工具类HanLP调用常用接口

分词

可以用两种方法输出:

from pyhanlp import *
content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
words = HanLP.segment(content)
for term in words:
    print(term.word, term.nature)
print(words)

hanlp和jieba对比_analyzer

hanlp和jieba对比_句法分析_02

依存句法分析

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
words = HanLP.parseDependency(content)
print(words)

hanlp和jieba对比_Dependency_03

关键词提取

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
words = HanLP.extractKeyword(content, 10)
print(words)

hanlp和jieba对比_hanlp和jieba对比_04

摘要提取

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"

words = HanLP.extractSummary(content, 1)#提取短语,同时指定摘要的最大长度 
print(words)

hanlp和jieba对比_hanlp和jieba对比_05

感知机词法分析器

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
PerceptronLexicalAnalyzer = JClass('com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
analyzer = PerceptronLexicalAnalyzer()
words = analyzer.analyze(content)
print(words)

hanlp和jieba对比_句法分析_06

中国人名识别

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
NER = HanLP.newSegment().enableNameRecognize(True)
words = NER.seg(content)
print(words)

hanlp和jieba对比_Dependency_07

音译名识别

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,珍妮还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
NER = HanLP.newSegment().enableTranslatedNameRecognize(True)
words = NER.seg(content)
print(words)

hanlp和jieba对比_analyzer_08

短语提取

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,珍妮还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
words = HanLP.extractPhrase(content, 3)
print(words)

hanlp和jieba对比_Dependency_09

简繁转换

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
Traditional_content = "雖然原始的食材便具有食物原始的風情,雲初還是認爲," \
                      "最美味的食物還是需要經過分割,烹調,處置,最後端上桌的食物纔是最符合大唐人腸胃的食物。"
print(HanLP.convertToTraditionalChinese(content))
print(HanLP.convertToSimplifiedChinese(Traditional_content))

hanlp和jieba对比_python_10

拼音转换

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
print(HanLP.convertToPinyinList(content))

hanlp和jieba对比_Dependency_11

声母、韵母

from pyhanlp import *

content = "虽然原始的食材便具有食物原始的风情,云初还是认为," \
          "最美味的食物还是需要经过分割,烹调,处置,最后端上桌的食物才是最符合大唐人肠胃的食物。"
words = HanLP.convertToPinyinList(content)
for i in words:
    print(i.getShengmu(), i.getYunmu())

hanlp和jieba对比_hanlp和jieba对比_12

pyhanlp可视化

使用命令hanlp serve

hanlp和jieba对比_python_13


hanlp和jieba对比_句法分析_14