英文词性分类
目前英文词性分类主流方法和工具为:
1.中文普通任务直接用分词工具就够了,jieba、Hanlp、Ansj、Standfordnlp;
2.自己准备语料,HMM、CRF,现在神经网络训练的话一般就是CRF++工具(可以构建特征)、CNN-LSTM(简单些)、Bi-LSTM+CRF(用得最多)、或者是BERT+BiLSTM+CRF(新)。
我自己尝试了Stanfordnlp和spacy,可以使用。
import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp(u'This is a sentence .')
for token in doc:
print(token, token.pos_, token.pos)
for nounc in doc.noun_chunks:
print(nounc)
#with open('demo.txt', mode='r', encoding='UTF-8') as f:
def getFileContent(path):
with open(path,'r') as f:
return f.read()
demo = nlp(getFileContent('./demo.txt'))
# demo = nlp(’f‘)
for nounc in demo.noun_chunks:
print(nounc)
or
file = open("demo.txt", "r")
doc = nlp(file.read())
for np in doc.noun_chunks:
print(np.text)
or
import spacy
nlp = spacy.load("en")
doc = nlp("We try to explicitly describe the geometry of the edges of the images.")
for np in doc.noun_chunks: # use np instead of np.text
print(np)
print()
# code to recursively combine nouns
# 'We' is actually a pronoun but included in your question
# hence the token.pos_ == "PRON" part in the last if statement
# suggest you extract PRON separately like the noun-chunks above
index = 0
nounIndices = []
for token in doc:
# print(token.text, token.pos_, token.dep_, token.head.text)
if token.pos_ == 'NOUN':
nounIndices.append(index)
index = index + 1
print(nounIndices)
for idxValue in nounIndices:
doc = nlp("We try to explicitly describe the geometry of the edges of the images.")
span = doc[doc[idxValue].left_edge.i : doc[idxValue].right_edge.i+1]
span.merge()
for token in doc:
if token.dep_ == 'dobj' or token.dep_ == 'pobj' or token.pos_ == "PRON":
print(token.text)
or
from stanfordcorenlp import StanfordCoreNLP
import nltk
from nltk.tree import Tree as nltkTree
nlp = StanfordCoreNLP('./StanfordNLP工具包/stanford-corenlp-full-2018-02-27')
sentence = 'person removes plate out of cabinet' #输入句子
sen_tag = nlp.pos_tag(sentence) #词性标注
print(sen_tag)
noun_word = []
for i in range(len(sen_tag)):
if sen_tag[i][1] == 'NN':
noun_word.append(sen_tag[i][0])
print(noun_word)
参考文章:
https://blog.csdn.net/lsp1991/article/details/22733619
https://my.oschina.net/u/4232146/blog/4626208
http://www.manongjc.com/article/40737.html
https://spacy.io/models
https://zhuanlan.zhihu.com/p/51425975
https://www.it1352.com/1683784.html
http://www.mamicode.com/info-detail-2686258.html
http://stanfordnlp.github.io/CoreNLP/
https://github.com/stanfordnlp/CoreNLP
https://www.zhihu.com/question/29940957
https://www.jianshu.com/p/931c49830754
https://www.cnblogs.com/maoerbao/p/13019276.html
https://www.it610.com/article/1188489230905614336.htm
https://www.it610.com/article/1188489230905614336.htm
https://zhuanlan.zhihu.com/p/137226095
https://blog.csdn.net/weixin_34613450/article/details/84317158
https://zhuanlan.zhihu.com/p/44180488