※mecab-python3-0.996.2で動作確認済み。
(mecab-python3の0.996ではnode.surfaceのバグがあって思わぬ挙動になります。)
get_meishi_or_doushi.py
import MeCab
mecab = MeCab.Tagger ('-d /usr/local/lib/mecab/dic/mecab-ipadic-neologd')
def get_meishi_and_doushi(document, stopwords):
meishi = []
mecab.parse("")
node = mecab.parseToNode(document).next
stopwords = stopwords
while node:
nodeFeature = node.feature.split(",")
if nodeFeature[0] == "名詞" or nodeFeature[0] == "動詞":
node_surface = node.surface
flag = False
for stop in stop_words:
if stop == node_surface:
flag = True
if flag != True:
meishi.append(node_surface)
node = node.next
return meishi
stop_words = ["で", "?", "も", "は", "か", "た", "て", "の", "し", "だ", "よ","0","ん","な","!","…","ー","に","お","ぜ","ここ","こと","は","ーーー"]
do.py
# textは任意の一次元配列
get_meishi_and_doushi(text, stop_words)