"""
37. 「猫」と共起頻度の高い上位10語
「猫」とよく共起する(共起頻度が高い)10語とその出現頻度をグラフ(例えば棒グラフなど)で表示せよ.
sentence_list:
[[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': '一', 'base': '一', 'pos': '名詞', 'pos1': '数'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
[{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'},
{'surface': '吾輩', 'base': '吾輩', 'pos': '名詞', 'pos1': '代名詞'},
{'surface': 'は', 'base': 'は', 'pos': '助詞', 'pos1': '係助詞'},
{'surface': '猫', 'base': '猫', 'pos': '名詞', 'pos1': '一般'},
{'surface': 'で', 'base': 'だ', 'pos': '助動詞', 'pos1': '*'},
{'surface': 'ある', 'base': 'ある', 'pos': '助動詞', 'pos1': '*'},
{'surface': '。', 'base': '。', 'pos': '記号', 'pos1': '句点'},
{'surface': '', 'base': '*', 'pos': 'BOS/EOS', 'pos1': '*'}],
Memo:
- 共起頻度: https://www.jtp.co.jp/techport/2018-04-18-001/
"""
from collections import defaultdict
from typing import List
import matplotlib.pyplot as plt
import utils
plt.style.use("ggplot")
plt.rcParams["font.family"] = "Hiragino Mincho ProN" # 日本語対応
def get_co_occurrence(sentence_list: List[List[dict]]) -> list:
sents = [
[word["surface"] for word in sent[1:-1]] for sent in sentence_list
] # [['一'], ['吾輩', 'は', '猫', 'で', 'ある', '。']]
counter = defaultdict(int)
for sent in sents:
if "猫" in sent:
for word in sent:
counter[word] += 1
del counter["猫"]
sorted_counter = {
k: v for k, v in sorted(counter.items(), key=lambda item: item[1], reverse=True)
}
return list(sorted_counter.items())
def plot_co_occurrence(x: list, y: list) -> None:
x_pos = [i for i, _ in enumerate(x)]
plt.bar(x, y)
plt.xlabel("Term")
plt.ylabel("Frequency")
plt.title("Co-occurrence with '猫'")
plt.xticks(x_pos, x)
plt.show()
sentence_list = utils.read_json("30_neko_mecab.json")
counter = get_co_occurrence(sentence_list)
# [('の', 391), ('は', 272), ('、', 252), ('に', 250), ('を', 232)]
x = [word[0] for word in counter[:10]]
y = [word[1] for word in counter[:10]]
plot_co_occurrence(x, y)
# ![image-20200527193140109](https://raw.githubusercontent.com/LearnXu/images/master/imgs/image-20200527193140109.png)
More than 3 years have passed since last update.
Register as a new user and use Qiita more conveniently
- You get articles that match your needs
- You can efficiently read back useful information
- You can use dark theme