言語処理100本ノック 2015
79. 適合率-再現率グラフの描画
http://www.cl.ecei.tohoku.ac.jp/nlp100/
「アーティスト情報(artist.json.gz)をデータベースに登録せよ.さらに,次のフィールドでインデックスを作成せよ: name, aliases.name, tags.value, rating.value.」
素人の言語処理100本ノック:79
https://qiita.com/segavvy/items/8f93187ec89f4831d863
# ./p79.py
Traceback (most recent call last):
File "./p79.py", line 105, in <module>
plt.plot(thresholds, accuracys, color='green', linestyle='--', label='正解率')
File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 3352, in plot
ax = gca()
File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 969, in gca
return gcf().gca(**kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 586, in gcf
return figure()
File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 533, in figure
**kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 161, in new_figure_manager
return cls.new_figure_manager_given_figure(num, fig)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 167, in new_figure_manager_given_figure
canvas = cls.FigureCanvas(figure)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_qt5agg.py", line 24, in __init__
super(FigureCanvasQTAgg, self).__init__(figure=figure)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py", line 234, in __init__
_create_qApp()
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py", line 125, in _create_qApp
raise RuntimeError('Invalid DISPLAY variable')
RuntimeError: Invalid DISPLAY variable
ソースは下記(コマンドとして実行したく1行目追記)
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
fname_result = 'result.txt'
fname_work = 'work.txt'
def score(fname):
'''結果ファイルからスコア算出
結果ファイルを読み込んで、正解率、適合率、再現率、F1スコアを返す
戻り値:
正解率,適合率,再現率,F1スコア
'''
# 結果を読み込んで集計
TP = 0 # True-Positive 予想が+1、正解も+1
FP = 0 # False-Positive 予想が+1、正解は-1
FN = 0 # False-Negative 予想が-1、正解は+1
TN = 0 # True-Negative 予想が-1、正解も-1
with open(fname) as data_file:
for line in data_file:
cols = line.split('\t')
if len(cols) < 3:
continue
if cols[0] == '+1': # 正解
if cols[1] == '+1': # 予想
TP += 1
else:
FN += 1
else:
if cols[1] == '+1':
FP += 1
else:
TN += 1
# 算出
accuracy = (TP + TN) / (TP + FP + FN + TN) # 正解率
precision = TP / (TP + FP) # 適合率
recall = TP / (TP + FN) # 再現率
f1 = (2 * recall * precision) / (recall + precision) # F1スコア
return accuracy, precision, recall, f1
# 結果読み込み、予測確率は元の値(仮説関数hypothesis()の値)に戻す
results = []
with open(fname_result) as data_file:
for line in data_file:
cols = line.split('\t')
if len(cols) < 3:
continue
# 正解ラベル
label = cols[0]
# 識別関数predict()の値
if cols[1] == '-1':
predict = 1.0 - float(cols[2]) # 確率を戻す
else:
predict = float(cols[2])
results.append((label, predict))
# 閾値を変えながらスコア算出、グラフ描画用の配列へセット
thresholds = []
accuracys = []
precisions = []
recalls = []
f1s = []
for threshold in np.arange(0.02, 1.0, 0.02):
# score()を使うため、一時ファイルに結果保存
with open(fname_work, 'w') as file_out:
for label, predict in results:
if predict > threshold:
file_out.write('{}\t{}\t{}\n'.format(label, '+1', predict))
else:
file_out.write('{}\t{}\t{}\n'.format(label, '-1', 1 - predict))
# スコア算出
accuracy, precision, recall, f1 = score(fname_work)
# 結果追加
thresholds.append(threshold)
accuracys.append(accuracy)
precisions.append(precision)
recalls.append(recall)
f1s.append(f1)
# グラフで使うフォント情報(デフォルトのままでは日本語が表示できない)
fp = FontProperties(
fname='/Library/Fonts/Times New Roman Bold Italic.ttf'
)
# 折線グラフの値の設定
plt.plot(thresholds, accuracys, color='green', linestyle='--', label='正解率')
plt.plot(thresholds, precisions, color='red', linewidth=3, label='適合率')
plt.plot(thresholds, recalls, color='blue', linewidth=3, label='再現率')
plt.plot(thresholds, f1s, color='magenta', linestyle='--', label='F1スコア')
# 軸の値の範囲の調整
plt.xlim(
xmin=0, xmax=1.0
)
plt.ylim(
ymin=0, ymax=1.0
)
# グラフのタイトル、ラベル指定
plt.title(
'79. 適合率-再現率グラフの描画', # タイトル
fontproperties=fp # 使うフォント情報
)
plt.xlabel(
'ロジスティック回帰モデルの分類の閾値', # x軸ラベル
fontproperties=fp # 使うフォント情報
)
plt.ylabel(
'精度', # y軸ラベル
fontproperties=fp # 使うフォント情報
)
# グリッドを表示
plt.grid(axis='both')
# 凡例表示
plt.legend(loc='lower left', prop=fp)
# 表示
plt.show()
先頭と途中と最後を加筆。
#!/usr/bin/env python
# coding: utf-8
import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
fname_result = 'result.txt'
fname_work = 'work.txt'
def score(fname):
'''結果ファイルからスコア算出
結果ファイルを読み込んで、正解率、適合率、再現率、F1スコアを返す
戻り値:
正解率,適合率,再現率,F1スコア
'''
# 結果を読み込んで集計
TP = 0 # True-Positive 予想が+1、正解も+1
FP = 0 # False-Positive 予想が+1、正解は-1
FN = 0 # False-Negative 予想が-1、正解は+1
TN = 0 # True-Negative 予想が-1、正解も-1
with open(fname) as data_file:
for line in data_file:
cols = line.split('\t')
if len(cols) < 3:
continue
if cols[0] == '+1': # 正解
if cols[1] == '+1': # 予想
TP += 1
else:
FN += 1
else:
if cols[1] == '+1':
FP += 1
else:
TN += 1
# 算出
accuracy = (TP + TN) / (TP + FP + FN + TN) # 正解率
precision = TP / (TP + FP) # 適合率
recall = TP / (TP + FN) # 再現率
f1 = (2 * recall * precision) / (recall + precision) # F1スコア
return accuracy, precision, recall, f1
# 結果読み込み、予測確率は元の値(仮説関数hypothesis()の値)に戻す
results = []
with open(fname_result) as data_file:
for line in data_file:
cols = line.split('\t')
if len(cols) < 3:
continue
# 正解ラベル
label = cols[0]
# 識別関数predict()の値
if cols[1] == '-1':
predict = 1.0 - float(cols[2]) # 確率を戻す
else:
predict = float(cols[2])
results.append((label, predict))
# 閾値を変えながらスコア算出、グラフ描画用の配列へセット
fig = plt.figure()
thresholds = []
accuracys = []
precisions = []
recalls = []
f1s = []
for threshold in np.arange(0.02, 1.0, 0.02):
# score()を使うため、一時ファイルに結果保存
with open(fname_work, 'w') as file_out:
for label, predict in results:
if predict > threshold:
file_out.write('{}\t{}\t{}\n'.format(label, '+1', predict))
else:
file_out.write('{}\t{}\t{}\n'.format(label, '-1', 1 - predict))
# スコア算出
accuracy, precision, recall, f1 = score(fname_work)
# 結果追加
thresholds.append(threshold)
accuracys.append(accuracy)
precisions.append(precision)
recalls.append(recall)
f1s.append(f1)
# グラフで使うフォント情報(デフォルトのままでは日本語が表示できない)
fp = FontProperties(
fname='/Library/Fonts/Times New Roman Bold Italic.ttf'
)
# 折線グラフの値の設定
plt.plot(thresholds, accuracys, color='green', linestyle='--', label='正解率')
plt.plot(thresholds, precisions, color='red', linewidth=3, label='適合率')
plt.plot(thresholds, recalls, color='blue', linewidth=3, label='再現率')
plt.plot(thresholds, f1s, color='magenta', linestyle='--', label='F1スコア')
# 軸の値の範囲の調整
plt.xlim(
xmin=0, xmax=1.0
)
plt.ylim(
ymin=0, ymax=1.0
)
# グラフのタイトル、ラベル指定
plt.title(
'79. 適合率-再現率グラフの描画', # タイトル
fontproperties=fp # 使うフォント情報
)
plt.xlabel(
'ロジスティック回帰モデルの分類の閾値', # x軸ラベル
fontproperties=fp # 使うフォント情報
)
plt.ylabel(
'精度', # y軸ラベル
fontproperties=fp # 使うフォント情報
)
# グリッドを表示
plt.grid(axis='both')
# 凡例表示
plt.legend(loc='lower left', prop=fp)
# 表示
#plt.show()
fig.savefig('p79.png')
# ./p79.py
Traceback (most recent call last):
File "./p79.py", line 144, in <module>
fig.savefig('p79.png')
File "/opt/conda/lib/python3.7/site-packages/matplotlib/figure.py", line 2062, in savefig
self.canvas.print_figure(fname, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 2263, in print_figure
**kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 517, in print_png
FigureCanvasAgg.draw(self)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 437, in draw
self.figure.draw(self.renderer)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/figure.py", line 1493, in draw
renderer, self, artists, self.suppressComposite)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/image.py", line 141, in _draw_list_compositing_images
a.draw(renderer)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 2635, in draw
mimage._draw_list_compositing_images(renderer, self, artists)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/image.py", line 141, in _draw_list_compositing_images
a.draw(renderer)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/axis.py", line 1204, in draw
self.label.draw(renderer)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
return draw(artist, renderer, *args, **kwargs)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/text.py", line 706, in draw
bbox, info, descent = textobj._get_layout(renderer)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/text.py", line 300, in _get_layout
ismath=False)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 245, in get_text_width_height_descent
font = self._get_agg_font(prop)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 280, in _get_agg_font
font = get_font(fname)
File "/opt/conda/lib/python3.7/site-packages/matplotlib/font_manager.py", line 1389, in get_font
return _get_font(filename, hinting_factor)
FileNotFoundError: [Errno 2] No such file or directory: '/Library/Fonts/Times New Roman Bold Italic.ttf'
fontを注釈にすると
# ./p79.py
Traceback (most recent call last):
File "./p79.py", line 125, in <module>
fontproperties=fp # 使うフォント情報
NameError: name 'fp' is not defined
docker側のfont指定をしないといけないのかも。
最後までおよみいただきありがとうございました。
いいね 💚、フォローをお願いします。
Thank you very much for reading to the last sentence.
Please press the like icon 💚 and follow me for your happy life.