LoginSignup
0
1

今日の作業記録 python error(言語処理100本ノック:79)未解決

Last updated at Posted at 2019-01-23

言語処理100本ノック 2015

79. 適合率-再現率グラフの描画

http://www.cl.ecei.tohoku.ac.jp/nlp100/
「アーティスト情報(artist.json.gz)をデータベースに登録せよ.さらに,次のフィールドでインデックスを作成せよ: name, aliases.name, tags.value, rating.value.」
素人の言語処理100本ノック:79
https://qiita.com/segavvy/items/8f93187ec89f4831d863

# ./p79.py
Traceback (most recent call last):
  File "./p79.py", line 105, in <module>
    plt.plot(thresholds, accuracys, color='green', linestyle='--', label='正解率')
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 3352, in plot
    ax = gca()
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 969, in gca
    return gcf().gca(**kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 586, in gcf
    return figure()
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/pyplot.py", line 533, in figure
    **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 161, in new_figure_manager
    return cls.new_figure_manager_given_figure(num, fig)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 167, in new_figure_manager_given_figure
    canvas = cls.FigureCanvas(figure)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_qt5agg.py", line 24, in __init__
    super(FigureCanvasQTAgg, self).__init__(figure=figure)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py", line 234, in __init__
    _create_qApp()
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_qt5.py", line 125, in _create_qApp
    raise RuntimeError('Invalid DISPLAY variable')
RuntimeError: Invalid DISPLAY variable

ソースは下記(コマンドとして実行したく1行目追記)

#!/usr/bin/env python
# coding: utf-8

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

fname_result = 'result.txt'
fname_work = 'work.txt'


def score(fname):
    '''結果ファイルからスコア算出
    結果ファイルを読み込んで、正解率、適合率、再現率、F1スコアを返す

    戻り値:
    正解率,適合率,再現率,F1スコア
    '''
    # 結果を読み込んで集計
    TP = 0      # True-Positive     予想が+1、正解も+1
    FP = 0      # False-Positive    予想が+1、正解は-1
    FN = 0      # False-Negative    予想が-1、正解は+1
    TN = 0      # True-Negative     予想が-1、正解も-1

    with open(fname) as data_file:
        for line in data_file:
            cols = line.split('\t')

            if len(cols) < 3:
                continue

            if cols[0] == '+1':         # 正解
                if cols[1] == '+1':     # 予想
                    TP += 1
                else:
                    FN += 1
            else:
                if cols[1] == '+1':
                    FP += 1
                else:
                    TN += 1

    # 算出
    accuracy = (TP + TN) / (TP + FP + FN + TN)      # 正解率
    precision = TP / (TP + FP)      # 適合率
    recall = TP / (TP + FN)     # 再現率
    f1 = (2 * recall * precision) / (recall + precision)    # F1スコア

    return accuracy, precision, recall, f1


# 結果読み込み、予測確率は元の値(仮説関数hypothesis()の値)に戻す
results = []
with open(fname_result) as data_file:
    for line in data_file:

        cols = line.split('\t')
        if len(cols) < 3:
            continue

        # 正解ラベル
        label = cols[0]

        # 識別関数predict()の値
        if cols[1] == '-1':
            predict = 1.0 - float(cols[2])      # 確率を戻す
        else:
            predict = float(cols[2])

        results.append((label, predict))

# 閾値を変えながらスコア算出、グラフ描画用の配列へセット
thresholds = []
accuracys = []
precisions = []
recalls = []
f1s = []
for threshold in np.arange(0.02, 1.0, 0.02):

    # score()を使うため、一時ファイルに結果保存
    with open(fname_work, 'w') as file_out:
        for label, predict in results:
            if predict > threshold:
                file_out.write('{}\t{}\t{}\n'.format(label, '+1', predict))
            else:
                file_out.write('{}\t{}\t{}\n'.format(label, '-1', 1 - predict))

    # スコア算出
    accuracy, precision, recall, f1 = score(fname_work)

    # 結果追加
    thresholds.append(threshold)
    accuracys.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)


# グラフで使うフォント情報(デフォルトのままでは日本語が表示できない)
fp = FontProperties(
    fname='/Library/Fonts/Times New Roman Bold Italic.ttf'
)

# 折線グラフの値の設定
plt.plot(thresholds, accuracys, color='green', linestyle='--', label='正解率')
plt.plot(thresholds, precisions, color='red', linewidth=3, label='適合率')
plt.plot(thresholds, recalls, color='blue', linewidth=3, label='再現率')
plt.plot(thresholds, f1s, color='magenta', linestyle='--', label='F1スコア')

# 軸の値の範囲の調整
plt.xlim(
    xmin=0, xmax=1.0
)
plt.ylim(
    ymin=0, ymax=1.0
)

# グラフのタイトル、ラベル指定
plt.title(
    '79. 適合率-再現率グラフの描画',    # タイトル
    fontproperties=fp   # 使うフォント情報
)
plt.xlabel(
    'ロジスティック回帰モデルの分類の閾値',       # x軸ラベル
    fontproperties=fp   # 使うフォント情報
)
plt.ylabel(
    '精度',         # y軸ラベル
    fontproperties=fp   # 使うフォント情報
)

# グリッドを表示
plt.grid(axis='both')

# 凡例表示
plt.legend(loc='lower left', prop=fp)

# 表示
plt.show()

先頭と途中と最後を加筆。

#!/usr/bin/env python
# coding: utf-8

import numpy as np
import matplotlib as mpl
mpl.use('Agg')
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties

fname_result = 'result.txt'
fname_work = 'work.txt'


def score(fname):
    '''結果ファイルからスコア算出
    結果ファイルを読み込んで、正解率、適合率、再現率、F1スコアを返す

    戻り値:
    正解率,適合率,再現率,F1スコア
    '''
    # 結果を読み込んで集計
    TP = 0      # True-Positive     予想が+1、正解も+1
    FP = 0      # False-Positive    予想が+1、正解は-1
    FN = 0      # False-Negative    予想が-1、正解は+1
    TN = 0      # True-Negative     予想が-1、正解も-1

    with open(fname) as data_file:
        for line in data_file:
            cols = line.split('\t')

            if len(cols) < 3:
                continue

            if cols[0] == '+1':         # 正解
                if cols[1] == '+1':     # 予想
                    TP += 1
                else:
                    FN += 1
            else:
                if cols[1] == '+1':
                    FP += 1
                else:
                    TN += 1

    # 算出
    accuracy = (TP + TN) / (TP + FP + FN + TN)      # 正解率
    precision = TP / (TP + FP)      # 適合率
    recall = TP / (TP + FN)     # 再現率
    f1 = (2 * recall * precision) / (recall + precision)    # F1スコア

    return accuracy, precision, recall, f1


# 結果読み込み、予測確率は元の値(仮説関数hypothesis()の値)に戻す
results = []
with open(fname_result) as data_file:
    for line in data_file:

        cols = line.split('\t')
        if len(cols) < 3:
            continue

        # 正解ラベル
        label = cols[0]

        # 識別関数predict()の値
        if cols[1] == '-1':
            predict = 1.0 - float(cols[2])      # 確率を戻す
        else:
            predict = float(cols[2])

        results.append((label, predict))

# 閾値を変えながらスコア算出、グラフ描画用の配列へセット
fig = plt.figure()

thresholds = []
accuracys = []
precisions = []
recalls = []
f1s = []
for threshold in np.arange(0.02, 1.0, 0.02):

    # score()を使うため、一時ファイルに結果保存
    with open(fname_work, 'w') as file_out:
        for label, predict in results:
            if predict > threshold:
                file_out.write('{}\t{}\t{}\n'.format(label, '+1', predict))
            else:
                file_out.write('{}\t{}\t{}\n'.format(label, '-1', 1 - predict))

    # スコア算出
    accuracy, precision, recall, f1 = score(fname_work)

    # 結果追加
    thresholds.append(threshold)
    accuracys.append(accuracy)
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)


# グラフで使うフォント情報(デフォルトのままでは日本語が表示できない)
fp = FontProperties(
    fname='/Library/Fonts/Times New Roman Bold Italic.ttf'
)

# 折線グラフの値の設定
plt.plot(thresholds, accuracys, color='green', linestyle='--', label='正解率')
plt.plot(thresholds, precisions, color='red', linewidth=3, label='適合率')
plt.plot(thresholds, recalls, color='blue', linewidth=3, label='再現率')
plt.plot(thresholds, f1s, color='magenta', linestyle='--', label='F1スコア')

# 軸の値の範囲の調整
plt.xlim(
    xmin=0, xmax=1.0
)
plt.ylim(
    ymin=0, ymax=1.0
)

# グラフのタイトル、ラベル指定
plt.title(
    '79. 適合率-再現率グラフの描画',    # タイトル
    fontproperties=fp   # 使うフォント情報
)
plt.xlabel(
    'ロジスティック回帰モデルの分類の閾値',       # x軸ラベル
    fontproperties=fp   # 使うフォント情報
)
plt.ylabel(
    '精度',         # y軸ラベル
    fontproperties=fp   # 使うフォント情報
)

# グリッドを表示
plt.grid(axis='both')

# 凡例表示
plt.legend(loc='lower left', prop=fp)

# 表示
#plt.show()
fig.savefig('p79.png')

# ./p79.py
Traceback (most recent call last):
  File "./p79.py", line 144, in <module>
    fig.savefig('p79.png')
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/figure.py", line 2062, in savefig
    self.canvas.print_figure(fname, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backend_bases.py", line 2263, in print_figure
    **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 517, in print_png
    FigureCanvasAgg.draw(self)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 437, in draw
    self.figure.draw(self.renderer)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
    return draw(artist, renderer, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/figure.py", line 1493, in draw
    renderer, self, artists, self.suppressComposite)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/image.py", line 141, in _draw_list_compositing_images
    a.draw(renderer)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
    return draw(artist, renderer, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/axes/_base.py", line 2635, in draw
    mimage._draw_list_compositing_images(renderer, self, artists)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/image.py", line 141, in _draw_list_compositing_images
    a.draw(renderer)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
    return draw(artist, renderer, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/axis.py", line 1204, in draw
    self.label.draw(renderer)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/artist.py", line 55, in draw_wrapper
    return draw(artist, renderer, *args, **kwargs)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/text.py", line 706, in draw
    bbox, info, descent = textobj._get_layout(renderer)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/text.py", line 300, in _get_layout
    ismath=False)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 245, in get_text_width_height_descent
    font = self._get_agg_font(prop)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/backends/backend_agg.py", line 280, in _get_agg_font
    font = get_font(fname)
  File "/opt/conda/lib/python3.7/site-packages/matplotlib/font_manager.py", line 1389, in get_font
    return _get_font(filename, hinting_factor)
FileNotFoundError: [Errno 2] No such file or directory: '/Library/Fonts/Times New Roman Bold Italic.ttf'

fontを注釈にすると

# ./p79.py
Traceback (most recent call last):
  File "./p79.py", line 125, in <module>
    fontproperties=fp   # 使うフォント情報
NameError: name 'fp' is not defined

docker側のfont指定をしないといけないのかも。

最後までおよみいただきありがとうございました。

いいね 💚、フォローをお願いします。

Thank you very much for reading to the last sentence.

Please press the like icon 💚 and follow me for your happy life.

0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1