Help us understand the problem. What is going on with this article?

kerasですべてのEpochのクラス別precision,recall,f1のグラフを描画する

More than 1 year has passed since last update.

kerasで1epochごとに各クラスのprecision,recall,f1のグラフを描画する

kerasのmetricで[Keras] クラスごとのAccuracy, Precision, Recall, F-measureをmetricsを利用してTensorBoardで確認するを参考にしてたら、batch_sizeごとの計算になるせいで全然値が違って困ったので、毎エポック終了時に計算するCallbackを書きました。

MetricHistoryクラスを定義します。

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.callbacks import Callback
from sklearn.metrics import classification_report
class MetricHistory(Callback):
    def __init__(self, model, X_train, y_train, X_test=None, y_test=None, labels=None, verbose=False):
        self.model = model
        self.X_train = X_train
        self.y_train = y_train
        self.X_test = X_test
        self.y_test = y_test
        self.verbose = verbose
        if labels is not None:
            self.nb_classes = len(labels)
            self.labels = labels
        else:
            self.nb_classes = len(np.unique(np.append(y_train, y_test)))
            self.labels = [str(i) for i in range(self.nb_classes)]
        self.history = {}
        self.metrics = []
        for i in range(self.nb_classes):
            for key in ["precision", "val_precision", "recall", "val_recall", "f1-score", "val_f1-score"]:
                if self.X_test is None and "val_" in key:
                    continue
                self.metrics.append("{}-{}".format(key, self.labels[i]))
        for key in ["precision", "val_precision", "recall", "val_recall", "f1-score", "val_f1-score"]:
            if self.X_test is None and "val_" in key:
                continue
            for avgclass in ["micro", "macro"]:
                self.metrics.append("{}-{}".format(key, avgclass))
        for metric in self.metrics:
            self.history[metric] = []

    def my_round(self, val, digit=0):
        p = 10 ** digit
        return (val * p * 2 + 1) // 2 / p

    def on_epoch_end(self, epoch, logs=None):
        if self.X_test is not None:
            y_pred = np.argmax(self.model.predict(self.X_test), axis=1)
            report =  classification_report(np.argmax(self.y_test, axis=1), y_pred, output_dict=True)
        y_pred_train = np.argmax(self.model.predict(self.X_train), axis=1)
        print(self.X_train.shape, np.argmax(self.y_train, axis=1).shape, y_pred_train.shape)
        report_train =  classification_report(np.argmax(self.y_train, axis=1), y_pred_train, output_dict=True)

        for key in ["precision", "recall", "f1-score"]:
            for i in range(self.nb_classes):
                keyclass = "{}-{}".format(key, self.labels[i])
                keyclassval = "val_{}-{}".format(key, self.labels[i])
                if str(i) in report_train:
                    self.history[keyclass].append(report_train[str(i)][key])
                else:
                    self.history[keyclass].append(0)
                if self.X_test is not None:
                    if str(i) in report:
                        self.history[keyclassval].append(report[str(i)][key])
                    else:
                        self.history[keyclassval].append(0)
        for key in ["precision", "recall", "f1-score"]:
            for avgclass in ["micro", "macro"]:
                keyclass= key + "-" + avgclass
                self.history[keyclass].append(report_train[avgclass+" avg"][key])
                if self.X_test is not None:
                    keyclassval= "val_" + keyclass
                    self.history[keyclassval].append(report[avgclass+" avg"][key])
        if self.verbose:
            line = []
            for label in self.metrics:
                line.append("{}: {}".format(label, self.my_round(self.history[label][-1], 4)))
            print(" - ", " - ".join(line))


trainするよ。MetricHistoryのインスタンスは後で参照するからコールバックの外で定義します。X_val, y_val, labelsは省略可能。

labels = ["T-shirt/top",
            "Trouser",
            "Pullover",
            "Dress",
            "Coat",
            "Sandal",
            "Shirt",
            "Sneaker",
            "Bag",
            "Ankle boot",]
metric = MetricHistory(model, X_train, y_train, X_val, y_val, labels, verbose=True)
callbacks = [
             metric,
            ]

train_model = model.fit(X_train, y_train,
                  batch_size=BATCH_SIZE,
                  epochs=NO_EPOCHS,
                  verbose=1,
                  validation_data=(X_val, y_val),
                  callbacks=callbacks)

Train実行。verbose=Trueだとエポックごとにログに出してくれます。

46592/48000 [============================>.] - ETA: 0s - loss: 0.0293 - acc: 0.9895(48000, 28, 28, 1) (48000,) (48000,)
-  precision-T-shirt/top: 0.9994 - val_precision-T-shirt/top: 0.8858 - recall-T-shirt/top: 0.9975 - val_recall-T-shirt/top: 0.8333 - f1-score-T-shirt/top: 0.9984 - val_f1-score-T-shirt/top: 0.8587 - precision-Trouser: 0.9998 - val_precision-Trouser: 0.9904 - recall-Trouser: 1.0 - val_recall-Trouser: 0.9825 - f1-score-Trouser: 0.9999 - val_f1-score-Trouser: 0.9864 - precision-Pullover: 0.9979 - val_precision-Pullover: 0.8628 - recall-Pullover: 0.9971 - val_recall-Pullover: 0.8857 - f1-score-Pullover: 0.9975 - val_f1-score-Pullover: 0.8741 - precision-Dress: 0.9959 - val_precision-Dress: 0.8741 - recall-Dress: 0.999 - val_recall-Dress: 0.9143 - f1-score-Dress: 0.9974 - val_f1-score-Dress: 0.8938 - precision-Coat: 0.9979 - val_precision-Coat: 0.8702 - recall-Coat: 0.9944 - val_recall-Coat: 0.863 - f1-score-Coat: 0.9961 - val_f1-score-Coat: 0.8666 - precision-Sandal: 0.9998 - val_precision-Sandal: 0.9816 - recall-Sandal: 0.9998 - val_recall-Sandal: 0.979 - f1-score-Sandal: 0.9998 - val_f1-score-Sandal: 0.9803 - precision-Shirt: 0.9952 - val_precision-Shirt: 0.7656 - recall-Shirt: 0.9973 - val_recall-Shirt: 0.7543 - f1-score-Shirt: 0.9962 - val_f1-score-Shirt: 0.7599 - precision-Sneaker: 0.9996 - val_precision-Sneaker: 0.9622 - recall-Sneaker: 0.9992 - val_recall-Sneaker: 0.9742 - f1-score-Sneaker: 0.9994 - val_f1-score-Sneaker: 0.9682 - precision-Bag: 0.9985 - val_precision-Bag: 0.9608 - recall-Bag: 0.9998 - val_recall-Bag: 0.9855 - f1-score-Bag: 0.9992 - val_f1-score-Bag: 0.973 - precision-Ankle boot: 0.9998 - val_precision-Ankle boot: 0.9716 - recall-Ankle boot: 0.9998 - val_recall-Ankle boot: 0.9616 - f1-score-Ankle boot: 0.9998 - val_f1-score-Ankle boot: 0.9666 - precision-micro: 0.9984 - precision-macro: 0.9984 - val_precision-micro: 0.9128 - val_precision-macro: 0.9125 - recall-micro: 0.9984 - recall-macro: 0.9984 - val_recall-micro: 0.9128 - val_recall-macro: 0.9134 - f1-score-micro: 0.9984 - f1-score-macro: 0.9984 - val_f1-score-micro: 0.9128 - val_f1-score-macro: 0.9128


def plot_history(label, metric, ax, val=True):
    ax.plot(metric.history[label],label="{} for train".format(label), color="blue")
    val_label = "val_" + label
    if val and val_label in metric.history:
        lmax = np.argmax(metric.history[val_label])
        lmin = np.argmin(metric.history[val_label])
        ax.plot(metric.history[val_label],label="{} for valid".format(label), color="orange")
        ax.scatter([lmax, lmin], [metric.history[val_label][lmax], metric.history[val_label][lmin]], color="orange")
    ax.set_title(label+ ' (val_min:{}, val_max:{})'.format(lmin, lmax))
    ax.set_xlabel('epoch')
    ax.set_ylabel(label)
    ax.legend(loc='best')

nrows = len(labels)+3
fig, ax = plt.subplots(nrows=nrows, ncols=3, figsize=(15,6*nrows))
plot_history("loss", train_model, ax[0, 0])
plot_history("acc", train_model, ax[0, 1])


print(ax.shape)
for i, label in enumerate(labels):
    for j, key in enumerate(["precision", "recall", "f1-score"]):
        plot_history(key+"-"+label, metric, ax[i+1, j])


for i, avgclass in enumerate(["micro", "macro"]):
    for j, key in enumerate(["precision", "recall", "f1-score"]):
        plot_history(key+"-"+avgclass, metric, ax[11+i, j])
plt.show()

グラフ描画します。kerasではデフォルトでtrain_model.historyにログが入っているから、それに合わせてmetric.historyにログが入るようにしました。
val_系はminとmaxにマーカーが付いて、タイトルに(val_min: n, val_max: n)を出すようにしました。
ちなみに、kerasのModelCheckpointは1始まりだから全エポックを保存したモデルをload_weightsするときはn+1したモデルを読まないとだめみたい。

1番上がlossとaccuracy。真ん中が各クラスのprecition, recall, f1-score。一番下の2行がmicroとmacroだよ。クラス不均衡だとmacro知りたいからね。

X_train, y_train, X_test, y_testがメモリに乗っていることが前提なので、fit_generatorみたいな逐次読み込みには非対応です。

クラス不均衡怖いですね。どんどんグラフ化していきましょう。

keras_graph.png

Why not register and get more from Qiita?
  1. We will deliver articles that match you
    By following users and tags, you can catch up information on technical fields that you are interested in as a whole
  2. you can read useful information later efficiently
    By "stocking" the articles you like, you can search right away