More than 5 years have passed since last update.

TensorFlow2.0 Advent Calendar 2019

@Suguru_Toyohara(suguru toyohara)

【TF2.0】Kerasで自作TrainingLoopを組んでCIFAR10を解く話

Last updated at 2019-12-03Posted at 2019-12-03

はじめに

今回は自作TrainingLoopを作成してみようと思います。
これを用いると、昨日ご紹介したtf.data.Datasetの実力を発揮することができます。

自作トレーニングループを組む

まずは基本的なことから述べていきます。

基本

注意🚨

TensorFlowのバージョンは2.0で動かしています。基本的にeager modeなので、TF1.15等では動きません。
また、前提としてAdventCalender1日目,2日目,3日目を学んでいる前提なので、
このコードの意味は・・・？🤔となったらまずは振り返りをしていただけると幸いです。

import

dataset

import numpy as np 
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split

ここに書いてあるimportは全て使いますので、忘れずにpipするなりしてください。

modelを用意

実際に計算するDNNのモデルを用意しましょう。
使うモデルはCIFAR-10でaccuracy95%--CNNで精度を上げるテクニック--を参考にいたしました。

model

class ConvLayer1(keras.layers.Layer):
    def __init__(self, output_filter=64, **kwargs):
        super(ConvLayer1, self).__init__(output_filter, **kwargs)
        self.conv1_1 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.conv1_2 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.BN = keras.layers.BatchNormalization()
        self.conv2 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.MaxPool = keras.layers.MaxPool2D()
        self.dropout = keras.layers.Dropout(0.25)

    def call(self, input_x, training=False):
        x = self.conv1_1(input_x)
        x = self.conv1_2(x)
        x = self.BN(x,training=training)
        x = self.conv2(x)
        x = self.MaxPool(x)
        x = self.dropout(x,training=training)
        
        return x

class ConvLayer2(keras.layers.Layer):
    def __init__(self, output_filter=256, **kwargs):
        super(ConvLayer2, self).__init__(output_filter, **kwargs)
        self.conv1_1 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.conv1_2 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.BN1 = keras.layers.BatchNormalization()
        self.conv2_1 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.conv2_2 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.conv2_3 = keras.layers.Conv2D(output_filter,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.BN2 = keras.layers.BatchNormalization()
        self.conv3_1 = keras.layers.Conv2D(output_filter*2,3,padding="same",activation="relu",kernel_initializer='he_normal')
        self.conv3_2 = keras.layers.Conv2D(output_filter*2,3,padding="same",activation="relu",kernel_initializer='he_normal')

    def call(self, input_x, training=False):

        x = self.conv1_1(input_x)
        x = self.conv1_2(x)
        x = self.BN1(x, training=training)
        x = self.conv2_1(x)
        x = self.conv2_2(x)
        x = self.conv2_3(x)
        x = self.BN2(x, training=training)
        x = self.conv3_1(x)
        x = self.conv3_2(x)

        return x

class DNNmodel(keras.models.Model):
    def __init__(self, **kwargs):
        super(DNNmodel,self).__init__(**kwargs)
        self.FirstConv = ConvLayer1(64, name="FirstConv")
        self.SecondConv = ConvLayer1(128, name="SecondConv")
        self.ThirdConv = ConvLayer2(256, name="ThirdConv")
        self.GAP = keras.layers.GlobalAvgPool2D(name="GAP")
        self.Dense1 = keras.layers.Dense(1024,activation="relu",name="hidden1")
        self.Dropout1 = keras.layers.Dropout(0.4,name="dropout1")
        self.Dense2 = keras.layers.Dense(1024,activation="relu",name="hidden2")
        self.Dropout2 = keras.layers.Dropout(0.4,name="dropout2")
        self.Dense3 = keras.layers.Dense(10,activation="softmax",name="output_layer")

    def call(self, input_x, training=False):
        
        x = self.FirstConv(input_x)
        x = self.SecondConv(x)
        x = self.ThirdConv(x)
        x = self.GAP(x)
        x = self.Dense1(x)
        x = self.Dropout1(x, training=training)
        x = self.Dense2(x)
        x = self.Dropout2(x, training=training)
        x = self.Dense3(x)

        return x

    def build_graph(self, input_shape): 
        input_shape_nobatch = input_shape[1:]
        self.build(input_shape)
        inputs = tf.keras.Input(shape=input_shape_nobatch)

        if not hasattr(self, 'call'):
            raise AttributeError("User should define 'call' method in sub-class model!")

        _ = self.call(inputs)

model = DNNmodel(name="DNNmodel")
model.build_graph((None,32,32,3))
model.summary()

結果

Model: "DNNmodel"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
FirstConv (ConvLayer1)       (None, 16, 16, 64)        75904     
_________________________________________________________________
SecondConv (ConvLayer1)      (None, 8, 8, 128)         369536    
_________________________________________________________________
ThirdConv (ConvLayer2)       (None, 8, 8, 512)         6197504   
_________________________________________________________________
GAP (GlobalAveragePooling2D) (None, 512)               0         
_________________________________________________________________
hidden1 (Dense)              (None, 1024)              525312    
_________________________________________________________________
dropout1 (Dropout)           (None, 1024)              0         
_________________________________________________________________
hidden2 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dropout2 (Dropout)           (None, 1024)              0         
_________________________________________________________________
output_layer (Dense)         (None, 10)                10250     
=================================================================
Total params: 8,228,106
Trainable params: 8,226,698
Non-trainable params: 1,408
_________________________________________________________________

Modelの出力の仕方

TF2.0ではeager modeであるがゆえに直感的に以下のように書くことができます。

model_output

model(train_x[:128])

結果

<tf.Tensor: id=6157, shape=(128, 10), dtype=float32, numpy=
array([[0.08526948, 0.11089919, 0.08213641, ..., 0.11136722, 0.0868127 ,
        0.09018778],
       [0.08159084, 0.11315602, 0.07852338, ..., 0.11361311, 0.08396991,
        0.08619042],
       [0.08103222, 0.10935334, 0.07987671, ..., 0.11274508, 0.08338705,
        0.08537409],
       ...,
       [0.08247326, 0.11167498, 0.08143697, ..., 0.11255328, 0.08266811,
        0.08789377],
       [0.08227389, 0.11054476, 0.08104812, ..., 0.11199851, 0.08548176,
        0.08526304],
       [0.07933465, 0.11304733, 0.07648235, ..., 0.11548454, 0.0835701 ,
        0.08446465]], dtype=float32)>

しっかりと確率値が(128,10)で出てることがお分かりかと思います。
これを応用して、勾配計算・Backwardをしていきます。

勾配を計算させる

勾配の計算にはtf.GradientTapeを用います。詳しくは公式ガイドを参照していただきたいですが、簡単に説明すると

modelの動きを常に監視することが可能
modelの入力から出力までのtf系オペレーションを監視
それによって自動微分で全勾配パラメータが取得可能

と、いった具合です。（本当に謎技術ですね・・・）

具体的なTrainingStepを書くと以下のようになります。

training_loop

loss_object = tf.keras.losses.SparseCategoricalCrossentropy() #sparseVector用のCategoricalCrossEntropy
optimizer = tf.keras.optimizers.Adam()
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

@tf.function
def train_step(image, label):
    #1batchごとのtrainStep
    with tf.GradientTape() as tape:
        predictions = model(image,training=True)#Trainingモードで入力し、Predictionを得る
        loss = loss_object(label,predictions)#具体的なloss値をpredictionと正解labelから得る
    gradients = tape.gradient(loss, model.trainable_variables)#監視していたlogから微分して勾配を得る
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))#勾配を適応していく

    train_loss(loss)#lossを記録
    train_accuracy(label, predictions)#Accを記録

これによってDNNの学習を行うという形になります。
また、Val_stepとTest_stepについても記載して、まとめると、

まとめ

loss_object = tf.keras.losses.SparseCategoricalCrossentropy() #sparseVector用のCategoricalCrossEntropy
optimizer = tf.keras.optimizers.Adam()

train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')

val_loss = tf.keras.metrics.Mean(name='val_loss')
val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

@tf.function
def train_step(image, label):
    #1batchごとのtrainStep
    with tf.GradientTape() as tape:
        predictions = model(image,training=True)
        print(label.shape,predictions.shape)
        loss = loss_object(label,predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))

    train_loss(loss)
    train_accuracy(label, predictions)

@tf.function
def valid_step(image, label):
    predictions = model(image)
    t_loss = loss_object(label, predictions)

    val_loss(t_loss)
    val_accuracy(label, predictions)

@tf.function
def test_step(image, label):
    #testのoneStep
    predictions = model(image)
    t_loss = loss_object(label, predictions)

    test_loss(t_loss)
    test_accuracy(label, predictions)

これを利用して、loopを作成します。

トレーニングループ

manual_train_loop

from tqdm import tqdm 
epochs = 50

model = DNNmodel(name="test_model")
model.build_graph((None,32,32,3))
for epoch in range(epochs):
    with tf.device("GPU:0"):
        with tqdm(total = x_train.shape[0])as pb:
            
            for image, label in train_ds:
                train_step(image, label)
                pb.update(image.shape[0])

            for val_image, val_label in val_ds:
                valid_step(val_image, val_label)

            for test_img, test_label in test_ds:
                test_step(test_img,test_label)

    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, val_loss:{}, val_Acc:{} Test Loss: {}, Test Accuracy: {}'
    print (template.format(
        epoch+1,
        train_loss.result(),
        train_accuracy.result()*100,
        val_loss.result(),
        val_accuracy.result()*100,
        test_loss.result(),
        test_accuracy.result()*100
        )
    )

結果

100%|██████████| 40000/40000 [00:11<00:00, 3418.37it/s]
Epoch 1, Loss: 1.8097996711730957, Accuracy: 29.344999313354492, val_loss:2.7008612155914307, val_Acc:24.649999618530273 Test Loss: 2.706587314605713, Test Accuracy: 24.279998779296875
100%|██████████| 40000/40000 [00:10<00:00, 3988.10it/s]
Epoch 2, Loss: 1.6335889101028442, Accuracy: 37.598751068115234, val_loss:2.240044593811035, val_Acc:34.13999938964844 Test Loss: 2.250936985015869, Test Accuracy: 33.86000061035156
100%|██████████| 40000/40000 [00:10<00:00, 3988.72it/s]
Epoch 3, Loss: 1.5132713317871094, Accuracy: 42.996665954589844, val_loss:1.948573350906372, val_Acc:40.78666687011719 Test Loss: 1.9632114171981812, Test Accuracy: 40.22666931152344
100%|██████████| 40000/40000 [00:10<00:00, 3993.19it/s]
Epoch 4, Loss: 1.424739956855774, Accuracy: 46.77124786376953, val_loss:1.7964352369308472, val_Acc:43.849998474121094 Test Loss: 1.8081798553466797, Test Accuracy: 43.36000061035156
100%|██████████| 40000/40000 [00:10<00:00, 3988.21it/s]
Epoch 5, Loss: 1.351781964302063, Accuracy: 49.8129997253418, val_loss:1.641222357749939, val_Acc:47.59000015258789 Test Loss: 1.6527938842773438, Test Accuracy: 47.11399841308594
100%|██████████| 40000/40000 [00:10<00:00, 3969.50it/s]
Epoch 6, Loss: 1.291505217552185, Accuracy: 52.34041976928711, val_loss:1.5141568183898926, val_Acc:51.125 Test Loss: 1.525038480758667, Test Accuracy: 50.7066650390625
100%|██████████| 40000/40000 [00:10<00:00, 3946.69it/s]
Epoch 7, Loss: 1.239457368850708, Accuracy: 54.49071502685547, val_loss:1.4405416250228882, val_Acc:53.121429443359375 Test Loss: 1.4522910118103027, Test Accuracy: 52.70143127441406
100%|██████████| 40000/40000 [00:10<00:00, 3972.89it/s]
Epoch 8, Loss: 1.1929171085357666, Accuracy: 56.4275016784668, val_loss:1.3709797859191895, val_Acc:55.23999786376953 Test Loss: 1.3852379322052002, Test Accuracy: 54.77625274658203
100%|██████████| 40000/40000 [00:10<00:00, 3941.13it/s]
Epoch 9, Loss: 1.1516916751861572, Accuracy: 58.09833526611328, val_loss:1.3137990236282349, val_Acc:56.988887786865234 Test Loss: 1.3274644613265991, Test Accuracy: 56.56444549560547
100%|██████████| 40000/40000 [00:10<00:00, 3948.50it/s]
Epoch 10, Loss: 1.114467740058899, Accuracy: 59.61674880981445, val_loss:1.2772949934005737, val_Acc:58.217002868652344 Test Loss: 1.289846420288086, Test Accuracy: 57.849002838134766

しっかり学習できていることがわかります。

ちょっと改良

tqdmの機能で、.set_postfix_strを使うとリアルタイムに現状のTrainの成績を進捗バーに表示できます。
これを利用して、

改良版

from tqdm import tqdm 
epochs = 10
template_train = "Loss: {}, Acc: {}"
model = DNNmodel(name="test_model")
model.build_graph((None,32,32,3))
for epoch in range(epochs):
    with tqdm(total = train_x.shape[0])as pb:
        
        for image, label in train_ds:
            train_step(image, label)
            pb.update(image.shape[0])
            pb.set_postfix_str(template_train.format(
                train_loss.result(),
                train_accuracy.result()*100
            ))

        for val_image, val_label in val_ds:
            valid_step(val_image, val_label)

        for test_img, test_label in test_ds:
            test_step(test_img,test_label)

    
    template = 'Epoch {}, Loss: {}, Accuracy: {}, val_loss:{}, val_Acc:{} Test Loss: {}, Test Accuracy: {}'
    print (template.format(
        epoch+1,
        train_loss.result(),
        train_accuracy.result()*100,
        val_loss.result(),
        val_accuracy.result()*100,
        test_loss.result(),
        test_accuracy.result()*100
        )
    )

こうするとなおいいですね。

さらに改良

earlystopping
図表の表示
val_lossで成績決定

などなど、様々にやった結果のごちゃごちゃしたTrainLoopがこちら

TrainLoop

from tqdm import tqdm
from time import time
import matplotlib.pyplot as plt

    

def train(input_model,model_name="temp"):
    save_weight="/tmp/checkpoints/"+model_name

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    #train_auc = tf.keras.metrics.AUC(name="train_auc")

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
    #val_auc = tf.keras.metrics.AUC(name="val_auc")

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')    
    #test_auc = tf.keras.metrics.AUC(name="test_auc")

    @tf.function
    def train_step(image, label):
        

        with tf.GradientTape() as tape:
            predictions = input_model(image,training=True)
            loss = loss_object(label,predictions)
        gradients = tape.gradient(loss, input_model.trainable_variables)
        optimizer.apply_gradients(zip(gradients, input_model.trainable_variables))

        train_loss(loss)
        train_accuracy(label, predictions)
        #train_auc.update_state(label, predictions)


    @tf.function
    def val_step(image, label):
        #testのoneStep
        predictions = input_model(image)
        t_loss = loss_object(label, predictions)

        val_loss(t_loss)
        val_accuracy(label, predictions)
        #val_auc.update_state(label, predictions)

    #max_auc=0.5
    min_loss =10000.0
    early_stop_count=0
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam()
    stat_train_loss = []
    stat_train_acc = []
    #stat_train_auc = []
    stat_val_loss = []
    stat_val_acc = []
    #stat_val_auc = []
    stat_isbest = 0
    early_stop_limit = 30
    for epoch in range(1,250):
        if early_stop_count ==early_stop_limit and epoch >=50:
            print("early stopping!!")
            break
        with tqdm(total=len(train_x))as pb:
            
            for train_x_, train_y_ in train_ds:
                train_step(train_x_,train_y_)
                
                template = 'Train_Loss: {:.5f}, Train_Accuracy: {:.3f}'
                suffix=template.format(
                train_loss.result(),
                train_accuracy.result()*100,
                #train_auc.result(),
                )
                
                pb.set_postfix_str(suffix)
                pb.update(len(train_x_))
            stat_train_loss.append(train_loss.result())
            stat_train_acc.append(train_accuracy.result()*100)
            for val_x_,val_y_ in val_ds:
                val_step(val_x_,val_y_)
            if val_loss.result()<min_loss:
                input_model.save_weights(save_weight)
                min_loss = val_loss.result()
                stat_isbest+=early_stop_count+1
                early_stop_count = 0
                print("the best mode!:loss{:.5f},acc{:.3f}".format(val_loss.result(),val_accuracy.result()*100))
            else:
                print("not best mode!:loss{:.5f},acc{:.3f} stopcount:{}".format(val_loss.result(),val_accuracy.result()*100,early_stop_count+1))
                early_stop_count+=1
                
            stat_val_loss.append(val_loss.result())
            stat_val_acc.append(val_accuracy.result()*100)
            #stat_val_auc.append(val_auc.result())
            
            val_loss.reset_states()
            val_accuracy.reset_states()

        
    input_model.load_weights(save_weight)
    #test_auc = tf.keras.metrics.AUC(name="test_auc")
    @tf.function
    def test_step(image, label):
        #testのoneStep
        predictions = input_model(image)
        t_loss = loss_object(label, predictions)

        test_loss(t_loss)
        test_accuracy(label, predictions)
        #test_auc.update_state(label, predictions)

    for test_img, test_label in test_ds:
        test_step(test_img,test_label)
    print("the best score:")
    template = '\n Test Loss: {}, Test Accuracy: {}'# Test_AUC :{}'
    print(template.format(
        test_loss.result(),
        test_accuracy.result()*100,
        #test_auc.result()
        )
    )
    input_model.save_weights(save_weight+".acc{}".format(test_accuracy.result()*100))

    plt.figure(figsize=(10,5),facecolor="white")
    x_axis= list(range(1,len(stat_train_acc)+1))
    plt.subplot(121)
    plt.xlabel("batch-epoch")
    plt.ylabel("SparseCategorical-CrossEntropy-Loss")
    plt.ylim(0,1.5)
    plt.plot(x_axis,stat_train_loss)
    plt.plot(x_axis,stat_val_loss)
    plt.plot([stat_isbest],[stat_val_loss[stat_isbest-1]],marker="*")
    plt.legend(['Train', 'Test', 'bestPoint'], loc='upper right')

    
    plt.subplot(122)
    plt.xlabel("batch-epoch")
    plt.ylabel("Accuracy")
    plt.plot(x_axis,stat_train_acc)
    plt.plot(x_axis,stat_val_acc)
    plt.plot([stat_isbest],[stat_val_acc[stat_isbest-1]],marker="*")
    plt.legend(['Train', 'Test', 'bestPoint'], loc='lower right')

    """
    plt.subplot(133)
    plt.xlabel("batch-epoch")
    plt.ylabel("AUC")
    plt.plot(x_axis,stat_train_auc)
    plt.plot(x_axis,stat_val_auc,"r")
    plt.plot([stat_isbest],[stat_val_auc[stat_isbest]],marker="*")
    plt.suptitle(model_name)"""
    plt.show()
    print(stat_isbest)

model = DNNmodel(name="temp")
train(model,"temp")

なんというか、低レイヤーって感じがしますね...
これはCIFAR10に限らず、Sparseなラベルがされた分類モデルならなんでもこれでできます。
2値分類であればコメントアウトしてあるAUCを繋げられるのでご利用ください。

DataAugmentation

まず、通常のデータセットを構成する

dataset

(train_x, train_y), (test_x, test_y) =  keras.datasets.cifar10.load_data()
train_x, test_x = train_x/255.0, test_x/255.0
train_x,val_x,train_y,val_y = train_test_split(train_x,train_y,test_size=0.2,shuffle=True)
train_ds = tf.data.Dataset.from_tensor_slices((train_x,train_y)).shuffle(40000)
train_ds = train_ds.batch(128)
val_ds = tf.data.Dataset.from_tensor_slices((val_x,val_y))
val_ds = val_ds.batch(128)
test_ds = tf.data.Dataset.from_tensor_slices((test_x,test_y))
test_ds = test_ds.batch(128)

これで一旦学習させてみよう。

Learning

model = DNNmodel(name="DNNmodel")
train(model,"firstmodel")

結果


100%|██████████| 40000/40000 [00:11<00:00, 3520.87it/s, Train_Loss: 1.75304, Train_Accuracy: 32.510]
the best mode!:loss1.99360,acc29.490
100%|██████████| 40000/40000 [00:09<00:00, 4031.21it/s, Train_Loss: 1.48647, Train_Accuracy: 44.206]
not best mode!:loss2.00988,acc41.300 stopcount:1
100%|██████████| 40000/40000 [00:09<00:00, 4000.77it/s, Train_Loss: 1.31734, Train_Accuracy: 51.237]
the best mode!:loss1.00378,acc64.390
...中略

not best mode!:loss0.79759,acc84.200 stopcount:32
100%|██████████| 40000/40000 [00:10<00:00, 3920.37it/s, Train_Loss: 0.29922, Train_Accuracy: 89.642]
not best mode!:loss0.74358,acc84.560 stopcount:33
100%|██████████| 40000/40000 [00:10<00:00, 3911.42it/s, Train_Loss: 0.29429, Train_Accuracy: 89.818]
not best mode!:loss0.74700,acc84.880 stopcount:34
100%|██████████| 40000/40000 [00:10<00:00, 3930.98it/s, Train_Loss: 0.28953, Train_Accuracy: 89.988]
not best mode!:loss0.78008,acc84.290 stopcount:35

the best score:

 Test Loss: 0.5882744193077087, Test Accuracy: 81.87999725341797

これをDataAugmentationで強化する。
どのようにDataAugmentationするかの方針はこちらのブログ、
データのお気持ちを考えながらData Augmentationする
を参考にさせていただきました。
また、どのように実装するかについてはこちらのブログ、
NumPyでの画像のData Augmentationまとめ
を参考にしました。

しかし、作成時間の関係で今回は簡単に実装できるものだけでDataAugmentationします...
後日また詳しいAugmentationについては紹介しようと思います。
(ここで詳しく語っても横道にそれてしまう感じがします）
(tf系でかつ高速なAugmentationをしようと思うと難しいのでひとまずはペンディングします)

DataAugmentation

@tf.function
def rotate_tf(image,label):
    if image.shape.__len__() ==4:

        random_angles = tf.random.uniform(shape = (tf.shape(image)[0], ), minval = -30*np
        .pi / 180, maxval = 30*np.pi / 180)
    if image.shape.__len__() ==3:
        random_angles = tf.random.uniform(shape = (), minval = -30*np
        .pi / 180, maxval = 30*np.pi / 180)

    return tfa.image.rotate(image,random_angles),label

@tf.function
def flip_left_right(image,label):
    return tf.image.random_flip_left_right(image),label

@tf.function
def flip_up_down(image,label):
    return tf.image.random_flip_up_down(image),label



(train_x, train_y), (test_x, test_y) =  keras.datasets.cifar10.load_data()
train_x, test_x = train_x/255.0, test_x/255.0
train_x,val_x,train_y,val_y = train_test_split(train_x,train_y,test_size=0.2,shuffle=True)
train_ds = tf.data.Dataset.from_tensor_slices((train_x,train_y)).shuffle(40000)
train_ds = train_ds.batch(128).map(flip_up_down).map(flip_left_right).map(rotate_tf)
val_ds = tf.data.Dataset.from_tensor_slices((val_x,val_y))
val_ds = val_ds.batch(128)
test_ds = tf.data.Dataset.from_tensor_slices((test_x,test_y))
test_ds = test_ds.batch(128)

model = DNNmodel(name="temp")
train(model,"temp")

結果

...中略
100%|██████████| 40000/40000 [00:10<00:00, 3858.39it/s, Train_Loss: 0.50215, Train_Accuracy: 82.597]
not best mode!:loss0.46391,acc86.160 stopcount:27
100%|██████████| 40000/40000 [00:10<00:00, 3861.20it/s, Train_Loss: 0.50036, Train_Accuracy: 82.661]
not best mode!:loss0.44775,acc87.090 stopcount:28
100%|██████████| 40000/40000 [00:10<00:00, 3867.10it/s, Train_Loss: 0.49860, Train_Accuracy: 82.723]
not best mode!:loss0.41001,acc87.680 stopcount:29
100%|██████████| 40000/40000 [00:10<00:00, 3871.51it/s, Train_Loss: 0.49685, Train_Accuracy: 82.785]not best mode!:loss0.40719,acc87.510 stopcount:30
early stopping!!

the best score:

 Test Loss: 0.42540156841278076, Test Accuracy: 86.48999786376953

ちょっと途中でLossが爆発してるのが気になりますが...

すごくいい精度が出ました。ですが、90%を目指すならもっと工夫した方が良さそうですね。
次は、epochごとに学習率を変えてみることにしましょう。

学習率減衰

学習率を変える方法も簡単に変えられます。単純にoptimizerを学習途中で変えればいいのです。

学習率を変更



def train(input_model,model_name="temp"):
    save_weight="/tmp/checkpoints/"+model_name

    train_loss = tf.keras.metrics.Mean(name='train_loss')
    train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
    #train_auc = tf.keras.metrics.AUC(name="train_auc")

    val_loss = tf.keras.metrics.Mean(name='val_loss')
    val_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='val_accuracy')
    #val_auc = tf.keras.metrics.AUC(name="val_auc")

    test_loss = tf.keras.metrics.Mean(name='test_loss')
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')    
    #test_auc = tf.keras.metrics.AUC(name="test_auc")



    @tf.function
    def val_step(image, label):
        #testのoneStep
        predictions = input_model(image)
        t_loss = loss_object(label, predictions)

        val_loss(t_loss)
        val_accuracy(label, predictions)
        #val_auc.update_state(label, predictions)

    #max_auc=0.5
    min_loss =10000.0
    early_stop_count=0
    loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
    stat_train_loss = []
    stat_train_acc = []
    #stat_train_auc = []
    stat_val_loss = []
    stat_val_acc = []
    #stat_val_auc = []
    stat_isbest = 0
    early_stop_limit = 30
    for epoch in range(1,250):
        if early_stop_count ==early_stop_limit and epoch >=50:
            print("early stopping!!")
            break

        if epoch <75:
            optimizer = tf.keras.optimizers.Adam(0.001)
        elif epoch >=75 and epoch <150 :
            optimizer = tf.keras.optimizers.Adam(0.0001)
        elif epoch >=150:
            optimizer = tf.keras.optimizers.Adam(0.00001)
        
        @tf.function
        def train_step(image, label):
            with tf.GradientTape() as tape:
                predictions = input_model(image,training=True)
                loss = loss_object(label,predictions)
            gradients = tape.gradient(loss, input_model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, input_model.trainable_variables))

            train_loss(loss)
            train_accuracy(label, predictions)
            #train_auc.update_state(label, predictions)

        
        with tqdm(total=len(train_x))as pb:
            pb.set_description_str("Epoch:{}".format(epoch+1))
            for train_x_, train_y_ in train_ds:
                train_step(train_x_,train_y_)
                
                template = 'Train_Loss: {:.5f}, Train_Accuracy: {:.3f}'
                suffix=template.format(
                train_loss.result(),
                train_accuracy.result()*100,
                #train_auc.result(),
                )
                
                pb.set_postfix_str(suffix)
                pb.update(len(train_x_))
            stat_train_loss.append(train_loss.result())
            stat_train_acc.append(train_accuracy.result()*100)
            for val_x_,val_y_ in val_ds:
                val_step(val_x_,val_y_)
            if val_loss.result()<min_loss:
                input_model.save_weights(save_weight)
                min_loss = val_loss.result()
                stat_isbest+=early_stop_count+1
                early_stop_count = 0
                print("the best mode!:loss{:.5f},acc{:.3f}".format(val_loss.result(),val_accuracy.result()*100))
            else:
                print("not best mode!:loss{:.5f},acc{:.3f} stopcount:{}".format(val_loss.result(),val_accuracy.result()*100,early_stop_count+1))
                early_stop_count+=1
                
            stat_val_loss.append(val_loss.result())
            stat_val_acc.append(val_accuracy.result()*100)
            #stat_val_auc.append(val_auc.result())
            
            val_loss.reset_states()
            val_accuracy.reset_states()

        
    input_model.load_weights(save_weight)
    #test_auc = tf.keras.metrics.AUC(name="test_auc")
    @tf.function
    def test_step(image, label):
        #testのoneStep
        predictions = input_model(image)
        t_loss = loss_object(label, predictions)

        test_loss(t_loss)
        test_accuracy(label, predictions)
        #test_auc.update_state(label, predictions)

    for test_img, test_label in test_ds:
        test_step(test_img,test_label)
    print("the best score:")
    template = '\n Test Loss: {}, Test Accuracy: {}'# Test_AUC :{}'
    print(template.format(
        test_loss.result(),
        test_accuracy.result()*100,
        #test_auc.result()
        )
    )
    input_model.save_weights(save_weight+".acc{}".format(test_accuracy.result()*100))

    plt.figure(figsize=(10,5),facecolor="white")
    x_axis= list(range(1,len(stat_train_acc)+1))
    plt.subplot(121)
    plt.xlabel("batch-epoch")
    plt.ylabel("SparseCategorical-CrossEntropy-Loss")
    plt.ylim(0,1.5)
    plt.plot(x_axis,stat_train_loss)
    plt.plot(x_axis,stat_val_loss)
    plt.plot([stat_isbest],[stat_val_loss[stat_isbest-1]],marker="*")
    plt.legend(['Train', 'Val', 'bestPoint'], loc='upper right')

    plt.subplot(122)
    plt.xlabel("batch-epoch")
    plt.ylabel("Accuracy")
    plt.plot(x_axis,stat_train_acc)
    plt.plot(x_axis,stat_val_acc)
    plt.plot([stat_isbest],[stat_val_acc[stat_isbest-1]],marker="*")
    plt.legend(['Train', 'Test', 'bestPoint'], loc='lower right')

    """
    plt.subplot(133)
    plt.xlabel("batch-epoch")
    plt.ylabel("AUC")
    plt.plot(x_axis,stat_train_auc)
    plt.plot(x_axis,stat_val_auc,"r")
    plt.plot([stat_isbest],[stat_val_auc[stat_isbest]],marker="*")
    plt.suptitle(model_name)"""
    plt.show()
    print(stat_isbest)


model = DNNmodel(name="temp")
train(model,"temp")

結果

not best mode!:loss0.40854,acc87.810 stopcount:28
Epoch:128: 100%|██████████| 40000/40000 [00:11<00:00, 3523.69it/s, Train_Loss: 0.53335, Train_Accuracy: 81.554]
not best mode!:loss0.40124,acc87.610 stopcount:29
Epoch:129: 100%|██████████| 40000/40000 [00:11<00:00, 3428.10it/s, Train_Loss: 0.53132, Train_Accuracy: 81.625]not best mode!:loss0.40338,acc87.740 stopcount:30
early stopping!!

the best score:

 Test Loss: 0.398386687040329, Test Accuracy: 87.97000122070312

88%まで成長しましたね。

おわりに

こんな感じでいくらでもトレーニング中の動作を制御することができるようになりました。
tqdmを使えば進捗バーも出ますし、ハイレベルAPIと似た挙動をさせることも可能です。
このトレーニングループをベースにして、研究してみてください。

一旦、私の方でのAdventCalendarでのハンズオンはここまでになります。続きを書く時間があれば、空いているカレンダーに入れて、内容を投稿するかもしれません。
(Ganでのループとか、DataAugmentationとか)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up