More than 5 years have passed since last update.

TF-Slimを使ってTensorFlowのCNNモデルをリファクタリングしてみた

Last updated at 2017-06-28Posted at 2017-06-28

はじめに

DeepLearningには複数のライブラリがありますが、TensorFlow他のライブラリに比べてモデルを作成する際に複雑になりがちな記法に見受けられました。
今回は、以前作成したモデルをTF-Slimというライブラリを使って、リファクタリングしてみました。
リファクタリングといっても、記法を置き換えただけで、もしPythonの規約があるとしたら、それはガン無視して、改行なりいれてます。
モデルだけクラスを分けてはいないので、若干読みにくいですがご了承ください。

ソースコード

slim_network.py

# !/usr/local/bin/python
# -*- coding: utf-8 -*-

import cv2
import numpy as np
import tensorflow as tf
import tensorflow.python.platform
import tensorflow.contrib.slim as slim

# 識別ラベルの数(今回はザッカーバーグ:0,イーロンマスク：1,ビルゲイツ:2なので、3)
NUM_CLASSES = 3

# 学習する時の画像のサイズ(px)
IMAGE_SIZE = 28

# 画像の次元数(28* 28*カラー(?))
IMAGE_PIXELS = IMAGE_SIZE*IMAGE_SIZE*3

# 学習に必要なデータのpathや学習の規模を設定
# パラメタの設定、デフォルト値やヘルプ画面の説明文を登録できるTensorFlow組み込み関数
flags = tf.app.flags
FLAGS = flags.FLAGS

# 学習用データ
flags.DEFINE_string('train', '/Users/neriai/Develops/workspace/dir/train/data.txt', 'File name of train data')

# 検証用テストデータ
flags.DEFINE_string('test', '/Users/neriai/Develops/workspace/dir/test/data.txt', 'File name of train data')

# データを置いてあるフォルダ
flags.DEFINE_string('train_dir', '/Users/neriai/Develops/workspace/dir/data', 'Directory to put the training data.')

# データ学習訓練の試行回数
flags.DEFINE_integer('max_steps', 100, 'Number of steps to run trainer.')

# 1回の学習で何枚の画像を使うか
flags.DEFINE_integer('batch_size', 20, 'Batch size Must divide evenly into the dataset sizes.')

# 学習率、小さすぎると学習が進まないし、大きすぎても誤差が収束しなかったり発散したりしてダメとか
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')

# AIの学習モデル部分(ニューラルネットワーク)を作成する
# images_placeholder: 画像のplaceholder, keep_prob: dropout率のplace_holderが引数になり
# 入力画像に対して、各ラベルの確率を出力して返す
def model(x_image, keep_prob):
    with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
        activation_fn=tf.nn.relu,
        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
        biases_initializer=tf.constant_initializer(0.1)
    ):
        with slim.arg_scope([slim.max_pool2d], padding='SAME'):

            # 畳み込み層第1レイヤーを作成
            conv1 = slim.conv2d(x_image, 32, [5, 5])

            # プーリング層1の作成
            pool1 = slim.max_pool2d(conv1, [2, 2])

            # 畳み込み層第2レイヤーの作成
            conv2 = slim.conv2d(pool1, 64, [5, 5])

            # プーリング層2の作成
            pool2 = slim.max_pool2d(conv2, [2, 2])

            # 全結合層1の作成
            pool2_flat = slim.flatten(pool2)
            fc1 = slim.fully_connected(pool2_flat, 1024)

            # dropoutの設定
            dropout = slim.dropout(fc1, keep_prob)

    # 全結合層2の作成
    y_conv = slim.fully_connected(dropout, NUM_CLASSES, activation_fn=None)

    # # ソフトマックス関数による正規化
    y_conv = tf.nn.softmax(y_conv)

    return y_conv

# 予測結果と正解にどれくらい「誤差」があったかを算出する
# logitsは計算結果:  float - [batch_size, NUM_CLASSES]
# labelsは正解ラベル: int32 - [batch_size, NUM_CLASSES]
def loss(labels_placeholder, model):

    # 予測結果と正解にどれくらい「誤差」があったかを算出する
    cross_entropy = -tf.reduce_sum(labels_placeholder*tf.log(model))

    # TensorBoardで表示するよう指定
    tf.summary.scalar("cross_entropy", cross_entropy)

    # 誤差の率の値(cross_entropy)を返す
    return cross_entropy

# 誤差(loss)を元に誤差逆伝播を用いて設計した学習モデルを訓練する
# 裏側何が起きているのかよくわかってないが、学習モデルの各層の重み(w)などを
# 誤差を元に最適化して調整しているという理解(?)
# (誤差逆伝播は「人工知能は人間を超えるか」書籍の説明が神)
def training(learning_rate, loss):

    # 誤差(loss)を元に誤差逆伝播を用いて設計した学習モデルを訓練する
    train_step = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    return train_step

# inferenceで学習モデルが出した予測結果の正解率を算出する
def accuracy(model, labels_placeholder):

    # 予測ラベルと正解ラベルが等しいか比べる。同じ値であればTrueが返される
    correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(labels_placeholder, 1))

    # booleanのcorrect_predictionをfloatに直して正解率の算出
    # false:0,true:1に変換して計算する
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    # TensorBoardで表示する様設定
    tf.summary.scalar("accuracy", accuracy)

    return accuracy

if __name__ == '__main__':

    # ファイルを開く
    f = open(FLAGS.train, 'r')

    # データを入れる配列
    train_image = []
    train_label = []

    for line in f:

      # 改行を除いてスペース区切りにする
      line = line.rstrip()
      l = line.split()

      # データを読み込んで28x28に縮小
      img = cv2.imread(l[0])
      img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))

      # 一列にした後、0-1のfloat値にする
      train_image.append(img.flatten().astype(np.float32)/255.0)

      # ラベルを1-of-k方式で用意する
      tmp = np.zeros(NUM_CLASSES)
      tmp[int(l[1])] = 1
      train_label.append(tmp)

    # numpy形式に変換
    train_image = np.asarray(train_image)
    train_label = np.asarray(train_label)

    f.close()

    f = open(FLAGS.test, 'r')

    test_image = []
    test_label = []

    for line in f:

      line = line.rstrip()
      l = line.split()

      img = cv2.imread(l[0])
      img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))

      test_image.append(img.flatten().astype(np.float32)/255.0)

      tmp = np.zeros(NUM_CLASSES)
      tmp[int(l[1])] = 1

      test_label.append(tmp)

    test_image = np.asarray(test_image)
    test_label = np.asarray(test_label)

    f.close()

    # TensorBoardのグラフに出力するスコープを指定
    with tf.Graph().as_default() as graph:

        # 画像を入れるためのTensor(28*28*3(IMAGE_PIXELS)次元の画像が任意の枚数(None)分はいる)
        images_placeholder = tf.placeholder(tf.float32, shape=(None, IMAGE_PIXELS))

        # ラベルを入れるためのTensor(3(NUM_CLASSES)次元のラベルが任意の枚数(None)分入る)
        labels_placeholder = tf.placeholder(tf.float32, shape=(None, NUM_CLASSES))

        # ベクトル形式で入力されてきた画像データを28px * 28pxの画像に戻す(?)。
        # 今回はカラー画像なので3(モノクロだと1)
        x_image = tf.reshape(images_placeholder, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])

        # dropout率を入れる仮のTensor
        keep_prob = tf.placeholder(tf.float32)

        # model()を呼び出してモデルを作る
        model = model(x_image, keep_prob)

        # loss()を呼び出して損失を計算
        loss = loss(labels_placeholder, model)

        # training()を呼び出して訓練して学習モデルのパラメーターを調整する
        train_step = training(FLAGS.learning_rate, loss)

        # 精度の計算
        accuracy = accuracy(model, labels_placeholder)

        # 保存の準備
        saver = tf.train.Saver()

        # Sessionの作成(TensorFlowの計算は絶対Sessionの中でやらなきゃだめ)
        sess = tf.Session()

        # 変数の初期化(Sessionを開始したらまず初期化)
        sess.run(tf.global_variables_initializer())

        # TensorBoard表示の設定(TensorBoardの宣言的な?)
        summary_step = tf.summary.merge_all()

        # train_dirでTensorBoardログを出力するpathを指定
        summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)

        # 実際にmax_stepの回数だけ訓練の実行していく
        for step in range(FLAGS.max_steps):
            for i in range(len(train_image)/FLAGS.batch_size):

                # batch_size分の画像に対して訓練の実行
                batch = FLAGS.batch_size*i

                # feed_dictでplaceholderに入れるデータを指定する
                sess.run(
                    train_step,
                    feed_dict={
                        images_placeholder: train_image[batch:batch+FLAGS.batch_size],
                        labels_placeholder: train_label[batch:batch+FLAGS.batch_size],
                        keep_prob: 0.5
                    }
                )

            # 1step終わるたびに精度を計算する
            train_accuracy = sess.run(
                accuracy,
                feed_dict={
                    images_placeholder: train_image,
                    labels_placeholder: train_label,
                    keep_prob: 1.0
                }
            )

            print "step %d, training accuracy %g"%(step, train_accuracy)

            # 1step終わるたびにTensorBoardに表示する値を追加する
            summary_str = sess.run(
                summary_step,
                feed_dict={
                    images_placeholder: train_image,
                    labels_placeholder: train_label,
                    keep_prob: 1.0
                }
            )

            summary_writer.add_summary(summary_str, step)

    # 訓練が終了したらテストデータに対する精度を表示する
    print "test accuracy %g"%sess.run(
        accuracy,
        feed_dict={
            images_placeholder: test_image,
            labels_placeholder: test_label,
            keep_prob: 1.0
        }
    )

    # データを学習して最終的に出来上がったモデルを保存
    # "model.ckpt"は出力されるファイル名
    save_path = saver.save(sess, "model.ckpt")

比較

初期化・層の作成

Before

def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial)

def bias_variable(shape):
    initial = tf.constant(0.1, shape=shape)
    return tf.Variable(initial)

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1], padding='SAME')

After

with slim.arg_scope(
        [slim.conv2d, slim.fully_connected],
        activation_fn=tf.nn.relu,
        weights_initializer=tf.truncated_normal_initializer(stddev=0.1),
        biases_initializer=tf.constant_initializer(0.1)
    ):
        with slim.arg_scope([slim.max_pool2d], padding='SAME'):

畳み込み層1

Before

W_conv1 = weight_variable([5, 5, 3, 32])
b_conv1 = bias_variable([32])
conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)

After

conv1 = slim.conv2d(x_image, 32, [5, 5])

プーリング層1

Before

pool1 = max_pool_2x2(conv1)

After

pool1 = slim.max_pool2d(conv1, [2, 2])

畳み込み層2

Before

W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

After

conv2 = slim.conv2d(pool1, 64, [5, 5])

プーリング層2

Before

pool2 = max_pool_2x2(conv2)

After

pool2 = slim.max_pool2d(conv2, [2, 2])

全結合層1

Before

W_fc1 = weight_variable([7*7*64, 1024])
b_fc1 = bias_variable([1024])
pool2_flat = tf.reshape(pool2, [-1, 7*7*64])
fc1 = tf.nn.relu(tf.matmul(pool2_flat, W_fc1) + b_fc1)

After

pool2_flat = slim.flatten(pool2)
fc1 = slim.fully_connected(pool2_flat, 1024)

ドロップアウト

Before

dropout = tf.nn.dropout(fc1, keep_prob)

After

dropout = slim.dropout(fc1, keep_prob)

全結合層2 + 正規化

Before

W_fc2 = weight_variable([1024, NUM_CLASSES])
b_fc2 = bias_variable([NUM_CLASSES])
y_conv = tf.nn.softmax(tf.matmul(dropout, W_fc2) + b_fc2)

After

y_conv = slim.fully_connected(dropout, NUM_CLASSES, activation_fn=None)
y_conv = tf.nn.softmax(y_conv)

まとめ

ちょっとスリムになった感覚だけど、SSDクラスの層になると効果がでてくるのかもしれない。
置き換えるの理解というよりパズル感覚。
ほかも置き換えられそうなことありそうでしたが、色々エラーや計算数値が壊れたりするので、model内部だけに留めました。
パクリソースだったのが、少し自分のモノになって気がする。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up