More than 5 years have passed since last update.

TensorFlowで input:100, output:100のネットワークでの学習を検討 v0.9 / v0.8 > output のactivation functionを３つ試した > Linear / softplus / sigmoid

Last updated at 2017-06-17Posted at 2017-03-12

動作環境

GeForce GTX 1070 (8GB)
ASRock Z170M Pro4S [Intel Z170chipset]
Ubuntu 14.04 LTS desktop amd64
TensorFlow v0.11
cuDNN v5.1 for Linux
CUDA v8.0
Python 2.7.6
IPython 5.1.0 -- An enhanced Interactive Python.
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
GNU bash, version 4.3.8(1)-release (x86_64-pc-linux-gnu)

TensorFlowを使って、input:100 node, output:100 nodeのネットワークの学習を試行中。

今回は、output layerのactivation functionをいじってみて、結果の差異を確認した。

API doc: https://www.tensorflow.org/versions/r0.11/api_docs/python/nn/activation_functions_

データ生成コード > v0.9

Jupyterコード。

v0.9において、input, outputともに[0,1)の範囲の値にした。
以下により、test_in.csvとtest_out.csvが生成される。

in100_out100.ipynb

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import os

'''
v0.9 Mar. 12, 2017
  - change [DATA_FACTOR] to [1.0]
v0.8 Mar. 05, 2017
  - add [DATA_FACTOR] to change range of the data
v0.7 Feb. 16, 2017
  - increase [NUM_FILEOUT] from 100 to 10000
v0.6 Feb. 14, 2017
  - fix bug > savetxt() was without 'delimiter=','
v0.5 Feb. 06, 2017
  - add [FILENAME_*_CSV],[FILENAME_*_BAK]
  - output to csv with append mode
v0.4 Jan. 21, 2017
  - set size of figures
v0.3 Jan. 21, 2017
  - show 2 images in one figure
v0.2 Jan. 14, 2017
  - calcOutput() return in numpy.array
  - add saveToCsvFile()
v0.1 Jan. 14, 2017
  - add calcOutput()
  - add showIn2D()
  - show 1d in 2d format
'''

'''
codingrule:PEP8
'''

XDIM = 10
YDIM = 10
INDIM = XDIM * YDIM

FILENAME_IN_CSV = 'test_in.csv'
FILENAME_OUT_CSV = 'test_out.csv'
FILENAME_IN_BAK = 'test_in.bak'
FILENAME_OUT_BAK = 'test_out.bak'

DATA_FACTOR = 1.0

def saveToCsvFile(data_1d, filename):
    wrk_1d = data_1d.reshape(1, INDIM)
    # np.savetxt(filename, wrk_1d, delimiter=',')
    with open(filename, 'a') as fd:
        np.savetxt(fd, wrk_1d, delimiter=',')


def calcOutput(in_1d):
    len_1d = XDIM * YDIM
    out_1d = [0.0] * len_1d
    for idx in range(0, in_1d.size):
        out_1d[idx] = in_1d[len_1d - idx - 1]
    return np.array(out_1d)


def showIn2D(data_1d):
    # print(data_1d)
    data_2d = np.reshape(data_1d, (XDIM, YDIM))
    plt.imshow(data_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
    plt.show()


def showIn2D_2image_subplot_each(data1_1d, data2_1d):
    data1_2d = np.reshape(data1_1d, (XDIM, YDIM))
    data2_2d = np.reshape(data2_1d, (XDIM, YDIM))
    fig1 = plt.figure(1)
    fig1.set_size_inches(3.14, 3.14)
    plt.subplot(121)
    plt.title('input node')
    plt.imshow(data1_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
    plt.subplot(122)
    plt.title('output node')
    plt.imshow(data2_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
    plt.show()


def showIn2D_2image_subplot_first(data1_1d, data2_1d):
    data1_2d = np.reshape(data1_1d, (XDIM, YDIM))
    data2_2d = np.reshape(data2_1d, (XDIM, YDIM))
    fig, (axL, axR) = plt.subplots(ncols=2, figsize=(10, 4))
    axL.imshow(data1_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
    axL.grid(True)
    axR.imshow(data2_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
    axR.grid(True)
    fig.show()


if __name__ == '__main__':
    # backup the original csv files
    if os.path.exists(FILENAME_IN_CSV):
        os.rename(FILENAME_IN_CSV, FILENAME_IN_BAK)
    if os.path.exists(FILENAME_OUT_CSV):
        os.rename(FILENAME_OUT_CSV, FILENAME_OUT_BAK)

    # append csv
    NUM_FILEOUT = 10000
    NUM_DISPLAY = 5
    for loop in range(NUM_FILEOUT):
        # in_1d = np.random.rand(INDIM)
        in_1d = np.random.rand(INDIM) * DATA_FACTOR
        out_1d = calcOutput(in_1d)
        saveToCsvFile(in_1d, FILENAME_IN_CSV)
        saveToCsvFile(out_1d, FILENAME_OUT_CSV)
        if loop < NUM_DISPLAY:
            showIn2D_2image_subplot_each(in_1d, out_1d)
            showIn2D_2image_subplot_first(in_1d, out_1d)

学習コード v0.8

prediction = slim.fully_connected()において、activation functionを3種類切り替えて実行する。

learn_in100out100.py

# !/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np

'''
v0.8 Mar. 13, 2017
    - toying with output activation function
        + [None], [tf.nn.softplus], [tf.nn.sigmoid]
v0.7 Feb. 22, 2017
    - treat each hidden layer to handle dropout instead of slim.stack()
v0.6 Feb. 19, 2017
    - add sys.stdout.flush() to immediately print
v0.5 Feb. 18, 2017
    - add fc_drop()
v0.4 Feb. 15, 2017
    - fix bug > ZeroDivisionError: float division by zero @ shuffle_batch()
v0.3 Feb. 15, 2017
    - tweak [batch_size] for shuffle_batch()
v0.2 Feb. 15, 2017
    - fix bug > different dimensions for placeholder and network
v0.1 Feb. 06, 2017
    - read [test_in.csv],[test_out.csv]
'''

'''
codingrule:PEP8
'''


def fc_drop(inputs, *args, **kwargs):
    # Thanks to: http://qiita.com/shngt/items/f532601b4f059ce8584f
    net = slim.fully_connected(inputs, *args, **kwargs)
    return slim.dropout(net, 0.9)

filename_inp = tf.train.string_input_producer(["test_in.csv"])
filename_out = tf.train.string_input_producer(["test_out.csv"])
NUM_INP_NODE = 100
NUM_OUT_NODE = 100

# parse csv
# a. input node
reader = tf.TextLineReader()
key, value = reader.read(filename_inp)
deflist = [[0.] for idx in range(NUM_INP_NODE)]
input1 = tf.decode_csv(value, record_defaults=deflist)
# b. output node
key, value = reader.read(filename_out)
deflist = [[0.] for idx in range(NUM_OUT_NODE)]
output1 = tf.decode_csv(value, record_defaults=deflist)
# c. pack
# inputs = tf.pack([input1])
inputs = input1
# outputs = tf.pack([output1])
outputs = output1

batch_size = 2
inputs_batch, output_batch = tf.train.shuffle_batch(
    [inputs, outputs], batch_size, capacity=10, min_after_dequeue=batch_size)

input_ph = tf.placeholder("float", [None, 100])
output_ph = tf.placeholder("float", [None, 100])

# network
# a. with stack
# hiddens = slim.stack(input_ph, slim.fully_connected, [7, 7, 7],
#                     activation_fn=tf.nn.sigmoid, scope="hidden")
# a2. larget network
hiddens = slim.stack(input_ph, slim.fully_connected, [100, 100, 100],
                     activation_fn=tf.nn.sigmoid, scope="hidden")
# b. without stack
# sgm = tf.nn.sigmoid
# hiddens = slim.fully_connected(input_ph, 7, activation_fn=sgm, scope='hidden1')
# hiddens = slim.fully_connected(hiddens, 7, activation_fn=sgm, scope='hidden2')
# hiddens = slim.fully_connected(hiddens, 7, activation_fn=sgm, scope='hidden3')
# c. without stack, with dropout
# sgm = tf.nn.sigmoid
# hiddens = slim.fully_connected(input_ph, 7, activation_fn=sgm, scope='hidden1')
# drpout = slim.stack(hiddens, fc_drop, [7, 7], scope='hd_fc1')
# hiddens = slim.fully_connected(drpout, 7, activation_fn=sgm, scope='hidden2')
# drpout = slim.stack(hiddens, fc_drop, [7, 7], scope='hd_fc2')
# hiddens = slim.fully_connected(drpout, 7, activation_fn=sgm, scope='hidden3')


# a. None(Linear)
prediction = slim.fully_connected(
    hiddens, 100, activation_fn=None, scope="output")
# b. None(Linear)
# prediction = slim.fully_connected(
#    hiddens, 100, activation_fn=tf.nn.softplus, scope="output")
# c. None(Linear)
# prediction = slim.fully_connected(
#    hiddens, 100, activation_fn=tf.nn.sigmoid, scope="output")


loss = tf.contrib.losses.mean_squared_error(prediction, output_ph)

train_op = slim.learning.create_train_op(loss, tf.train.AdamOptimizer(0.000001))

init_op = tf.initialize_all_variables()

with tf.Session() as sess:
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    try:
        sess.run(init_op)
        for idx in range(90000):
            # for idx in range(100000):
            inpbt, outbt = sess.run([inputs_batch, output_batch])
            _, t_loss = sess.run(
                [train_op, loss],
                feed_dict={input_ph: inpbt, output_ph: outbt})

            if (idx+1) % 100 == 0:
                print("%d,%f" % (idx+1, t_loss))
                sys.stdout.flush()
    finally:
        coord.request_stop()

    coord.join(threads)

結果

Jupyterコード。

check_result_170215.ipynb

%matplotlib inline

'''
Last update: Mar. 12, 2017
'''

import numpy as np
import matplotlib.pyplot as plt

# data1 = np.loadtxt('res.learn.170215', delimiter=',')
# data1 = np.loadtxt('res.learn.N=1000_170216', delimiter=',')
# data1 = np.loadtxt('res.withStack_170222', delimiter=',')
# data1 = np.loadtxt('res.wordUpsideDown_1000word_170305', delimiter=',')
# data1 = np.loadtxt('res.learn.N=1000_factor1000_170305', delimiter=',')

# data2 = np.loadtxt('res.learn.N=1000_dropout@output_170218', delimiter=',')
# data2 = np.loadtxt('res.learn.N=1000_dropout@output_longrun_170218', delimiter=',')
# data2 = np.loadtxt('res.withoutStack_170222', delimiter=',')
# data2 = np.loadtxt('res.withoutStack_withDropout_170222', delimiter=',')
# data2 = np.loadtxt('res.wordUpsideDown_1000word_normalized_170305', delimiter=',')
# data2 = np.loadtxt('res.learn.N=1000_factor100_170305', delimiter=',')

data1 = np.loadtxt('res.learn.output:None_170312', delimiter=',')
data2 = np.loadtxt('res.learn.output:softplus_170312', delimiter=',')
data3 = np.loadtxt('res.learn.output:sigmoid_170312', delimiter=',')

input1 = data1[:,0]
output1 = data1[:,1]
input2 = data2[:,0]
output2 = data2[:,1]
input3 = data3[:,0]
output3 = data3[:,1]

fig = plt.figure(figsize=(8,8))
ax1 = fig.add_subplot(3,1,1)
ax2 = fig.add_subplot(3,1,2)
ax3 = fig.add_subplot(3,1,3)

ax1.scatter(input1,output1,s=2, label='output:None(Linear)')
ax2.scatter(input2,output2,s=2, label='output:softplus')
ax3.scatter(input3,output3,s=2, label='output:sigmoid')

ax1.set_xlabel('step')
ax1.set_ylabel('Error')
ax1.set_xlim(0, 90000)
ax1.set_ylim(0, 0.3)
ax1.grid(True)
ax1.legend(loc='upper right')

ax2.set_xlabel('step')
ax2.set_ylabel('Error')
ax2.set_xlim(0, 90000)
ax2.set_ylim(0, 0.3)
ax2.grid(True)
ax2.legend(loc='upper right')

ax3.set_xlabel('step')
ax3.set_ylabel('Error')
ax3.set_xlim(0, 90000)
ax3.set_ylim(0, 0.3)
ax3.grid(True)
ax3.legend(loc='upper right')


fig.tight_layout()
fig.show()

考察

3種類のActivation functionで得られる最終誤差は同じ程度
sigmoidが一番早く、次にsoftplusが早く収束している

softplus(やsoftplus関連のsoftmax)は、ある出力を1として他を0とする、というようなコメントをstackoverflowで読んだ（URLを失念)が、上記のような関数近似で有効なのか不明。

The softmax "squishes" the inputs so that sum(input) = 1; it's a way of normalizing.

hidden layerはすべてのケースでsigmoidとしていた。これを変更することで最終誤差を減らすことができるのだろうか。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up