GeForce GTX 1070 (8GB)
ASRock Z170M Pro4S [Intel Z170chipset]
Ubuntu 14.04 LTS desktop amd64
TensorFlow v0.11
cuDNN v5.1 for Linux
CUDA v8.0
Python 2.7.6
IPython 5.1.0 -- An enhanced Interactive Python.
gcc (Ubuntu 4.8.4-2ubuntu1~14.04.3) 4.8.4
GNU bash, version 4.3.8(1)-release (x86_64-pc-linux-gnu)
TensorFlowを使って、input:100 node, output:100 nodeのネットワークの学習を試行中。
関連 http://qiita.com/7of9/items/7394f4326a88500a67b9
今回は、output layerのactivation functionをいじってみて、結果の差異を確認した。
API doc: https://www.tensorflow.org/versions/r0.11/api_docs/python/nn/activation_functions_
データ生成コード > v0.9
Jupyterコード。
v0.9において、input, outputともに[0,1)の範囲の値にした。
以下により、test_in.csv
とtest_out.csv
が生成される。
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import os
'''
v0.9 Mar. 12, 2017
- change [DATA_FACTOR] to [1.0]
v0.8 Mar. 05, 2017
- add [DATA_FACTOR] to change range of the data
v0.7 Feb. 16, 2017
- increase [NUM_FILEOUT] from 100 to 10000
v0.6 Feb. 14, 2017
- fix bug > savetxt() was without 'delimiter=','
v0.5 Feb. 06, 2017
- add [FILENAME_*_CSV],[FILENAME_*_BAK]
- output to csv with append mode
v0.4 Jan. 21, 2017
- set size of figures
v0.3 Jan. 21, 2017
- show 2 images in one figure
v0.2 Jan. 14, 2017
- calcOutput() return in numpy.array
- add saveToCsvFile()
v0.1 Jan. 14, 2017
- add calcOutput()
- add showIn2D()
- show 1d in 2d format
'''
'''
codingrule:PEP8
'''
XDIM = 10
YDIM = 10
INDIM = XDIM * YDIM
FILENAME_IN_CSV = 'test_in.csv'
FILENAME_OUT_CSV = 'test_out.csv'
FILENAME_IN_BAK = 'test_in.bak'
FILENAME_OUT_BAK = 'test_out.bak'
DATA_FACTOR = 1.0
def saveToCsvFile(data_1d, filename):
wrk_1d = data_1d.reshape(1, INDIM)
# np.savetxt(filename, wrk_1d, delimiter=',')
with open(filename, 'a') as fd:
np.savetxt(fd, wrk_1d, delimiter=',')
def calcOutput(in_1d):
len_1d = XDIM * YDIM
out_1d = [0.0] * len_1d
for idx in range(0, in_1d.size):
out_1d[idx] = in_1d[len_1d - idx - 1]
return np.array(out_1d)
def showIn2D(data_1d):
# print(data_1d)
data_2d = np.reshape(data_1d, (XDIM, YDIM))
plt.imshow(data_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
plt.show()
def showIn2D_2image_subplot_each(data1_1d, data2_1d):
data1_2d = np.reshape(data1_1d, (XDIM, YDIM))
data2_2d = np.reshape(data2_1d, (XDIM, YDIM))
fig1 = plt.figure(1)
fig1.set_size_inches(3.14, 3.14)
plt.subplot(121)
plt.title('input node')
plt.imshow(data1_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
plt.subplot(122)
plt.title('output node')
plt.imshow(data2_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
plt.show()
def showIn2D_2image_subplot_first(data1_1d, data2_1d):
data1_2d = np.reshape(data1_1d, (XDIM, YDIM))
data2_2d = np.reshape(data2_1d, (XDIM, YDIM))
fig, (axL, axR) = plt.subplots(ncols=2, figsize=(10, 4))
axL.imshow(data1_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
axL.grid(True)
axR.imshow(data2_2d, extent=(0, XDIM, 0, YDIM), cmap=cm.gist_rainbow)
axR.grid(True)
fig.show()
if __name__ == '__main__':
# backup the original csv files
if os.path.exists(FILENAME_IN_CSV):
os.rename(FILENAME_IN_CSV, FILENAME_IN_BAK)
if os.path.exists(FILENAME_OUT_CSV):
os.rename(FILENAME_OUT_CSV, FILENAME_OUT_BAK)
# append csv
NUM_FILEOUT = 10000
NUM_DISPLAY = 5
for loop in range(NUM_FILEOUT):
# in_1d = np.random.rand(INDIM)
in_1d = np.random.rand(INDIM) * DATA_FACTOR
out_1d = calcOutput(in_1d)
saveToCsvFile(in_1d, FILENAME_IN_CSV)
saveToCsvFile(out_1d, FILENAME_OUT_CSV)
if loop < NUM_DISPLAY:
showIn2D_2image_subplot_each(in_1d, out_1d)
showIn2D_2image_subplot_first(in_1d, out_1d)
学習コード v0.8
prediction = slim.fully_connected()
において、activation functionを3種類切り替えて実行する。
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import tensorflow as tf
import tensorflow.contrib.slim as slim
import numpy as np
'''
v0.8 Mar. 13, 2017
- toying with output activation function
+ [None], [tf.nn.softplus], [tf.nn.sigmoid]
v0.7 Feb. 22, 2017
- treat each hidden layer to handle dropout instead of slim.stack()
v0.6 Feb. 19, 2017
- add sys.stdout.flush() to immediately print
v0.5 Feb. 18, 2017
- add fc_drop()
v0.4 Feb. 15, 2017
- fix bug > ZeroDivisionError: float division by zero @ shuffle_batch()
v0.3 Feb. 15, 2017
- tweak [batch_size] for shuffle_batch()
v0.2 Feb. 15, 2017
- fix bug > different dimensions for placeholder and network
v0.1 Feb. 06, 2017
- read [test_in.csv],[test_out.csv]
'''
'''
codingrule:PEP8
'''
def fc_drop(inputs, *args, **kwargs):
# Thanks to: http://qiita.com/shngt/items/f532601b4f059ce8584f
net = slim.fully_connected(inputs, *args, **kwargs)
return slim.dropout(net, 0.9)
filename_inp = tf.train.string_input_producer(["test_in.csv"])
filename_out = tf.train.string_input_producer(["test_out.csv"])
NUM_INP_NODE = 100
NUM_OUT_NODE = 100
# parse csv
# a. input node
reader = tf.TextLineReader()
key, value = reader.read(filename_inp)
deflist = [[0.] for idx in range(NUM_INP_NODE)]
input1 = tf.decode_csv(value, record_defaults=deflist)
# b. output node
key, value = reader.read(filename_out)
deflist = [[0.] for idx in range(NUM_OUT_NODE)]
output1 = tf.decode_csv(value, record_defaults=deflist)
# c. pack
# inputs = tf.pack([input1])
inputs = input1
# outputs = tf.pack([output1])
outputs = output1
batch_size = 2
inputs_batch, output_batch = tf.train.shuffle_batch(
[inputs, outputs], batch_size, capacity=10, min_after_dequeue=batch_size)
input_ph = tf.placeholder("float", [None, 100])
output_ph = tf.placeholder("float", [None, 100])
# network
# a. with stack
# hiddens = slim.stack(input_ph, slim.fully_connected, [7, 7, 7],
# activation_fn=tf.nn.sigmoid, scope="hidden")
# a2. larget network
hiddens = slim.stack(input_ph, slim.fully_connected, [100, 100, 100],
activation_fn=tf.nn.sigmoid, scope="hidden")
# b. without stack
# sgm = tf.nn.sigmoid
# hiddens = slim.fully_connected(input_ph, 7, activation_fn=sgm, scope='hidden1')
# hiddens = slim.fully_connected(hiddens, 7, activation_fn=sgm, scope='hidden2')
# hiddens = slim.fully_connected(hiddens, 7, activation_fn=sgm, scope='hidden3')
# c. without stack, with dropout
# sgm = tf.nn.sigmoid
# hiddens = slim.fully_connected(input_ph, 7, activation_fn=sgm, scope='hidden1')
# drpout = slim.stack(hiddens, fc_drop, [7, 7], scope='hd_fc1')
# hiddens = slim.fully_connected(drpout, 7, activation_fn=sgm, scope='hidden2')
# drpout = slim.stack(hiddens, fc_drop, [7, 7], scope='hd_fc2')
# hiddens = slim.fully_connected(drpout, 7, activation_fn=sgm, scope='hidden3')
# a. None(Linear)
prediction = slim.fully_connected(
hiddens, 100, activation_fn=None, scope="output")
# b. None(Linear)
# prediction = slim.fully_connected(
# hiddens, 100, activation_fn=tf.nn.softplus, scope="output")
# c. None(Linear)
# prediction = slim.fully_connected(
# hiddens, 100, activation_fn=tf.nn.sigmoid, scope="output")
loss = tf.contrib.losses.mean_squared_error(prediction, output_ph)
train_op = slim.learning.create_train_op(loss, tf.train.AdamOptimizer(0.000001))
init_op = tf.initialize_all_variables()
with tf.Session() as sess:
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
try:
sess.run(init_op)
for idx in range(90000):
# for idx in range(100000):
inpbt, outbt = sess.run([inputs_batch, output_batch])
_, t_loss = sess.run(
[train_op, loss],
feed_dict={input_ph: inpbt, output_ph: outbt})
if (idx+1) % 100 == 0:
print("%d,%f" % (idx+1, t_loss))
sys.stdout.flush()
finally:
coord.request_stop()
coord.join(threads)
結果
Jupyterコード。
%matplotlib inline
'''
Last update: Mar. 12, 2017
'''
import numpy as np
import matplotlib.pyplot as plt
# data1 = np.loadtxt('res.learn.170215', delimiter=',')
# data1 = np.loadtxt('res.learn.N=1000_170216', delimiter=',')
# data1 = np.loadtxt('res.withStack_170222', delimiter=',')
# data1 = np.loadtxt('res.wordUpsideDown_1000word_170305', delimiter=',')
# data1 = np.loadtxt('res.learn.N=1000_factor1000_170305', delimiter=',')
# data2 = np.loadtxt('res.learn.N=1000_dropout@output_170218', delimiter=',')
# data2 = np.loadtxt('res.learn.N=1000_dropout@output_longrun_170218', delimiter=',')
# data2 = np.loadtxt('res.withoutStack_170222', delimiter=',')
# data2 = np.loadtxt('res.withoutStack_withDropout_170222', delimiter=',')
# data2 = np.loadtxt('res.wordUpsideDown_1000word_normalized_170305', delimiter=',')
# data2 = np.loadtxt('res.learn.N=1000_factor100_170305', delimiter=',')
data1 = np.loadtxt('res.learn.output:None_170312', delimiter=',')
data2 = np.loadtxt('res.learn.output:softplus_170312', delimiter=',')
data3 = np.loadtxt('res.learn.output:sigmoid_170312', delimiter=',')
input1 = data1[:,0]
output1 = data1[:,1]
input2 = data2[:,0]
output2 = data2[:,1]
input3 = data3[:,0]
output3 = data3[:,1]
fig = plt.figure(figsize=(8,8))
ax1 = fig.add_subplot(3,1,1)
ax2 = fig.add_subplot(3,1,2)
ax3 = fig.add_subplot(3,1,3)
ax1.scatter(input1,output1,s=2, label='output:None(Linear)')
ax2.scatter(input2,output2,s=2, label='output:softplus')
ax3.scatter(input3,output3,s=2, label='output:sigmoid')
ax1.set_xlabel('step')
ax1.set_ylabel('Error')
ax1.set_xlim(0, 90000)
ax1.set_ylim(0, 0.3)
ax1.grid(True)
ax1.legend(loc='upper right')
ax2.set_xlabel('step')
ax2.set_ylabel('Error')
ax2.set_xlim(0, 90000)
ax2.set_ylim(0, 0.3)
ax2.grid(True)
ax2.legend(loc='upper right')
ax3.set_xlabel('step')
ax3.set_ylabel('Error')
ax3.set_xlim(0, 90000)
ax3.set_ylim(0, 0.3)
ax3.grid(True)
ax3.legend(loc='upper right')
fig.tight_layout()
fig.show()
考察
- 3種類のActivation functionで得られる最終誤差は同じ程度
- sigmoidが一番早く、次にsoftplusが早く収束している
softplus(やsoftplus関連のsoftmax)は、ある出力を1として他を0とする、というようなコメントをstackoverflowで読んだ(URLを失念)が、上記のような関数近似で有効なのか不明。
The softmax "squishes" the inputs so that sum(input) = 1; it's a way of normalizing.
hidden layerはすべてのケースでsigmoidとしていた。これを変更することで最終誤差を減らすことができるのだろうか。