動作環境
GeForce GTX 1070 (8GB)
ASRock Z170M Pro4S [Intel Z170chipset]
Ubuntu 16.04 LTS desktop amd64
TensorFlow v1.1.0
cuDNN v5.1 for Linux
CUDA v8.0
Python 3.5.2
IPython 6.0.0 -- An enhanced Interactive Python.
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
GNU bash, version 4.3.48(1)-release (x86_64-pc-linux-gnu)
100万サンプル程度の数値セットを扱い、function approximationを行おうとしている。
csv形式で読み込むのでなくTFRecordsを使うのが良いだろうか。
TFRecordsの参考
- 学習データをTFRecordにした話 : TensorFlow将棋ソフト開発日誌 #12
- http://qiita.com/YusukeSuzuki@github/items/1388534bc274bc64b9b2
- TFRecords取り扱い時の注意事項が記載されている
-
https://www.quora.com/What-is-the-best-way-to-read-data-into-Tensorflow
- サンプルコードを回答している方がおられる
読み込みファイル
InitField-Y
http://qiita.com/7of9/items/930cde32700320a8db63#initfield-y
v0.1 > 保存のみ [失敗]
toTFRecord_170701.py
import tensorflow as tf
import numpy as np
'''
v0.1 Jul. 01, 2017
- read 'IntField' then output as TFRecord
'''
# codingrule: PEP8
data = np.genfromtxt('IntField-Y', delimiter=' ')
# 1st line is text string
# (e.g. "x y z |E|^2 Ex.r Ex.i Ey.r Ey.i Ez.r Ez.i")
# xpos, ypos, zpos = data[1:, 0], data[1:, 1], data[1:, 2]
# E2 = data[1:, 3]
# Exr, Exi = data[1:, 4], data[1:, 5]
# Eyr, Eyi = data[1:, 6], data[1:, 7]
# Ezr, Ezi = data[1:, 8], data[1:, 9]
linenum = len(data[:, 0]) # 9329
with tf.python_io.TFRecordWriter("sample_170701.tfrecords") as tf_writer:
for idx in range(linenum):
xpos, ypos, zpos = data[idx, 0], data[idx, 1], data[idx, 2]
Exr, Exi = data[idx, 4], data[idx, 5]
Eyr, Eyi = data[idx, 6], data[idx, 7]
Ezr, Ezi = data[idx, 8], data[idx, 9]
#
xyz = [xpos, ypos, zpos]
example = tf.train.Example()
example.features.feature["feature"].float_list.value.extend(xyz)
example.features.feature["Exr"].float_list.value.append(Exr)
example.features.feature["Exi"].float_list.value.append(Exi)
example.features.feature["Eyr"].float_list.value.append(Eyr)
example.features.feature["Eyi"].float_list.value.append(Eyi)
example.features.feature["Ezr"].float_list.value.append(Ezr)
example.features.feature["Ezi"].float_list.value.append(Ezi)
#
tf_writer.write(example.SerializeToString())
run
$ python3 toTFRecord_170701.py
$
$ ls -l toTFRecord_170701.py
-rw-rw-r-- 1 xxx xxx 1337 7月 1 11:10 toTFRecord_170701.py
ファイルは作成されたが、きちんと値が入っているかはReader処理を実装するまで不明。
理解が浅いので間違っている可能性が高い。
(追記 2017/07/01)
読み込みがどうもうまくいかない。
v0.2 > 314(int64)の保存と読み込み
http://qiita.com/YusukeSuzuki@github/items/1388534bc274bc64b9b2
のリンク先の下記を参考にしました。
http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
code
toTFRecord_170701b.py
import tensorflow as tf
import numpy as np
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
OUT_FILE = 'sample_170701b.tfrecords'
pi = 314
with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
example = tf.train.Example(features=tf.train.Features(feature={
'pi': _int64_feature(pi)}))
tf_writer.write(example.SerializeToString())
fromTFRecord_170701b.py
import tensorflow as tf
import numpy as np
INP_FILE = 'sample_170701b.tfrecords'
record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)
for record in record_iterator:
example = tf.train.Example()
example.ParseFromString(record)
pi = int(example.features.feature['pi']
.int64_list
.value[0])
print(pi)
$ python3 toTFRecord_170701b.py
$ python3 fromTFRecord_170701b.py
314
int64型の数値(314)に関しての保存と読み込みはできた。
v0.3 > 3.14(np.float32)の保存と読み出し
toTFRecord_170701c.py
import tensorflow as tf
import numpy as np
import sys
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
OUT_FILE = 'sample_170701b.tfrecords'
pi = np.array([3.14],dtype=np.float32)
pi_raw = pi.tostring()
with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
example = tf.train.Example(features=tf.train.Features(feature={
'pi_raw': _bytes_feature(pi_raw)}))
tf_writer.write(example.SerializeToString())
fromTFRecord_170701c.py
import tensorflow as tf
import numpy as np
INP_FILE = 'sample_170701b.tfrecords'
record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)
for record in record_iterator:
example = tf.train.Example()
example.ParseFromString(record)
pi_raw = (example.features.feature['pi_raw']
.bytes_list
.value[0])
pi_1d = np.fromstring(pi_raw, dtype=np.float32)
reconst = pi_1d.reshape([1,-1])
print(reconst)
$ python3 toTFRecord_170701c.py
$ python3 fromTFRecord_170701c.py
[[ 3.1400001]]
v0.4 > numpy.ndarray(np.float32)
toTFRecord_170701d.py
import tensorflow as tf
import numpy as np
# on Python 3.5.2
# codingrule: PEP8
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
OUT_FILE = 'sample_170701b.tfrecords'
data = np.genfromtxt('IntField-Y', delimiter=' ')
# 1st line is text string
# (e.g. "x y z |E|^2 Ex.r Ex.i Ey.r Ey.i Ez.r Ez.i")
linenum = len(data[:, 0]) - 1 # 9329 - 1 (1: text line)
print(linenum)
xpos = np.array(data[1:, 0], dtype=np.float32)
print(xpos)
xpos_raw = xpos.tostring()
with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
example = tf.train.Example(features=tf.train.Features(feature={
'linenum': _int64_feature(linenum),
'xpos_raw': _bytes_feature(xpos_raw)}))
tf_writer.write(example.SerializeToString())
fromTFRecord_170701d.py
import tensorflow as tf
import numpy as np
# on Python 3.5.2
# codingrule: PEP8
INP_FILE = 'sample_170701b.tfrecords'
record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)
for record in record_iterator:
example = tf.train.Example()
example.ParseFromString(record)
linenum = int(example.features.feature['linenum']
.int64_list
.value[0])
xpos_raw = (example.features.feature['xpos_raw']
.bytes_list
.value[0])
xpos_1d = np.fromstring(xpos_raw, dtype=np.float32)
xpos_org = xpos_1d.reshape([linenum, -1])
print(linenum)
print(xpos_org)
$ python3 toTFRecord_170701d.py
9328
[-0.23620997 0.23620997 -1.18104982 ..., 1.18104982 -0.23620997
0.23620997]
$ python3 fromTFRecord_170701d.py
9328
[[-0.23620997]
[ 0.23620997]
[-1.18104982]
...,
[ 1.18104982]
[-0.23620997]
[ 0.23620997]]