動作環境
GeForce GTX 1070 (8GB)
ASRock Z170M Pro4S [Intel Z170chipset]
Ubuntu 16.04 LTS desktop amd64
TensorFlow v1.1.0
cuDNN v5.1 for Linux
CUDA v8.0
Python 3.5.2
IPython 6.0.0 -- An enhanced Interactive Python.
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
GNU bash, version 4.3.48(1)-release (x86_64-pc-linux-gnu)
概要
This article is related to ADDA (light scattering simulator based on the discrete dipole approximation).
bash > 複数ディレクトリ内のファイルのTFRerocds化 (個別処理)
において作成した複数のTFRecordsファイルを1つのTFRecordsファイルにまとめる。
複数ファイルをそのままTensorFlowで読込んで学習させる方法もありそうだが、今回は1つのファイルにしてみる。
ファイル構成
$ ls ../run3*/IntField-Y*tfrecords
../run353_sphere_g26_m1.33/IntField-Y_170709.tfrecords ../run361_sphere_g26_m1.4/IntField-Y_170709.tfrecords ../run369_sphere_g26_m1.45/IntField-Y_170709.tfrecords
../run354_sphere_g26_m1.33/IntField-Y_170709.tfrecords ../run362_sphere_g26_m1.4/IntField-Y_170709.tfrecords ../run370_sphere_g26_m1.45/IntField-Y_170709.tfrecords
../run355_sphere_g26_m1.33/IntField-Y_170709.tfrecords ../run363_sphere_g26_m1.4/IntField-Y_170709.tfrecords ../run371_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run356_sphere_g26_m1.33/IntField-Y_170709.tfrecords ../run364_sphere_g26_m1.4/IntField-Y_170709.tfrecords ../run372_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run357_sphere_g26_m1.33/IntField-Y_170709.tfrecords ../run365_sphere_g26_m1.45/IntField-Y_170709.tfrecords ../run373_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run358_sphere_g26_m1.33/IntField-Y_170709.tfrecords ../run366_sphere_g26_m1.45/IntField-Y_170709.tfrecords ../run374_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run359_sphere_g26_m1.4/IntField-Y_170709.tfrecords ../run367_sphere_g26_m1.45/IntField-Y_170709.tfrecords ../run375_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run360_sphere_g26_m1.4/IntField-Y_170709.tfrecords ../run368_sphere_g26_m1.45/IntField-Y_170709.tfrecords ../run376_sphere_g26_m1.5/IntField-Y_170709.tfrecords
code v0.2
combine_TFRecords_170722.py
import numpy as np
import tensorflow as tf
import glob
import sys
"""
v0.2 Jul. 22, 2017
- add combineTFRecords()
- add _bytes_feature()
- add _int64_feature()
- add convert_to_raw()
- add get_feature_float32()
v0.1 Jul. 22, 2017
- get file list
"""
# on
# Ubuntu 16.04 LTS
# TensorFlow v1.1
# Python 3.5.2
# codingrule:PEP8
OUT_FILE = 'combined_IntField-Y_170722.tfrecords'
def get_feature_float32(example, feature_name):
wrk_raw = (example.features.feature[feature_name]
.bytes_list
.value[0])
wrk_1d = np.fromstring(wrk_raw, dtype=np.float32)
wrk_org = wrk_1d.reshape([1, -1])
return wrk_org
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
def convert_to_raw(orgval):
wrk_org = np.array(orgval, dtype=np.float32)
wrk_raw = wrk_org.tostring()
return wrk_raw
def combineTFRecords(inputfile, tfwriter):
record_iterator = tf.python_io.tf_record_iterator(path=inputfile)
for record in record_iterator:
# --- 1. read
example = tf.train.Example()
example.ParseFromString(record)
xpos_org = get_feature_float32(example, 'xpos_raw')
ypos_org = get_feature_float32(example, 'ypos_raw')
zpos_org = get_feature_float32(example, 'zpos_raw')
mr_org = get_feature_float32(example, 'mr_raw')
mi_org = get_feature_float32(example, 'mi_raw')
exr_org = get_feature_float32(example, 'exr_raw')
exi_org = get_feature_float32(example, 'exi_raw')
eyr_org = get_feature_float32(example, 'eyr_raw')
eyi_org = get_feature_float32(example, 'eyi_raw')
ezr_org = get_feature_float32(example, 'ezr_raw')
ezi_org = get_feature_float32(example, 'ezi_raw')
# --- 2. output
example = tf.train.Example(features=tf.train.Features(feature={
'xpos_raw': _bytes_feature(convert_to_raw(xpos_org)),
'ypos_raw': _bytes_feature(convert_to_raw(ypos_org)),
'zpos_raw': _bytes_feature(convert_to_raw(zpos_org)),
'mr_raw': _bytes_feature(convert_to_raw(mr_org)),
'mi_raw': _bytes_feature(convert_to_raw(mi_org)),
'exr_raw': _bytes_feature(convert_to_raw(exr_org)),
'exi_raw': _bytes_feature(convert_to_raw(exi_org)),
'eyr_raw': _bytes_feature(convert_to_raw(eyr_org)),
'eyi_raw': _bytes_feature(convert_to_raw(eyi_org)),
'ezr_raw': _bytes_feature(convert_to_raw(ezr_org)),
'ezi_raw': _bytes_feature(convert_to_raw(ezi_org))
}))
tfwriter.write(example.SerializeToString())
res = glob.glob("../run*/IntField-Y_170709.tfrecords")
with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
for idx, elem in enumerate(res):
# for debug
# if idx >= 2:
# sys.exit()
print("idx%d:%s" % (idx, elem))
combineTFRecords(elem, tf_writer)
テスト読込みcode
まとめたファイルがきちんと読めるか確認するためのコード。
test_readCombined_170722.py
import numpy as np
import tensorflow as tf
"""
v0.2 Jul. 09, 2017
- read [mr] and [mi]
v0.1 Jul. 09, 2017
- read position and Ex, Ey, Ez
+ add get_feature_float32()
"""
# on
# Ubuntu 16.04 LTS
# TensorFlow v1.1
# Python 3.5.2
# codingrule: PEP8
def get_feature_float32(example, feature_name):
wrk_raw = (example.features.feature[feature_name]
.bytes_list
.value[0])
wrk_1d = np.fromstring(wrk_raw, dtype=np.float32)
wrk_org = wrk_1d.reshape([1, -1])
return wrk_org
INP_FILE = 'combined_IntField-Y_170722.tfrecords'
record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)
for record in record_iterator:
example = tf.train.Example()
example.ParseFromString(record)
xpos_org = get_feature_float32(example, 'xpos_raw')
ypos_org = get_feature_float32(example, 'ypos_raw')
zpos_org = get_feature_float32(example, 'zpos_raw')
mr_org = get_feature_float32(example, 'mr_raw')
mi_org = get_feature_float32(example, 'mi_raw')
exr_org = get_feature_float32(example, 'exr_raw')
exi_org = get_feature_float32(example, 'exi_raw')
eyr_org = get_feature_float32(example, 'eyr_raw')
eyi_org = get_feature_float32(example, 'eyi_raw')
ezr_org = get_feature_float32(example, 'ezr_raw')
ezi_org = get_feature_float32(example, 'ezi_raw')
list_pos = *xpos_org, *ypos_org, *zpos_org, *mr_org, *mi_org
list_e = *exr_org, *exi_org, *eyr_org, *eyi_org, *ezr_org, *ezi_org
print(*list_pos, *list_e)
#print(*ezi_org)
実行
$ python3 combine_TFRecords_170722.py
idx0:../run366_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx1:../run375_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx2:../run354_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx3:../run362_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx4:../run364_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx5:../run368_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx6:../run370_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx7:../run376_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx8:../run367_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx9:../run356_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx10:../run372_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx11:../run355_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx12:../run360_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx13:../run373_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx14:../run365_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx15:../run369_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx16:../run353_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx17:../run357_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx18:../run371_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx19:../run359_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx20:../run358_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx21:../run374_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx22:../run363_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx23:../run361_sphere_g26_m1.4/IntField-Y_170709.tfrecords
$ ls -l combined_IntField-Y_170722.tfrecords
-rw-rw-r-- 1 xxx xxx 56191872 7月 22 09:31 combined_IntField-Y_170722.tfrecords
$ python3 test_readCombined_170722.py | head -n 5
[-0.21666151] [-1.51663053] [-5.41653776] [ 1.45000005] [ 0.001] [-0.02894282] [ 0.02603715] [ 0.17363222] [ 0.70102149] [-0.09119416] [-0.12842615]
[ 0.21666151] [-1.51663053] [-5.41653776] [ 1.45000005] [ 0.001] [ 0.02894282] [-0.02603715] [ 0.17363222] [ 0.70102149] [-0.09119417] [-0.12842615]
[-1.0833075] [-1.0833075] [-5.41653776] [ 1.45000005] [ 0.001] [-0.11953837] [ 0.11888158] [ 0.32258603] [ 0.59490114] [-0.06808119] [-0.09587516]
[-0.64998454] [-1.0833075] [-5.41653776] [ 1.45000005] [ 0.001] [-0.11078756] [ 0.05728684] [ 0.17507839] [ 0.67870367] [-0.06764975] [-0.1068899]
[-0.21666151] [-1.0833075] [-5.41653776] [ 1.45000005] [ 0.001] [-0.04282469] [ 0.00599804] [ 0.12292478] [ 0.8378399] [-0.0632441] [-0.10424931]
Traceback (most recent call last):
File "test_readCombined_170722.py", line 50, in <module>
print(*list_pos, *list_e)
BrokenPipeError: [Errno 32] Broken pipe
$ python3 test_readCombined_170722.py | tail -n 5
[ 0.22439376] [ 1.12196875] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.03255619] [ 0.01280934] [-3.08298993] [-0.54999858] [-0.09020738] [ 1.30495083]
[ 0.67318124] [ 1.12196875] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.22019333] [ 0.02005777] [-2.49291277] [-0.39416489] [-0.08645402] [ 1.29670262]
[ 1.12196875] [ 1.12196875] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.42727378] [ 0.01380414] [-2.04840279] [-0.19202116] [-0.09770373] [ 1.21033025]
[-0.22439376] [ 1.57075632] [ 5.60984373] [ 1.39999998] [ 0.01] [ 0.09677245] [-0.01133237] [-2.4711504] [-0.21153302] [-0.13845336] [ 1.68478251]
[ 0.22439376] [ 1.57075632] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.09677245] [ 0.01133237] [-2.4711504] [-0.21153302] [-0.13845336] [ 1.68478251]
refractive indexの値を見ると、まとめることができているようだ。
サンプル数
$ python3 test_readCombined_170722.py | wc
223872 3916020 32910582