LoginSignup
0
1

More than 5 years have passed since last update.

TensorFlow > 複数のTFRecordsファイルを1つにまとめる v0.1,v0.2

Last updated at Posted at 2017-07-22
動作環境
GeForce GTX 1070 (8GB)
ASRock Z170M Pro4S [Intel Z170chipset]
Ubuntu 16.04 LTS desktop amd64
TensorFlow v1.1.0
cuDNN v5.1 for Linux
CUDA v8.0
Python 3.5.2
IPython 6.0.0 -- An enhanced Interactive Python.
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
GNU bash, version 4.3.48(1)-release (x86_64-pc-linux-gnu)

概要

This article is related to ADDA (light scattering simulator based on the discrete dipole approximation).

bash > 複数ディレクトリ内のファイルのTFRerocds化 (個別処理)
において作成した複数のTFRecordsファイルを1つのTFRecordsファイルにまとめる。

複数ファイルをそのままTensorFlowで読込んで学習させる方法もありそうだが、今回は1つのファイルにしてみる。

ファイル構成

$ ls ../run3*/IntField-Y*tfrecords
../run353_sphere_g26_m1.33/IntField-Y_170709.tfrecords  ../run361_sphere_g26_m1.4/IntField-Y_170709.tfrecords   ../run369_sphere_g26_m1.45/IntField-Y_170709.tfrecords
../run354_sphere_g26_m1.33/IntField-Y_170709.tfrecords  ../run362_sphere_g26_m1.4/IntField-Y_170709.tfrecords   ../run370_sphere_g26_m1.45/IntField-Y_170709.tfrecords
../run355_sphere_g26_m1.33/IntField-Y_170709.tfrecords  ../run363_sphere_g26_m1.4/IntField-Y_170709.tfrecords   ../run371_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run356_sphere_g26_m1.33/IntField-Y_170709.tfrecords  ../run364_sphere_g26_m1.4/IntField-Y_170709.tfrecords   ../run372_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run357_sphere_g26_m1.33/IntField-Y_170709.tfrecords  ../run365_sphere_g26_m1.45/IntField-Y_170709.tfrecords  ../run373_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run358_sphere_g26_m1.33/IntField-Y_170709.tfrecords  ../run366_sphere_g26_m1.45/IntField-Y_170709.tfrecords  ../run374_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run359_sphere_g26_m1.4/IntField-Y_170709.tfrecords   ../run367_sphere_g26_m1.45/IntField-Y_170709.tfrecords  ../run375_sphere_g26_m1.5/IntField-Y_170709.tfrecords
../run360_sphere_g26_m1.4/IntField-Y_170709.tfrecords   ../run368_sphere_g26_m1.45/IntField-Y_170709.tfrecords  ../run376_sphere_g26_m1.5/IntField-Y_170709.tfrecords

code v0.2

combine_TFRecords_170722.py
import numpy as np
import tensorflow as tf
import glob
import sys

"""
v0.2 Jul. 22, 2017
    - add combineTFRecords()
    - add _bytes_feature()
    - add _int64_feature()
    - add convert_to_raw()
    - add get_feature_float32()
v0.1 Jul. 22, 2017
    - get file list
"""

# on
#   Ubuntu 16.04 LTS
#   TensorFlow v1.1
#   Python 3.5.2

# codingrule:PEP8


OUT_FILE = 'combined_IntField-Y_170722.tfrecords'


def get_feature_float32(example, feature_name):
    wrk_raw = (example.features.feature[feature_name]
               .bytes_list
               .value[0])
    wrk_1d = np.fromstring(wrk_raw, dtype=np.float32)
    wrk_org = wrk_1d.reshape([1, -1])
    return wrk_org


def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


def convert_to_raw(orgval):
    wrk_org = np.array(orgval, dtype=np.float32)
    wrk_raw = wrk_org.tostring()
    return wrk_raw


def combineTFRecords(inputfile, tfwriter):
    record_iterator = tf.python_io.tf_record_iterator(path=inputfile)
    for record in record_iterator:
        # --- 1. read
        example = tf.train.Example()
        example.ParseFromString(record)

        xpos_org = get_feature_float32(example, 'xpos_raw')
        ypos_org = get_feature_float32(example, 'ypos_raw')
        zpos_org = get_feature_float32(example, 'zpos_raw')
        mr_org = get_feature_float32(example, 'mr_raw')
        mi_org = get_feature_float32(example, 'mi_raw')
        exr_org = get_feature_float32(example, 'exr_raw')
        exi_org = get_feature_float32(example, 'exi_raw')
        eyr_org = get_feature_float32(example, 'eyr_raw')
        eyi_org = get_feature_float32(example, 'eyi_raw')
        ezr_org = get_feature_float32(example, 'ezr_raw')
        ezi_org = get_feature_float32(example, 'ezi_raw')

        # --- 2. output
        example = tf.train.Example(features=tf.train.Features(feature={
            'xpos_raw': _bytes_feature(convert_to_raw(xpos_org)),
            'ypos_raw': _bytes_feature(convert_to_raw(ypos_org)),
            'zpos_raw': _bytes_feature(convert_to_raw(zpos_org)),
            'mr_raw': _bytes_feature(convert_to_raw(mr_org)),
            'mi_raw': _bytes_feature(convert_to_raw(mi_org)),
            'exr_raw': _bytes_feature(convert_to_raw(exr_org)),
            'exi_raw': _bytes_feature(convert_to_raw(exi_org)),
            'eyr_raw': _bytes_feature(convert_to_raw(eyr_org)),
            'eyi_raw': _bytes_feature(convert_to_raw(eyi_org)),
            'ezr_raw': _bytes_feature(convert_to_raw(ezr_org)),
            'ezi_raw': _bytes_feature(convert_to_raw(ezi_org))
            }))
        tfwriter.write(example.SerializeToString())


res = glob.glob("../run*/IntField-Y_170709.tfrecords")
with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
    for idx, elem in enumerate(res):
        #  for debug
        # if idx >= 2:
        #     sys.exit()

        print("idx%d:%s" % (idx, elem))
        combineTFRecords(elem, tf_writer)

テスト読込みcode

まとめたファイルがきちんと読めるか確認するためのコード。

test_readCombined_170722.py
import numpy as np
import tensorflow as tf

"""
v0.2 Jul. 09, 2017
  - read [mr] and [mi]
v0.1 Jul. 09, 2017
  - read position and Ex, Ey, Ez
     + add get_feature_float32()
"""

# on
#   Ubuntu 16.04 LTS
#   TensorFlow v1.1
#   Python 3.5.2

# codingrule: PEP8


def get_feature_float32(example, feature_name):
    wrk_raw = (example.features.feature[feature_name]
               .bytes_list
               .value[0])
    wrk_1d = np.fromstring(wrk_raw, dtype=np.float32)
    wrk_org = wrk_1d.reshape([1, -1])
    return wrk_org

INP_FILE = 'combined_IntField-Y_170722.tfrecords'

record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)
for record in record_iterator:
    example = tf.train.Example()
    example.ParseFromString(record)

    xpos_org = get_feature_float32(example, 'xpos_raw')
    ypos_org = get_feature_float32(example, 'ypos_raw')
    zpos_org = get_feature_float32(example, 'zpos_raw')
    mr_org = get_feature_float32(example, 'mr_raw')
    mi_org = get_feature_float32(example, 'mi_raw')
    exr_org = get_feature_float32(example, 'exr_raw')
    exi_org = get_feature_float32(example, 'exi_raw')
    eyr_org = get_feature_float32(example, 'eyr_raw')
    eyi_org = get_feature_float32(example, 'eyi_raw')
    ezr_org = get_feature_float32(example, 'ezr_raw')
    ezi_org = get_feature_float32(example, 'ezi_raw')

    list_pos = *xpos_org, *ypos_org, *zpos_org, *mr_org, *mi_org
    list_e = *exr_org, *exi_org, *eyr_org, *eyi_org, *ezr_org, *ezi_org

    print(*list_pos, *list_e)
    #print(*ezi_org)

実行

$ python3 combine_TFRecords_170722.py 
idx0:../run366_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx1:../run375_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx2:../run354_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx3:../run362_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx4:../run364_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx5:../run368_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx6:../run370_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx7:../run376_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx8:../run367_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx9:../run356_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx10:../run372_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx11:../run355_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx12:../run360_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx13:../run373_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx14:../run365_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx15:../run369_sphere_g26_m1.45/IntField-Y_170709.tfrecords
idx16:../run353_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx17:../run357_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx18:../run371_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx19:../run359_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx20:../run358_sphere_g26_m1.33/IntField-Y_170709.tfrecords
idx21:../run374_sphere_g26_m1.5/IntField-Y_170709.tfrecords
idx22:../run363_sphere_g26_m1.4/IntField-Y_170709.tfrecords
idx23:../run361_sphere_g26_m1.4/IntField-Y_170709.tfrecords
$ ls -l combined_IntField-Y_170722.tfrecords 
-rw-rw-r-- 1 xxx xxx 56191872  7月 22 09:31 combined_IntField-Y_170722.tfrecords
$ python3 test_readCombined_170722.py | head -n 5
[-0.21666151] [-1.51663053] [-5.41653776] [ 1.45000005] [ 0.001] [-0.02894282] [ 0.02603715] [ 0.17363222] [ 0.70102149] [-0.09119416] [-0.12842615]
[ 0.21666151] [-1.51663053] [-5.41653776] [ 1.45000005] [ 0.001] [ 0.02894282] [-0.02603715] [ 0.17363222] [ 0.70102149] [-0.09119417] [-0.12842615]
[-1.0833075] [-1.0833075] [-5.41653776] [ 1.45000005] [ 0.001] [-0.11953837] [ 0.11888158] [ 0.32258603] [ 0.59490114] [-0.06808119] [-0.09587516]
[-0.64998454] [-1.0833075] [-5.41653776] [ 1.45000005] [ 0.001] [-0.11078756] [ 0.05728684] [ 0.17507839] [ 0.67870367] [-0.06764975] [-0.1068899]
[-0.21666151] [-1.0833075] [-5.41653776] [ 1.45000005] [ 0.001] [-0.04282469] [ 0.00599804] [ 0.12292478] [ 0.8378399] [-0.0632441] [-0.10424931]
Traceback (most recent call last):
  File "test_readCombined_170722.py", line 50, in <module>
    print(*list_pos, *list_e)
BrokenPipeError: [Errno 32] Broken pipe
$ python3 test_readCombined_170722.py | tail -n 5
[ 0.22439376] [ 1.12196875] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.03255619] [ 0.01280934] [-3.08298993] [-0.54999858] [-0.09020738] [ 1.30495083]
[ 0.67318124] [ 1.12196875] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.22019333] [ 0.02005777] [-2.49291277] [-0.39416489] [-0.08645402] [ 1.29670262]
[ 1.12196875] [ 1.12196875] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.42727378] [ 0.01380414] [-2.04840279] [-0.19202116] [-0.09770373] [ 1.21033025]
[-0.22439376] [ 1.57075632] [ 5.60984373] [ 1.39999998] [ 0.01] [ 0.09677245] [-0.01133237] [-2.4711504] [-0.21153302] [-0.13845336] [ 1.68478251]
[ 0.22439376] [ 1.57075632] [ 5.60984373] [ 1.39999998] [ 0.01] [-0.09677245] [ 0.01133237] [-2.4711504] [-0.21153302] [-0.13845336] [ 1.68478251]

refractive indexの値を見ると、まとめることができているようだ。

サンプル数

$ python3 test_readCombined_170722.py | wc
 223872 3916020 32910582
0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1