LoginSignup
3
5

More than 5 years have passed since last update.

TFRecords > 保存と読み込み > int型 | np.float32型 | numpy.ndarray(np.float32)

Last updated at Posted at 2017-07-01
動作環境
GeForce GTX 1070 (8GB)
ASRock Z170M Pro4S [Intel Z170chipset]
Ubuntu 16.04 LTS desktop amd64
TensorFlow v1.1.0
cuDNN v5.1 for Linux
CUDA v8.0
Python 3.5.2
IPython 6.0.0 -- An enhanced Interactive Python.
gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
GNU bash, version 4.3.48(1)-release (x86_64-pc-linux-gnu)

100万サンプル程度の数値セットを扱い、function approximationを行おうとしている。
csv形式で読み込むのでなくTFRecordsを使うのが良いだろうか。

TFRecordsの参考

読み込みファイル

InitField-Y
http://qiita.com/7of9/items/930cde32700320a8db63#initfield-y

v0.1 > 保存のみ [失敗]

toTFRecord_170701.py
import tensorflow as tf
import numpy as np
'''
v0.1 Jul. 01, 2017
   - read 'IntField' then output as TFRecord
'''

# codingrule: PEP8

data = np.genfromtxt('IntField-Y', delimiter=' ')
# 1st line is text string
# (e.g. "x y z |E|^2 Ex.r Ex.i Ey.r Ey.i Ez.r Ez.i")

# xpos, ypos, zpos = data[1:, 0], data[1:, 1], data[1:, 2]
# E2 = data[1:, 3]
# Exr, Exi = data[1:, 4], data[1:, 5]
# Eyr, Eyi = data[1:, 6], data[1:, 7]
# Ezr, Ezi = data[1:, 8], data[1:, 9]

linenum = len(data[:, 0])  # 9329

with tf.python_io.TFRecordWriter("sample_170701.tfrecords") as tf_writer:
    for idx in range(linenum):
        xpos, ypos, zpos = data[idx, 0], data[idx, 1], data[idx, 2]
        Exr, Exi = data[idx, 4], data[idx, 5]
        Eyr, Eyi = data[idx, 6], data[idx, 7]
        Ezr, Ezi = data[idx, 8], data[idx, 9]
        #
        xyz = [xpos, ypos, zpos]
        example = tf.train.Example()
        example.features.feature["feature"].float_list.value.extend(xyz)
        example.features.feature["Exr"].float_list.value.append(Exr)
        example.features.feature["Exi"].float_list.value.append(Exi)
        example.features.feature["Eyr"].float_list.value.append(Eyr)
        example.features.feature["Eyi"].float_list.value.append(Eyi)
        example.features.feature["Ezr"].float_list.value.append(Ezr)
        example.features.feature["Ezi"].float_list.value.append(Ezi)
        #
        tf_writer.write(example.SerializeToString())

run

$ python3 toTFRecord_170701.py
$
$ ls -l toTFRecord_170701.py 
-rw-rw-r-- 1 xxx xxx 1337  7月  1 11:10 toTFRecord_170701.py

ファイルは作成されたが、きちんと値が入っているかはReader処理を実装するまで不明。

理解が浅いので間違っている可能性が高い。

(追記 2017/07/01)
読み込みがどうもうまくいかない。

v0.2 > 314(int64)の保存と読み込み

http://qiita.com/YusukeSuzuki@github/items/1388534bc274bc64b9b2
のリンク先の下記を参考にしました。
http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/

code

toTFRecord_170701b.py
import tensorflow as tf
import numpy as np

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

OUT_FILE = 'sample_170701b.tfrecords'

pi = 314
with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
    example = tf.train.Example(features=tf.train.Features(feature={
        'pi': _int64_feature(pi)}))
    tf_writer.write(example.SerializeToString())
fromTFRecord_170701b.py
import tensorflow as tf
import numpy as np

INP_FILE = 'sample_170701b.tfrecords'

record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)

for record in record_iterator:
    example = tf.train.Example()
    example.ParseFromString(record)

    pi = int(example.features.feature['pi']
            .int64_list
            .value[0])

    print(pi)
$ python3 toTFRecord_170701b.py 
$ python3 fromTFRecord_170701b.py 
314

int64型の数値(314)に関しての保存と読み込みはできた。

v0.3 > 3.14(np.float32)の保存と読み出し

toTFRecord_170701c.py
import tensorflow as tf
import numpy as np
import sys 

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

OUT_FILE = 'sample_170701b.tfrecords'

pi = np.array([3.14],dtype=np.float32)
pi_raw = pi.tostring()

with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
    example = tf.train.Example(features=tf.train.Features(feature={
        'pi_raw': _bytes_feature(pi_raw)}))
    tf_writer.write(example.SerializeToString())
fromTFRecord_170701c.py
import tensorflow as tf
import numpy as np

INP_FILE = 'sample_170701b.tfrecords'

record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)

for record in record_iterator:
    example = tf.train.Example()
    example.ParseFromString(record)

    pi_raw = (example.features.feature['pi_raw']
            .bytes_list
            .value[0])
    pi_1d = np.fromstring(pi_raw, dtype=np.float32)
    reconst = pi_1d.reshape([1,-1])
    print(reconst)
$ python3 toTFRecord_170701c.py 
$ python3 fromTFRecord_170701c.py 
[[ 3.1400001]]

v0.4 > numpy.ndarray(np.float32)

toTFRecord_170701d.py
import tensorflow as tf
import numpy as np

# on Python 3.5.2
# codingrule: PEP8


def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))


def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

OUT_FILE = 'sample_170701b.tfrecords'

data = np.genfromtxt('IntField-Y', delimiter=' ')
# 1st line is text string
# (e.g. "x y z |E|^2 Ex.r Ex.i Ey.r Ey.i Ez.r Ez.i")

linenum = len(data[:, 0]) - 1  # 9329 - 1 (1: text line)
print(linenum)

xpos = np.array(data[1:, 0], dtype=np.float32)
print(xpos)
xpos_raw = xpos.tostring()

with tf.python_io.TFRecordWriter(OUT_FILE) as tf_writer:
    example = tf.train.Example(features=tf.train.Features(feature={
        'linenum': _int64_feature(linenum),
        'xpos_raw': _bytes_feature(xpos_raw)}))
    tf_writer.write(example.SerializeToString())

fromTFRecord_170701d.py
import tensorflow as tf
import numpy as np

# on Python 3.5.2
# codingrule: PEP8

INP_FILE = 'sample_170701b.tfrecords'

record_iterator = tf.python_io.tf_record_iterator(path=INP_FILE)

for record in record_iterator:
    example = tf.train.Example()
    example.ParseFromString(record)

    linenum = int(example.features.feature['linenum']
                  .int64_list
                  .value[0])
    xpos_raw = (example.features.feature['xpos_raw']
                .bytes_list
                .value[0])
    xpos_1d = np.fromstring(xpos_raw, dtype=np.float32)
    xpos_org = xpos_1d.reshape([linenum, -1])
    print(linenum)
    print(xpos_org)

$ python3 toTFRecord_170701d.py 
9328
[-0.23620997  0.23620997 -1.18104982 ...,  1.18104982 -0.23620997
  0.23620997]
$ python3 fromTFRecord_170701d.py 
9328
[[-0.23620997]
 [ 0.23620997]
 [-1.18104982]
 ..., 
 [ 1.18104982]
 [-0.23620997]
 [ 0.23620997]]
3
5
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
3
5