こちらは個人メモレベルです。調べたことを箇条書き的に記載しています。
####■目的
数10GBのファイルを読み込みデータ解析する。
(1)(再)読み込み速度を早くする。
(2)メモリ消費量を抑える。
(3)解析性能を向上させる。
####■メモリ消費量調査方法
import memory_profiler
#メモリ消費を見たいメソッドの上に@profileを記述
#以下はクラスメソッドに記述した例
@classmethod
@memory_profiler.profile
def main_process(cls, f_filter,...):
Line# Mem usage Increment Line Contents
314 29.8 MiB 0.0 MiB @classmethod
315 @memory_profiler.profile
316 def main_process(cls,
329 29.8 MiB 0.0 MiB cls.section = section
330 29.8 MiB 0.0 MiB cls.section_map.clear()
※ループの中で何回も呼び出される行のメモリ消費量は最大値が出力される模様
####■オブジェクトサイズ調査方法
from pympler import asizeof
print('size of 10 = %d' % asizeof(10)) #バイトサイズが返ってくる
24
####■要素の集合を定義する方法
('abc_1234561', datetime.datetime(2019, 1, 20, 10, 20), 12345, 67890, 1234.5678)
#書き込み不可
#要素名で参照不可
nt = namedtuple("REPORT","name count value1 value2 va1ue3")
nt('abc_123456', datetime.datetime(2019, 1, 20, 10, 20), 12345, 67890, 1234.5678)
#書き込み不可
#要素名で参照可
import array
array.array('I',[123, 456, day])
#全ての要素が同一の型であること
import numpy
numpy.array([1,2, 3])
#要素がすべて数値であること
ls=[abc_1234561, datetime.datetime(2019, 1, 20, 10, 20), 12345, 67890, 1234.5678]
#要素名でアクセス不可
class Report_slot:
__slots__ = ('name', 'count','value1','value2','va1ue3')
def __init__(self, name, count, value1, va1ue2, value3):
self.name = name
self.count = count
self.value1 = value1
self.va1ue2 = va1ue2
self.va1ue3 = value3
#要素名でアクセス可能
#__slots__でプロパティを限定することでオブジェクトサイズを減らす。
####■要素の集合の容量、アクセス性能
要素数は5とし、以下の型で定義した場合のオブジェクトサイズ、アクセス速度(sec/100000回)を調査。
'abc_123456', datetime.datetime(2019, 1, 20, 10, 20), 12345, 67890, 1234.5678
種類 | オブジェクトサイズ | 生成 | 参照 |
---|---|---|---|
tuple | 168 | 0.048 | 0.026 |
named_tuple | 80 | 0.159 | 0.064 |
list | 176 | 0.080 | 0.026 |
class | 424 | 0.176 | 0.034 |
class(slot) | 224 | 0.146 | 0.038 |
####■クラスについて
Javaのstaticメソッド、abstractメソッド相当の定義方法
class DataAnalysis:
@classmethod
@abstractmethod
def master_map(cls, file, target_filter):
pass
####■オブジェクトのシリアライズ
同一のファイルを何度も読み込む必要がある場合、読み込んだ結果を
バイナリファイルとして書き出すことで性能を向上させることができる。
#####(1)生成
書き込み単位を分けることでchunkを指定することができる。
import pickle
# chunk1
with open('pickle_file', 'wb') as f:
pickle.dump([[1, 2], [3, 4], [5, 6]]), f)
# chunk2
with open('pickle_file', 'wb') as f:
pickle.dump([[7, 8], [9, 10], [11, 12]]), f)
#####(2)読み込み
書き出したオブジェクトの合成が不要なケース。
f = open(f_xx, 'rb')
while 1:
try:
unpickler = pickle.Unpickler(f)
list_items = unpickler.load()
# 合成が必要な場合ここでlistを結合(extend)する。
print(list_items)
except EOFError:
break
print 1回目: [[1, 2], [3, 4], [5, 6]]
print 2回目: [[7, 8], [9, 10], [11, 12]]
####■pythonファイルのパッケージ化
複数のpythonファイルをパッケージ化する。
#####(1)フォルダ構成
init.pyを定義すること。
Pycharm project/
hoge(ルート)/
__init__.py
hogesum.py
submodule/
__init__.py
hogesum_a.py
hogesum_b.py
hogesum_c.py
#####(2)明示的な相対import
Pycharmではimport文を絶対参照で記述する場合、プロジェクト直下にパッケージを配置しなければならない。
また配置を変更したりパッケージ名を変更した場合、importのパスが通らなくなる。
このため、パッケージがどこに配置されても良いように、パッケージ内のモジュールは相対importする。
######(a)hogesum.pyでsubmodule下のhogesum_cをimportする。
from submodule import hogesum_c as panalysis
※Pycharmで「Unresolved reference」の警告が表示される場合、
hogeフォルダを右クリックして「Mark Directory As」→ 「Source Root」を選択すると警告が消える。
######(b)hogesum_c.pyでhogesum_a.pyで定義されたクラスをimportする
from .hogesum_a import DataAnalysis
from .hogesum_a import Filter
####■テスト用のコード(追記)
######(1)numpy.arrayなどのテスト
# -*- coding: utf-8 -*-
# Work with Python 3.5
# Script numpy_test.py
from pympler import asizeof
import numpy as np
import memory_profiler
@memory_profiler.profile
def main():
arry = np.arange(1000000).reshape((1000, 1000))
aarry_t = arry.T
print('arry row = %d col = %d size = %d' % (len(arry),len(arry[0]), asizeof.asizeof(arry)))
print('...processing. It takes a minute or more.')
arry2 = [[i+j*1000 for i in range(1000)] for j in range(1000)]
arry2_t = list(map(list, zip(*arry2)))
print('arry2 row = %d col = %d size = %d' % (len(arry2), len(arry2[0]), asizeof.asizeof(arry2)))
if __name__ == "__main__":
main()
######(2)各種要素定義のテスト
# -*- coding: utf-8 -*-
# Work with Python 3.5
# Script array_test.py
import array
import sys
import datetime
import pickle
from pympler import asizeof
from collections import namedtuple
import time
from collections import deque
class hoge:
__slots__ = ('a', 'b', 'c')
def __init__(self, day, f):
self.a = 123
self.b = day
self.c = f
class hoge2:
def __init__(self, day, f):
self.a = 123
self.b = day
self.c = f
class hoge3:
def __init__(self,d, day, f):
self.a = 123
self.b = day
self.c = f
self.d = d
def main(argv):
x_day = datetime.datetime.strptime('2019/1/20 10:20', '%Y/%m/%d %H:%M')
day = int(time.mktime(x_day.timetuple()))
f1 = float('123.33')
f2 = float('456.33')
x0 = array.array('l', [123, 456, day])
x1 = []
x2 = []
x3 = []
for i in range(0, 2):
x1.append([array.array('i', [123, 456, day]), f1])
for i in range(0, 2):
x2.append([123, 456, day, f1])
binary = pickle.dumps('Hello, World!')
bb = pickle.loads(binary)
print(bb)
del bb
print('---')
y1 = pickle.dumps(x1)
print(pickle.loads(y1))
z1 = hoge(day, f1)
z2 = hoge2(day, f1)
# objgraph.show_refs([x1], filename='sample-graph.png')
# objgraph.show_most_common_types()
print('x_day size=%d' % asizeof.asizeof(x_day))
print('day size=%d' % asizeof.asizeof(day))
print('x1 size=%d' % asizeof.asizeof(x1))
print('x2 size=%d' % asizeof.asizeof(x2))
print('array element size=%d' % asizeof.asizeof([array.array('l', [123, 456, day]), f1]))
print('list element size=%d' % asizeof.asizeof([123, 456, day, f1]))
print('class slot element size=%d' % asizeof.asizeof(z1))
print('class element size=%d' % asizeof.asizeof(z2))
Foo = namedtuple("xoo", "A B C D")
ff = Foo(123, 456, day, f1)
print('nametuple size=%d' % asizeof.asizeof(ff))
print('A=%d' % ff.A)
print('a element size=%d' % asizeof.asizeof('a'))
print('dict element size=%d' % asizeof.asizeof({123, 456, day, f1}))
print('dict with key element size=%d' % asizeof.asizeof({'A':123, 'D':456, 'B':day, 'C':f1}))
print('a=%d' % x1[0][0][0])
print('123 size=%d' % asizeof.asizeof(123))
print('x1[0][0][0] size=%d' % asizeof.asizeof(x1[0][0][0]))
x4 = []
start = time.time()
for i in range(0,4000000):
x4.append([i, 456, day, f1])
elapsed_time = time.time() - start
print("list = %f sec"%elapsed_time)
x5 = []
start = time.time()
x5_append = x5.append
for i in range(0, 4000000):
x5_append(Foo(i, 456, day, f1))
elapsed_time = time.time() - start
print("nametapple = %f sec" % elapsed_time)
x6 = []
start = time.time()
for i in range(0, 4000000):
x6.append(hoge3(i, day, f1))
elapsed_time = time.time() - start
print("class = %f sec" % elapsed_time)
x7 = []
start = time.time()
x7_append = x7.append
for i in range(0, 4000000):
x7_append([i, 456, day, f1])
elapsed_time = time.time() - start
print("list = %f sec" % elapsed_time)
x8 = deque()
start = time.time()
x8_append = x8.append
for i in range(0, 4000000):
x8_append([i, 456, day, f1])
elapsed_time = time.time() - start
print("deque = %f sec" % elapsed_time)
# muppy.print_summary()
if __name__ == "__main__":
main(sys.argv[1:])
######(3)各種データセーブ/ロードのテスト
# -*- coding: utf-8 -*-
# Work with Python 3.5
# Script elements_type_and_save_test.py
import sys
import pickle
from collections import namedtuple
import time
info_tuple = namedtuple('info_tuple', 'col0 col1 col2')
def save_tuple():
start = time.time()
t_list = []
for idx in range(0, 100000):
t_list.append(info_tuple(idx, 'abc', 'def'))
with open('C:\\temp\\tuple.pickle', mode='wb') as f:
pickle.dump(t_list, f)
with open('C:\\temp\\tuple.pickle', mode='ab') as f:
pickle.dump(t_list, f)
with open('C:\\temp\\tuple.pickle', mode='ab') as f:
pickle.dump(t_list, f)
elapsed_time = time.time() - start
print("save tuple = %f sec" % elapsed_time)
def load_tuple():
start = time.time()
t_list = []
with open('C:\\temp\\tuple.pickle', mode='rb') as f:
t_list = pickle.load(f)
elapsed_time = time.time() - start
print("load tuple = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = data.col0
elapsed_time = time.time() - start
print("read tuple = %f sec count = %d" % (elapsed_time, count))
def save_list():
start = time.time()
l_list = []
for idx in range(0, 100000):
l_list.append([idx, 'abc', 'def'])
with open('C:\\temp\\list.pickle', mode='wb') as f:
pickle.dump(l_list, f)
with open('C:\\temp\\list.pickle', mode='ab') as f:
pickle.dump(l_list, f)
with open('C:\\temp\\list.pickle', mode='ab') as f:
pickle.dump(l_list, f)
elapsed_time = time.time() - start
print("save list = %f sec" % elapsed_time)
def load_list():
start = time.time()
t_list = []
with open('C:\\temp\\list.pickle', mode='rb') as f:
t_list = pickle.load(f)
elapsed_time = time.time() - start
print("load list = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = data[0]
elapsed_time = time.time() - start
print("read list = %f sec count = %d" % (elapsed_time, count))
def save_csv():
start = time.time()
f = open('C:\\temp\\csv_data.csv', 'w')
for idx in range(0, 100000):
f.writelines(str(idx) + ',' + 'abc'+',' + 'def'+'\n')
f.close()
elapsed_time = time.time() - start
print("save csv = %f sec" % elapsed_time)
def load_csv():
start = time.time()
t_list = []
f = open('C:\\temp\\csv_data.csv', 'r')
for line in f:
cols = line[:-1].split(',')
t_list.append(cols)
f.close()
elapsed_time = time.time() - start
print("load csv = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = int(data[0])
elapsed_time = time.time() - start
print("read csv = %f sec count = %d" % (elapsed_time, count))
def main(argv):
save_tuple()
save_list()
save_csv()
load_tuple()
load_list()
load_csv()
if __name__ == "__main__":
main(sys.argv[1:])
(4)データロードと入力のテスト
# -*- coding: utf-8 -*-
# Work with Python 3.5
# elements_type_and_save_test.pyの実行結果を使用する
# Script load_and_read_test.py
import sys
import pickle
import time
def pickle_load(unpickler):
obj = []
obj_append = obj.append
while 1:
try:
obj_append(unpickler.load())
except EOFError:
break
return obj
def load_list():
start = time.time()
t_list = []
f = open('C:\\temp\\list.pickle', 'rb')
unpickler = pickle.Unpickler(f)
count = 0
for col in [flatten for inner in pickle_load(unpickler) for flatten in inner]:
count += 1
print('count=%d' % count)
f.close()
f = open('C:\\temp\\list.pickle', 'rb')
unpickler = pickle.Unpickler(f)
while 1:
try:
t_list = unpickler.load()
print('len=%d' % len(t_list))
except EOFError:
break
f.close()
f = open('C:\\temp\\list.pickle', 'rb')
unpickler = pickle.Unpickler(f)
count = 0
for t_items in t_list:
count = t_items[0]
f.close()
print('count=%d' % count)
elapsed_time = time.time() - start
print("load list = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = data[0]
elapsed_time = time.time() - start
print("read list = %f sec count = %d" % (elapsed_time, count))
def load_csv():
start = time.time()
t_list = []
f = open('C:\\temp\\csv_data.csv', 'r')
for line in f:
cols = line[:-1].split(',')
t_list.append(cols)
f.close()
elapsed_time = time.time() - start
print("load csv = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = int(data[0])
elapsed_time = time.time() - start
print("read csv = %f sec count = %d" % (elapsed_time, count))
def main(argv):
load_list()
load_csv()
if __name__ == "__main__":
main(sys.argv[1:])
######(4)メモリプロファイル
# -*- coding: utf-8 -*-
# Work with Python 3.5
# elements_type_and_save_test.pyの実行結果を使用する
# Script memory_profile_test.py -m memory_profiler
import sys
import pickle
import time
import memory_profiler
def pickle_load(unpickler):
obj = []
obj_append = obj.append
while 1:
try:
obj_append(unpickler.load())
except EOFError:
break
return obj
def load_list():
start = time.time()
t_list = []
f = open('C:\\temp\\list.pickle', 'rb')
unpickler = pickle.Unpickler(f)
count = 0
for col in [flatten for inner in pickle_load(unpickler) for flatten in inner]:
count += 1
print('count=%d' % count)
f.close()
f = open('C:\\temp\\list.pickle', 'rb')
unpickler = pickle.Unpickler(f)
while 1:
try:
t_list = unpickler.load()
print('len=%d' % len(t_list))
except EOFError:
break
f.close()
f = open('C:\\temp\\list.pickle', 'rb')
unpickler = pickle.Unpickler(f)
count = 0
for t_list in unpickler.load():
count += 1
f.close()
print('count=%d' % count)
elapsed_time = time.time() - start
print("load list = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = data[0]
elapsed_time = time.time() - start
print("read list = %f sec count = %d" % (elapsed_time, count))
@memory_profiler.profile
def load_csv():
start = time.time()
t_list = []
f = open('C:\\temp\\csv_data.csv', 'r')
for line in f:
cols = line[:-1].split(',')
t_list.append(cols)
f.close()
elapsed_time = time.time() - start
print("load csv = %f sec" % elapsed_time)
start = time.time()
count = -1
for data in t_list:
count = int(data[0])
elapsed_time = time.time() - start
print("read csv = %f sec count = %d" % (elapsed_time, count))
print("...processing. It may take over 30 minutes.")
for i in range(10000):
p = [j for j in range(10001-i)]
p.clear()
del p
del t_list
def main(argv):
# load_list()
load_csv()
if __name__ == "__main__":
main(sys.argv[1:])
######(5)オブジェクトサイズの調査
# -*- coding: utf-8 -*-
# Work with Python 3.5
# Script size_test.py
import array
import sys
import datetime
from pympler import asizeof
from collections import namedtuple
import time
nt = namedtuple("DATA1", "id count value1 value2 value3")
class Data1_slot:
__slots__ = ('id', 'count', 'value1','value2','value3')
def __init__(self, id, count, value1, value2, value3):
self.id = id
self.count = count
self.value1 = value1
self.value2 = value2
self.value3 = value3
class Data1:
def __init__(self, id, count, value1, value2, value3):
self.id = id
self.count = count
self.value1 = value1
self.value2 = value2
self.value3 = value3
class hoge2:
def __init__(self, day, f):
self.a = 123
self.b = day
self.c = f
class hoge3:
def __init__(self,d, day, f):
self.a = 123
self.b = day
self.c = f
self.d = d
def element_list_measure(a,b,c,d,e):
n_element = [a, b, int(c), d, float(e)]
print('element list size = %d byte : \'%s\'' % (asizeof.asizeof(n_element), n_element))
start = time.time()
for i in range(100000):
n_element = [a, b, int(c), i, float(e)]
elapsed_time = time.time() - start
print("time %f sec : create list element * 100000" % elapsed_time)
start = time.time()
for i in range(100000):
e1 = n_element[0]
e2 = n_element[1]
e3 = n_element[2]
e4 = n_element[3]
e5 = n_element[4]
elapsed_time = time.time() - start
print("time %f sec : read list element * 100000" % elapsed_time)
return n_element
def element_tuple_measure(a,b,c,d,e):
n_element = (a, b, int(c), d, float(e))
print('element tuple size = %d byte : \'%s\'' % (asizeof.asizeof(n_element), n_element))
start = time.time()
for i in range(100000):
n_element = (a, b, int(c), i, float(e))
elapsed_time = time.time() - start
print("time %f sec : create tuple element * 100000" % elapsed_time)
start = time.time()
for i in range(100000):
e1 = n_element[0]
e2 = n_element[1]
e3 = n_element[2]
e4 = n_element[3]
e5 = n_element[4]
elapsed_time = time.time() - start
print("time %f sec : read tuple element * 100000" % elapsed_time)
return n_element
def element_namedtuple_measure(a,b,c,d,e):
n_element = nt(a, b, int(c), d, float(e))
print('element namedtuple size = %d byte : \'%s\'' % (asizeof.asizeof(n_element), n_element))
start = time.time()
for i in range(100000):
n_element = nt(a, b, int(c), i, float(e))
elapsed_time = time.time() - start
print("time %f sec : create namedtuple element * 100000" % elapsed_time)
start = time.time()
for i in range(100000):
e1 = n_element.id
e2 = n_element.count
e3 = n_element.value1
e4 = n_element.value2
e5 = n_element.value3
elapsed_time = time.time() - start
print("time %f sec : read namedtuple element * 100000" % elapsed_time)
return n_element
def element_class_measure(a,b,c,d,e):
n_element = Data1(a, b, int(c), d, float(e))
print('element class size = %d byte : \'%s\'' % (asizeof.asizeof(n_element), n_element))
start = time.time()
for i in range(100000):
n_element = Data1(a, b, int(c), i, float(e))
elapsed_time = time.time() - start
print("time %f sec : create class element * 100000" % elapsed_time)
start = time.time()
for i in range(100000):
e1 = n_element.id
e2 = n_element.count
e3 = n_element.value1
e4 = n_element.value2
e5 = n_element.value3
elapsed_time = time.time() - start
print("time %f sec : read class element * 100000" % elapsed_time)
return n_element
def element_class_slot_measure(a,b,c,d,e):
n_element = Data1_slot(a, b, int(c), d, float(e))
print('element class slot size = %d byte : \'%s\'' % (asizeof.asizeof(n_element), n_element))
start = time.time()
for i in range(100000):
n_element = Data1_slot(a, b, int(c), i, float(e))
elapsed_time = time.time() - start
print("time %f sec : create class slot element * 100000" % elapsed_time)
start = time.time()
for i in range(100000):
e1 = n_element.id
e2 = n_element.count
e3 = n_element.value1
e4 = n_element.value2
e5 = n_element.value3
elapsed_time = time.time() - start
print("time %f sec : read class slot element * 100000" % elapsed_time)
return n_element
def list_measure(e_name, obj):
for m in [1, 2, 10, 100, 1000, 10000]:
ls = []
for i in range(m):
ls.append(obj)
print('list[%s] size %d byte : list * %d \'%s\'' % (e_name, asizeof.asizeof(ls),m, ls[0:1]))
start = time.time()
ls = []
for i in range(1000000):
ls.append(obj)
elapsed_time = time.time() - start
print("time %f sec : list append" % elapsed_time)
start = time.time()
ls = []
ls_append = ls.append
for i in range(1000000):
ls_append(obj)
elapsed_time = time.time() - start
print("time %f sec : list_append" % elapsed_time)
start = time.time()
for i in range(1000000):
a = ls[i]
elapsed_time = time.time() - start
print("time %f sec : list read" % elapsed_time)
def map_measure(e_name, obj):
for m in [1, 2, 10, 100, 1000, 10000]:
map = {}
for i in range(m):
map['ABC_' + str(i + 10000)] = obj[i]
print('map[%s] size %d byte len = %d ' % (e_name, asizeof.asizeof(map), m))
start = time.time()
map = {}
for i in range(100000):
map['ABC_' + str(i + 100000)] = obj[i]
elapsed_time = time.time() - start
print("time %f sec : map append * 100000" % elapsed_time)
def main(argv):
x_day = datetime.datetime.strptime('2019/1/20 10:20', '%Y/%m/%d %H:%M')
day = int(time.mktime(x_day.timetuple()))
f1 = float('123.33')
f2 = float('456.33')
x0 = array.array('l', [123, 456, day])
x1 = []
x2 = []
x3 = []
data1 = 'abc_123456,2019/1/20 10:20,12345,67890,1234.5678,1,2,'
dict = 'xyz_123456,xyz,123456,ABC,DEF'
rireki = 'def_123456,prop3,2019/1/21 10:20'
print('list size')
cols1 = data1[:-1].split(',')
d1 = datetime.datetime.strptime(cols1[1], '%Y/%m/%d %H:%M')
# n_element = [cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4])]
n_element = element_list_measure(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
cols2 = dict[:-1].split(',')
m_element = [cols2[0],cols2[1],cols2[2],cols2[3],cols2[4]]
cols3 = rireki[:-1].split(',')
d3 = datetime.datetime.strptime(cols3[2], '%Y/%m/%d %H:%M')
r_element = [cols3[0],cols3[1],d3]
print('要素サイズ \'%s\': %dbyte' % (n_element[0], asizeof.asizeof(n_element[0]) ))
print('要素サイズ \'%s\': %dbyte' % (str(n_element[1]), asizeof.asizeof(n_element[1])))
print('要素サイズ \'%d\': %dbyte' % (n_element[2], asizeof.asizeof(n_element[2])))
print('要素サイズ \'%d\': %dbyte' % (n_element[3], asizeof.asizeof(n_element[3])))
print('要素サイズ \'%f\': %dbyte' % (n_element[4], asizeof.asizeof(n_element[4])))
# print('listサイズ データ1 = %dbyte : %s' % ( asizeof.asizeof(n_element), n_element))
print('listサイズ データ2 = %dbyte : %s' % (asizeof.asizeof(m_element), m_element))
print('listサイズ データ3 = %dbyte : %s' % (asizeof.asizeof(r_element), r_element))
t_n_element = (cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
t_n_element = element_tuple_measure(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
t_m_element = (cols2[0], cols2[1], cols2[2], cols2[3], cols2[4])
t_r_element = (cols3[0], cols3[1], d3)
print('tupleサイズ データ1 = %dbyte : %s' % (asizeof.asizeof(t_n_element), t_n_element))
print('tupleサイズ データ2 = %dbyte : %s' % (asizeof.asizeof(t_m_element), t_m_element))
print('tupleサイズ データ3 = %dbyte : %s' % (asizeof.asizeof(t_r_element), t_r_element))
mt = namedtuple("DATA2", "id sub1 sub2 item1 item2")
rt = namedtuple("DATA3", "id item3 date")
# nt_n_element = nt(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
nt_n_element = element_namedtuple_measure(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
nt_m_element = mt(cols2[0], cols2[1], cols2[2], cols2[3], cols2[4])
nt_r_element = rt(cols3[0], cols3[1], d3)
print('namedtupleサイズ データ1 = %dbyte : %s' % (asizeof.asizeof(nt_n_element), nt_n_element))
print('namedtupleサイズ データ2 = %dbyte : %s' % (asizeof.asizeof(nt_m_element), nt_m_element))
print('namedtupleサイズ データ3 = %dbyte : %s' % (asizeof.asizeof(nt_r_element), nt_r_element))
# c_n_element = Nippo(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
c_n_element = element_class_measure(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
print('classサイズ データ1 = %dbyte : %s' % (asizeof.asizeof(c_n_element), c_n_element))
cs_n_element = Data1_slot(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
cs_n_element = element_class_slot_measure(cols1[0], d1, int(cols1[2]), int(cols1[3]), float(cols1[4]))
print('class_slotサイズ データ1 = %dbyte : %s' % (asizeof.asizeof(cs_n_element), cs_n_element))
list_measure('None', None)
list_measure('data1 list', n_element)
list_measure('data1 tuple', t_n_element)
list_measure('data1 namedtuple', nt_n_element)
list_measure('data1 class', c_n_element)
list_measure('data1 class slot', cs_n_element)
list_measure('data2 list]', m_element)
list_measure('data3 list]', r_element)
nlist1 = [n_element for i in range(10000)]
nlist1_2 = [n_element for i in range(20000)]
mlist1 = [m_element for i in range(10000)]
rlist1 = [r_element for i in range(10000)]
nlist2 = [t_n_element for i in range(10000)]
mlist2 = [t_m_element for i in range(10000)]
rlist2 = [t_r_element for i in range(10000)]
nlist3 = [nt_n_element for i in range(10000)]
mlist3 = [nt_m_element for i in range(10000)]
rlist3 = [nt_r_element for i in range(10000)]
'''
print('list[list]*20000 データ1 = %dbyte : %s' % (asizeof.asizeof(nlist1_2), nlist1_2[0:1]))
print('list[tuple]*10000 データ1 = %dbyte : %s' % (asizeof.asizeof(nlist2), nlist2[0:1]))
print('list[namedtuple]*10000 データ1 = %dbyte : %s' % (asizeof.asizeof(nlist3), nlist3[0:1]))
print('list[list]*10000 データ2 = %dbyte : %s' % (asizeof.asizeof(mlist1), mlist1[0:1]))
print('list[tuple]*10000 データ2 = %dbyte : %s' % (asizeof.asizeof(mlist2), mlist2[0:1]))
print('list[namedtuple]*10000 データ2 = %dbyte : %s' % (asizeof.asizeof(mlist3), mlist3[0:1]))
print('list[list]*10000 データ3 = %dbyte : %s' % (asizeof.asizeof(rlist1), rlist1[0:1]))
print('list[tuple]*10000 データ3 = %dbyte : %s' % (asizeof.asizeof(rlist2), rlist2[0:1]))
print('list[namedtuple]*10000 データ3 = %dbyte : %s' % (asizeof.asizeof(rlist3), rlist3[0:1]))
'''
map_measure('None', [None for i in range(100000)])
map_measure('list[list]', [n_element for i in range(100000)])
map_measure('list[named_tuple]', [nt_n_element for i in range(100000)])
nmap1 = {}
for i in range(10000):
nmap1['ABC_' + str(i + 10000)] = nlist1[i]
print('map[list]*10000 = %dbyte : %s ......' % (asizeof.asizeof(nmap1), list(nmap1.items())[0:5]))
nmap2 = {}
for i in range(10000):
nmap2['ABC_' + str(i + 10000)] = nlist2[i]
print('map[tuple]*10000 = %dbyte : %s ......' % (asizeof.asizeof(nmap2), list(nmap2.items())[0:5]))
nmap3 = {}
for i in range(10000):
nmap3['ABC_' + str(i + 10000)] = nlist3[i]
print('map[namedtuple]*10000(推定byte=216*length*40) = %dbyte : %s ......' % (asizeof.asizeof(nmap3), list(nmap3.items())[0:5]))
# objgraph.show_refs([x1], filename='sample-graph.png')
# objgraph.show_most_common_types()
print('x_day size=%d' % asizeof.asizeof(x_day))
print('day size=%d' % asizeof.asizeof(day))
print('x1 size=%d' % asizeof.asizeof(x1))
print('x2 size=%d' % asizeof.asizeof(x2))
print('array element size=%d' % asizeof.asizeof([array.array('l', [123, 456, day]), f1]))
print('list element size=%d' % asizeof.asizeof([123, 456, day, f1]))
Foo = namedtuple("xyz", "A B C D")
ff = Foo(123, 456, day, f1)
print('nametuple size=%d' % asizeof.asizeof(ff))
print('A=%d' % ff.A)
print('a element size=%d' % asizeof.asizeof('a'))
print('data2 element size=%d' % asizeof.asizeof({123, 456, day, f1}))
print('data2 with key element size=%d' % asizeof.asizeof({'A':123, 'D':456, 'B':day, 'C':f1}))
print('123 size=%d' % asizeof.asizeof(123))
if __name__ == "__main__":
main(sys.argv[1:])