過去N点のデータを入力とし、未来N点のデータを予測(出力)とする場合のデータを生成します。(※過去と未来のステップ数が等しい場合のみ有効です。)
関数
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import TimeseriesGenerator
def create_timeseries(data, lookback):
generator = TimeseriesGenerator(data, data, length=lookback, batch_size=1)
X_li, y_li = list(), list()
for i in range(len(generator))[:-lookback+1]: # lookback分、x2がnullになるため除く
x, y = generator[i]
x2, y2 = generator[i + lookback]
X_li.append(x)
y_li.append(x2)
re_X = np.concatenate(X_li)
re_y = np.concatenate(y_li)
return re_X, re_y
# 実行
data = np.arange(100).reshape(-1, 4)
print(data.shape)
print(data[:5])
X, y = create_timeseries(data=data, lookback=3)
print(X[0])
print(y[0])
print("=====")
print(X[-1])
print(y[-1])
# 出力
(25, 4)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]]
0 [[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]] shape=(1, 3, 4)
=> [[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]] shape=(1, 3, 4)
=====================
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]
=====
[[72 73 74 75]
[76 77 78 79]
[80 81 82 83]]
[[84 85 86 87]
[88 89 90 91]
[92 93 94 95]]
解説
準備
import pandas as pd
import numpy as np
from keras.preprocessing.sequence import TimeseriesGenerator
# step数は3に設定
lookback = 3
使うデータ
data = np.arange(100).reshape(-1, 4)
print(data.shape)
data[:5]
Generator生成
generator = TimeseriesGenerator(data, data, length=lookback, batch_size=1)
生成されるデータ
# lookup分のx、次の値がy
for x, y in generator:
print('%s => %s' % (x, y))
生成されるデータからLSTM用に抽出
X_li, y_li = list(), list()
for i in range(len(generator))[:-lookback+1]: # lookback分、x2がnullになるため除く
x, y = generator[i]
x2, y2 = generator[i + lookback]
X_li.append(x)
y_li.append(x2)
print(i, x, "shape={}".format(x.shape))
print("=>", x2, "shape={}".format(x2.shape))
print("=====================")
抽出したデータを結合
re_X = np.concatenate(X_li)
re_y = np.concatenate(y_li)
# re_X.shape, re_y.shape => (19, 3, 4), (19, 3, 4)
完成したデータ
for x, y in zip(re_X, re_y):
print(x)
print("=>", x2)
print("=====================")