概要
Kerasによる時系列データの機械学習。
時系列データをTimeseriesGeneratorを使って準備する
使い方を理解するためのサンプル
keras.preprocessing.sequence.TimeseriesGenerator
環境
- Google Colaboratory
- Keras
参考 - https://machinelearningmastery.com/how-to-use-the-timeseriesgenerator-for-time-series-forecasting-in-keras/
Univariate Time Series Example (一変量時系列の例)
TimeseriesGenerator(data, targets, length=n_input, batch_size=1)
data:連続したデータ
target:ターゲット用データ
length: 出力シーケンスの長さ
Simple MLP Example
# univariate one step problem with mlp
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.preprocessing.sequence import TimeseriesGenerator
import matplotlib.pyplot as plt
# define dataset
series = array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# define generator
n_input = 2
generator = TimeseriesGenerator(series, series, length=n_input, batch_size=1)
print('Samples: %d' % len(generator))
# print each sample
for i in range(len(generator)):
x, y = generator[i]
print('%s => %s' % (x, y))
output
Samples: 8
[[1 2]] => [3]
[[2 3]] => [4]
[[3 4]] => [5]
[[4 5]] => [6]
[[5 6]] => [7]
[[6 7]] => [8]
[[7 8]] => [9]
[[8 9]] => [10]
# define model
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=n_input))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit_generator(generator, steps_per_epoch=1, epochs=200, verbose=0)
myloss = model.history.history['loss']
plt.plot(range(len(myloss)),myloss)
# make a one step prediction out of sample
x_input = array([9, 10]).reshape((1, n_input))
yhat = model.predict(x_input, verbose=0)
print(yhat)
output
[[11.610671]]
-- グラフ省略--
LSTM Example
- LSTMの入力データのshapeは[samples, timesteps, features]にする必要がある
- Generaterへの入力データのshapeは[samples, features]とする
series = series.reshape((len(series), n_features))
# univariate one step problem with lstm
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.preprocessing.sequence import TimeseriesGenerator
import matplotlib.pyplot as plt
# define dataset
series = array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
# reshape to [10, 1]
n_features = 1
series = series.reshape((len(series), n_features))
# define generator
n_input = 2
generator = TimeseriesGenerator(series, series, length=n_input, batch_size=1)
# Print generator
print('Samples: %d' % len(generator))
# print each sample
for i in range(len(generator)):
x, y = generator[i]
print('%s => %s' % (x, y))
output
Samples: 8
[[[1]
[2]]] => [[3]]
[[[2]
[3]]] => [[4]]
[[[3]
[4]]] => [[5]]
[[[4]
[5]]] => [[6]]
[[[5]
[6]]] => [[7]]
[[[6]
[7]]] => [[8]]
[[[7]
[8]]] => [[9]]
[[[8]
[9]]] => [[10]]
# define model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# model.summary()
# fit model
# model.fit_generator(generator, steps_per_epoch=8, epochs=200, verbose=1)
model.fit_generator(generator, epochs=100, verbose=0)
myloss = model.history.history['loss']
plt.plot(range(len(myloss)),myloss)
# make a one step prediction out of sample
x_input = array([9, 10]).reshape((1, n_input, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)
output
[[11.014828]]
-- グラフ省略--
Multivariate Time Series Example (多変量時系列の例)
LSTM Example (多変量)
多変量を扱うには hstackでまとめる
dataset = hstack((in_seq1, in_seq2))
# multivariate one step problem
from numpy import array
from numpy import hstack
from numpy import insert,delete
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
import matplotlib.pyplot as plt
# define dataset
in_seq1 = array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
in_seq2 = array([15, 25, 35, 45, 55, 65, 75, 85, 95, 105])
out_seq = array([25, 45, 65, 85, 105, 125, 145, 165, 185, 205])
# reshape series
in_seq1 = in_seq1.reshape((len(in_seq1), 1))
in_seq2 = in_seq2.reshape((len(in_seq2), 1))
out_seq = out_seq.reshape((len(out_seq), 1))
# horizontally stack columns
dataset = hstack((in_seq1, in_seq2))
## define generator
n_input = 3
n_features = dataset.shape[1]
generator = TimeseriesGenerator(dataset, out_seq, length=n_input, batch_size=1)
# print each sample
for i in range(len(generator)):
x, y = generator[i]
print('%s => %s' % (x, y))
output
Using TensorFlow backend.
[[[10 15]
[20 25]
[30 35]]] => [[85]]
[[[20 25]
[30 35]
[40 45]]] => [[105]]
[[[30 35]
[40 45]
[50 55]]] => [[125]]
[[[40 45]
[50 55]
[60 65]]] => [[145]]
[[[50 55]
[60 65]
[70 75]]] => [[165]]
[[[60 65]
[70 75]
[80 85]]] => [[185]]
[[[70 75]
[80 85]
[90 95]]] => [[205]]
# define model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_input, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit_generator(generator, steps_per_epoch=2, epochs=50, verbose=0)
# plot loss
myloss = model.history.history['loss']
print('loss:',myloss[-1])
plt.plot(range(len(myloss)),myloss)
# make a one step prediction out of sample
x_input = array( [[80, 85],[90,95],[100,105]] ).reshape((1, n_input, n_features))
yhat = model.predict(x_input, verbose=1)
print(yhat)
output
loss: 3.0511040687561035
1/1 [==============================] - 0s 178ms/step
[[231.18332]]
(グラフ略)