はじめに
意外と情報が見つからなかったのでまとめてみました。
sklearn.model_selection.TimeSeriesSplit
を用いた実行例を以下に示します。
実行例
import numpy as np
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
X = np.array([[40, 15, 36, 11, 100, 22, 21, 76, 3, 32],
[3.1, 3.5, 1.3, 2.1, 8.3, 1.1, 3.4, 2.0, 2.4, 5.6]]).T
y = np.array([3, 21, 13, 7, 4, 1, 19, 2, 1, 1])
tscv = TimeSeriesSplit(n_splits=3)
for train, test in tscv.split(X):
print(train, test)
[0 1 2 3] [4 5]
[0 1 2 3 4 5] [6 7]
[0 1 2 3 4 5 6 7] [8 9]
XGBoostでの実行例
import numpy as np
import xgboost as xgb
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
X = np.array([[40, 15, 36, 11, 100, 22, 21, 76, 3, 32],
[3.1, 3.5, 1.3, 2.1, 8.3, 1.1, 3.4, 2.0, 2.4, 5.6]]).T
y = np.array([3, 21, 13, 7, 4, 1, 19, 2, 1, 1])
model = xgb.XGBRegressor()
params = {
"learning_rate" : [0.1,0.3,0.5],
"max_depth" : [2,3,5],
}
tscv = TimeSeriesSplit(n_splits=3).split(X)
gscv = GridSearchCV(estimator=model,
cv=tscv,
param_grid=params)
GridSearchCV(cv=<generator object TimeSeriesSplit.split at 0x7f2a5ee6c830>,
error_score='raise',
estimator=XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,
max_depth=3, min_child_weight=1, missing=None, n_estimators=100,
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
silent=True, subsample=1),
fit_params={}, iid=True, n_jobs=1,
param_grid={'learning_rate': [0.1, 0.3, 0.5], 'max_depth': [2, 3, 5]},
pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
scoring=None, verbose=0)
参考