本稿の目的
時系列解析手法の一つARMAモデルが、Pythonライブラリstatsmodelsでどのように実装されているか見てみました。特に、フィットをどこで行っているかを探りました。
参考
ライブラリの使用方法は、以下のブログ記事が参考になります:
Pythonによる時系列分析の基礎
ARMA(+ARIMA, SARIMA)の理論(のつかみの部分)については、上記ブログの著者による書籍が参考になります:
時系列分析と状態空間モデルの基礎: RとStanで学ぶ理論と実装(通称 ハヤブサ本)
いざ、ソースコードへ
場所
statsmodelsのコードは、github上にて公開されています。
本体
fit部分
# l.424
class ARMA(tsbase.TimeSeriesModel):
__doc__ = tsbase._tsa_doc % {"model" : _arma_model,
"params" : _arma_params, "extra_params" : "",
"extra_sections" : _armax_notes %
{"Model" : "ARMA"}}
def __init__(self, endog, order, exog=None, dates=None, freq=None,
missing='none'):
super(ARMA, self).__init__(endog, exog, dates, freq, missing=missing)
exog = self.data.exog # get it after it's gone through processing
_check_estimable(len(self.endog), sum(order))
self.k_ar = k_ar = order[0]
self.k_ma = k_ma = order[1]
self.k_lags = max(k_ar, k_ma+1)
if exog is not None:
if exog.ndim == 1:
exog = exog[:, None]
k_exog = exog.shape[1] # number of exog. variables excl. const
else:
k_exog = 0
self.k_exog = k_exog
# 中略
# l.1074 ~ 1164
def fit(self, start_params=None, trend='c', method="css-mle",
transparams=True, solver='lbfgs', maxiter=500, full_output=1,
disp=5, callback=None, start_ar_lags=None, **kwargs):
mlefit = super(ARIMA, self).fit(start_params, trend,
method, transparams, solver,
maxiter, full_output, disp,
callback, start_ar_lags, **kwargs)
normalized_cov_params = None # TODO: fix this?
arima_fit = ARIMAResults(self, mlefit._results.params,
normalized_cov_params)
arima_fit.k_diff = self.k_diff
arima_fit.mle_retvals = mlefit.mle_retvals
arima_fit.mle_settings = mlefit.mle_settings
return ARIMAResultsWrapper(arima_fit)
# 以下略
fit部分はtsbase.TimeSeriesModelを継承。
引数のうち、methodは最大化する指標。
css (conditional sum of squares), mle (most likelihood estimate), もしくはその複合。
mleではときはKalman filterを使う。
method : str {'css-mle','mle','css'} |
This is the loglikelihood to maximize. If "css-mle", the
conditional sum of squares likelihood is maximized and its values
are used as starting values for the computation of the exact
likelihood via the Kalman filter. If "mle", the exact likelihood
is maximized via the Kalman Filter. If "css" the conditional sum
of squares likelihood is maximized. All three methods use
`start_params` as starting parameters. See above for more
information.
solverはBFGS(Broyden-Fletcher-Goldfarb-Shanno), Newton-Raphson, Nelder-Meadなどが使える。
BFGSは準ニュートン法の一種らしい。
solver : str or None, optional
Solver to be used. The default is 'lbfgs' (limited memory
Broyden-Fletcher-Goldfarb-Shanno). Other choices are 'bfgs',
'newton' (Newton-Raphson), 'nm' (Nelder-Mead), 'cg' -
(conjugate gradient), 'ncg' (non-conjugate gradient), and
'powell'. By default, the limited memory BFGS uses m=12 to
approximate the Hessian, projected gradient tolerance of 1e-8 and
factr = 1e2. You can change these by using kwargs.
TimeSeriesModel
継承元であるtsbase.TimeSeriesModelを見てみた。
https://github.com/statsmodels/statsmodels/blob/master/statsmodels/tsa/base/tsa_model.py
# l.40
class TimeSeriesModel(base.LikelihoodModel):
__doc__ = _tsa_doc % {"model": _model_doc, "params": _generic_params,
"extra_params": _missing_param_doc,
"extra_sections": ""}
def __init__(self, endog, exog=None, dates=None, freq=None,
missing='none', **kwargs):
super(TimeSeriesModel, self).__init__(endog, exog, missing=missing,
**kwargs)
# Date handling in indexes
self._init_dates(dates, freq)
# 以下略
fitメソッドを持たない。
→ LikelihoodModelを継承。
LikelihoodMoldel
# l.206~
class LikelihoodModel(Model):
"""
Likelihood model is a subclass of Model.
"""
def __init__(self, endog, exog=None, **kwargs):
super(LikelihoodModel, self).__init__(endog, exog, **kwargs)
self.initialize()
# 中略
# l.254~
def fit(self, start_params=None, method='newton', maxiter=100,
full_output=True, disp=True, fargs=(), callback=None, retall=False,
skip_hessian=False, **kwargs):
Hinv = None # JP error if full_output=0, Hinv not defined
if start_params is None:
if hasattr(self, 'start_params'):
start_params = self.start_params
elif self.exog is not None:
# fails for shape (K,)?
start_params = [0] * self.exog.shape[1]
else:
raise ValueError("If exog is None, then start_params should "
"be specified")
# TODO: separate args from nonarg taking score and hessian, ie.,
# user-supplied and numerically evaluated estimate frprime doesn't take
# args in most (any?) of the optimize function
nobs = self.endog.shape[0]
# f = lambda params, *args: -self.loglike(params, *args) / nobs
def f(params, *args):
return -self.loglike(params, *args) / nobs
if method == 'newton':
# TODO: why are score and hess positive?
def score(params, *args):
return self.score(params, *args) / nobs
def hess(params, *args):
return self.hessian(params, *args) / nobs
else:
def score(params, *args):
return -self.score(params, *args) / nobs
def hess(params, *args):
return -self.hessian(params, *args) / nobs
warn_convergence = kwargs.pop('warn_convergence', True)
optimizer = Optimizer()
xopt, retvals, optim_settings = optimizer._fit(f, score, start_params,
fargs, kwargs,
hessian=hess,
method=method,
disp=disp,
maxiter=maxiter,
callback=callback,
retall=retall,
full_output=full_output)
# NOTE: this is for fit_regularized and should be generalized
cov_params_func = kwargs.setdefault('cov_params_func', None)
if cov_params_func:
Hinv = cov_params_func(self, xopt, retvals)
elif method == 'newton' and full_output:
Hinv = np.linalg.inv(-retvals['Hessian']) / nobs
elif not skip_hessian:
H = -1 * self.hessian(xopt)
invertible = False
if np.all(np.isfinite(H)):
eigvals, eigvecs = np.linalg.eigh(H)
if np.min(eigvals) > 0:
invertible = True
if invertible:
Hinv = eigvecs.dot(np.diag(1.0 / eigvals)).dot(eigvecs.T)
Hinv = np.asfortranarray((Hinv + Hinv.T) / 2.0)
else:
from warnings import warn
warn('Inverting hessian failed, no bse or cov_params '
'available', HessianInversionWarning)
Hinv = None
if 'cov_type' in kwargs:
cov_kwds = kwargs.get('cov_kwds', {})
kwds = {'cov_type': kwargs['cov_type'], 'cov_kwds': cov_kwds}
else:
kwds = {}
if 'use_t' in kwargs:
kwds['use_t'] = kwargs['use_t']
# TODO: add Hessian approximation and change the above if needed
mlefit = LikelihoodModelResults(self, xopt, Hinv, scale=1., **kwds)
# TODO: hardcode scale?
if isinstance(retvals, dict):
mlefit.mle_retvals = retvals
if warn_convergence and not retvals['converged']:
from warnings import warn
from statsmodels.tools.sm_exceptions import ConvergenceWarning
warn("Maximum Likelihood optimization failed to converge. "
"Check mle_retvals", ConvergenceWarning)
mlefit.mle_settings = optim_settings
return mlefit
# 以下略
多分
# l.458~
optimizer = Optimizer()
xopt, retvals, optim_settings = optimizer._fit(f, score, start_params,
fargs, kwargs,
hessian=hess,
method=method,
disp=disp,
maxiter=maxiter,
callback=callback,
retall=retall,
full_output=full_output)
が犯人?
→ from statsmodels.base.optimizer import Optimizer
へ。
Optimizer
# l.5~
from __future__ import print_function
import numpy as np
from scipy import optimize
# 中略
# l.17~
class Optimizer(object):
def _fit(self, objective, gradient, start_params, fargs, kwargs,
hessian=None, method='newton', maxiter=100, full_output=True,
disp=True, callback=None, retall=False):
#TODO: generalize the regularization stuff
# Extract kwargs specific to fit_regularized calling fit
extra_fit_funcs = kwargs.setdefault('extra_fit_funcs', dict())
methods = ['newton', 'nm', 'bfgs', 'lbfgs', 'powell', 'cg', 'ncg',
'basinhopping', 'minimize']
methods += extra_fit_funcs.keys()
method = method.lower()
_check_method(method, methods)
fit_funcs = {
'newton': _fit_newton,
'nm': _fit_nm, # Nelder-Mead
'bfgs': _fit_bfgs,
'lbfgs': _fit_lbfgs,
'cg': _fit_cg,
'ncg': _fit_ncg,
'powell': _fit_powell,
'basinhopping': _fit_basinhopping,
'minimize': _fit_minimize # wrapper for scipy.optimize.minimize
}
#NOTE: fit_regularized checks the methods for these but it should be
# moved up probably
if extra_fit_funcs:
fit_funcs.update(extra_fit_funcs)
func = fit_funcs[method]
xopt, retvals = func(objective, gradient, start_params, fargs, kwargs,
disp=disp, maxiter=maxiter, callback=callback,
retall=retall, full_output=full_output,
hess=hessian)
optim_settings = {'optimizer': method, 'start_params': start_params,
'maxiter': maxiter, 'full_output': full_output,
'disp': disp, 'fargs': fargs, 'callback': callback,
'retall': retall}
optim_settings.update(kwargs)
# set as attributes or return?
return xopt, retvals, optim_settings
# 以下略
# l.171~
fit_funcs = {
'newton': _fit_newton,
'nm': _fit_nm, # Nelder-Mead
'bfgs': _fit_bfgs,
'lbfgs': _fit_lbfgs,
'cg': _fit_cg,
'ncg': _fit_ncg,
'powell': _fit_powell,
'basinhopping': _fit_basinhopping,
'minimize': _fit_minimize # wrapper for scipy.optimize.minimize
}
これにより、使用するアルゴリズムを選択したうえで、
# l.188~
func = fit_funcs[method]
xopt, retvals = func(objective, gradient, start_params, fargs, kwargs,
disp=disp, maxiter=maxiter, callback=callback,
retall=retall, full_output=full_output,
hess=hessian)
でフィットを行っていることが分かる。
では、フィット関数はどこにあるかというと、Optimizer
クラスの下にぞろぞろと定義されていました。
例として、defaultで利用する'lbfgs' (limited memory Broyden-Fletcher-Goldfarb-Shanno)を見てみます。
# l.347~
def _fit_lbfgs(f, score, start_params, fargs, kwargs, disp=True,
maxiter=100, callback=None, retall=False,
full_output=True, hess=None):
if epsilon and not approx_grad:
raise ValueError('a finite-differences epsilon was provided '
'even though we are not using approx_grad')
if approx_grad and loglike_and_score:
raise ValueError('gradient approximation was requested '
'even though an analytic loglike_and_score function '
'was given')
if loglike_and_score:
func = lambda p, *a : tuple(-x for x in loglike_and_score(p, *a))
elif score:
func = f
extra_kwargs['fprime'] = score
elif approx_grad:
func = f
retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter,
callback=callback, args=fargs,
bounds=bounds, disp=disp,
**extra_kwargs)
if full_output:
xopt, fopt, d = retvals
# The warnflag is
# 0 if converged
# 1 if too many function evaluations or too many iterations
# 2 if stopped for another reason, given in d['task']
warnflag = d['warnflag']
converged = (warnflag == 0)
gopt = d['grad']
fcalls = d['funcalls']
iterations = d['nit']
retvals = {'fopt': fopt, 'gopt': gopt, 'fcalls': fcalls,
'warnflag': warnflag, 'converged': converged,
'iterations': iterations}
else:
xopt = retvals[0]
retvals = None
return xopt, retvals
想像はしていましたが、
# l.407~
retvals = optimize.fmin_l_bfgs_b(func, start_params, maxiter=maxiter,
callback=callback, args=fargs,
bounds=bounds, disp=disp,
**extra_kwargs)
これは、冒頭from scipy import optimize
でインポートしたscipyのライブラリを使っています。
あとがき
ARMA_MODELではmethod
で最適化指標を、solver
で最適化手法を指定していましたが、気が付いたらいつの間にか'method'の方が最適化手法になっていました。どこですり替わったのでしょう?
また、最適化指標はどこに行ったのでしょうか?Kalman filterも使われているそうなので、興味があります。
scikit-learnと比べて、statsmodelsは構造が複雑で、表記の統一も緩く、辿るのがしんどかったです。
※本稿はscikit-learn、statsmodelsのライブラリ実装の読解と議論|OSS輪読会 #5用に作成した資料に一部追記を行ったものです。