1
4

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

R機械学習/カルマンフィルタによる状態空間モデル2

Last updated at Posted at 2022-05-16

Introduction

カルマンフィルタを使用した状態空間モデルをRで実装する方法についてのメモ.
今回は外生変数を組み込んだな時変係数モデルを実装する.
参考図書:時系列分析と状態空間モデルの基礎: RとStanで学ぶ理論と実装

ライブラリインポート

library(KFAS)  # モデル作成
library(ggplot2)  # グラフ描画
library(patchwork)  # グラフ配置

シミュレーションデータの作成

2つの外生変数と目的変数からなるデータを作成.

set.seed(1)
N=100

# 各種標準偏差
sigma_v = 10
sigma_w = 5
sigma_t1 = 0.6
sigma_t2 = 0.2

# 説明変数
x1 = rnorm(n=N, mean=5, sd=10)
x2 = rnorm(n=N, mean=10, sd=5)

# 切片,回帰係数,観測値
mu = cumsum(rnorm(n=N, mean=0, sd=sigma_w))
b1 = cumsum(rnorm(n=N, mean=0, sd=sigma_t1)) + 10
b2 = cumsum(rnorm(n=N, mean=0, sd=sigma_t1)) + 5
y = mu + b1*x1 + b2*x2 + rnorm(n=N, mean=0, sd=sigma_v)

dataset = data.frame(y=y, x1=x1, x2=x2)
coefset = data.frame(b1=b1, b2=b2, mu=mu)

データの確認

# 外生変数と目的変数
par(mfrow=c(3,1))
plot(x1)
plot(x2)
plot(y)

Out:外生変数と目的変数
1.png

# 回帰係数と切片
par(mfrow=c(3,1))
plot(b1)
plot(b2)
plot(mu)

Out:回帰係数と切片
2.png

テストデータと訓練データに分割

p = 80
train = dataset[1:p,]
test = dataset[81:N,]

train_coef = coefset[1:p,]
test_coef = coefset[81:N,]

モデリング

2つの外生変数をもつ時変係数モデルは以下のように定式化される.

$$
\begin{eqnarray}
\beta_{1,t} &=& \beta_{1,t-1} + \tau_{1,t}, \ \ \ \ &\tau_{1,t}&~Normal(0,\sigma^2_{\tau_1}) \\
\beta_{2,t} &=& \beta_{2,t-1} + \tau_{2,t}, \ \ &\tau_{2,t}&~Normal(0,\sigma^2_{\tau_2}) \\
\mu_t &=& \mu_{t-1} + w_t, \ \ &w_t&~Normal(0,\sigma^2_w) \\
y_t &=& \mu_t + \beta_{1,t}x_{1,t} + \beta_{2,t}x_{2,t} + v_t, \ \ \ \ &v_t&~Normal(0,\sigma^2_v)
\end{eqnarray}
$$

  • $\beta_t$:回帰係数
  • $\mu_t$:切片
  • $x_t$:外生変数
  • $y_t$:目的変数
  • $\tau_t$:回帰係数誤差
  • $w_t$:過程誤差
  • $v_t$:観測誤差
# Step1:モデルの構造を決める
build_kfas = SSModel(H = NA, train$y ~ SSMtrend(degree = 1, Q = NA) +
             SSMregression( ~ train$x1 + train$x2 , Q = diag(rep(NA, 2))))

# Step2:パラメータ推定
fit_kfas = fitSSM(build_kfas, inits = c(1, 1, 1, 1))

# Step3:フィルタリング
result_kfas = KFS(fit_kfas$model, filtering=c('state', 'mean'), smoothing=c('state', 'mean'))

モデル評価

訓練データの結果を図示.

# グラフ用データフレーム作成
df_kfas = data.frame(time=1:p, y=train$y, y_pred=result_kfas$m[1:length(train$y)])
df_kfas$y_pred[1] = NA

ggplot(data=df_kfas, aes(x=time, y=y))+
  geom_point(alpha=0.5)+
  geom_line(aes(y=y_pred),size=1.2, col = 'red')

Out:黒点が観測値,赤線が予測値
3.png

切片および回帰係数の推定結果

df_kfas_coef = data.frame(time=1:p, b1=train_coef$b1, b2=train_coef$b2, mu=train_coef$mu,
                          b1_pred = coef(result_kfas)[,1],
                          b2_pred = coef(result_kfas)[,2],
                          mu_pred = coef(result_kfas)[,3])

g1 = ggplot(data=df_kfas_coef, aes(x=time, y=b1)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b1_pred), size=1, col='red')

g2 = ggplot(data=df_kfas_coef, aes(x=time, y=b2)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b2_pred), size=1, col='red')

g3 = ggplot(data=df_kfas_coef, aes(x=time, y=mu)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=mu_pred), size=1, col='red')

g1+g2+g3+plot_layout(ncol=1)

Out:黒点が真値,赤線が推定値
4.png

逐次更新

訓練データで推定したパラメータを使用し,テストデータにおいても状態の更新をしたい.
少し強引な方法ではあるがKFASパッケージを使って逐次更新する方法について記す.

データ準備
テストデータのサイズ分訓練データ内の目的変数にNAを代入する.

train2 = dataset
train2[81:N,1] = NA

KFASパッケージでパラメタの推定まではこれまでと同様に行う.

# Step1:モデルの構造を決める
build_kfas2 = SSModel(H = NA, train2$y ~ SSMtrend(degree = 1, Q = NA) +
              SSMregression( ~ train2$x1 + train2$x2 , Q = diag(rep(NA, 2))))

# Step2:パラメータ推定
fit_kfas2 = fitSSM(build_kfas2, inits = c(1, 1, 1, 1))

パラメータ推定結果を格納したオブジェクトに全データを格納しフィルタリングを行う.

fit_kfas2[["model"]][["y"]][1:N] = dataset$y
# Step3:フィルタリング
result_kfas2 = KFS(fit_kfas2$model, filtering=c('state', 'mean'), smoothing=c('state', 'mean'))

目的変数と外生変数を図示

df_kfas2 = data.frame(time=1:N, y=dataset$y, y_pred=result_kfas2$m[1:N],
                       x1=dataset$x1, x2=dataset$x2)
df_kfas2$y_pred[1] = NA


g1 = ggplot(data=df_kfas2, aes(x=time, y=y))+
      geom_point(alpha=0.5)+
      geom_line(aes(y=y_pred),size=1.2, col = 'red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g2 = ggplot(data=df_kfas2, aes(x=time, y=x1))+
      geom_point(alpha=0.5) +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g3 = ggplot(data=df_kfas2, aes(x=time, y=x2))+
      geom_point(alpha=0.5) +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g1+g2+g3+plot_layout(ncol=1)

Out:紫垂直線の右がテストデータ
5.png

切片と回帰係数を図示

df_kfas_coef = data.frame(time=1:N, b1=coefset$b1, b2=coefset$b2, mu=coefset$mu,
                          b1_pred = coef(result_kfas2)[,1],
                          b2_pred = coef(result_kfas2)[,2],
                          mu_pred = coef(result_kfas2)[,3])

g1 = ggplot(data=df_kfas_coef, aes(x=time, y=b1)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b1_pred), size=1, col='red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g2 = ggplot(data=df_kfas_coef, aes(x=time, y=b2)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b2_pred), size=1, col='red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g3 = ggplot(data=df_kfas_coef, aes(x=time, y=mu)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=mu_pred), size=1, col='red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g1+g2+g3+plot_layout(ncol=1)

Out:紫垂直線の右がテストデータ
6.png

Conclusion

今回は時変係数モデルの構築,予測,逐次更新を実装した.
状態空間モデルは時系列データを柔軟に表現できるモデルとされているため,今後も学習を続けていこうと思う.



Code

# ライブラリインポート
library(KFAS)  # モデル作成
library(ggplot2)  # グラフ描画
library(patchwork)  # グラフ配置

# シミュレーションデータの作成
set.seed(1)
N=100

sigma_v = 10
sigma_w = 5
sigma_t1 = 0.6
sigma_t2 = 0.2

x1 = rnorm(n=N, mean=5, sd=10)
x2 = rnorm(n=N, mean=10, sd=5)

mu = cumsum(rnorm(n=N, mean=0, sd=sigma_w))
b1 = cumsum(rnorm(n=N, mean=0, sd=sigma_t1)) + 10
b2 = cumsum(rnorm(n=N, mean=0, sd=sigma_t1)) + 5
y = mu + b1*x1 + b2*x2 + rnorm(n=N, mean=0, sd=sigma_v)

dataset = data.frame(y=y, x1=x1, x2=x2)
coefset = data.frame(b1=b1, b2=b2, mu=mu)


par(mfrow=c(3,1))
plot(x1)
plot(x2)
plot(y)

par(mfrow=c(3,1))
plot(b1)
plot(b2)
plot(mu)

p = 80
train = dataset[1:p,]
test = dataset[81:N,]

train_coef = coefset[1:p,]
test_coef = coefset[81:N,]

# モデリング
build_kfas = SSModel(H = NA, train$y ~ SSMtrend(degree = 1, Q = NA) +
             SSMregression( ~ train$x1 + train$x2 , Q = diag(rep(NA, 2))))

fit_kfas = fitSSM(build_kfas, inits = c(1, 1, 1, 1))

result_kfas = KFS(fit_kfas$model, filtering=c('state', 'mean'), smoothing=c('state', 'mean'))

# モデル評価
df_kfas = data.frame(time=1:p, y=train$y, y_pred=result_kfas$m[1:length(train$y)])
df_kfas$y_pred[1] = NA

ggplot(data=df_kfas, aes(x=time, y=y))+
  geom_point(alpha=0.5)+
  geom_line(aes(y=y_pred),size=1.2, col = 'red')

df_kfas_coef = data.frame(time=1:p, b1=train_coef$b1, b2=train_coef$b2, mu=train_coef$mu,
                          b1_pred = coef(result_kfas)[,1],
                          b2_pred = coef(result_kfas)[,2],
                          mu_pred = coef(result_kfas)[,3])

g1 = ggplot(data=df_kfas_coef, aes(x=time, y=b1)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b1_pred), size=1, col='red')

g2 = ggplot(data=df_kfas_coef, aes(x=time, y=b2)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b2_pred), size=1, col='red')

g3 = ggplot(data=df_kfas_coef, aes(x=time, y=mu)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=mu_pred), size=1, col='red')

g1+g2+g3+plot_layout(ncol=1)

# 逐次更新
train2 = dataset
train2[81:N,1] = NA

build_kfas2 = SSModel(H = NA, train2$y ~ SSMtrend(degree = 1, Q = NA) +
              SSMregression( ~ train2$x1 + train2$x2 , Q = diag(rep(NA, 2))))

fit_kfas2 = fitSSM(build_kfas2, inits = c(1, 1, 1, 1))

fit_kfas2[["model"]][["y"]][1:N] = dataset$y
result_kfas2 = KFS(fit_kfas2$model, filtering=c('state', 'mean'), smoothing=c('state', 'mean'))

df_kfas2 = data.frame(time=1:N, y=dataset$y, y_pred=result_kfas2$m[1:N],
                       x1=dataset$x1, x2=dataset$x2)
df_kfas2$y_pred[1] = NA


g1 = ggplot(data=df_kfas2, aes(x=time, y=y))+
      geom_point(alpha=0.5)+
      geom_line(aes(y=y_pred),size=1.2, col = 'red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g2 = ggplot(data=df_kfas2, aes(x=time, y=x1))+
      geom_point(alpha=0.5) +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g3 = ggplot(data=df_kfas2, aes(x=time, y=x2))+
      geom_point(alpha=0.5) +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g1+g2+g3+plot_layout(ncol=1)

df_kfas_coef = data.frame(time=1:N, b1=coefset$b1, b2=coefset$b2, mu=coefset$mu,
                          b1_pred = coef(result_kfas2)[,1],
                          b2_pred = coef(result_kfas2)[,2],
                          mu_pred = coef(result_kfas2)[,3])

g1 = ggplot(data=df_kfas_coef, aes(x=time, y=b1)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b1_pred), size=1, col='red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g2 = ggplot(data=df_kfas_coef, aes(x=time, y=b2)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=b2_pred), size=1, col='red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g3 = ggplot(data=df_kfas_coef, aes(x=time, y=mu)) +
      geom_point(alpha=0.5) +
      geom_line(aes(y=mu_pred), size=1, col='red') +
      geom_vline(xintercept=p, col = 'purple', linetype='dashed')

g1+g2+g3+plot_layout(ncol=1)
1
4
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
4

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?