Edited at

線形モデル・GLMの動画

More than 3 years have passed since last update.

berobero11さんが作った緑本(データ解析のための統計モデリング入門)のGLMのスライド(分布から見た線形モデル・GLM・GLMM)が素晴らしかったので、コードを参考にしつつアニメーションバージョンを作ってみた。データは緑本の第3章「一般化線形モデル(GLM)」図3.9で使ったもの。ggplot2を動画で保存する方法がわからなかったので、ImageMagickでGIF化した。GLMMはまだ作成中。


線形モデル(正規分布 / 恒等リンク関数 / β_0 + β_1 x)

library(ggplot2)

# データ読み込み
# wget http://hosho.ees.hokudai.ac.jp/~kubo/stat/iwanamibook/kubobook_2012.zip
# unzip kubobook_2012.zip
# cd kubobook2012/chapter03
# 済みとする
source("d0.Rdata")

fit <- lm(y ~ x, data=d0)
fit <- glm(y ~ x, data=d0, family=gaussian(link=identity))

xrange <- seq(0, 2, by=0.05)
for(j in 1:length(xrange)){
x.v <- c(0 + xrange[j])
input <- data.frame(x=x.v, y=-1.424+2.890*x.v)
df <- data.frame()
x0 <- -50:50/10 + input$y
y0 <- dnorm(x0, mean=input$y, sd=1)
m <- as.matrix(data.frame(x=x0, y=2*y0))
z <- m %*% matrix(c(0, 1, -1, 0), 2, 2, byrow=T)
d <- data.frame(x=z[,1]+input$x, y=z[,2])
df <- rbind(df, d)
df.point <- data.frame(x=x.v, y=input$y)

# 左の図
p <- ggplot(df, aes(x=x, y=y))
p <- p + geom_polygon(data=df, fill="#F8766D", alpha=.9)
p <- p + geom_abline(intercept = -1.424, slope = 2.890, colour = "green", size=1.5)
p <- p + geom_point(data=df.point, size=5, colour = "green")
p <- p + geom_point(data=d0, size=5)
p <- p + theme(
axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16, angle=0),
axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14)
)
p <- p + xlim(-1.5, 2)
p <- p + ylim(-4, 8)

# 右の図
p2 <- ggplot(df, aes(x=y, y=dnorm(df$y, mean=mean(df$y), sd=1)))
p2 <- p2 + geom_polygon(fill="#F8766D", alpha=.9) + xlab("y")
p2 <- p2 + ylab("Density")
p2 <- p2 + annotate("text", x = mean(df$y) + 3, y = 0.4, label = "y ~ N(u, σ^2)\nu = a x + b", size=5)
p2 <- p2 + annotate("text", x = mean(df$y) + 3, y = 0.35, label = "↓", size=5)
p2 <- p2 + annotate("text", x = mean(df$y) + 3, y = 0.3, label = "y ~ N(u, 1)\nu = 2.890 x - 1.424", size=5)

png(file=paste0("FrameNormal", j, ".png"), width=960, height=480)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1, 2)))
plot(p, vp=viewport(layout.pos.row=1, layout.pos.col=1))
plot(p2, vp=viewport(layout.pos.row=1, layout.pos.col=2))
dev.off()
}

# ImageMagickがインストールされている前提
system("convert FrameNormal1.png FrameNormal2.png FrameNormal3.png FrameNormal4.png FrameNormal5.png FrameNormal6.png FrameNormal7.png FrameNormal8.png FrameNormal9.png FrameNormal10.png FrameNormal11.png FrameNormal12.png FrameNormal13.png FrameNormal14.png FrameNormal15.png FrameNormal16.png FrameNormal17.png FrameNormal18.png FrameNormal19.png FrameNormal20.png FrameNormal21.png FrameNormal22.png FrameNormal23.png FrameNormal24.png FrameNormal25.png FrameNormal26.png FrameNormal27.png FrameNormal28.png FrameNormal29.png FrameNormal30.png FrameNormal31.png FrameNormal32.png FrameNormal33.png FrameNormal34.png FrameNormal35.png FrameNormal36.png FrameNormal37.png FrameNormal38.png FrameNormal39.png FrameNormal40.png FrameNormal41.png Normal.gif")

Normal.gif


一般化線形モデル(ポワソン分布 / 対数リンク関数 / β_0 + β_1 x)


(ポワソン回帰モデル)

fit <- glm(y ~ x, data=d0, family=poisson(link=log))

xrange <- seq(0, 2, by=0.05)
for(j in 1:length(xrange)){
x.v <- c(0 + xrange[j])
input <- data.frame(x=x.v, y=exp(-3.859+2.981*x.v))
df <- data.frame()
x0 <- seq(0, 8)
y0 <- dpois(x0, lambda=input$y)
df.point <- data.frame(x=x.v, y=input$y)
df <- data.frame(x=x0, y=y0, xmin=(x.v-y0), xmax=x.v, ymin=x0-0.3, ymax=x0+0.3)

# 左の図
p <- ggplot(df, aes(x=x, y=y))
p <- p + geom_rect(fill="#F8766D", aes(xmin=df$xmin, xmax=df$xmax, ymin=df$ymin, ymax=df$ymax) , alpha=.9)
p <- p + stat_function(fun = function(x) exp(-3.859+2.981*x), color="green")
p <- p + geom_point(data=df.point, size=5, color="green")
p <- p + geom_point(data=d0, size=5)
p <- p + theme(
axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16, angle=0),
axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14)
)
p <- p + xlim(-1.5, 2)
p <- p + ylim(-4, 8)

# 右の図
p2 <- ggplot(df, aes(x=x, y=y))
p2 <- p2 + geom_rect(fill="#F8766D", aes(xmin=(df$x-0.3), xmax=(df$x+0.3), ymin=0, ymax=y) , alpha=.9)
p2 <- p2 + ylab("Density")
p2 <- p2 + annotate("text", x = 6, y = 0.8, label = "y ~ Poisson(λ)\nλ = β_0 + β_1 x", size=5)
p2 <- p2 + annotate("text", x = 6, y = 0.7, label = "↓", size=5)
p2 <- p2 + annotate("text", x = 6, y = 0.6, label = "y ~ Poisson(λ)\nλ = -3.859 + 2.981 x", size=5)
p2 <- p2 + xlim(-1, 8)
p2 <- p2 + ylim(0, 1)

png(file=paste0("FramePoisson", j, ".png"), width=960, height=480)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1, 2)))
plot(p, vp=viewport(layout.pos.row=1, layout.pos.col=1))
plot(p2, vp=viewport(layout.pos.row=1, layout.pos.col=2))
dev.off()
}

# ImageMagickがインストールされている前提
system("convert FramePoisson1.png FramePoisson2.png FramePoisson3.png FramePoisson4.png FramePoisson5.png FramePoisson6.png FramePoisson7.png FramePoisson8.png FramePoisson9.png FramePoisson10.png FramePoisson11.png FramePoisson12.png FramePoisson13.png FramePoisson14.png FramePoisson15.png FramePoisson16.png FramePoisson17.png FramePoisson18.png FramePoisson19.png FramePoisson20.png FramePoisson21.png FramePoisson22.png FramePoisson23.png FramePoisson24.png FramePoisson25.png FramePoisson26.png FramePoisson27.png FramePoisson28.png FramePoisson29.png FramePoisson30.png FramePoisson31.png FramePoisson32.png FramePoisson33.png FramePoisson34.png FramePoisson35.png FramePoisson36.png FramePoisson37.png FramePoisson38.png FramePoisson39.png FramePoisson40.png FramePoisson41.png Poisson.gif")

Poisson.gif


一般化線形モデル(二項分布 / ロジットリンク関数 / β_0 + β_1 x)


(ロジスティック回帰モデル)

fit <- glm(cbind(y,8-y)~x, data=d0, family=binomial(link=logit))

xrange <- seq(0, 2, by=0.05)
for(j in 1:length(xrange)){
x.v <- c(0 + xrange[j])
input <- data.frame(x=x.v, y=exp(-7.732+4.554*x.v)/(1+exp(-7.732+4.554*x.v)))
df <- data.frame()
x0 <- seq(0, 8)
y0 <- dbinom(x0, 8, input$y)
df.point <- data.frame(x=x.v, y=input$y)
df <- data.frame(x=x0, y=y0, xmin=(x.v-y0), xmax=x.v, ymin=x0-0.3, ymax=x0+0.3)

# 左の図
p <- ggplot(df, aes(x=x, y=y))
p <- p + geom_rect(fill="#F8766D", aes(xmin=df$xmin, xmax=df$xmax, ymin=df$ymin, ymax=df$ymax) , alpha=.9)
p <- p + stat_function(fun = function(x) exp(-7.732+4.554*x)/(1+exp(-7.732+4.554*x)), color="green")
p <- p + geom_point(data=df.point, size=5, color="green")
p <- p + geom_point(data=d0, size=5)
p <- p + theme(
axis.title.x = element_text(size=16),
axis.title.y = element_text(size=16, angle=0),
axis.text.x = element_text(size=14),
axis.text.y = element_text(size=14)
)
p <- p + xlim(-1.5, 2)
p <- p + ylim(-4, 8)

# 右の図
p2 <- ggplot(df, aes(x=x, y=y))
p2 <- p2 + geom_rect(fill="#F8766D", aes(xmin=(df$x-0.3), xmax=(df$x+0.3), ymin=0, ymax=y) , alpha=.9)
p2 <- p2 + ylab("Density")
p2 <- p2 + annotate("text", x = 6, y = 0.8, label = "y ~ Bin(N, q)\nq = β_0 + β_1 x", size=5)
p2 <- p2 + annotate("text", x = 6, y = 0.7, label = "↓", size=5)
p2 <- p2 + annotate("text", x = 6, y = 0.6, label = "y ~ Bin(8, q)\nq = -7.732 + 4.554 x", size=5)
p2 <- p2 + xlim(-1, 8)
p2 <- p2 + ylim(0, 1)

png(file=paste0("FrameLogistic", j, ".png"), width=960, height=480)
grid.newpage()
pushViewport(viewport(layout=grid.layout(1, 2)))
plot(p, vp=viewport(layout.pos.row=1, layout.pos.col=1))
plot(p2, vp=viewport(layout.pos.row=1, layout.pos.col=2))
dev.off()
}

# ImageMagickがインストールされている前提
system("convert FrameLogistic1.png FrameLogistic2.png FrameLogistic3.png FrameLogistic4.png FrameLogistic5.png FrameLogistic6.png FrameLogistic7.png FrameLogistic8.png FrameLogistic9.png FrameLogistic10.png FrameLogistic11.png FrameLogistic12.png FrameLogistic13.png FrameLogistic14.png FrameLogistic15.png FrameLogistic16.png FrameLogistic17.png FrameLogistic18.png FrameLogistic19.png FrameLogistic20.png FrameLogistic21.png FrameLogistic22.png FrameLogistic23.png FrameLogistic24.png FrameLogistic25.png FrameLogistic26.png FrameLogistic27.png FrameLogistic28.png FrameLogistic29.png FrameLogistic30.png FrameLogistic31.png FrameLogistic32.png FrameLogistic33.png FrameLogistic34.png FrameLogistic35.png FrameLogistic36.png FrameLogistic37.png FrameLogistic38.png FrameLogistic39.png FrameLogistic40.png FrameLogistic41.png Logistic.gif")

Logistic.gif

ロジスティック回帰モデルが思ったほど、シグモイドにならなかった。。。

(↑期待値の計算を間違えてました)