More than 5 years have passed since last update.

【統計学】二項分布、幾何分布をRでシミュレーション

Last updated at 2019-11-18Posted at 2019-09-27

自分用メモも兼ねて初投稿です。
Rを使ったモンテカルロシミュレーションで、ベルヌーイ試行から二項分布と幾何分布を作りました。
自分でシミュレーションを考えたり、パラメーターをいじったりすることで、確率分布の理解がより深まると思います。

追記

コメントでご指摘をいただいたので記事を書き直しました。
ありがとうございました。

参考

こちらのスライドが作るきっかけになりました
指数分布とポアソン分布のいけない関係

ベルヌーイ試行、二項分布、幾何分布の関係

一番わかりやすいベルヌーイ試行の例として、コイン投げを想定しましょう。
表が出たら成功とします。このとき
「成功回数」が従うのが二項分布、
「初めて成功した回数」が従うのが幾何分布になります。

シミュレーションを実行

まずはシミュレーション回数とパラメータを決めます。

N <- 10000 # シミュレーション回数
n <- 10 #　コイン投げの回数
p <- 0.5 # 表の出る確率

シミュレーションを行う関数を作ります。

bernoulli <- function() {
	k.sim <- integer(N)
	j.sim <- integer(N)

	for (i in 1:N) {
	
		k <- 0 # 成功回数
		j = Inf # 初めて成功した回
		counter <- 0 # 試行回数のカウンター		
			
		## n回に満たない,または成功していない場合は処理を繰り返す	
		while (counter < n | j == Inf) {

			counter <- counter + 1

			##　ベルヌーイ試行のシミュレーション
			sim <- sample(0:1, size = 1, replace = TRUE, prob = c(1-p, p))
			
			##　試行回数n回目までは二項分布のkをカウント
			if (counter <= n) {
				k <- k + sim
			}
			
			 ##　成功して、かつ　試行回数が j より小さいとき、j　を更新
			if (counter < j && sim == 1) {
				 j <- counter
			}

		}
	k.sim[i] <- k
	j.sim[i] <- j
	}

	return(list(k = k.sim, j = j.sim))
}

シミュレーションを実行して結果を保存。

sim <- bernoulli()
k.sim <- sim$k
j.sim <- sim$j

二項分布

シミュレーション結果のデータを整理。

### 二項分布

# シミュレーション結果
binom.result <- as.data.frame(table(k.sim)/N)

# シミュレーション結果を入れるデータフレームの作成
binom.sim <- data.frame(k = 0:n, freq = integer(n+1))

# 結果を入れる
for (j in 1:(n+1)) {
	for (i in 1:nrow(binom.result)) {
		if (binom.result[i, 1] == binom.sim$k[j]) {
			binom.sim$freq[j] <- binom.result[i, 2]
		}
	}
}

# 二項分布の理論値
binom.theo <- as.data.frame(dbinom(0:n, size = n, prob = p))
binom.theo$k <- 0:n
colnames(binom.theo) <- c("freq", "k")

## 平均の計算
# 理論値
mean.binom.theo <- n*p
# シミュレーション
mean.binom.sim <- sum(k.sim)/N

## 分散の計算
# 理論値
var.binom.theo <- n*p*(1-p)
# シミュレーション
var.binom.sim <- var(k.sim)

幾何分布

同じ要領で幾何分布もやってみます。

# シミュレーション結果
geo.result <- as.data.frame(table(j.sim)/N)

geo.result$j.sim <- as.numeric(as.character(geo.result$j.sim))

nj <- max(geo.result$j.sim)

geo.sim <- data.frame(freq = rep(0, nj), j = 1:nj)

for (j in 1:nj) {
	for (i in 1:nrow(geo.result)) {
		if (geo.result[i, 1] == geo.sim$j[j]) {
			geo.sim$freq[j] <- geo.result[i, 2]
		}
	}
}
geo.sim <- temp2

# 幾何分布の理論値
geo.theo <- as.data.frame(dgeom(1:nj -1, prob = p)) 
# dgeom(x, p) ではx は失敗回数であることに注意

geo.theo$j <- (1:nj)
colnames(geo.theo) <- c("freq", "j")

## 平均の計算
# 理論値
mean.geo.theo <- 1/p
# シミュレーション
mean.geo.sim <- sum(j.sim)/N

## 分散の計算
# 理論値
var.geo.theo <- (1-p)/p^2
# シミュレーション
var.geo.sim <- var(j.sim)

結果の出力

binom.merge <- merge(binom.sim, binom.theo, by = "k")
binom.plot <- rbind(binom.merge$freq.x, binom.merge$freq.y)
colnames(binom.plot) <- c(0:n)

geo.merge <- merge(geo.sim, geo.theo, by = "j")
geo.plot <- rbind(geo.merge$freq.x, geo.merge$freq.y)
colnames(geo.plot) <- c(1:nj)

par(mfrow = c(2,1)) # 上下に画面を分割

barplot(binom.plot,
	col = c("violetred1", "slateblue4"),
	legend.text = c("シミュレーション", "理論値"),
	args.legend = list(x = "topright"),
	beside = TRUE,
	main = "二項分布",
	xlab = "成功回数", ylab = "確率")


barplot(geo.plot,
	col = c("violetred1", "slateblue4"),
	legend.text = c("シミュレーション", "理論値"),
	args.legend = list(x = "topright"),
	beside = TRUE,
	main = "幾何分布",
	xlab = "初めて成功した回", ylab = "確率")


## 結果の出力
result <- function() {
	cat("二項分布\n シミュレーション\n  平均", mean.binom.sim,
		"\n  分散", var.binom.sim,

		"\n 理論値 \n  平均", mean.binom.theo,
		"\n  分散", var.binom.theo,

		"\n\n幾何分布\n シミュレーション\n  平均", mean.geo.sim,
		"\n  分散", var.geo.sim,

		"\n 理論値\n  平均", mean.geo.theo,
		"\n  分散", var.geo.theo)
}
result()

> result()
二項分布
 シミュレーション
  平均 5.0004 
  分散 2.573257 
 理論値 
  平均 5 
  分散 2.5 

幾何分布
 シミュレーション
  平均 1.9983 
  分散 2.053903 
 理論値
  平均 2 
  分散 2>

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up