3
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

確率・統計(「経済分析のための統計的方法 岩田 暁一先生」)

Last updated at Posted at 2018-07-09

#確率・統計(「経済分析のための統計的方法」)

#cochran theorem

library(dplyr);library(mvtnorm)

Q=0;S=0;n=20000

r=0;Q_vec=c();

X=cbind(rnorm(n,82,5),rnorm(n,58,9))

average=apply(X,2,mean);sigma=cov(X)

X=rmvnorm(n,mean=average,sigma=sigma)

random=c()

for(j in 1:n){

S=S+sigma

Q=Q+(X[j,]-average)%*%solve(sigma)%*%t(t(X[j,]-average))

Q_vec=c(Q_vec,(X[j,]-average)%*%solve(sigma)%*%t(t(X[j,]-average)))

random=c(random,rchisq(1,df=2))

}

Q_values=data.frame(num=1:n,Q=round(Q_vec))

summary_Q=Q_values %>% group_by(Q) %>% summarise(n=n())

plot(summary_Q$Q,summary_Q$n,type="o")

randoms=data.frame(num=1:n,random=round(random)) %>% group_by(random) %>% summarise(n=n())

plot(randoms$random,randoms$n,type="o")




#p.44

#二項分布の計算

#度数
n=10

#確率
p=0.3

#二項分布
density_func=dbinom(c(0:n),size=n,prob=p)

#二項分布平均
mu=sum(c(0:n)*dbinom(c(0:n),size=n,prob=p))

#二項分布分散
sigma2=sum(((c(0:n)-mu)^2)*dbinom(c(0:n),size=n,prob=p))

#y=a+b*x;xはdbinom(c(0:n),size=n,prob=p)に従う。
a=1;b=2

#乱数を発生させてプロットする
N=1000

plot(c(1:N),a+b*rbinom(N,size=n,prob=p),xlab="乱数発生数",ylab="値",col=2,type="p")

#対数の法則を用いた平均値(a+b*x)の計算方法
mean(a+b*rbinom(N,size=n,prob=p))

#実際の値の平均値(a+b*x)
a+b*mu

#a+b*xの分散

#対数の法則を用いた分散(a+b*x)の計算方法
var(a+b*rbinom(N,size=n,prob=p))

#実際の値の分散(a+b*x)
(b^2)*sigma2

#chebychev inequality

lambda=2

chebychev_data=data.frame(value=abs(rbinom(N,size=n,prob=p)-mu),upper=1/(lambda^2))

print(paste0("lambda*sqrt(sigma2)は",lambda*sqrt(sigma2)))

chebychev_data=chebychev_data %>% dplyr::filter(value>=lambda*sqrt(sigma2))

print(paste0("左辺の確率は",nrow(chebychev_data)/N,"で<=の値は",1/(lambda^2)))




#poisson dis;p.47

#少数の法則

n=100;lambda=1;

#二項分布

binom=function(x){
  
z<-(prod(c(1:n))/(prod(c(1:(n-x)))*prod(c(1:x))))*((lambda/n)^x)*((1-lambda/n)^(n-x))

return(z)
  
}

#ポアソン分布

pois=function(x){
  
z<-exp(-lambda)*(lambda^x)/prod(c(1:x))

return(z)
  
}

binom_pois_dis=data.frame(x=c(1:(n-1)),binom=0,pois=0)

for(j in 1:nrow(binom_pois_dis)){
  
binom_pois_dis$binom[j]=binom(binom_pois_dis$x[j])  
  
binom_pois_dis$pois[j]=pois(binom_pois_dis$x[j])  

}

#出力

plot(binom_pois_dis$x,binom_pois_dis$binom,type="p",col=2,xlab="x",ylab="density",main="binom")

plot(binom_pois_dis$x,binom_pois_dis$pois,type="p",col=3,xlab="x",ylab="density",main="pois")

#ラザフォード・ガイガーの実験結果p.49

data=data.frame(x=c(0:14),freq=c(57,203,383,525,532,408,273,139,45,27,10,4,0,1,1),predict=0)

lambda=sum(data$x*data$freq)/sum(data$freq)

for(j in 1:nrow(data)){
  
data$predict[j]=sum(data$freq)*dpois(data$x[j],lambda=lambda)
  
}

#結合分布p.50

#x1をさいころの目の数、x2をさいころを振った回数コインを投げたときの表の数

f=function(x,y){
  
z<-(1/6)*(gamma(x+1)/(gamma(y+1)*gamma(x-y+1)))*(1/2)^x

return(z)  
  
}


prob_matrix=array(0,dim=c(6,7))

colnames(prob_matrix)=c("0","1","2","3","4","5","6")

rownames(prob_matrix)=c("1","2","3","4","5","6")

for(i in 0:6){
for(j in 1:6){  
  
prob_matrix[j,(i+1)]=f(j,i) 
   
}  
}

#表3.4(p.51)
prob_matrix[is.nan(prob_matrix)==T]=0

#2次元正規分布のプロット

n=10

dnorm_data=data.frame(x=0,y=0,density=0)

for(j in (-n):n){
  
 data=data.frame(x=j,y=c((-n):n),density=0)
 
 dnorm_data=rbind(dnorm_data,data)
  
}

dnorm_data=tail(dnorm_data,nrow(dnorm_data)-1)

for(j in 1:nrow(dnorm_data)){
  
var=matrix(c(1,0.2,0.5,1),nrow=2,ncol=2)

mean=c(1,2);X=c(dnorm_data$x[j],dnorm_data$y[j])
  
dnorm_data$density[j]=(1/(2*pi*sqrt(det(var))))*exp(-(1/2)*(X-mean)%*%solve(var)%*%(X-mean))
  
}

library(rgl)

plot3d(dnorm_data$x,dnorm_data$y,dnorm_data$density,type="s",col=2,xlab="x",ylab="y",zlab="density",main="normal_dis")



#第4章

#標本平均の平均、分散

#定理4.1より、具体的に表出してみる

#正規乱数よりサンプル

N=100

x=rnorm(N,mean=1,sd=1)

#サンプル数をn、抽出回数をtimesとする
n=10;times=100

sampling_data=data.frame(times=c(1:times),mean=0)

for(j in 1:nrow(sampling_data)){
  
samples=sample(x,n)  
  
sampling_data$mean[j]=mean(samples)

}

#samplingによって発生した平均値の平均、分散
sampling_mean=mean(sampling_data$mean);sampling_sd=sd(sampling_data$mean)

#理論値(定理4.1)
mean=mean(x);sd=sqrt((N-n)/(N-1))*(sd(x)/sqrt(n))


#例題4.2.2

#ある村に成年男子200人居て、そのうち160人がたばこを吸う習慣がある。今その中から無作為に50人の成年男子を選び、たばこを吸う人の割合を調べる。標本における喫煙者率をpとすれば、pの平均と分散はどうなるか。

#二項母集団からの抽出

p=160/200;q=1-p

mu=1*p+0*(1-p)

var=p*q

n=50;N=200

sampling_ave=p;sampling_var=((N-n)/(N-1))*(p*q/n)

#正規化

mean=8;sd=2

N=1000

x=rnorm(N,mean=mean,sd=sd)

#正規乱数のプロット

plot(x,main=paste0("平均",mean,";分散",sd,"の正規乱数"))

#正規化後のプロット

plot((x-mean)/sd,main=paste0("正規化後の平均",mean,";分散",sd,"の正規乱数"))

#対数の法則による平均値と標準偏差

print(paste0("平均は",mean((x-mean)/sd),";標準偏差は",sd((x-mean)/sd)))

#区間推定

alpha=0.01

mean=1;sd=5;

under=mean+qnorm(alpha)*sd

upper=mean+qnorm(1-alpha)*sd


plot(c(-100:100),dnorm(c(-100:100),mean=mean,sd=sd),type="o",col=2,xlab="x",ylab="prob",xlim=c(-100,100),ylim=c(0,0.1),main=paste0((1-2*alpha)*100,"%区間"))

par(new=T)

plot(rep(under,2),c(-100,100),type="o",col=3,xlab="x",ylab="prob",xlim=c(-100,100),ylim=c(0,0.1),main=paste0((1-2*alpha)*100,"%区間"))

par(new=T)

plot(rep(upper,2),c(-100,100),type="o",col=3,xlab="x",ylab="prob",xlim=c(-100,100),ylim=c(0,0.1),main=paste0((1-2*alpha)*100,"%区間"))


#p.80 

#演習問題1

xy=array(0,dim=c(6,6))

for(j in 1:nrow(xy)){
for(i in 1:ncol(xy)){  
  
xy[i,j]=i+j 
   
}
}

#E[x]

average=sum(xy*(1/6)^2)

#sigma2

sigma2=sum((xy^2)*(1/6)^2)-average^2





#区間推定(中心極限定理)

n=100

alpha=0.025

x=rnorm(N,mean=2,sd=3)

mean=mean(x);sd=sd(x);

under=mean+qnorm(alpha)*sd/sqrt(n)

upper=mean+qnorm(1-alpha)*sd/sqrt(n)

print(paste0((1-2*alpha)*100,"% 区間は【",under," , ",upper,"】"))



#非復元抽出での正規区間の推定

#平均と分散の確認

N=1000;n=100

x=rnorm(N,mean=2,sd=3)

times=100

times_data=data.frame(times=c(1:times),mean=0)

for(j in 1:nrow(times_data)){
  
times_data$mean[j]=mean(sample(x,n) ) 
  
}

#反復抽出による平均と分散

sampling_ave=mean(times_data$mean)

sampling_var=var(times_data$mean)

#理論値

ave=mean(x)

var=((N-n)/(N-1))*(var(x)/n)

#区間推定

alpha=0.05

under=ave+qnorm(alpha)*sqrt(var)

upper=ave+qnorm(1-alpha)*sqrt(var)

print(paste0((1-2*alpha)*100,"% 区間は【",under," , ",upper,"】"))




#例題4.4.3 p.91

N=500;n=25;mu=104;sd=4.5

sd_ave=sqrt((N-n)/(N-1))*sd/sqrt(n)

print(paste0("確率は",1-pnorm((106-mu)/sd_ave)))

#演習問題1

#(a)
pnorm(110,mean=100,sd=20)

#(b)

1-pnorm(95,mean=100,sd=20)

#(c)

pnorm(130,mean=100,sd=20)-pnorm(80,mean=100,sd=20)

#演習問題2






#一変数関数の積分

#simpson積分公式による積分値の計算(一変数関数の積分)
func=function(x){
  y<-x*sin(x)*cos(x^2)+x+4*x+x^2
  return(y)
}

a=0;H=42;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

}
#simpson積分値
print(Y)
#packageによる計算
integrate(func,a,a+H)






#他変数関数の積分

f=function(x){

  z<-exp(-(x[1]*x[3]+sqrt(x[1]*x[2]*x[4])+sin(x[2]*x[3])+cos(x[4]*x[5])+x[5]^2))

  return(z)

}

N=10000

h=0.01

#積分領域
x11=0;x12=1
x21=0;x22=1
x31=0;x32=1
x41=0;x42=1
x51=0;x52=1

f_value=c();


for(j in 1:N){

x=array(0,dim=c(1,5))

x[1]=sample(seq(x11,x12,h),1)

x[2]=sample(seq(x21,x22,h),1)

x[3]=sample(seq(x31,x32,h),1)

x[4]=sample(seq(x41,x42,h),1)

x[5]=sample(seq(x51,x52,h),1)

f_value=c(f_value,f(x))


}



alpha=0.95



print(paste0(alpha*100,"%区間は(",(1/N)*sum(f_value)+qnorm((1-alpha)/2)*(1/sqrt(N))*(sqrt(sum((f_value-(1/N)*sum(f_value))^2)/(N-1)))," , ",(1/N)*sum(f_value)+qnorm(alpha+(1-alpha)/2)*(1/sqrt(N))*(sqrt(sum((f_value-(1/N)*sum(f_value))^2)/(N-1))),")"))

print(paste0("推定誤差は",qnorm(alpha+(1-alpha)/2)*(1/sqrt(N))*(sqrt(sum((f_value-(1/N)*sum(f_value))^2)/(N-1)))))

print(paste0("推定値は",(1/N)*sum(f_value)))

#p.105 積率母関数

t=2

func=function(x){
  y<-(1/sqrt(2*pi*10))*exp(-((x-2)^2)/(2*10))
  return(exp(t*x)*y)
}

a=-100;H=200;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

}



#simpson積分値

print(Y)

#packageによる計算

integrate(func,a,a+H)

#実際のlaplace変換の値

exp(2*t+(t^2)*10/2)

#積率母関数の微分

h=0.01

t=h

func=function(x){
  y<-(1/sqrt(2*pi*10))*exp(-((x-2)^2)/(2*10))
  return(exp(t*x)*y)
}

a=-100;H=200;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

}

Y1=Y

t=0

func=function(x){
  y<-(1/sqrt(2*pi*10))*exp(-((x-2)^2)/(2*10))
  return(exp(t*x)*y)
}

a=-100;H=200;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

}

Y2=Y

t=-h

func=function(x){
  y<-(1/sqrt(2*pi*10))*exp(-((x-2)^2)/(2*10))
  return(exp(t*x)*y)
}

a=-100;H=200;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

}

real_y=Y

print(paste0("積率母関数の原点での微分もしくは平均値は",(floor((Y1-Y2)*10^3)*10^(-3))/h))

print(paste0("積率母関数の原点での2回微分もしくは分散は",(floor((Y1-2*Y2+real_y)*10^3)*10^(-3))/(h^2)))



#p.112

#密度関数fに対するlaplace transformの収束

t=0.2

#二項分布

p=0.5;NN=100;N=100

#(25)

func=function(x){

z<-exp(t*(x)/(sqrt(N)))*dexp(x,rate=p)
  
return(z)
  
}

#積分値

floor(integrate(func,0,Inf)$value)^NN

#実際の値

floor(exp((t^2)/2))


#定理5.3

#二項分布における中心極限定理

N=10000;n=100;p=0.4

samples=rbinom(N,n,p)

nums=unique(samples)

data=data.frame(nums=nums,prob=0)

for(j in 1:nrow(data)){
  
data$prob[j]=length(samples[samples==data$nums[j]])/N  
  
}

mean=n*p;sd=sqrt(n*p*(1-p))

data=data %>% dplyr::mutate(norm=dnorm(nums,mean,sd))

#赤線が二項分布、緑の点が正規分布

plot(data$nums,data$prob,xlab="nums",ylab="prob",col=2,type="h",xlim=c(min(data$nums),max(data$nums)),ylim=c(min(data$prob,data$norm),max(data$prob,data$norm)))

par(new=T)

plot(data$nums,data$norm,xlab="nums",ylab="prob",col=3,type="p",xlim=c(min(data$nums),max(data$nums)),ylim=c(min(data$prob,data$norm),max(data$prob,data$norm)))




#多項分布

N=10000;n=100

prob=c(0.1,0.2,0.3,0.2,0.2)

samples=rmultinom(N,n,prob=prob)

nums=unique(as.numeric(samples))

data=data.frame(nums=nums,prob=0)

for(j in 1:nrow(data)){
  
data$prob[j]=length(samples[samples==data$nums[j]])/N  
  
}

plot(data$nums,data$prob,type="p",main=paste0("全度数の標本平均は",mean(n*prob)),xlim=c(min(nums),max(nums)),ylim=c(0,max(data$prob)),xlab="nums",ylab="prob",col=2)

params=5



moment=function(t,p,n){
  
return(sum(p*exp(t))^n)  
  
}

#E(ni)

n=10

p=c(0.1,0.1,0.1,0.1,0.6)

t=rep(0,5);h=0.0001

t_h=t;t_h[5]=t_h[5]+h

#E(ni)=n*p

floor((moment(t_h,p,n)-moment(t,p,n))/h)==floor(n*p)[5]

#E(ni^2)

t=rep(0,5);h=0.0001

t_plus=t;t_minus=t;t_plus[5]=t_plus[5]+h;t_minus[5]=t_minus[5]-h

#E(ni^2)=n*pi+n*(n-1)*pi^2

floor((moment(t_plus,p,n)-2*moment(t,p,n)+moment(t_minus,p,n))/(h^2))==floor(n*p+n*(n-1)*p^2)[5]

#sigma_ni^2

floor((moment(t_plus,p,n)-2*moment(t,p,n)+moment(t_minus,p,n))/(h^2))-floor((moment(t_h,p,n)-moment(t,p,n))/h)^2==floor(n*p*(1-p))[5]

#E(ni*nj)





#p.122

N=100

samples=rnorm(N,mean=2,sd=3)

plot(samples,type="p",col=2,main="平均2、分散9の正規乱数")

plot((samples-mean(samples))/sd(samples),col=2,type="p",main="正規化された乱数")

times=100000

kai_data=data.frame(num=1:times,kai=0)

mean=mean(samples);sd=sd(samples)

N=10

for(j in 1:times){

samples=rnorm(N,mean=2,sd=3)  
  
kai_data$kai[j]=sum((samples-mean)^2/(sd^2))  
  
}

kai_data$kai=round(kai_data$kai)

nums=sort(unique(round(kai_data$kai)))

kai_num=data.frame(nums=nums,freq=0)

for(j in 1:nrow(kai_num)){
  
kai_num$freq[j]=(sum(kai_data$kai==nums[j])/times ) 
  
}

kai_num=kai_num %>% dplyr::mutate(prob=dchisq(nums,N))


#乱数と実際のカイ二乗の密度

plot(kai_num$nums,kai_num$freq,col=2,type="h",xlab="num",ylab="prob",xlim=c(min(nums),max(nums)),ylim=c(min(kai_num$freq,kai_num$prob),max(kai_num$freq,kai_num$prob)))

par(new=T)

plot(kai_num$nums,kai_num$prob,col=3,type="p",xlab="num",ylab="prob",xlim=c(min(nums),max(nums)),ylim=c(min(kai_num$freq,kai_num$prob),max(kai_num$freq,kai_num$prob)))


#カイ二乗検定

#図6.3

m=5

kai_data=data.frame(x=seq(0,15,0.01),density=dchisq(seq(0,15,0.01),m),prob=pchisq(seq(0,15,0.01),m))

kai_data=kai_data[order(kai_data$x),]

laplace_kai=function(s,N,z){

func=function(x){
  y<-exp(s*x)*dchisq(x,N)
  return(y)
}

a=0;H=z;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))
}

return(Y)

}

#自由度2のカイ二乗分布の平均値

(laplace_kai(0.001,2,1000)-laplace_kai(0,2,1000))/0.001

#自由度2のカイ二乗分布の分散

(laplace_kai(0.001,2,1000)-2*laplace_kai(0,2,1000)+laplace_kai(-0.001,2,1000))/(0.001)^2-((laplace_kai(0.001,2,1000)-laplace_kai(0,2,1000))/0.001)^2

#カイ二乗分布の標準化した乱数

N=1000000;m=5

samples=(rchisq(N,m)-m)/sqrt(2*m)

samples=round(samples*10)/10

nums=sort(unique(samples))

kai_data=data.frame(nums=nums,freq=0,normal=0)

for(j in 1:nrow(kai_data)){
  
kai_data$freq[j]=length(samples[samples<=kai_data$nums[j]])/N 

kai_data$normal[j]=pnorm(kai_data$nums[j])
  
}

#平均値

mean(samples)

#標準偏差

sd(samples)


#カイ二乗分布の標準化した乱数による分布と正規分布による分布

plot(kai_data$nums,kai_data$freq,xlim=c(min(nums),max(nums)),ylim=c(min(kai_data$freq,kai_data$normal),max(kai_data$freq,kai_data$normal)),xlab="x",ylab="prob",type="h",col=2)

par(new=T)

plot(kai_data$nums,kai_data$normal,xlim=c(min(nums),max(nums)),ylim=c(min(kai_data$freq,kai_data$normal),max(kai_data$freq,kai_data$normal)),xlab="x",ylab="prob",type="p",col=3)



#(23)

N=10000

samples=(rnorm(N,mean=2,sd=5)-rnorm(N,mean=2,sd=5))/(sqrt(2)*5)

#平均値

mean(samples)

#分散

var(samples)




#p.137

#定理6.3

N=10000

M=5

samples=rnorm(N)/sqrt(rchisq(N,M)/M)

plot(samples,col=2,type="p",main=paste0("t;平均、",round(mean(samples)),",標準偏差、",floor(sd(samples))))

plot(rt(N,M),col=3,type="p",main=paste0("student t分布の乱数;平均、",floor(abs(mean(rt(N,M)))),",標準偏差、",floor(sd(rt(N,M)))))

samples=round(samples*100)/100

values=unique(samples)

values_data=data.frame(values=values,freq=0)

for(j in 1:nrow(values_data)){
  
values_data$freq[j]=length(samples[samples==values_data$values[j]])/N  
  
}

values_data=values_data[order(values_data$values),]

values_data=values_data %>% dplyr::mutate(prob=cumsum(freq))

#実際のサンプルから作った分布と比較

plot(values_data$values,values_data$prob,type="h",col=2,xlim=c(min(values_data$values),max(values_data$values)),ylim=c(0,1),xlab="x",ylab="prob")

par(new=T)

plot(values_data$values,pt(values_data$values,M),type="p",col=3,xlim=c(min(values_data$values),max(values_data$values)),ylim=c(0,1),xlab="x",ylab="prob")

t_moment=function(s){
  
 func=function(x){
  y<-(1/((2^(M/2))*gamma(M/2)))*(x^(s+M/2-1))*exp(-(1/2)*x)
  return(y)
}

a=0;H=200;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

} 

return(Y)  
  
}

#s>=-1

s=-1

#実際の値モーメント(student t):

(2^s)*gamma((M/2)+s)/gamma(M/2)

#数値積分の値(student t):

t_moment(s)

#2r次の積率

norm_moment=function(r){
  
  func=function(x){
  
  z<-(x^(2*r))*(1/sqrt(2*pi))*exp(-(1/2)*x^2)
  
  return(z)
  
  }
  
  a=-100;H=200;n=100000

Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));

for(j in 1:(n-2)){
  coefficient=ifelse(j-floor(j/2)*2==0,4,2)
  Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))

} 

return(Y)

}

#E(t^(2*r))

moment_t_func=function(x){
  
z<-(M^x)*(gamma(1/2+x)*gamma(M/2-x))/(gamma(1/2)*gamma(M/2))

return(z)
  
}

#Snedecor`s F distribution

N=1000000

m1=10;m2=5

samples=(rchisq(N,m1)/m1)/(rchisq(N,m2)/m2)

#実際の値(平均値)

mean(samples)

#理論値(平均値)

m2/(m2-2)

#実際の値(標準偏差)

sd(samples)

#理論値(標準偏差)

sqrt((2*(m2^2)*(m1+m2-2))/(m1*((m2-2)^2)*(m2-4)))



samples=round(samples*100)/100

nums=sort(unique(samples))

F_data=data.frame(nums=nums,freq=0,prob=0)

for(j in 1:nrow(F_data)){
  
F_data$freq[j]=length(samples[samples==F_data$nums[j]])/N  

F_data$prob[j]=pf(F_data$nums[j],m1,m2)
  
}

F_data=F_data %>% dplyr::mutate(dis=cumsum(freq))

F_data=head(F_data,nrow(F_data)-50)



#F distribution plot

plot(F_data$nums,F_data$dis,type="h",col=2,xlim=c(min(F_data$nums),max(F_data$nums)),ylim=c(min(F_data$prob,F_data$dis),max(F_data$dis,F_data$prob)),xlab="F",ylab="f(F)")

par(new=T)

plot(F_data$nums,F_data$prob,type="p",col=3,xlim=c(min(F_data$nums),max(F_data$nums)),ylim=c(min(F_data$prob,F_data$dis),max(F_data$dis,F_data$prob)),xlab="F",ylab="f(F)")



#p.148

#演習問題1

#標本数

n1=10;n2=20

#pf(2,n1,n2)は2以下の値をとる確率だから1-pf(2,n1,n2)は2より大きい確率

(1-pf(2,n1,n2))>0.05

#乱数から抽出してやってみる!

times=100000

freq=0

for(j in 1:times){

s1_2=var(rnorm(10,mean=8,sd=5));s2_2=var(rnorm(20,mean=0,sd=5))

if((s1_2/s2_2)>2){
  
 freq=freq+1
  
}

}

floor(100*freq/times)/100


#p.148

#演習問題2

times=1000

m=10

samples=rt(times,m)

samples=round(100*samples^2)/100

nums=sort(unique(samples))

data=data.frame(nums=nums,freq=0,dis=0)

for(j in 1:nrow(data)){
  
data$freq[j]=length(samples[samples==nums[j]])/times  

data$dis[j]=pf(data$nums[j],1,m)  
  
}

data=data %>% dplyr::mutate(frequency=cumsum(freq))

plot(data$nums,data$frequency,type="h",col=2,xlab="x^2",ylab="F_dis",xlim=c(min(nums),max(nums)),ylim=c(0,1))

par(new=T)

plot(data$nums,data$dis,type="p",col=3,xlab="x^2",ylab="F_dis",xlim=c(min(nums),max(nums)),ylim=c(0,1))


#the moment of F distribution 

k=2;m1=3;m2=5

moment_chisq1=function(x){
  
z<-(x^k)*(1/(2*gamma(m1/2)))*((x/2)^(m1/2-1))*exp(-(1/2)*x)  

return(z)

}

moment_chisq2=function(x){
  
z<-(x^(-k))*(1/(2*gamma(m2/2)))*((x/2)^(m2/2-1))*exp(-(1/2)*x)  

return(z)

}

#(12)から求めた値

(((m2/m1)^k)*integrate(moment_chisq1,0,Inf)$value*integrate(moment_chisq2,0,Inf)$value)

#(13)から求めた値

(((m2/m1)^k)*(gamma(m1/2+k)*gamma(m2/2-k)))/(gamma(m1/2)*gamma(m2/2))



#p.152

#Remark

#通常の区間推定

N=100

samples=rnorm(N,mean=3,sd=4)

average=mean(samples);sd=sd(samples)

alpha=0.05

under=average+qnorm(alpha/2)*sd/sqrt(N)

upper=average+qnorm(1-alpha/2)*sd/sqrt(N)

#データの平均値との絶対誤差の大きさhを決めることにより、サンプル数を決定する。

h=2

n=(sd*qnorm(1-alpha/2)/h)^2

#標本抽出による区間推定

N=1000;n=10

samples=rnorm(N,mean=3,sd=4)

average=mean(samples);sd=sd(samples)

alpha=0.05

under=average+qnorm(alpha/2)*(sd/sqrt(N))*(sqrt((N-n)/(N-1)))

upper=average+qnorm(1-alpha/2)*(sd/sqrt(N))*(sqrt((N-n)/(N-1)))

#小標本の推定(通常はNが25未満)

N=10;

samples=rnorm(N,mean=3,sd=4)

average=mean(samples);sd=sd(samples)

alpha=0.1

under=average+qt(alpha/2,N)*sd(samples)/sqrt(N)

upper=average+qt(1-alpha/2,N)*sd(samples)/sqrt(N)

#例題7.3.2

samples=c(2.1,3.5,-1.8,7.3,4)

x_bar=mean(samples)

S2=(1/(length(samples)-1))*(sum(samples^2)-length(samples)*mean(samples)^2)

s=sqrt(S2)

alpha=0.05

under=x_bar+qt(alpha/2,df=length(samples)-1)*s/sqrt(length(samples))

upper=x_bar+qt(1-alpha/2,df=length(samples)-1)*s/sqrt(length(samples))

t_dis=data.frame(x=seq(-10,10,0.1),density=dt(seq(-10,10,0.1),length(samples)-1))

#t分布プロット

plot(t_dis$x,t_dis$density)

#母分散の推定

#全母集団の中からn個取り出した標準偏差sの区間推定

var=c();kai2=c()

times=1000

mu=3;sigma=3;N=20

for(j in 1:times){

samples=rnorm(N,mean=mu,sd=sigma)

var=c(var,var(samples))

kai2=c(kai2,((N-1)*var(samples))/(sigma^2))

}

#標本標準偏差の平均

mean(var)

#区間推定:s2

alpha=0.025

under=(qchisq(alpha,df=N-1)*sigma^2)/(N-1)

upper=(qchisq(1-alpha,df=N-1)*sigma^2)/(N-1)

#区間推定:sigma2

alpha=0.025

samples=rnorm(N,mean=mu,sd=sigma)

s2=var(samples)

under=((N-1)*s2)/qchisq(1-alpha,df=N-1)

upper=((N-1)*s2)/qchisq(alpha,df=N-1)

#カイ二乗分布のプロット

chisq_data=data.frame(x=seq(0,200,1),density=dchisq(seq(0,200,1),N-1))

plot(chisq_data$x,chisq_data$density,type="o")



3
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
3
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?