#データが多変量正規分布に従うと見たとき、独立的な変数とみなしてt検定したほうがいいのか、そうでないのか
library(mvtnorm)
sigma_vec<- matrix(c(5,7,7,3), ncol=2)
n=50
data=rmvnorm(n,mean=c(0,0),sigma=sigma_vec)
data=data.frame(data)
ave=apply(data,2,mean);ave_diff=ave*c(1,-1)
S2_paired=sum(apply(t(t(as.matrix(data))-ave_diff),1,sum)^2)/(n-1)
S2_ind=sum(t(t(as.matrix(data))-ave_diff)^2)/(2*(n-1))
T_paired=sum(ave_diff)/sqrt(S2_paired/n)
T_ind=sum(ave_diff)/sqrt(2*S2_ind/n)
alpha=0.05
1-(qt(alpha/2,df=2*(n-1))/qt(alpha/2,df=n-1))^2
T_paired/qt(1-alpha/2,df=n-1)
T_ind/qt(1-alpha/2,df=2*(n-1))
#分散が既知のとき
library(mvtnorm)
sigma_vec<- matrix(c(4,0,0,3), ncol=2)
n=500
samples=rmvnorm(n,mean=c(1,2),sigma=sigma_vec)
mean=c(3,2)
dmvnorm(c(1,2),mean=mean,sigma=sigma_vec)
apply(samples,2,mean)
#kai^2 value
(apply(samples,2,mean)-mean)%*%solve(sigma_vec)%*%(apply(samples,2,mean)-mean)
alpha=0.05
#chisq point
qchisq(alpha,2)
#non-central chisq point
qchisq(alpha,2,(apply(samples,2,mean)-mean)%*%solve(sigma_vec)%*%(apply(samples,2,mean)-mean))
qchisq(1-alpha,2,(apply(samples,2,mean)-mean)%*%solve(sigma_vec)%*%(apply(samples,2,mean)-mean))
#分散が未知のとき
library(mvtnorm)
sigma_vec<- matrix(c(4,0,0,3), ncol=2)
n=500
samples=rmvnorm(n,mean=c(1,2),sigma=sigma_vec)
mean=c(1,2)
dmvnorm(c(1,2),mean=mean,sigma=sigma_vec)
apply(samples,2,mean)
var=diag(0,2)
for(j in 1:n){
var=var+(samples[j,]-mean)%*%t(samples[j,]-mean)
}
#S
var=(1/(n-1))*var
test_value=(apply(samples,2,mean)-mean)%*%solve(var)%*%(apply(samples,2,mean)-mean)
test_value=test_value*((n-2)/((n-1)*2))
#F test
alpha=0.01
qf(alpha,2,n-2)
#p.123 H0:mu1=mu2
#2-variable normal dis
sigma1<- matrix(c(40,1,1,30), ncol=2)
sigma2<- matrix(c(10,1,1,30), ncol=2)
n=100
mean1=c(1,2);mean2=c(1,2)
samples1=rmvnorm(n,mean=mean1,sigma=sigma1)
samples2=rmvnorm(n,mean=mean2,sigma=sigma2)
ave1=(apply(samples1,2,mean)*10)/10
ave2=(apply(samples2,2,mean)*10)/10
ave1
ave2
(ave1-ave2)%*%solve(sigma1/n+sigma2/n)%*%(ave1-ave2)*1/2
alpha=0.2
qchisq(alpha,2)
qchisq(alpha,2,(ave1-ave2)%*%solve(sigma1/n+sigma2/n)%*%(ave1-ave2))
#p.123 H0:mu1=mu2 (sigma is not known)
#2-variable normal dis
sigma1<- matrix(c(40,1,1,30), ncol=2)
sigma2<- matrix(c(10,1,1,30), ncol=2)
n1=100;n2=200
mean1=c(1,2);mean2=c(1,2)
samples1=rmvnorm(n1,mean=mean1,sigma=sigma1)
samples2=rmvnorm(n2,mean=mean2,sigma=sigma2)
ave1=(apply(samples1,2,mean)*10)/10
ave2=(apply(samples2,2,mean)*10)/10
var1=diag(0,2)
for(j in 1:n){
var1=var1+(samples1[j,]-ave1)%*%t(samples1[j,]-ave1)
}
var2=diag(0,2)
for(j in 1:n){
var2=var2+(samples2[j,]-ave2)%*%t(samples2[j,]-ave2)
}
p=2
S=(1/(n1+n2-2))*(var1+var2)
f=((n1*n2)/(n1+n2))*(ave1-ave2)%*%solve(S)%*%(ave1-ave2)*((n1+n2-p-1)/((n1+n2-2)*p))
ave1
ave2
f
alpha=0.2
qf(alpha,p,n1+n2-p-1)