#多重比較検定法
#p.41
library(dplyr)
#2群間での平均値検定
group1=rnorm(100,mean=5.5,sd=4)
group2=rnorm(50,mean=6,sd=5)
#t-test
sigma2=(1/(length(group1)+length(group2)-2))*(length(group1)*var(group1)+length(group2)*var(group2))
t=sqrt(length(group1)*length(group2))*(mean(group1)-mean(group2))/sqrt((length(group1)+length(group2))*sigma2)
alpha=0.05
qt(alpha/2,df=length(group1)+length(group2)-2)
abs(t)>abs(qt(alpha/2,df=length(group1)+length(group2)-2))
N=length(group1)+length(group2)-2
func=function(x){
y<-(1+x^2/(N-2))^(-(N-1)/2)
return(y)
}
#packageによる計算
p_value=2*integrate(func,abs(t),Inf)$value*gamma((N-1)/2)/(sqrt(pi*(N-2))*gamma((N-2)/2))
#H0が棄却されるかどうか
p_value<=alpha
#confidence interval
under=mean(group1)-mean(group2)+sqrt(N*sigma2)*qt(alpha/2,df=N-2)/sqrt(length(group1)*length(group2))
upper=mean(group1)-mean(group2)-sqrt(N*sigma2)*qt(alpha/2,df=N-2)/sqrt(length(group1)*length(group2))
#wilcoxson test
sum_S=0
for(j in 1:length(group1)){
X=group1[j]
sum_S=sum_S+sum(X>group2)-(1/2)*length(group2)
}
t_hat=sum_S
Z_hat=t_hat/sqrt(length(group1)*length(group2)*(N+1)/12)
Z_hat>abs(qnorm(alpha/2))
#p.49 theorem 3.3
n=10000
lambda=0.7
n1=n*lambda
n2=n-n1
sample1=rnorm(n1,mean=0,sd=5)
sample2=rnorm(n2,mean=0,sd=5)
vp=n2*sum(pnorm(sample1-mean(sample1),mean=0,sd=5)-1/2)-n1*sum(pnorm(sample2-mean(sample1),mean=0,sd=5)-1/2)
sum_S=c()
for(j in 1:length(sample1)){
X=sample1[j]
sum_S=c(sum_S,sum(X>sample2)-(1/2)*length(sample2))
}
t_hat=sum(sum_S)
sqrt(n)*t_hat/(n1*n2)-sqrt(n)*vp/(n1*n2)
sqrt(n)*t_hat/(n1*n2)
#(sqrt(n)*T_hat/(n1*n2))がN(0,1/(12*lambda*(1-lambda)))に従っていることを示す
n=10000
lambda=0.7
n1=n*lambda
n2=n-n1
values=c()
times=1000
for(j in 1:times){
sample1=rnorm(n1,mean=3,sd=5)
sample2=rnorm(n2,mean=3,sd=5)
vp=n2*sum(pnorm(sample1-mean(sample1),mean=3,sd=5)-1/2)-n1*sum(pnorm(sample2-mean(sample1),mean=3,sd=5)-1/2)
sum_S=0
for(j in 1:length(sample1)){
X=sample1[j]
sum_S=sum_S+sum(X>sample2)-(1/2)*length(sample2)
}
t_hat=sum_S
print(sqrt(n)*t_hat/(n1*n2)-sqrt(n)*vp/(n1*n2))
values=c(values,sqrt(n)*t_hat/(n1*n2))
}
#実際に予測されるN(0,1/(12*lambda*(1-lambda)))のサンプル
sample=rnorm(times,mean=0,sd=sqrt(1/(12*lambda*(1-lambda))))
#Z_hatの計算
Z_hat=sqrt((12*n1*n2)/(n*(n+1)))*values
#plot(t_hat)
sample=rnorm(times,mean=0,sd=sqrt(1/(12*lambda*(1-lambda))))
plot(values,type="p",col=2,xlim=c(1,times),ylim=c(min(c(values,sample)),max(c(values,sample))),xlab="index",ylab="values")
par(new=T)
plot(sample,type="p",col=3,xlim=c(1,times),ylim=c(min(c(values,sample)),max(c(values,sample))),xlab="index",ylab="values")
#plot(Z_hat)
sample=rnorm(times,mean=0,sd=1)
plot(Z_hat,type="p",col=2,xlim=c(1,times),ylim=c(min(c(values,sample)),max(c(values,sample))),xlab="index",ylab="values")
par(new=T)
plot(sample,type="p",col=3,xlim=c(1,times),ylim=c(min(c(values,sample)),max(c(values,sample))),xlab="index",ylab="values")
#hodges-leman p.56
library(dplyr)
n=1000
lambda=0.7
n1=n*lambda
n2=n-n1
sample=rnorm(n1,mean=4,sd=5)
sample2=rnorm(n2,mean=0,sd=5)
vec=c()
for(j in 1:length(sample)){
vec=c(vec,sample[j]-sample2)
}
data=data.frame(sita=seq(min(vec),max(vec),0.1),U=0,prob=0,delta=0)
for(i in 1:nrow(data)){
sample1=sample+data$sita[i]
order_data=data.frame(sign=0,order=0,sample=c(sample1,sample2))
order_data$sign[1:length(sample1)]=1
order_data=order_data[order(order_data$sample,decreasing = T),]
order_data$order=c(1:nrow(order_data))
order_data_x=order_data %>% filter(sign==1)
order_data_y=order_data %>% filter(sign==0)
U=0;values=c()
for(j in 1:nrow(order_data_x)){
x_value=order_data_x$sample[j]
U=U+length(order_data_y$sample[order_data_y$sample<x_value])
values=c(values,x_value-order_data_y$sample)
}
sum_R=U+(n1*(n1+1))/2
t_hat=sum_R-(n1*(n+1))/2
delta=median(values)
D=sort(values)
mean(sample1)-mean(sample2)
data$U[i]=U;data$prob[i]=U/(n1*n2);data$delta[i]=delta
}
n=10000
a_n=function(x){
z<-exp(-x)
return(z)
}
lambda=0.7
sita=0.01
n1=n*lambda
n2=n-n1
times=1000
t_hat=c()
t_vectors=c()
for(k in 1:times){
sample1=rnorm(n1,mean=0,sd=5)
sample2=rnorm(n2,mean=0,sd=5)
sample=sample1-sita
R=c()
for(j in 1:length(sample)){
value=sample[j];value2=sample1[j]
R=c(R,length(sample2[value>sample2])+length(sample1[value2>=sample1]))
}
t_vec=sum(R)-(n1*(n+1))/2
t_vec=t_vec/sqrt(((n1*n2)/(n*(n-1)))*sum((R-(n+1)/2)^2))
t_vectors=c(t_vectors,t_vec)
print(sum(a_n(R))-n1*mean(a_n(c(1:n))))
t_hat=c(t_hat,sum(a_n(R))-n1*mean(a_n(c(1:n))))
}
print(paste0("The average of t_hat:",mean(t_hat),";The variance of t_hat:",var(t_hat)))
print(paste0("The estimated variance:",(n1*n2/(n*(n-1)))*sum((a_n(1:n)-mean(a_n(c(1:n))))^2)))
print(paste0("The estimated average:",0))
#t_vectorsは(3.44)よりN(0,1)に従う。
print(paste0("t_vectorsの平均は",mean(t_vectors),"で","分散は",var(t_vectors)))
#分散分析 p.80
data=data.frame(num=1:6,factory1=c(3.1,4.1,3.3,3.9,3.7,2.4),factory2=c(4.7,5.6,4.3,5.9,6.1,4.2),factory3=c(5.1,3.7,4.5,6,3.9,5.4))
ave_x=apply(data[,2:ncol(data)],2,mean)
X_bar=mean(ave_x)
X=as.matrix(data[,2:ncol(data)])
k=ncol(X);n=prod(dim(X))
SSW=sum(t(t(X)-ave_x)^2)
SST=sum((X-mean(X))^2)
SSB=SST-SSW
TS=(SSB/(k-1))/(SSW/(n-k))
TS>qf(1-0.05,k-1,n-k)
p_value=1-pf(TS,k-1,n-k)
n=10
N=sample(100:1000,n,replace=T)
mat=array(0,dim=c(max(N),n))
for(j in 1:n){
var=rnorm(N[j],mean=sample(1:10,1,prob=c(0.2,0.1,0.1,0,0.1,0.2,0,0.1,0.1,0.1)),sd=sample(1:10,1,prob=c(0.2,0.1,0.1,0,0.1,0.2,0,0.1,0.1,0.1)))
mat[1:N[j],j]=var
}
mat[mat==0]=NA
file_names=paste0("~/mat_csv_同時信頼区間/",list.files("~/mat_csv_同時信頼区間/"))
file.remove(file_names)
sita=0;h=0.1;num=500
alpha=0.5
max_sita=1
min_sita=1
min_p_val=0
max_p_val=0
for(m in 1:num){
lambda_vec=N/sum(N)
num_mat=array(0,dim=c(length(N),length(N)))
U_mat=array(0,dim=c(length(N),length(N)))
T_mat=array(0,dim=c(length(N),length(N)))
p_U_mat=array(0,dim=c(length(N),length(N)))
abs_U_mat=array(0,dim=c(length(N),length(N)))
sigma_mat=array(0,dim=c(length(N),length(N)))
for(k in 1:length(N)){
for(l in 1:length(N)){
if(k!=l){
sam1=mat[,k];sam2=mat[,l]
sam1=sam1[is.na(sam1)==F];sam2=sam2[is.na(sam2)==F]
U=c();abs_U=c()
for(i in 1:length(sam1)){
value=sam1[i]
U=c(U,length(sam2[(value-sam2)>sita]))
abs_U=c(abs_U,length(sam2[abs(value-sam2)>sita]))
}
U=sum(U)
U_mat[k,l]=U
T_mat[k,l]=U-N[k]*N[l]/2
sigma_mat[k,l]=sqrt((N[k]*N[l]*(N[k]+N[l]+1))/12)
num_mat[k,l]=N[k]*N[l]
p_U_mat[k,l]=U/(N[k]*N[l])
abs_U_mat[k,l]=sum(abs_U)/(N[k]*N[l])
}
}
}
write.csv(U_mat,paste0("~/mat_csv_同時信頼区間/U_mat_",sita,".csv"))
write.csv(T_mat,paste0("~/mat_csv_同時信頼区間/T_mat_",sita,".csv"))
write.csv(p_U_mat,paste0("~/mat_csv_同時信頼区間/p_U_mat_",sita,".csv"))
write.csv(num_mat,paste0("~/mat_csv_同時信頼区間/num_mat_",sita,".csv"))
write.csv(sigma_mat,paste0("~/mat_csv_同時信頼区間/sigma_mat_",sita,".csv"))
write.csv(abs_U_mat,paste0("~/mat_csv_同時信頼区間/abs_U_mat_",sita,".csv"))
sita=sita+h
min_p=min(p_U_mat[p_U_mat!=0])
max_p=max(p_U_mat[p_U_mat!=0])
if(m==1){
min_sita=sita
max_sita=sita
min_p_val=min_p
max_p_val=max_p
}else{
min_sita=ifelse(min(abs(min_p-alpha/2),abs(min_sita-alpha/2))==abs(min_p-alpha/2),sita,min_sita)
max_sita=ifelse(min(abs(max_p-(1-alpha/2)),abs(max_sita-(1-alpha/2)))==abs(max_p-(1-alpha/2)),sita,max_sita)
min_sita=ifelse(min(abs(min_p-alpha/2),abs(min_sita-alpha/2))==abs(min_p-alpha/2),min_p,min_p_val)
max_sita=ifelse(min(abs(max_p-(1-alpha/2)),abs(max_sita-(1-alpha/2)))==abs(max_p-(1-alpha/2)),max_p,max_p_val)
}
}
#steel-dwass
library(dplyr)
n=100;k=10
A=function(s){
fun=function(y){
return(n*dnorm(y)*(pnorm(y)-pnorm(y-sqrt(2)*s))^(n-1))
}
return(integrate(fun,-Inf,Inf)$value)
}
t_data=data.frame(t=seq(0,10,0.01),A=0)
for(j in 1:nrow(t_data)){
t_data$A[j]=A(t_data$t[j])
}
plot(t_data$t,t_data$A,type="p",col=1)
X=array(0,dim=c(n,k))
for(j in 1:k){
X[,j]=rnorm(n,sample(1:10,1),sample(1:10,1))
}
X=round(X*10)/10
t_mat=array(0,dim=c(n,n))
for(j in 1:nrow(X)){
for(i in 1:nrow(X)){
if(j!=i){
X1=X[j,];X2=X[i,]
X_vec=c(X1,X2)
num_data=data.frame(no=c(1:length(X_vec)),values=sort(X_vec))
num_data=num_data %>% group_by(values) %>% summarise(no=mean(no))
for(l in 1:nrow(num_data)){
X_vec[X_vec==num_data$values[l]]=num_data$no[l]
}
R=sort(X_vec)
t=sum(R[1:ncol(X)])-(length(X1))*(length(X1)+length(X2))/2
sigma=sqrt(length(X1)*length(X2)*(length(X1)+length(X2)+1)/12)
Z=t/sigma
t_mat[j,i]=Z
}
}
}
t_mat=abs(t_mat)
max(t_mat)
#sheffe method
library(dplyr)
n=100;k=10
X=array(0,dim=c(n,k))
for(j in 1:k){
X[,j]=rnorm(n,sample(1:10,1),sample(1:10,1))
}
X=round(X*10)/10
X=t(X)
c=c(1,-1,1,-1,1,-1,1,-1,1,-1)
V_E=sum((X-apply(X,1,mean))^2)/(prod(dim(X))-k)
F_c=(sum((c*apply(X,1,mean)))^2/(k-1))/(V_E*sum(c^2/n))
alpha=0.01
qf(1-alpha,k-1,n-k)
#confidence interval
under=sum(c*apply(X,1,mean))-sqrt((k-1)*qf(1-alpha,k-1,n-k)*V_E*sum(c^2/n))
upper=sum(c*apply(X,1,mean))+sqrt((k-1)*qf(1-alpha,k-1,n-k)*V_E*sum(c^2/n))
#p.156
#t-test
a=100;r=10
y=array(0,dim=c(a,r))
for(j in 1:r){
y[,j]=rnorm(a,sample(1:10,1),sample(1:10,1))
}
mu=apply(y,2,mean)
sigma=sum((y-apply(y,1,mean))^2)/(a*(r-1))
#非心度
abs(sqrt(r)*sum(-c(1:a)*(mu-mean(y)))/sqrt(sigma*(a*(a^2-1))/12))
alpha=0.01
qt(1-alpha,df=a*(r-1))
#score method
a=10;r=10
y=array(0,dim=c(a,r))
for(j in 1:r){
y[,j]=rnorm(a,sample(1:10,1),sample(1:10,1))
}
mu=apply(y,1,mean)
sigma=sum((y-apply(y,1,mean))^2)/(a*(r-1))
C1=-sqrt((c(1:a)-1)*(1-(c(1:a)-1)/a))+sqrt(c(1:a)*(1-c(1:a)/a))
C=-c(1:a)+(a+1)/2
C1=-sqrt((c(1:a)-1)*(1-(c(1:a)-1)/a))+sqrt(c(1:a)*(1-c(1:a)/a))
cor=cor(C,mu);cor1=cor(C1,mu)
#非心度
gamma=r*sum((mu-mean(mu))^2)*(cor^2)/sigma
t=sqrt(r)*sum(C*apply(y,1,mean))/sqrt(sum(C^2)*sigma)
2/(2+log(a-1))
#Bartholomew 尤度比検定 p.168
r=c();n=100
for(j in 1:n){
r=c(r,sample(c(1:20),1))
}
y=array(0,dim=c(n,max(r)))
for(j in 1:n){
vec=c(rnorm(r[j],sample(1:10,1),sample(1:10,1)))
vec=c(vec,rep(NA,max(r)-r[j]))
y[j,]=vec
}
#H2
y_ave=c()
for(j in 1:n){
vec=y[j,]
vec=vec[is.na(vec)!=T]
y_ave=c(y_ave,mean(vec))
}
mu=y_ave
times=100000
for(k in 1:times){
for(j in length(mu):2){
mu_vec=mu[(j-1):(j)];r_vec=r[(j-1):j]
if(mu_vec[1]<mu_vec[2]){
mu_vec=rep((r_vec[1]*mu_vec[1]+r_vec[2]*mu_vec[2])/sum(r_vec),2)
mu[(j-1):j]=mu_vec
}
}
}
kai2_1=sum(r*(mu-mean(y[is.na(y)!=T]))^2)
l1=length(unique(floor(mu*10^6)/10^6))
f1=kai2_1/((l1-1)*sigma)
alpha=0.01
qf1=qf(1-alpha,a*max(r)-l,a*(max(r)-1))
mu1=mu
#H3
mu=y_ave
times=100000
for(k in 1:times){
for(j in 2:length(mu)){
mu_vec=mu[(j-1):(j)];r_vec=r[(j-1):j]
if(mu_vec[1]>mu_vec[2]){
mu_vec=rep((r_vec[1]*mu_vec[1]+r_vec[2]*mu_vec[2])/sum(r_vec),2)
mu[(j-1):j]=mu_vec
}
}
}
kai2_2=sum(r*(mu-mean(y[is.na(y)!=T]))^2)
l2=length(unique(floor(mu*10^6)/10^6))
f2=kai2_2/((l2-1)*sigma)
alpha=0.01
qf2=qf(1-alpha,a*max(r)-l,a*(max(r)-1))
mu2=mu
#TA(t);quantile calculating p.94
N=200;m=10
X=array(0,dim=c(N,m))
for(j in 1:nrow(X)){
X[j,]=rpois(m,sample(1:10,1))
}
k=5
X_dat=data.frame(X,clust=kmeans(X,k)$cluster)
TA_data=data.frame(t=seq(0,5,0.01),ta=0)
for(l in 1:nrow(TA_data)){
t=TA_data$t[l]
#simpson積分公式による積分値の計算(一変数関数の積分)
func=function(s){
f=function(x){
return(((pnorm(x)-pnorm(x-sqrt(2)*t*s))^(k-1))*dnorm(x))
}
m=N-k;
g=(2*s)*dgamma(s^2,m/2,m/2)
return(integrate(f,-Inf,Inf)$value*g)
}
a=0;H=100;n=100000
Y=(H/3)*(1/(n-1))*(func(a)+func(a+H));
for(j in 1:(n-2)){
coefficient=ifelse(j-floor(j/2)*2==0,4,2)
Y=Y+((H*coefficient)/(3*(n-1)))*func(a+(j*H)/(n-1))
}
TA_data$ta[l]=k*Y
print(l)
}
plot(TA_data$t,TA_data$ta,type="p",col=2)