#biostatic analysis
library(dplyr)
#sample1
data1=data.frame(X=c(1.2,1.4,1.6,1.8,2,2.2,2.4)) %>% mutate(X2=X^2)
sum_X1=sum(data1$X);sum_X1_2=sum(data1$X2)
n1=nrow(data1);X_ave=mean(data1$X)
SS1=sum_X1_2-sum_X1^2/n1
S1_2=SS1/(n1-1);s1=sqrt(S1_2)
V1=s1/mean(data1$X)
#sample2
data2=data.frame(X=c(1.2,1.6,1.7,1.8,1.9,2,2.4)) %>% mutate(X2=X^2)
sum_X2=sum(data2$X);sum_X2_2=sum(data2$X2)
n2=nrow(data2);X_ave=mean(data2$X)
SS2=sum_X2_2-sum_X2^2/n2
S2_2=SS2/(n2-1);s2=sqrt(S2_2)
V2=s2/mean(data2$X)
#interval estimation of the pth percentile Xp
data=rbind(data1,data2)
sigma=sqrt((sum(data$X2)-sum(data$X)^2/nrow(data))/nrow(data))
mu=mean(data$X)
p=c(1:99)
under_upper=data.frame(p=0,under=0,upper=0)
for(j in 1:length(p)){
under=mu-sigma*sqrt((1-p[j]/100)/(p[j]/100))
upper=mu+sigma*sqrt((1-p[j]/100)/(p[j]/100))
print(under);print(upper)
under_upper=rbind(under_upper,data.frame(p=p[j],under=under,upper=upper))
}
under_upper=under_upper[2:nrow(under_upper),]
#?????̑??l???̎ړxJ
#category1
category1=data.frame(terms=c("vines","eaves","branches","cavities"),samples=c(5,5,5,5))
categories1=nrow(category1)
n1=sum(category1$samples)
H1=(n1*log10(n1)-sum(category1$samples*log10(category1$samples)))/n1
H1_max=log10(categories1)
J1=H1/H1_max
#category2
category2=data.frame(terms=c("vines","eaves","branches","cavities"),samples=c(1,1,1,17))
categories2=nrow(category2)
n2=sum(category2$samples)
H2=(n2*log10(n2)-sum(category2$samples*log10(category2$samples)))/n2
H2_max=log10(categories2)
J2=H2/H2_max
#category3
category3=data.frame(terms=c("vines","eaves","branches","cavities"),samples=c(2,2,2,34))
categories3=nrow(category3)
n3=sum(category3$samples)
H3=(n3*log10(n3)-sum(category3$samples*log10(category3$samples)))/n3
H3_max=log10(categories3)
J3=H3/H3_max
#one sample hypothesis p.91
samples=c(25.8,24.6,26.1,22.9,25.1,27.3,24,24.5,23.9,26.2,24.3,24.6,23.3,25.5,28.1,24.8,23.5,26.3,25.4,25.5,23.9,27,24.8,22.9,25.4)
#H0:mu=24.3
mu=24.3
alpha=0.05
n=length(samples)
X_ave=mean(samples)
SS=sum(samples^2)-sum(samples)^2/n
S2=SS/(n-1)
s_x=sqrt(S2/n)
t=(X_ave-mu)/s_x
v=n-1
upper=qt(1-alpha/2,v)
under=-qt(1-alpha/2,v)
#necessary sample size
d=qt(1-alpha,n-1)*s_x
n=(S2*qt(1-alpha,v))^2/(d^2)
#confidence interval of sigma
sigma_under=sqrt(SS/qchisq(alpha/2,v))
sigma_upper=sqrt(SS/qchisq(1-alpha/2,v))
#H0:sigma2<=sigma0
samples=c(25.8,24.6,26.1,22.9,25.1,27.3,24,24.5,23.9,26.2,24.3,24.6,23.3,25.5,28.1,24.8,23.5,26.3,25.4,25.5,23.9,27,24.8,22.9,25.4)
n=length(samples);v=n-1
times=100
newton_raphson=data.frame(times=1:times,value=0,f=0,df=0)
value=20;h=0.01
alpha=0.05;beta=0.15;sigma0=1.5
fun=function(x){
s2=(sum(samples^2)-sum(samples)^2/x)/(x-1)
return((qchisq(1-beta,v)/qchisq(alpha,v))^2-(sigma0/s2)^2)
}
for(j in 1:nrow(newton_raphson)){
f<-fun(value)
df<-(fun(value+h)-fun(value))/h
newton_raphson$f[j]=f
newton_raphson$df[j]=df
value=-f/df+value
newton_raphson$value[j]=value
}
solution=newton_raphson$value[nrow(newton_raphson)]
#p.113
n=1000
samples=rpois(n,lambda=5)
hist(samples)
#H0:
X_ave=mean(samples)
SS=sum(samples^2)-sum(samples)^2/n
S2=SS/(n-1)
summary_samples=data.frame(samples) %>% group_by(samples) %>% summarise(n=n()) %>% mutate(X=samples,X2=samples^2,X3=samples^3)
k3=(n*sum(summary_samples$n*summary_samples$X3)-3*sum(summary_samples$n*summary_samples$X)*sum(summary_samples$n*summary_samples$X2)+(2*sum(summary_samples$n*summary_samples$X)^3)/n)/((n-1)*(n-2))
g1=k3/sqrt(S2^3);b1=(((n-2)*g1)/(sqrt(n*(n-1))))
A=(b1)*sqrt(((n+1)*(n+3))/(6*(n-2)))
B=(3*(n^2+27*n-70)*(n+1)*(n+3))/((n-2)*(n+5)*(n+7)*(n+9))
C=sqrt(2*(B-1))-1
D=sqrt(C);E=1/sqrt(log(D));f=A/sqrt(2/(C-1))
Z_g1=E*log(f+sqrt(f^2+1))
#H0:?Ƃ??��Ă??邩
k4=(n+1)*(n*sum(summary_samples$n*summary_samples$X^4)-4*sum(summary_samples$n*summary_samples$X)*sum(summary_samples$n*summary_samples$X3)+6*sum(summary_samples$n*summary_samples$X2)*sum(summary_samples$n*summary_samples$X)^2/n-3*sum(summary_samples$n*summary_samples$X)^4/(n^2))/((n-1)*(n-2)*(n-3))
k4=k4-3*(SS^2)/((n-2)*(n-3))
g2=k4/(S2^2)
G=(24*n*(n-2)*(n-3))/((n+3)*(n+5)*(n+1)^2)
H=(n-2)*(n-3)*abs(g2)/((n^2-1)*sqrt(G))
J=(6*(n^2-5*n+2))*sqrt((6*(n+3)*(n+5))/(n*(n-2)*(n-3)))/((n+7)*(n+9))
K=6+8*(2/J+sqrt(1+4/(J^2)))/J
L=(1-2/K)/(1+H*sqrt(2/(K-4)))
Z_g2=(1-2/(9*K)-L^(1/3))/sqrt(2/(9*K))
Z_g1^2+Z_g2^2
qchisq(1-0.01,2)
#p.123 two sample hypotheses sigma1!=simga2
drug_B=c(8.8,8.4,7.9,8.7,9.1,9.6)
drug_G=c(9.9,9,11.1,9.6,8.7,10.4,9.5)
n1=length(drug_B);n2=length(drug_G)
v1=n1-1;v2=n2-1
X1=mean(drug_B);X2=mean(drug_G)
SS1=sum(drug_B^2)-sum(drug_B)^2/n1
SS2=sum(drug_G^2)-sum(drug_G)^2/n2
sp_2=(SS1+SS2)/(v1+v2)
s=sqrt(sp_2*(1/n1+1/n2))
t=abs(X1-X2)/s
qt(1-0.05/2,v1+v2)
#confidence interval
upper_X1=X1+qt(1-0.05/2,v1+v2)*sqrt(sp_2/n1)
under_X1=X1-qt(1-0.05/2,v1+v2)*sqrt(sp_2/n1)
#confidence interval
upper_X2=X2+qt(1-0.05/2,v1+v2)*sqrt(sp_2/n2)
under_X2=X2-qt(1-0.05/2,v1+v2)*sqrt(sp_2/n2)
#confidence interval
upper=X1-X2+qt(1-0.05/2,v1+v2)*s
under=X1-X2-qt(1-0.05/2,v1+v2)*s
#weighted confidence interval
Xp=(n1*X1+n2*X2)/(n1+n2)
upper=Xp+qt(1-0.05/2,v1+v2)*sqrt(sp_2/(n1+n2))
under=Xp-qt(1-0.05/2,v1+v2)*sqrt(sp_2/(n1+n2))
#minimum detectabale differences
#population
sqrt((2*sp_2)/(n1+n2))*(qt(1-0.05,v1+v2)+qt(1-0.01,v1+v2))
#power of the test
#p.138 example 8.8
type1=c(41,34,33,36,40,25,31,37,34,30,38)
type2=c(52,57,62,55,64,57,56,55)
n1=length(type1);n2=length(type2)
v1=n1-1;v2=n2-1
SS1=sum(type1^2)-sum(type1)^2/n1
SS2=sum(type2^2)-sum(type2)^2/n2
s1=var(type1);s2=var(type2)
f=s1/s2
qf(1-0.05/2,v1,v2)
sp_2=(SS1+SS2)/(v1+v2)
alpha=0.01;beta=0.05
#?K?v?T???v???T?C?Y
((qnorm(1-alpha)+qnorm(1-beta))/log(f))^2+2
#example 8.12
weight=c(72.5,71.7,60.8,63.2,71.4,73.1,77.9,75.7,72,69);height=c(183,172,180,190,191,169,166,177,184,187,179)
n1=length(weight);n2=length(height)
v1=n1-1;v2=n2-1
X1=mean(weight);X2=mean(height)
s1=var(weight);s2=var(height)
V1=sqrt(s1)/X1;V2=sqrt(s2)/X2
s1_log=var(log10(weight))
s2_log=var(log10(height))
f=s1_log/s2_log
qf(1-0.05/2,v1,v2)
#Z-test sigma1/mu1=sigma2/mu2
V_p=((n1-1)*V1+(n2-1)*V2)/(n1+n2-2)
V_p2=V_p^2
Z=(V1-V2)/sqrt(V_p2*(1/(n1-1)+1/(n2-1))*(0.5+V_p2))
qnorm(1-0.05)
#Mann-Whitney test p.146
#example 8.13
height_males=c(193,188,185,183,180,178,170)
height_females=c(175,173,168,165,163)
rank(c(height_females,height_males))
female_rank=row_number(desc(c(height_females,height_males)))[1:length(height_females)]
male_rank=row_number(desc(c(height_females,height_males)))[(length(height_females)+1):(length(height_females)+length(height_males))]
R2=sum(female_rank);R1=sum(male_rank)
n1=length(height_males);n2=length(height_females)
U1=n1*n2+(n1*(n1+1))/2-R1
U2=n1*n2-U1
#Mann-Whitney test (normal approximation)
#example 8.17
A_grade=c(3,3,3,6,10,10,13.5,13.5,16.5,16.5,19.5)
B_grade=c(3,3,7.5,7.5,10,12,16.5,16.5,19.5,22.5,22.5,22.5,22.5,25)
n1=length(A_grade);n2=length(B_grade)
R1=sum(A_grade);R2=sum(B_grade)
N=n1+n2
U1=n1*n2+(n1*(n1+1))/2-R1
U2=n1*n2-U1
mu_U=(n1*n2)/2
rank=data.frame(ranks=c(A_grade,B_grade)) %>% group_by(ranks) %>% summarise(n=n())
t=rank$n
sum_t=sum(t^3-t)
sigma_U=sqrt((n1*n2*(N^3-N-sum_t))/(12*(N^2-N)))
#sigma_U=sqrt((n1*n2*(N+1))/12)
Z=abs(U2-mu_U)/sigma_U
qnorm(1-0.05)
#p.156 testing for difference between two diversity indices
Michigan=data.frame(f=c(47,35,7,5,3,2)) %>% mutate(f_log=f*log10(f),f_log2=f*log10(f)^2)
Louisiana=data.frame(f=c(48,23,11,13,8,2)) %>% mutate(f_log=f*log10(f),f_log2=f*log10(f)^2)
n1=sum(Michigan$f)
H1=(n1*log10(n1)-sum(Michigan$f_log))/n1
s_H1=(sum(Michigan$f_log2)-sum(Michigan$f_log)^2/n1)/(n1^2)
n2=sum(Louisiana$f)
H2=(n2*log10(n2)-sum(Louisiana$f_log))/n2
s_H2=(sum(Louisiana$f_log2)-sum(Louisiana$f_log)^2/n2)/(n2^2)
s_H1_H2=sqrt(s_H1+s_H2)
t=(H1-H2)/s_H1_H2
v=((s_H1+s_H2)^2)/(s_H1^2/n1+s_H2^2/n2)
qt(1-0.05/2,v)
#paired sample hypothesis p.161
data=data.frame(Deer=c(1:10),hindleg=c(142,140,144,144,142,146,149,150,142,148),foreleg=c(138,136,147,139,143,141,143,145,136,146)) %>% mutate(d=hindleg-foreleg)
n=nrow(data);d_bar=mean(data$d);sd=var(data$d)
t=d_bar/sqrt(sd)
s_d_bar=sqrt(var(data$d)/n)
qt(1-0.05/2,n-1)
#confidence interval
upper=d_bar+qt(1-0.05/2,n-1)*s_d_bar
under=d_bar-qt(1-0.05/2,n-1)*s_d_bar
#example 9.3 p.166 wilcoxon paired sample test
data=data %>% mutate(rank_d=row_number((abs(d))))
rank=data %>% group_by(d) %>% mutate(rank_d=mean(rank_d),rank_sign_d=sign(d)*rank_d)
t_plus=sum(rank$rank_sign_d[rank$rank_sign_d>0])
t_minus=sum(rank$rank_sign_d[rank$rank_sign_d<0])
#normal approximation
mu_t=(n*(n+1))/4
sigma_t=sqrt((n*(n+1)*(2*n+1))/24)
Z=abs(max(t_plus,t_minus)-mu_t)/sigma_t
#example 9.4 p.170
mat=array(0,dim=c(2,2))
rownames(mat)=colnames(mat)=c("relief","no relief")
mat[1,1]=11;mat[1,2]=6;mat[2,1]=10;mat[2,2]=24
kai2=(abs(mat[1,2]-mat[2,1])-1)^2/(mat[1,2]+mat[2,1])
qchisq(1-0.05,1)
#example 9.7 p.173
mat=array(0,dim=c(3,3))
mat[1,]=c(173,20,7)
mat[2,]=c(15,51,2)
mat[3,]=c(5,3,24)
kai2=c()
for(j in 1:ncol(mat)){
for(i in 1:nrow(mat)){
if(j>i){
kai2=c(kai2,(mat[i,j]-mat[j,i])^2/(mat[i,j]+mat[j,i]))
}
}
}
kai2=sum(kai2)
v=nrow(mat)*(nrow(mat)-1)/2
qchisq(1-0.05,v)
#p.177~180
#example 10.1
data=data.frame(feed1=c(60.8,57,65,58.6,61.7),feed2=c(68.7,67.7,74,66.3,69.8),feed3=c(102.6,102.1,100.2,96.5,NA),feed4=c(87.9,84.2,83.1,85.7,90.3))
ni=c(5,5,4,5);N=sum(ni)
mat=as.matrix(data);mat[is.na(mat)>0]=0
X_s=apply(mat,2,sum)
X_bar=c();X_n=apply(mat,2,sum)^2/ni
for(j in 1:ncol(data)){
vec=data[,j];vec=vec[is.na(vec)==0]
X_bar=c(X_bar,mean(vec))
}
C=sum(mat)^2/N
group_MS=sum(apply(mat,2,sum)^2/ni)-C
error_MS=sum(mat^2)-C-group_MS
group_MS=group_MS/(ncol(mat)-1)
error_MS=error_MS/(N-ncol(mat))
f=group_MS/error_MS
qf(1-0.05,ncol(mat)-1,N-ncol(mat))
#example 10.2 p.186
data=data.frame(one=c(34,36,34,35,34),two=c(37,36,35,37,37),three=c(34,37,35,37,36),four=c(36,34,37,34,35))
sum=sum(data)
sum2=sum(data^2)
N=prod(dim(data))
C=sum^2/N
group_SS=sum(apply(data,2,sum)^2/nrow(data))-C
total_SS=sum2-C
error_SS=total_SS-group_SS
f=(group_SS/(ncol(data)-1))/(error_SS/(prod(dim(data))-1-(ncol(data)-1)))
qf(1-0.05,ncol(data)-1,prod(dim(data))-1-(ncol(data)-1))
#multiple welch test
k=4
s2=c();ni=c();ave=c()
for(j in 1:k){
n=sample(50:100,1)
ni=c(ni,n)
x=rnorm(n,sample(1:10,1),sample(1:10,1))
s2=c(s2,var(x));ave=c(ave,mean(x))
}
ci=ni/s2;C=sum(ci)
X_w=sum(ci*ave)/C
A=sum((1-ci/C)^2/(ni-1))
f_dash=sum(ci*(ave-X_w)^2)/((k-1)*(1+(2*A*(k-2))/(k^2-1)))
#10.2 confidence limit for population means p.189
k=4
s2=c();ni=c();ave=c();X=c();SS=c()
for(j in 1:k){
n=sample(50:100,1)
ni=c(ni,n)
x=rnorm(n,sample(1:10,1),4)
X=c(X,x)
s2=c(s2,var(x));ave=c(ave,mean(x))
SS=c(SS,sum((x-mean(x))^2))
}
sp2=sum(SS)/sum(ni-1)
B=2.30259*(log10(sp2)*sum(ni-1)-sum((ni-1)*log10(s2)))
C=1+(sum(1/(ni-1))-1/sum(ni-1))/(3*(k-1))
B_C=B/C
alpha=0.05
if(B_C<qchisq(1-alpha,k-1)){
under=ave-qt(1-alpha/2,sum(ni)-1)*sqrt(var(X)/ni)
upper=ave+qt(1-alpha/2,sum(ni)-1)*sqrt(var(X)/ni)
}
#power of test
k=4
s2=c();ni=c();ave=c();X=c();SS=c()
n=sample(50:100,1)
for(j in 1:k){
x=rnorm(n,sample(1:10,1),4)
X=c(X,x)
s2=c(s2,var(x));ave=c(ave,mean(x))
SS=c(SS,sum((x-mean(x))^2))
}
pthi=sqrt(n*sum((ave-mean(ave))^2)/(k*var(X)))
#example 10.5
#H0:mu1=mu2=mu3
data=data.frame(source_of_variation=c("Among groups","Error"),SS=c(10.37,16.55),DF=c(2,10)) %>% mutate(MS=SS/DF)
f=data$MS[1]/data$MS[2];k=data$DF[1]+1
qf(1-0.05,data$DF[1],data$DF[2])
pthi=sqrt((k-1)*(data$MS[1]-data$MS[2])/(k*data$MS[2]))
#example 10.10
#Kruscal-Wallis test p.197
data=data.frame(species=c(rep("Herbs",5),rep("Shrubs",5),rep("Trees",5)),values=c(14,12.1,9.6,8.2,10.2,8.4,5.1,5.5,6.6,6.3,6.9,7.3,5.8,4.1,5.4)) %>% mutate(order=row_number(data$values))
summary=data %>% group_by(species) %>% summarise(R=sum(order),n=n())
N=sum(summary$n)
H=(12/(N*(N+1)))*sum(summary$R^2/summary$n)-3*(N+1)
qchisq(1-0.05,2)
#example 10.11
#Kruscal-Wallis test (tied rank)
data=data.frame(pond1=c(7.68,7.69,7.7,7.7,7.72,7.73,7.73,7.76),pond2=c(7.71,7.73,7.74,7.74,7.78,7.78,7.8,7.81),pond3=c(7.74,7.75,7.77,7.78,7.8,7.81,7.84,NA),pond4=c(7.71,7.71,7.74,7.79,7.81,7.85,7.87,7.91))
orders=rank(c(data[is.na(data)!=T]),ties.method = c("average"))
orders2=rank(c(data[is.na(data)!=T]),ties.method = c("min"))
ranks=data
sub_orders=orders
ns=c();R=c()
for(j in 1:ncol(ranks)){
values=data[,j]
values=values[is.na(values)!=T]
ns=c(ns,length(values))
ranks[1:length(values),j]=c(sub_orders[1:length(values)])
R=c(R,sum(sub_orders[1:length(values)]))
sub_orders=sub_orders[-c(1:length(values))]
}
N=sum(ns)
H=(12/(N*(N+1)))*sum(R^2/ns)-3*(N+1)
order2=data.frame(order=orders2) %>% group_by(order) %>% summarise(n=n()) %>% filter(n>1)
t=order2$n
C=1-sum(t^3-t)/(N^3-N)
H_C=H/C
v=ncol(data)-1;k=ncol(data)
qchisq(1-0.05,k)
f=((N-k)*H_C)/((k-1)*(N-1-H_C))
qf(1-0.05,v,N-k-1)
#example 10.12
#the multisample median test p.201
data=data.frame(north=c(7.1,7.2,7.4,7.6,7.6,7.7,7.7,7.9,8.1,8.4,8.5,8.8),east=c(6.9,7,7.1,7.2,7.3,7.3,7.4,7.6,7.8,8.1,8.3,8.5),south=c(7.8,7.9,8.1,8.3,8.3,8.4,8.4,8.4,8.6,8.9,9.2,9.4),west=c(6.4,6.6,6.7,7.1,7.6,7.8,8.2,8.4,8.6,8.7,8.8,8.9))
medians=apply(data,2,median)
grand_median=median(as.matrix(data))
#Homogeneity of variances
#example 10.13
#H0:sigma1=sigma2=sigma3=sigma4
data=data.frame(feed1=c(60.8,57,65,58.6,61.7),feed2=c(68.7,67.7,74,66.3,69.8),feed3=c(102.6,102.1,100.2,96.5,NA),feed4=c(87.9,84.2,83.1,85.7,90.3))
K=ncol(data);SS=c();df=c();variances=c();average=c()
for(j in 1:ncol(data)){
vec=data[,j];vec=vec[is.na(vec)!=T]
df=c(df,length(vec))
SS=c(SS,sum((vec-mean(vec))^2))
variances=c(variances,var(vec))
average=c(average,mean(vec))
}
v=df-1
s2_p=sum(SS)/sum(v)
B=2.30259*(log10(s2_p)*sum(v)-sum(v*log10(variances)))
C=1+(sum(1/v)-1/sum(v))/(3*(K-1))
B_C=B/C
qchisq(1-0.05,K-1)
#example 10.14
#testing for homogeneity of coefficients of variation, using the data of example 10.1
#H0:mu1/simga1=mu2/sigma2=mu3/sigma3
V=sqrt(variances)/average
V_p=sum(v*V)/sum(v)
kai2=(sum(v*V^2)-sum(v*V)^2/sum(v))/((V_p^2+0.5)*V_p^2)
qchisq(1-0.05,K-1)
#example 11.1 p.210
#tukey multiple comparison test with equal sample sizes. the data are strontium concentrations in five different bodies of water
#H0:mu1=mu2=mu3=mu4=mu5
data=data.frame(Graysons_pond=c(28.2,33.2,36.4,34.6,29.1,31),Beaver_lake=c(39.6,40.8,37.9,37.1,43.6,42.4),Anglers_cov=c(46.3,42.1,43.5,48.8,43.7,40.1),Appletree_lake=c(41,44.1,46.4,40.2,38.6,36.3),Rock_river=c(56.3,54.1,59.4,62.7,60,57.3))
K=ncol(data);SS=c();df=c();variances=c();average=c()
for(j in 1:ncol(data)){
vec=data[,j];vec=vec[is.na(vec)!=T]
df=c(df,length(vec))
SS=c(SS,sum((vec-mean(vec))^2))
variances=c(variances,var(vec))
average=c(average,mean(vec))
}
group_SS=sum(apply(data,2,sum)^2/nrow(data))-sum(data)^2/prod(dim(data))
group_MS=group_SS/(ncol(data)-1)
error_MS=sum(SS)/(ncol(data)*(nrow(data)-1))
SE=sqrt(error_MS/nrow(data))
f=group_MS/error_MS
qf(1-0.05,K-1,ncol(data)*(nrow(data)-1))
#example 11.1 (continued)
#H0:mu_B=mu_A
mat=array(0,dim=c(ncol(data),ncol(data)))
for(i in 1:ncol(data)){
for(j in 1:ncol(data)){
mat[i,j]=abs(average[i]-average[j])/SE
}
}
#tables
mat>4.166
#example 11.2 p.213
data=data.frame(feed1=c(60.8,57,65,58.6,61.7),feed2=c(68.7,67.7,74,66.3,69.8),feed3=c(102.6,102.1,100.2,96.5,NA),feed4=c(87.9,84.2,83.1,85.7,90.3))
K=ncol(data);SS=c();df=c();variances=c();average=c()
for(j in 1:ncol(data)){
vec=data[,j];vec=vec[is.na(vec)!=T]
df=c(df,length(vec))
SS=c(SS,sum((vec-mean(vec))^2))
variances=c(variances,var(vec))
average=c(average,mean(vec))
}
error_MS=sum(SS)/sum(df-1)
error_DF=sum(df-1)
mat=array(0,dim=c(K,K))
for(j in 1:K){
for(i in 1:K){
SE=sqrt(error_MS*(1/df[i]+1/df[j])/2)
mat[i,j]=abs(average[i]-average[j])/SE
print(SE)
}
}
mat>4.076
#confidence intervals following multiple comparisons p.215
under_mat=c()
upper_mat=c()
ave_under=c()
ave_upper=c()
for(j in 1:K){
under_mat=c(under_mat,average-qt(1-0.05/2,df[j]-1)*sqrt(error_MS/df[j]))
upper_mat=c(upper_mat,average+qt(1-0.05/2,df[j]-1)*sqrt(error_MS/df[j]))
ave_under=c(ave_under,sum(df*average)/sum(df)-qt(1-0.05/2,df[j]-1)*sqrt(error_MS/sum(df)))
ave_upper=c(ave_upper,sum(df*average)/sum(df)+qt(1-0.05/2,df[j]-1)*sqrt(error_MS/sum(df)))
}
#The Scheffes test for the hypothesises and data of examples p.220
#example 11.6
data=data.frame(Graysons_pond=c(28.2,33.2,36.4,34.6,29.1,31),Beaver_lake=c(39.6,40.8,37.9,37.1,43.6,42.4),Anglers_cov=c(46.3,42.1,43.5,48.8,43.7,40.1),Appletree_lake=c(41,44.1,46.4,40.2,38.6,36.3),Rock_river=c(56.3,54.1,59.4,62.7,60,57.3))
K=ncol(data);SS=c();df=c();variances=c();average=c()
for(j in 1:ncol(data)){
vec=data[,j];vec=vec[is.na(vec)!=T]
df=c(df,length(vec))
SS=c(SS,sum((vec-mean(vec))^2))
variances=c(variances,var(vec))
average=c(average,mean(vec))
}
group_SS=sum(apply(data,2,sum)^2/nrow(data))-sum(data)^2/prod(dim(data))
group_MS=group_SS/(ncol(data)-1)
error_MS=sum(SS)/(ncol(data)*(nrow(data)-1))
mat=array(0,dim=c(K,K))
under=array(0,dim=c(K,K))
upper=array(0,dim=c(K,K))
for(j in 1:K){
for(i in 1:K){
if(i!=j){
SE=sqrt(error_MS*(1/df[i]+1/df[j]))
mat[i,j]=abs(average[i]-average[j])/SE
under[i,j]=average[i]-average[j]-S_a*sqrt(error_MS*(1/df[i]+1/df[j]))
upper[i,j]=average[i]-average[j]+S_a*sqrt(error_MS*(1/df[i]+1/df[j]))
}
}
}
N=sum(df);S_a=sqrt((K-1)*qf(1-0.05,K-1,N-K))
#multiple contrasts
c_vec=c(1/3,1/3,1/3,-1)
ave=c(average[2:4],average[5])
abs(sum(c_vec*ave))/sqrt(error_MS*sum(c_vec^2/df[c(2:5)]))
S_a
#95% confidence interval p.222
sum(c_vec*ave)+S_a*sqrt(error_MS*sum(c_vec^2/df[c(2:5)]))
#example 11.9 p.223
#nonparametric tukey-type multiple comparisons,using the nemenyi test
#example 10.10
data=data.frame(species=c(rep("Herbs",5),rep("Shrubs",5),rep("Trees",5)),values=c(14,12.1,9.6,8.2,10.2,8.4,5.1,5.5,6.6,6.3,6.9,7.3,5.8,4.1,5.4))
data=data %>% mutate(order=row_number(data$values))
summary=data %>% group_by(species) %>% summarise(R=sum(order),n=n())
n=unique(summary$n);k=nrow(summary);R=summary$R
SE=sqrt((n*n*k*(n*k+1))/12)
mat=array(0,dim=c(k,k))
for(i in 1:k){
for(j in 1:k){
mat[i,j]=abs(R[i]-R[j])/SE
}
}
#table
mat>3.314
#example 11.10
#nonparametric multiple comparisons with unequal sample sizes. The data are those from example 10.11, where the Kruscal-Wallis test rejected the null hypothesis that water pH was the same in all four ponds examined.
#Kruscal-Wallis test (tied rank)
data=data.frame(pond1=c(7.68,7.69,7.7,7.7,7.72,7.73,7.73,7.76),pond2=c(7.71,7.73,7.74,7.74,7.78,7.78,7.8,7.81),pond3=c(7.74,7.75,7.77,7.78,7.8,7.81,7.84,NA),pond4=c(7.71,7.71,7.74,7.79,7.81,7.85,7.87,7.91))
orders=rank(c(data[is.na(data)!=T]),ties.method = c("average"))
orders2=rank(c(data[is.na(data)!=T]),ties.method = c("min"))
ranks=data
sub_orders=orders
ns=c();R=c()
for(j in 1:ncol(ranks)){
values=data[,j]
values=values[is.na(values)!=T]
ns=c(ns,length(values))
ranks[1:length(values),j]=c(sub_orders[1:length(values)])
R=c(R,sum(sub_orders[1:length(values)]))
sub_orders=sub_orders[-c(1:length(values))]
}
N=sum(ns)
H=(12/(N*(N+1)))*sum(R^2/ns)-3*(N+1)
order2=data.frame(order=orders2) %>% group_by(order) %>% summarise(n=n()) %>% filter(n>1)
t=order2$n
sum_t=sum(t^3-t)
R_bar=R/ns
mat_t=array(0,dim=c(length(R_bar),length(R_bar)))
for(j in 1:length(R_bar)){
for(i in 1:length(R_bar)){
SE=sqrt(((N*(N+1))/12-sum_t/(12*(N-1)))*(1/ns[i]+1/ns[j]))
mat_t[i,j]=abs(R_bar[i]-R_bar[j])/SE
}
}
Q_0.05_4=2.639
#p.232 two-factor analysis of variance with equal replication
#example 12.1 Hypothesises and data for a Model I, two-factor analysis of variance with equal replication
data=data.frame(treat_ment=c("No Hormone Treatment","No Hormone Treatment","Hormone Treatment","Hormone Treatment"),sex=c("female","male","female","male"))
mat=matrix(c(16.5,14.5,39.1,32,18.4,11,26.2,23.8,12.7,10.8,21.3,28.8,14,14.3,35.8,25,12.8,10,40.2,29.3),ncol=5)
Data=data.frame(data,(mat))
sum_X=apply(mat,1,sum)
ave_X=apply(mat,1,mean)
summary_treat_ment=data.frame(data,sum_X,ave_X) %>% group_by(treat_ment) %>% summarise(sum_X=sum(sum_X),ave_X=mean(ave_X))
summary_sex=data.frame(data,sum_X,ave_X) %>% group_by(sex) %>% summarise(sum_X=sum(sum_X),ave_X=mean(ave_X))
a=2;b=2;n=ncol(mat)
N=a*b*n
C=sum(mat)^2/N
total_SS=sum(mat^2)-C
total_DF=N-1
cells_SS=sum(sum_X^2)/(length(sum_X)+1)-C
cells_DF=a*b-1
with_in_cells_SS=total_SS-cells_SS
with_in_cells_DF=a*b*(n-1)
factor_A_SS=sum(summary_treat_ment$sum_X^2)/(a*n)-C
factor_B_SS=sum(summary_sex$sum_X^2)/(b*n)-C
factor_B_DF=b-1
factor_A_DF=a-1
AB_interaction_SS=cells_SS-factor_A_SS-factor_B_SS
AB_interaction_DF=factor_A_DF*factor_B_DF
#H0:There is no effect of hormone treatment on the mean plasma calcium concentration of birds i the population sampled.
f=factor_A_SS/(with_in_cells_SS/with_in_cells_DF)
qf(1-0.05,a-1,with_in_cells_DF)
#H0:There is no difference in mean plasma calcium concentration between male and female birds in the population sampled.
f=factor_B_SS/(with_in_cells_SS/with_in_cells_DF)
qf(1-0.05,b-1,with_in_cells_DF)
#H0:There is no interaction of sex and hormone treatment affecting the mean plasma calcium concentration of birds in the population sampled.
f=AB_interaction_SS/(with_in_cells_SS/with_in_cells_DF)
qf(1-0.05,AB_interaction_DF,with_in_cells_DF)
#example 12.3
#we concluded that mean plasma calcium concentration is different between birds with the hormone treatment and those without
XS=summary_treat_ment$ave_X
under=XS-qt(1-0.05/2,a*b*(n-1))*sqrt((with_in_cells_SS/with_in_cells_DF)/(b*n))
upper=XS+qt(1-0.05/2,a*b*(n-1))*sqrt((with_in_cells_SS/with_in_cells_DF)/(b*n))
X_p=sum(mat)/prod(dim(mat))
under=X_p-qt(1-0.05/2,a*b*(n-1))*sqrt(with_in_cells_SS/(with_in_cells_DF*prod(dim(mat))))
upper=X_p+qt(1-0.05/2,a*b*(n-1))*sqrt(with_in_cells_SS/(with_in_cells_DF*prod(dim(mat))))