LoginSignup
1
0

More than 3 years have passed since last update.

生物統計(Biostatistical analysis ZAR著 PRENTICE HALL)

Last updated at Posted at 2019-07-03
#biostatic analysis

library(dplyr)

#sample1

data1=data.frame(X=c(1.2,1.4,1.6,1.8,2,2.2,2.4)) %>% mutate(X2=X^2)

sum_X1=sum(data1$X);sum_X1_2=sum(data1$X2)

n1=nrow(data1);X_ave=mean(data1$X)

SS1=sum_X1_2-sum_X1^2/n1

S1_2=SS1/(n1-1);s1=sqrt(S1_2)

V1=s1/mean(data1$X)

#sample2

data2=data.frame(X=c(1.2,1.6,1.7,1.8,1.9,2,2.4)) %>% mutate(X2=X^2)

sum_X2=sum(data2$X);sum_X2_2=sum(data2$X2)

n2=nrow(data2);X_ave=mean(data2$X)

SS2=sum_X2_2-sum_X2^2/n2

S2_2=SS2/(n2-1);s2=sqrt(S2_2)

V2=s2/mean(data2$X)


#interval estimation of the pth percentile Xp

data=rbind(data1,data2)

sigma=sqrt((sum(data$X2)-sum(data$X)^2/nrow(data))/nrow(data))

mu=mean(data$X)

p=c(1:99)

under_upper=data.frame(p=0,under=0,upper=0)

for(j in 1:length(p)){

under=mu-sigma*sqrt((1-p[j]/100)/(p[j]/100))

upper=mu+sigma*sqrt((1-p[j]/100)/(p[j]/100))

print(under);print(upper)

under_upper=rbind(under_upper,data.frame(p=p[j],under=under,upper=upper))

}

under_upper=under_upper[2:nrow(under_upper),]




#?????̑??l???̎ړxJ

#category1

category1=data.frame(terms=c("vines","eaves","branches","cavities"),samples=c(5,5,5,5))

categories1=nrow(category1)

n1=sum(category1$samples)

H1=(n1*log10(n1)-sum(category1$samples*log10(category1$samples)))/n1

H1_max=log10(categories1)

J1=H1/H1_max


#category2

category2=data.frame(terms=c("vines","eaves","branches","cavities"),samples=c(1,1,1,17))

categories2=nrow(category2)

n2=sum(category2$samples)

H2=(n2*log10(n2)-sum(category2$samples*log10(category2$samples)))/n2

H2_max=log10(categories2)

J2=H2/H2_max


#category3

category3=data.frame(terms=c("vines","eaves","branches","cavities"),samples=c(2,2,2,34))

categories3=nrow(category3)

n3=sum(category3$samples)

H3=(n3*log10(n3)-sum(category3$samples*log10(category3$samples)))/n3

H3_max=log10(categories3)

J3=H3/H3_max



#one sample hypothesis p.91

samples=c(25.8,24.6,26.1,22.9,25.1,27.3,24,24.5,23.9,26.2,24.3,24.6,23.3,25.5,28.1,24.8,23.5,26.3,25.4,25.5,23.9,27,24.8,22.9,25.4)

#H0:mu=24.3

mu=24.3

alpha=0.05

n=length(samples)

X_ave=mean(samples)

SS=sum(samples^2)-sum(samples)^2/n

S2=SS/(n-1)

s_x=sqrt(S2/n)

t=(X_ave-mu)/s_x

v=n-1

upper=qt(1-alpha/2,v)

under=-qt(1-alpha/2,v)

#necessary sample size

d=qt(1-alpha,n-1)*s_x

n=(S2*qt(1-alpha,v))^2/(d^2)


#confidence interval of sigma

sigma_under=sqrt(SS/qchisq(alpha/2,v))

sigma_upper=sqrt(SS/qchisq(1-alpha/2,v))

#H0:sigma2<=sigma0

samples=c(25.8,24.6,26.1,22.9,25.1,27.3,24,24.5,23.9,26.2,24.3,24.6,23.3,25.5,28.1,24.8,23.5,26.3,25.4,25.5,23.9,27,24.8,22.9,25.4)

n=length(samples);v=n-1

times=100
newton_raphson=data.frame(times=1:times,value=0,f=0,df=0)
value=20;h=0.01

alpha=0.05;beta=0.15;sigma0=1.5

fun=function(x){

s2=(sum(samples^2)-sum(samples)^2/x)/(x-1)

return((qchisq(1-beta,v)/qchisq(alpha,v))^2-(sigma0/s2)^2)  

}


for(j in 1:nrow(newton_raphson)){
f<-fun(value)
df<-(fun(value+h)-fun(value))/h

  newton_raphson$f[j]=f
  newton_raphson$df[j]=df
  value=-f/df+value

  newton_raphson$value[j]=value
}

solution=newton_raphson$value[nrow(newton_raphson)]





#p.113

n=1000

samples=rpois(n,lambda=5)

hist(samples)

#H0:

X_ave=mean(samples)

SS=sum(samples^2)-sum(samples)^2/n

S2=SS/(n-1)

summary_samples=data.frame(samples) %>% group_by(samples) %>% summarise(n=n()) %>% mutate(X=samples,X2=samples^2,X3=samples^3)

k3=(n*sum(summary_samples$n*summary_samples$X3)-3*sum(summary_samples$n*summary_samples$X)*sum(summary_samples$n*summary_samples$X2)+(2*sum(summary_samples$n*summary_samples$X)^3)/n)/((n-1)*(n-2))

g1=k3/sqrt(S2^3);b1=(((n-2)*g1)/(sqrt(n*(n-1))))

A=(b1)*sqrt(((n+1)*(n+3))/(6*(n-2)))

B=(3*(n^2+27*n-70)*(n+1)*(n+3))/((n-2)*(n+5)*(n+7)*(n+9))

C=sqrt(2*(B-1))-1

D=sqrt(C);E=1/sqrt(log(D));f=A/sqrt(2/(C-1))

Z_g1=E*log(f+sqrt(f^2+1))

#H0:?Ƃ??��Ă??邩

k4=(n+1)*(n*sum(summary_samples$n*summary_samples$X^4)-4*sum(summary_samples$n*summary_samples$X)*sum(summary_samples$n*summary_samples$X3)+6*sum(summary_samples$n*summary_samples$X2)*sum(summary_samples$n*summary_samples$X)^2/n-3*sum(summary_samples$n*summary_samples$X)^4/(n^2))/((n-1)*(n-2)*(n-3))

k4=k4-3*(SS^2)/((n-2)*(n-3))

g2=k4/(S2^2)

G=(24*n*(n-2)*(n-3))/((n+3)*(n+5)*(n+1)^2)

H=(n-2)*(n-3)*abs(g2)/((n^2-1)*sqrt(G))

J=(6*(n^2-5*n+2))*sqrt((6*(n+3)*(n+5))/(n*(n-2)*(n-3)))/((n+7)*(n+9))

K=6+8*(2/J+sqrt(1+4/(J^2)))/J

L=(1-2/K)/(1+H*sqrt(2/(K-4)))

Z_g2=(1-2/(9*K)-L^(1/3))/sqrt(2/(9*K))

Z_g1^2+Z_g2^2

qchisq(1-0.01,2)


#p.123 two sample hypotheses sigma1!=simga2

drug_B=c(8.8,8.4,7.9,8.7,9.1,9.6)
drug_G=c(9.9,9,11.1,9.6,8.7,10.4,9.5)

n1=length(drug_B);n2=length(drug_G)

v1=n1-1;v2=n2-1

X1=mean(drug_B);X2=mean(drug_G)

SS1=sum(drug_B^2)-sum(drug_B)^2/n1

SS2=sum(drug_G^2)-sum(drug_G)^2/n2

sp_2=(SS1+SS2)/(v1+v2)

s=sqrt(sp_2*(1/n1+1/n2))

t=abs(X1-X2)/s

qt(1-0.05/2,v1+v2)

#confidence interval

upper_X1=X1+qt(1-0.05/2,v1+v2)*sqrt(sp_2/n1)

under_X1=X1-qt(1-0.05/2,v1+v2)*sqrt(sp_2/n1)

#confidence interval

upper_X2=X2+qt(1-0.05/2,v1+v2)*sqrt(sp_2/n2)

under_X2=X2-qt(1-0.05/2,v1+v2)*sqrt(sp_2/n2)

#confidence interval

upper=X1-X2+qt(1-0.05/2,v1+v2)*s

under=X1-X2-qt(1-0.05/2,v1+v2)*s

#weighted confidence interval

Xp=(n1*X1+n2*X2)/(n1+n2)

upper=Xp+qt(1-0.05/2,v1+v2)*sqrt(sp_2/(n1+n2))

under=Xp-qt(1-0.05/2,v1+v2)*sqrt(sp_2/(n1+n2))




#minimum detectabale differences

#population

sqrt((2*sp_2)/(n1+n2))*(qt(1-0.05,v1+v2)+qt(1-0.01,v1+v2))

#power of the test

#p.138 example 8.8

type1=c(41,34,33,36,40,25,31,37,34,30,38)

type2=c(52,57,62,55,64,57,56,55)

n1=length(type1);n2=length(type2)

v1=n1-1;v2=n2-1

SS1=sum(type1^2)-sum(type1)^2/n1

SS2=sum(type2^2)-sum(type2)^2/n2

s1=var(type1);s2=var(type2)

f=s1/s2

qf(1-0.05/2,v1,v2)

sp_2=(SS1+SS2)/(v1+v2)

alpha=0.01;beta=0.05

#?K?v?T???v???T?C?Y

((qnorm(1-alpha)+qnorm(1-beta))/log(f))^2+2

#example 8.12

weight=c(72.5,71.7,60.8,63.2,71.4,73.1,77.9,75.7,72,69);height=c(183,172,180,190,191,169,166,177,184,187,179)

n1=length(weight);n2=length(height)

v1=n1-1;v2=n2-1

X1=mean(weight);X2=mean(height)

s1=var(weight);s2=var(height)

V1=sqrt(s1)/X1;V2=sqrt(s2)/X2

s1_log=var(log10(weight))

s2_log=var(log10(height))

f=s1_log/s2_log

qf(1-0.05/2,v1,v2)

#Z-test sigma1/mu1=sigma2/mu2

V_p=((n1-1)*V1+(n2-1)*V2)/(n1+n2-2)

V_p2=V_p^2

Z=(V1-V2)/sqrt(V_p2*(1/(n1-1)+1/(n2-1))*(0.5+V_p2))

qnorm(1-0.05)

#Mann-Whitney test p.146

#example 8.13

height_males=c(193,188,185,183,180,178,170)

height_females=c(175,173,168,165,163)

rank(c(height_females,height_males))

female_rank=row_number(desc(c(height_females,height_males)))[1:length(height_females)]

male_rank=row_number(desc(c(height_females,height_males)))[(length(height_females)+1):(length(height_females)+length(height_males))]

R2=sum(female_rank);R1=sum(male_rank)

n1=length(height_males);n2=length(height_females)

U1=n1*n2+(n1*(n1+1))/2-R1

U2=n1*n2-U1

#Mann-Whitney test (normal approximation)

#example 8.17

A_grade=c(3,3,3,6,10,10,13.5,13.5,16.5,16.5,19.5)

B_grade=c(3,3,7.5,7.5,10,12,16.5,16.5,19.5,22.5,22.5,22.5,22.5,25)

n1=length(A_grade);n2=length(B_grade)

R1=sum(A_grade);R2=sum(B_grade)

N=n1+n2

U1=n1*n2+(n1*(n1+1))/2-R1

U2=n1*n2-U1

mu_U=(n1*n2)/2

rank=data.frame(ranks=c(A_grade,B_grade)) %>% group_by(ranks) %>% summarise(n=n())

t=rank$n

sum_t=sum(t^3-t)

sigma_U=sqrt((n1*n2*(N^3-N-sum_t))/(12*(N^2-N)))

#sigma_U=sqrt((n1*n2*(N+1))/12)

Z=abs(U2-mu_U)/sigma_U

qnorm(1-0.05)

#p.156 testing for difference between two diversity indices

Michigan=data.frame(f=c(47,35,7,5,3,2)) %>% mutate(f_log=f*log10(f),f_log2=f*log10(f)^2)

Louisiana=data.frame(f=c(48,23,11,13,8,2)) %>% mutate(f_log=f*log10(f),f_log2=f*log10(f)^2)

n1=sum(Michigan$f)

H1=(n1*log10(n1)-sum(Michigan$f_log))/n1

s_H1=(sum(Michigan$f_log2)-sum(Michigan$f_log)^2/n1)/(n1^2)

n2=sum(Louisiana$f)

H2=(n2*log10(n2)-sum(Louisiana$f_log))/n2

s_H2=(sum(Louisiana$f_log2)-sum(Louisiana$f_log)^2/n2)/(n2^2)

s_H1_H2=sqrt(s_H1+s_H2)

t=(H1-H2)/s_H1_H2

v=((s_H1+s_H2)^2)/(s_H1^2/n1+s_H2^2/n2)

qt(1-0.05/2,v)




#paired sample hypothesis p.161

data=data.frame(Deer=c(1:10),hindleg=c(142,140,144,144,142,146,149,150,142,148),foreleg=c(138,136,147,139,143,141,143,145,136,146)) %>% mutate(d=hindleg-foreleg)

n=nrow(data);d_bar=mean(data$d);sd=var(data$d)

t=d_bar/sqrt(sd)

s_d_bar=sqrt(var(data$d)/n)

qt(1-0.05/2,n-1)

#confidence interval

upper=d_bar+qt(1-0.05/2,n-1)*s_d_bar

under=d_bar-qt(1-0.05/2,n-1)*s_d_bar

#example 9.3 p.166 wilcoxon paired sample test

data=data %>% mutate(rank_d=row_number((abs(d))))

rank=data %>% group_by(d) %>% mutate(rank_d=mean(rank_d),rank_sign_d=sign(d)*rank_d)

t_plus=sum(rank$rank_sign_d[rank$rank_sign_d>0])

t_minus=sum(rank$rank_sign_d[rank$rank_sign_d<0])


#normal approximation

mu_t=(n*(n+1))/4

sigma_t=sqrt((n*(n+1)*(2*n+1))/24)

Z=abs(max(t_plus,t_minus)-mu_t)/sigma_t

#example 9.4 p.170

mat=array(0,dim=c(2,2))

rownames(mat)=colnames(mat)=c("relief","no relief")

mat[1,1]=11;mat[1,2]=6;mat[2,1]=10;mat[2,2]=24

kai2=(abs(mat[1,2]-mat[2,1])-1)^2/(mat[1,2]+mat[2,1])

qchisq(1-0.05,1)

#example 9.7 p.173

mat=array(0,dim=c(3,3))

mat[1,]=c(173,20,7)
mat[2,]=c(15,51,2)
mat[3,]=c(5,3,24)

kai2=c()

for(j in 1:ncol(mat)){
for(i in 1:nrow(mat)){  
if(j>i){

kai2=c(kai2,(mat[i,j]-mat[j,i])^2/(mat[i,j]+mat[j,i]))

}   
}
}

kai2=sum(kai2)

v=nrow(mat)*(nrow(mat)-1)/2

qchisq(1-0.05,v)


#p.177~180

#example 10.1

data=data.frame(feed1=c(60.8,57,65,58.6,61.7),feed2=c(68.7,67.7,74,66.3,69.8),feed3=c(102.6,102.1,100.2,96.5,NA),feed4=c(87.9,84.2,83.1,85.7,90.3))

ni=c(5,5,4,5);N=sum(ni)

mat=as.matrix(data);mat[is.na(mat)>0]=0

X_s=apply(mat,2,sum)

X_bar=c();X_n=apply(mat,2,sum)^2/ni

for(j in 1:ncol(data)){

vec=data[,j];vec=vec[is.na(vec)==0]  

X_bar=c(X_bar,mean(vec))  

}

C=sum(mat)^2/N

group_MS=sum(apply(mat,2,sum)^2/ni)-C

error_MS=sum(mat^2)-C-group_MS

group_MS=group_MS/(ncol(mat)-1)

error_MS=error_MS/(N-ncol(mat))

f=group_MS/error_MS

qf(1-0.05,ncol(mat)-1,N-ncol(mat))

#example 10.2 p.186

data=data.frame(one=c(34,36,34,35,34),two=c(37,36,35,37,37),three=c(34,37,35,37,36),four=c(36,34,37,34,35))

sum=sum(data)

sum2=sum(data^2)

N=prod(dim(data))

C=sum^2/N

group_SS=sum(apply(data,2,sum)^2/nrow(data))-C

total_SS=sum2-C

error_SS=total_SS-group_SS

f=(group_SS/(ncol(data)-1))/(error_SS/(prod(dim(data))-1-(ncol(data)-1)))

qf(1-0.05,ncol(data)-1,prod(dim(data))-1-(ncol(data)-1))

#multiple welch test 

k=4

s2=c();ni=c();ave=c()

for(j in 1:k){

n=sample(50:100,1)  

ni=c(ni,n)

x=rnorm(n,sample(1:10,1),sample(1:10,1))  

s2=c(s2,var(x));ave=c(ave,mean(x))

}

ci=ni/s2;C=sum(ci)

X_w=sum(ci*ave)/C

A=sum((1-ci/C)^2/(ni-1))

f_dash=sum(ci*(ave-X_w)^2)/((k-1)*(1+(2*A*(k-2))/(k^2-1)))

#10.2 confidence limit for population means p.189

k=4

s2=c();ni=c();ave=c();X=c();SS=c()

for(j in 1:k){

n=sample(50:100,1)  

ni=c(ni,n)

x=rnorm(n,sample(1:10,1),4)  

X=c(X,x)

s2=c(s2,var(x));ave=c(ave,mean(x))

SS=c(SS,sum((x-mean(x))^2))

}

sp2=sum(SS)/sum(ni-1)

B=2.30259*(log10(sp2)*sum(ni-1)-sum((ni-1)*log10(s2)))

C=1+(sum(1/(ni-1))-1/sum(ni-1))/(3*(k-1))

B_C=B/C

alpha=0.05

if(B_C<qchisq(1-alpha,k-1)){

under=ave-qt(1-alpha/2,sum(ni)-1)*sqrt(var(X)/ni) 

upper=ave+qt(1-alpha/2,sum(ni)-1)*sqrt(var(X)/ni) 

}

#power of test

k=4

s2=c();ni=c();ave=c();X=c();SS=c()

n=sample(50:100,1)  

for(j in 1:k){

x=rnorm(n,sample(1:10,1),4)  

X=c(X,x)

s2=c(s2,var(x));ave=c(ave,mean(x))

SS=c(SS,sum((x-mean(x))^2))

}

pthi=sqrt(n*sum((ave-mean(ave))^2)/(k*var(X)))

#example 10.5

#H0:mu1=mu2=mu3

data=data.frame(source_of_variation=c("Among groups","Error"),SS=c(10.37,16.55),DF=c(2,10)) %>% mutate(MS=SS/DF)

f=data$MS[1]/data$MS[2];k=data$DF[1]+1

qf(1-0.05,data$DF[1],data$DF[2])

pthi=sqrt((k-1)*(data$MS[1]-data$MS[2])/(k*data$MS[2]))

#example 10.10

#Kruscal-Wallis test p.197

data=data.frame(species=c(rep("Herbs",5),rep("Shrubs",5),rep("Trees",5)),values=c(14,12.1,9.6,8.2,10.2,8.4,5.1,5.5,6.6,6.3,6.9,7.3,5.8,4.1,5.4)) %>% mutate(order=row_number(data$values))

summary=data %>% group_by(species) %>% summarise(R=sum(order),n=n())

N=sum(summary$n)

H=(12/(N*(N+1)))*sum(summary$R^2/summary$n)-3*(N+1)

qchisq(1-0.05,2)

#example 10.11

#Kruscal-Wallis test (tied rank)

data=data.frame(pond1=c(7.68,7.69,7.7,7.7,7.72,7.73,7.73,7.76),pond2=c(7.71,7.73,7.74,7.74,7.78,7.78,7.8,7.81),pond3=c(7.74,7.75,7.77,7.78,7.8,7.81,7.84,NA),pond4=c(7.71,7.71,7.74,7.79,7.81,7.85,7.87,7.91))

orders=rank(c(data[is.na(data)!=T]),ties.method = c("average"))

orders2=rank(c(data[is.na(data)!=T]),ties.method = c("min"))


ranks=data

sub_orders=orders

ns=c();R=c()

for(j in 1:ncol(ranks)){

values=data[,j]

values=values[is.na(values)!=T]

ns=c(ns,length(values))

ranks[1:length(values),j]=c(sub_orders[1:length(values)])

R=c(R,sum(sub_orders[1:length(values)]))

sub_orders=sub_orders[-c(1:length(values))]

}

N=sum(ns)

H=(12/(N*(N+1)))*sum(R^2/ns)-3*(N+1)

order2=data.frame(order=orders2) %>% group_by(order) %>% summarise(n=n()) %>% filter(n>1)

t=order2$n

C=1-sum(t^3-t)/(N^3-N)

H_C=H/C

v=ncol(data)-1;k=ncol(data)

qchisq(1-0.05,k)

f=((N-k)*H_C)/((k-1)*(N-1-H_C))

qf(1-0.05,v,N-k-1)

#example 10.12

#the multisample median test p.201

data=data.frame(north=c(7.1,7.2,7.4,7.6,7.6,7.7,7.7,7.9,8.1,8.4,8.5,8.8),east=c(6.9,7,7.1,7.2,7.3,7.3,7.4,7.6,7.8,8.1,8.3,8.5),south=c(7.8,7.9,8.1,8.3,8.3,8.4,8.4,8.4,8.6,8.9,9.2,9.4),west=c(6.4,6.6,6.7,7.1,7.6,7.8,8.2,8.4,8.6,8.7,8.8,8.9))

medians=apply(data,2,median)

grand_median=median(as.matrix(data))



#Homogeneity of variances

#example 10.13

#H0:sigma1=sigma2=sigma3=sigma4

data=data.frame(feed1=c(60.8,57,65,58.6,61.7),feed2=c(68.7,67.7,74,66.3,69.8),feed3=c(102.6,102.1,100.2,96.5,NA),feed4=c(87.9,84.2,83.1,85.7,90.3))

K=ncol(data);SS=c();df=c();variances=c();average=c()

for(j in 1:ncol(data)){

vec=data[,j];vec=vec[is.na(vec)!=T]  

df=c(df,length(vec))

SS=c(SS,sum((vec-mean(vec))^2))

variances=c(variances,var(vec))

average=c(average,mean(vec))

}

v=df-1

s2_p=sum(SS)/sum(v)

B=2.30259*(log10(s2_p)*sum(v)-sum(v*log10(variances)))

C=1+(sum(1/v)-1/sum(v))/(3*(K-1))

B_C=B/C

qchisq(1-0.05,K-1)

#example 10.14 

#testing for homogeneity of coefficients of variation, using the data of example 10.1

#H0:mu1/simga1=mu2/sigma2=mu3/sigma3

V=sqrt(variances)/average

V_p=sum(v*V)/sum(v)

kai2=(sum(v*V^2)-sum(v*V)^2/sum(v))/((V_p^2+0.5)*V_p^2)

qchisq(1-0.05,K-1)


#example 11.1 p.210


#tukey multiple comparison test with equal sample sizes. the data are strontium concentrations in five different bodies of water

#H0:mu1=mu2=mu3=mu4=mu5

data=data.frame(Graysons_pond=c(28.2,33.2,36.4,34.6,29.1,31),Beaver_lake=c(39.6,40.8,37.9,37.1,43.6,42.4),Anglers_cov=c(46.3,42.1,43.5,48.8,43.7,40.1),Appletree_lake=c(41,44.1,46.4,40.2,38.6,36.3),Rock_river=c(56.3,54.1,59.4,62.7,60,57.3))


K=ncol(data);SS=c();df=c();variances=c();average=c()

for(j in 1:ncol(data)){

vec=data[,j];vec=vec[is.na(vec)!=T]  

df=c(df,length(vec))

SS=c(SS,sum((vec-mean(vec))^2))

variances=c(variances,var(vec))

average=c(average,mean(vec))

}

group_SS=sum(apply(data,2,sum)^2/nrow(data))-sum(data)^2/prod(dim(data))

group_MS=group_SS/(ncol(data)-1)

error_MS=sum(SS)/(ncol(data)*(nrow(data)-1))

SE=sqrt(error_MS/nrow(data))

f=group_MS/error_MS

qf(1-0.05,K-1,ncol(data)*(nrow(data)-1))

#example 11.1 (continued)

#H0:mu_B=mu_A

mat=array(0,dim=c(ncol(data),ncol(data)))

for(i in 1:ncol(data)){
for(j in 1:ncol(data)){  

mat[i,j]=abs(average[i]-average[j])/SE   

}
}

#tables

mat>4.166


#example 11.2  p.213

data=data.frame(feed1=c(60.8,57,65,58.6,61.7),feed2=c(68.7,67.7,74,66.3,69.8),feed3=c(102.6,102.1,100.2,96.5,NA),feed4=c(87.9,84.2,83.1,85.7,90.3))

K=ncol(data);SS=c();df=c();variances=c();average=c()

for(j in 1:ncol(data)){

vec=data[,j];vec=vec[is.na(vec)!=T]  

df=c(df,length(vec))

SS=c(SS,sum((vec-mean(vec))^2))

variances=c(variances,var(vec))

average=c(average,mean(vec))

}

error_MS=sum(SS)/sum(df-1)

error_DF=sum(df-1)

mat=array(0,dim=c(K,K))

for(j in 1:K){
for(i in 1:K){  

SE=sqrt(error_MS*(1/df[i]+1/df[j])/2)  

mat[i,j]=abs(average[i]-average[j])/SE   

print(SE)

}
}

mat>4.076

#confidence intervals following multiple comparisons p.215

under_mat=c()

upper_mat=c()

ave_under=c()

ave_upper=c()

for(j in 1:K){

under_mat=c(under_mat,average-qt(1-0.05/2,df[j]-1)*sqrt(error_MS/df[j]))  

upper_mat=c(upper_mat,average+qt(1-0.05/2,df[j]-1)*sqrt(error_MS/df[j]))  

ave_under=c(ave_under,sum(df*average)/sum(df)-qt(1-0.05/2,df[j]-1)*sqrt(error_MS/sum(df)))

ave_upper=c(ave_upper,sum(df*average)/sum(df)+qt(1-0.05/2,df[j]-1)*sqrt(error_MS/sum(df)))

}





#The Scheffes test for the hypothesises and data of examples p.220

#example 11.6

data=data.frame(Graysons_pond=c(28.2,33.2,36.4,34.6,29.1,31),Beaver_lake=c(39.6,40.8,37.9,37.1,43.6,42.4),Anglers_cov=c(46.3,42.1,43.5,48.8,43.7,40.1),Appletree_lake=c(41,44.1,46.4,40.2,38.6,36.3),Rock_river=c(56.3,54.1,59.4,62.7,60,57.3))


K=ncol(data);SS=c();df=c();variances=c();average=c()

for(j in 1:ncol(data)){

vec=data[,j];vec=vec[is.na(vec)!=T]  

df=c(df,length(vec))

SS=c(SS,sum((vec-mean(vec))^2))

variances=c(variances,var(vec))

average=c(average,mean(vec))

}

group_SS=sum(apply(data,2,sum)^2/nrow(data))-sum(data)^2/prod(dim(data))

group_MS=group_SS/(ncol(data)-1)

error_MS=sum(SS)/(ncol(data)*(nrow(data)-1))


mat=array(0,dim=c(K,K))

under=array(0,dim=c(K,K))

upper=array(0,dim=c(K,K))


for(j in 1:K){
for(i in 1:K){  

if(i!=j){  

SE=sqrt(error_MS*(1/df[i]+1/df[j]))

mat[i,j]=abs(average[i]-average[j])/SE  

under[i,j]=average[i]-average[j]-S_a*sqrt(error_MS*(1/df[i]+1/df[j]))

upper[i,j]=average[i]-average[j]+S_a*sqrt(error_MS*(1/df[i]+1/df[j]))


}

}
}

N=sum(df);S_a=sqrt((K-1)*qf(1-0.05,K-1,N-K))

#multiple contrasts

c_vec=c(1/3,1/3,1/3,-1)

ave=c(average[2:4],average[5])

abs(sum(c_vec*ave))/sqrt(error_MS*sum(c_vec^2/df[c(2:5)]))

S_a


#95% confidence interval p.222

sum(c_vec*ave)+S_a*sqrt(error_MS*sum(c_vec^2/df[c(2:5)]))



#example 11.9 p.223

#nonparametric tukey-type multiple comparisons,using the nemenyi test

#example 10.10

data=data.frame(species=c(rep("Herbs",5),rep("Shrubs",5),rep("Trees",5)),values=c(14,12.1,9.6,8.2,10.2,8.4,5.1,5.5,6.6,6.3,6.9,7.3,5.8,4.1,5.4))

data=data %>% mutate(order=row_number(data$values))

summary=data %>% group_by(species) %>% summarise(R=sum(order),n=n())

n=unique(summary$n);k=nrow(summary);R=summary$R

SE=sqrt((n*n*k*(n*k+1))/12)

mat=array(0,dim=c(k,k))

for(i in 1:k){
for(j in 1:k){  

mat[i,j]=abs(R[i]-R[j])/SE 

}
}

#table

mat>3.314


#example 11.10

#nonparametric multiple comparisons with unequal sample sizes. The data are those from example 10.11, where the Kruscal-Wallis test rejected the null hypothesis that water pH was the same in all four ponds examined.

#Kruscal-Wallis test (tied rank)

data=data.frame(pond1=c(7.68,7.69,7.7,7.7,7.72,7.73,7.73,7.76),pond2=c(7.71,7.73,7.74,7.74,7.78,7.78,7.8,7.81),pond3=c(7.74,7.75,7.77,7.78,7.8,7.81,7.84,NA),pond4=c(7.71,7.71,7.74,7.79,7.81,7.85,7.87,7.91))

orders=rank(c(data[is.na(data)!=T]),ties.method = c("average"))

orders2=rank(c(data[is.na(data)!=T]),ties.method = c("min"))


ranks=data

sub_orders=orders

ns=c();R=c()

for(j in 1:ncol(ranks)){

values=data[,j]

values=values[is.na(values)!=T]

ns=c(ns,length(values))

ranks[1:length(values),j]=c(sub_orders[1:length(values)])

R=c(R,sum(sub_orders[1:length(values)]))

sub_orders=sub_orders[-c(1:length(values))]

}

N=sum(ns)

H=(12/(N*(N+1)))*sum(R^2/ns)-3*(N+1)

order2=data.frame(order=orders2) %>% group_by(order) %>% summarise(n=n()) %>% filter(n>1)

t=order2$n

sum_t=sum(t^3-t)

R_bar=R/ns

mat_t=array(0,dim=c(length(R_bar),length(R_bar)))

for(j in 1:length(R_bar)){
for(i in 1:length(R_bar)){

SE=sqrt(((N*(N+1))/12-sum_t/(12*(N-1)))*(1/ns[i]+1/ns[j]))  

mat_t[i,j]=abs(R_bar[i]-R_bar[j])/SE  



}
}

Q_0.05_4=2.639


#p.232 two-factor analysis of variance with equal replication

#example 12.1 Hypothesises and data for a Model I, two-factor analysis of variance with equal replication

data=data.frame(treat_ment=c("No Hormone Treatment","No Hormone Treatment","Hormone Treatment","Hormone Treatment"),sex=c("female","male","female","male"))

mat=matrix(c(16.5,14.5,39.1,32,18.4,11,26.2,23.8,12.7,10.8,21.3,28.8,14,14.3,35.8,25,12.8,10,40.2,29.3),ncol=5)

Data=data.frame(data,(mat))

sum_X=apply(mat,1,sum)

ave_X=apply(mat,1,mean)

summary_treat_ment=data.frame(data,sum_X,ave_X) %>% group_by(treat_ment) %>% summarise(sum_X=sum(sum_X),ave_X=mean(ave_X))

summary_sex=data.frame(data,sum_X,ave_X) %>% group_by(sex) %>% summarise(sum_X=sum(sum_X),ave_X=mean(ave_X))

a=2;b=2;n=ncol(mat)

N=a*b*n

C=sum(mat)^2/N

total_SS=sum(mat^2)-C

total_DF=N-1

cells_SS=sum(sum_X^2)/(length(sum_X)+1)-C

cells_DF=a*b-1

with_in_cells_SS=total_SS-cells_SS

with_in_cells_DF=a*b*(n-1)

factor_A_SS=sum(summary_treat_ment$sum_X^2)/(a*n)-C

factor_B_SS=sum(summary_sex$sum_X^2)/(b*n)-C

factor_B_DF=b-1

factor_A_DF=a-1

AB_interaction_SS=cells_SS-factor_A_SS-factor_B_SS

AB_interaction_DF=factor_A_DF*factor_B_DF

#H0:There is no effect of hormone treatment on the mean plasma calcium concentration of birds i the population sampled.

f=factor_A_SS/(with_in_cells_SS/with_in_cells_DF)

qf(1-0.05,a-1,with_in_cells_DF)

#H0:There is no difference in mean plasma calcium concentration between male and female birds in the population sampled.

f=factor_B_SS/(with_in_cells_SS/with_in_cells_DF)

qf(1-0.05,b-1,with_in_cells_DF)

#H0:There is no interaction of sex and hormone treatment affecting the mean plasma calcium concentration of birds in the population sampled.

f=AB_interaction_SS/(with_in_cells_SS/with_in_cells_DF)

qf(1-0.05,AB_interaction_DF,with_in_cells_DF)

#example 12.3

#we concluded that mean plasma calcium concentration is different between birds with the hormone treatment and those without

XS=summary_treat_ment$ave_X

under=XS-qt(1-0.05/2,a*b*(n-1))*sqrt((with_in_cells_SS/with_in_cells_DF)/(b*n))

upper=XS+qt(1-0.05/2,a*b*(n-1))*sqrt((with_in_cells_SS/with_in_cells_DF)/(b*n))

X_p=sum(mat)/prod(dim(mat))

under=X_p-qt(1-0.05/2,a*b*(n-1))*sqrt(with_in_cells_SS/(with_in_cells_DF*prod(dim(mat))))

upper=X_p+qt(1-0.05/2,a*b*(n-1))*sqrt(with_in_cells_SS/(with_in_cells_DF*prod(dim(mat))))



1
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
0