変量効果の推定とBLUP法　佐々木義之先生　京都大学学術出版会 #RStudio


#変量効果モデルとBLUP(遺伝的要因による）

#p.56

library(dplyr)

X=matrix(c(rep(1,12),1,1,1,rep(0,9),0,0,0,1,1,1,1,1,0,0,0,0,rep(0,8),1,1,1,1),ncol=4)

Z=matrix(c(c(1,0,0,1,0,0,0,0,1,0,0,0),c(0,1,0,0,1,0,1,0,0,1,0,0),c(0,0,1,0,0,0,0,0,0,0,1,0),c(0,0,0,0,0,1,0,1,0,0,0,1)),ncol=4)

X2=t(X)%*%X

sol_X2=svd(X2)$u%*%diag(1/svd(X2)$d)%*%t(svd(X2)$v)

W=diag(rep(1,nrow(X)))-X%*%sol_X2%*%t(X)

#parameters

y=abs(rnorm(12))

mat1=rbind(cbind(t(X)%*%X,t(X)%*%Z),cbind(t(Z)%*%X,t(Z)%*%Z))

mat12=t(mat1)%*%mat1

sol_mat12=svd(mat12)$u%*%diag(1/svd(mat12)$d)%*%t(svd(mat12)$v)

mat2=t(t(c(t(X)%*%y,t(Z)%*%y)))

params=sol_mat12%*%t(mat1)%*%mat2


#変量効果の推定

data=data.frame(population=c("A","A","A","B","B","B"),bull=c("S1","S2","S3","S4","S5","S6"),H1=c(24,30,NA,30,25,NA),H2=c(22,26,21,26,NA,23))

fixed=1

y=c(t(as.matrix(cbind(data$H1,data$H2))))

y=y[is.na(y)==F]

X=matrix(c(1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1),ncol=5)

t_X=t(X)

Z=(matrix(c(1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1),ncol=6))

t_Z=t(Z)

V=array(0,dim=c(9,9))

V[1,]=c(3,0.225,0,0,0,0,0,0,0)
V[2,]=c(0,3,0,0,0,0,0,0,0)
V[3,]=c(0,0,3,0.225,0,0.1125,0.1125,0.1125,0)
V[4,]=c(0,0,0,3,0,0.1125,0.1125,0.1125,0)
V[5,5:9]=c(3,0,0,0,0)
V[6,6:9]=c(3,0.225,0.05625,0)
V[7,7:9]=c(3,0.05625,0)
V[8,8:9]=c(3,0)
V[9,9]=c(3)

V=V+t(V)

diag(V)=diag(V)/2

H=t(X)%*%solve(V)%*%X

#交配の入れ替えはすべて等しいから

H[lower.tri(H)]=0

beta=solve(H)%*%t(X)%*%solve(V)%*%t(t(y))

C=matrix(c(0.225,0.225,0,0,0,0,0,0,0,0,0,0.225,0.225,0,0.1125,0.1125,0.1125,0,0,0,0,0,0.225,0,0,0,0,0,0,0.1125,0.1125,0,0.225,0.225,0.05625,0,0,0,0.1125,0.1125,0,0.5625,0.5625,0.225,0,0,0,0,0,0,0,0,0,0.225),ncol=6)

t_k=array(0,dim=c(6,5))

t_k[1:3,4]=c(1,1,1)

t_k[4:6,5]=c(1,1,1)

diff=y-X%*%beta

if(fixed==1){

u=t(t(diff)%*%Z/apply(Z,2,sum))

cor(X%*%beta+Z%*%u,y)

}else{

u=t(C)%*%solve(V)%*%(y-X%*%beta)  

cor(X%*%beta+Z%*%u,y)

}

result=data.frame(y=y,predict=X%*%beta+Z%*%u)

plot(c(1:nrow(result)),result$y,ylim=c(min(result),max(result)),xlab="samples",ylab="values",col=2,type="o")

par(new=T)

plot(c(1:nrow(result)),result$predict,ylim=c(min(result),max(result)),xlab="samples",ylab="values",col=3,type="o")

g_hat=t_k%*%beta+u


#p.85 random reg model

library(dplyr)

y=t(t(c(9,10,12,6,8,7,10,8,13)))

X=(matrix(c(1,1,1,1,1,1,1,1,1,1,2,3,1,2,2,3,1,3),ncol=2))

za=t(matrix(rbind(c(0,0,0,0,0,0,0,0,0),c(1,1,1,0,0,0,0,0,0),c(0,0,0,1,1,0,0,0,0),c(0,0,0,0,0,1,1,0,0),c(0,0,0,0,0,0,0,1,1)),nrow=5))

zb=t((matrix(rbind(c(0,0,0,0,0,0,0,0,0),c(1,2,3,0,0,0,0,0,0),c(0,0,0,1,2,0,0,0,0),c(0,0,0,0,0,2,3,0,0),c(0,0,0,0,0,0,0,1,3)),nrow=5)))


inv_A=as.matrix(rbind(c(2.1667,-0.6667,0,-0.1667,-1),c(0,1.33333,0,0,0),c(0,0,1,0,0),c(0,0,0,1.8333,-1),c(0,0,0,0,2)))

sig_a=0.5;sig_b=0.2;sig_e=0.75;sig_ab=0.75

mat=rbind(cbind(t(X)%*%X,t(X)%*%za,t(X)%*%zb),cbind(t(za)%*%X,t(za)%*%za+inv_A*(sig_e/sig_a)^2,t(za)%*%zb+inv_A*(sig_e^2/sig_ab)),cbind(t(zb)%*%X,t(zb)%*%za+inv_A*(sig_e^2/sig_ab),t(zb)%*%zb+inv_A*(sig_e/sig_b)^2))

solve(mat)%*%rbind(t(X)%*%y,t(za)%*%y,t(zb)%*%y)


#p.99

library(dplyr)

y=c(2.5,3.3,3.6,3.3,3.6,2.5,2.1,3.5,3.7,4,2.8,2.6)

XH=cbind(c(1,1,1,1,1,1,0,0,0,0,0,0),c(0,0,0,0,0,0,1,1,1,1,1,1))

Z=as.matrix(cbind(c(1,0,0,0,0,0,1,0,0,0,0,0),c(0,1,0,0,0,0,0,1,1,0,0,0),c(0,0,1,rep(0,9)),c(0,0,0,1,1,0,0,0,0,1,0,0),c(0,0,0,0,0,1,0,0,0,0,1,1)))

sig_a=0.5;sig_b=0.2;sig_e=0.75;sig_ab=0.75

sig_epsi=sig_e+(3/4)*sig_a

A=matrix(c(1,0,0,0,0,0,1,0,0,0,0,0.25,1,0,0,0,0.125,0.125,1,0,0,0,0,0,1),ncol=5)

mat=rbind(cbind(t(XH)%*%XH,t(XH)%*%Z),cbind(t(Z)%*%XH,t(Z)%*%Z+solve(A)*sig_epsi/sig_s))

vec=c(t(XH)%*%y,t(Z)%*%y)

params=solve(t(mat)%*%mat)%*%t(mat)%*%vec


#p.140

library(dummies)

X=as.matrix(dummy(as.numeric(c(1,1,2,1,1,2,2,1,1,1,2,2,1,2,2,1,2,2,1,2,1,2,2,1,2,1,2,2))))

colnames(X)=c("x1","x2")

a=c(rep(1,12),rep(2,8),rep(3,8))

b=c(1,1,1,2,2,2,2,3,3,4,4,4,1,1,1,2,2,2,3,3,2,2,2,3,3,4,4,4)

Z_a=dummy(a);Z_b=dummy(b)

colnames(Z_a)=c("za1","za2","za3")

colnames(Z_b)=c("zb1","zb2","zb3","zb4")

#part1

W=cbind(X,Z_a,Z_b);W_A=cbind(X,Z_a);W_B=W

y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)

W2=t(W)%*%W

M_X=X%*%solve(t(X)%*%X)%*%t(X)

M_A=W_A%*%(svd(t(W_A)%*%W_A)$u%*%diag(1/svd(t(W_A)%*%W_A)$d)%*%t(svd(t(W_A)%*%W_A)$v))%*%t(W_A)

M_B=W_B%*%(svd(t(W_B)%*%W_B)$u%*%diag(1/svd(t(W_B)%*%W_B)$d)%*%t(svd(t(W_B)%*%W_B)$v))%*%t(W_B)

Q_A=M_A-M_X;Q_B=M_B-M_A;M_E=diag(1,nrow(X));Q_E=M_E-M_B;Z_e=M_E

mat1=cbind(sum(diag(t(Z_a)%*%Q_A%*%Z_a)),sum(diag(t(Z_b)%*%Q_A%*%Z_b)),sum(diag(t(Z_e)%*%Q_A%*%Z_e)))

mat2=cbind(0,sum(diag(t(Z_b)%*%Q_B%*%Z_b)),sum(diag(t(Z_e)%*%Q_B%*%Z_e)))

mat3=cbind(0,0,sum(diag(t(Z_e)%*%Q_E%*%Z_e)))

mat=rbind(mat1,mat2,mat3)

sig_vec=solve(mat)%*%c(t(y)%*%Q_A%*%t(t(y)),t(y)%*%Q_B%*%t(t(y)),t(y)%*%Q_E%*%t(t(y)))


#part2

W=cbind(X,Z_b,Z_a);W_A=cbind(X,Z_b);W_B=W

y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)

W2=t(W)%*%W

M_X=X%*%solve(t(X)%*%X)%*%t(X)

M_A=W_A%*%(svd(t(W_A)%*%W_A)$u%*%diag(1/svd(t(W_A)%*%W_A)$d)%*%t(svd(t(W_A)%*%W_A)$v))%*%t(W_A)

M_B=W_B%*%(svd(t(W_B)%*%W_B)$u%*%diag(1/svd(t(W_B)%*%W_B)$d)%*%t(svd(t(W_B)%*%W_B)$v))%*%t(W_B)

Q_A=M_A-M_X;Q_B=M_B-M_A;M_E=diag(1,nrow(X));Q_E=M_E-M_B;Z_e=M_E

mat1=cbind(sum(diag(t(Z_a)%*%Q_A%*%Z_a)),sum(diag(t(Z_b)%*%Q_A%*%Z_b)),sum(diag(t(Z_e)%*%Q_A%*%Z_e)))

mat2=cbind(0,sum(diag(t(Z_b)%*%Q_B%*%Z_b)),sum(diag(t(Z_e)%*%Q_B%*%Z_e)))

mat3=cbind(0,0,sum(diag(t(Z_e)%*%Q_E%*%Z_e)))

mat=rbind(mat1,mat2,mat3)

sig_vec=solve(mat)%*%c(t(y)%*%Q_A%*%t(t(y)),t(y)%*%Q_B%*%t(t(y)),t(y)%*%Q_E%*%t(t(y)))


#5.2 最尤法

#p.142

library(dummies)

X=as.matrix(dummy(as.numeric(c(1,1,2,1,1,2,2,1,1,1,2,2,1,2,2,1,2,2,1,2,1,2,2,1,2,1,2,2))))

y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)

colnames(X)=c("x1","x2")

a=c(rep(1,12),rep(2,8),rep(3,8))

b=c(1,1,1,2,2,2,2,3,3,4,4,4,1,1,1,2,2,2,3,3,2,2,2,3,3,4,4,4)

Z_a=dummy(a);Z_b=dummy(b);Z_e=diag(1,nrow(X))

colnames(Z_a)=c("za1","za2","za3")

colnames(Z_b)=c("zb1","zb2","zb3","zb4")


n=nrow(X)

beta=rep(0.1,ncol(X))

sigma=rep(1,3)

ite=10000

eta=0.001

for(l in 1:ite){

V=Z_a%*%t(Z_a)*sigma[1]+Z_b%*%t(Z_b)*sigma[2]+Z_e%*%t(Z_e)*sigma[3]

sol_V=svd(V)$u%*%diag(1/svd(V)$d)%*%t(svd(V)$v)

beta=beta+eta*(t(X)%*%sol_V%*%y-t(X)%*%sol_V%*%X%*%beta)

dsig_a=-sum(diag(sol_V%*%Z_a%*%t(Z_a)))/2+t(y-X%*%beta)%*%sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%(y-X%*%beta)/2

dsig_b=-sum(diag(sol_V%*%Z_b%*%t(Z_b)))/2+t(y-X%*%beta)%*%sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%(y-X%*%beta)/2

dsig_e=-sum(diag(sol_V%*%Z_e%*%t(Z_e)))/2+t(y-X%*%beta)%*%sol_V%*%Z_e%*%t(Z_e)%*%sol_V%*%(y-X%*%beta)/2

sigma[1]=sigma[1]+eta*as.numeric(dsig_a)

sigma[2]=sigma[2]+eta*as.numeric(dsig_b)

sigma[3]=sigma[3]+eta*as.numeric(dsig_e)

log_lik=-n*log(2*pi)/2-log(det(V))/2-as.numeric(t(y-X%*%beta)%*%sol_V%*%t(t(y-X%*%beta)))/2

print(log_lik)

}


#NR method

#p.146

library(dummies)

X=as.matrix(dummy(as.numeric(c(1,1,2,1,1,2,2,1,1,1,2,2,1,2,2,1,2,2,1,2,1,2,2,1,2,1,2,2))))

y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)

colnames(X)=c("x1","x2")

a=c(rep(1,12),rep(2,8),rep(3,8))

b=c(1,1,1,2,2,2,2,3,3,4,4,4,1,1,1,2,2,2,3,3,2,2,2,3,3,4,4,4)

Z_a=dummy(a);Z_b=dummy(b);Z_e=diag(1,nrow(X))

colnames(Z_a)=c("za1","za2","za3")

colnames(Z_b)=c("zb1","zb2","zb3","zb4")


n=nrow(X)

beta=rep(0.01,ncol(X))

sigma=rep(0.001,3)

ite=1000000

eta=10^(-8)


for(l in 1:ite){

sigma_pre=sigma

V=Z_a%*%t(Z_a)*sigma[1]+Z_b%*%t(Z_b)*sigma[2]+Z_e%*%t(Z_e)*sigma[3]

sol_V=svd(V)$u%*%diag(1/svd(V)$d)%*%t(svd(V)$v)

P=sol_V-sol_V%*%X%*%solve(t(X)%*%sol_V%*%X)%*%t(X)%*%sol_V

s=c(t(y)%*%P%*%P%*%t(t(y)),t(y)%*%P%*%Z_a%*%t(Z_a)%*%t(t(y)),t(y)%*%P%*%Z_b%*%t(Z_b)%*%t(t(y)),t(y)%*%P%*%Z_e%*%t(Z_e)%*%t(t(y)))

H=array(0,dim=c(4,4))

H[1,]=c(sum(diag(sol_V%*%sol_V)),sum(diag(sol_V%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%sol_V%*%Z_e%*%t(Z_e))))

H[2,]=c(sum(diag(sol_V%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_e%*%t(Z_e))))

H[3,]=c(sum(diag(sol_V%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_e%*%t(Z_e))))

H[4,]=c(sum(diag(sol_V%*%sol_V%*%Z_e%*%t(Z_e))),sum(diag(sol_V%*%Z_e%*%t(Z_e)%*%sol_V%*%Z_e%*%t(Z_e))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_e%*%t(Z_e))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_e%*%t(Z_e))))

sigma=ifelse(sigma-eta*c(s%*%solve(H))>0,sigma-eta*c(s%*%solve(H)),sigma)

#print(sum(abs(sigma-sigma_pre)))

beta=solve(t(X)%*%sol_V%*%X)%*%t(X)%*%sol_V%*%y

log_lik=-n*log(2*pi)/2-log(det(V))/2-as.numeric(t(y-X%*%beta)%*%sol_V%*%t(t(y-X%*%beta)))/2

print(log_lik)

}

変量効果の推定とBLUP法 佐々木 義之先生 京都大学学術出版会

変量効果の推定とBLUP法　佐々木義之先生　京都大学学術出版会