#変量効果モデルとBLUP(遺伝的要因による)
#p.56
library(dplyr)
X=matrix(c(rep(1,12),1,1,1,rep(0,9),0,0,0,1,1,1,1,1,0,0,0,0,rep(0,8),1,1,1,1),ncol=4)
Z=matrix(c(c(1,0,0,1,0,0,0,0,1,0,0,0),c(0,1,0,0,1,0,1,0,0,1,0,0),c(0,0,1,0,0,0,0,0,0,0,1,0),c(0,0,0,0,0,1,0,1,0,0,0,1)),ncol=4)
X2=t(X)%*%X
sol_X2=svd(X2)$u%*%diag(1/svd(X2)$d)%*%t(svd(X2)$v)
W=diag(rep(1,nrow(X)))-X%*%sol_X2%*%t(X)
#parameters
y=abs(rnorm(12))
mat1=rbind(cbind(t(X)%*%X,t(X)%*%Z),cbind(t(Z)%*%X,t(Z)%*%Z))
mat12=t(mat1)%*%mat1
sol_mat12=svd(mat12)$u%*%diag(1/svd(mat12)$d)%*%t(svd(mat12)$v)
mat2=t(t(c(t(X)%*%y,t(Z)%*%y)))
params=sol_mat12%*%t(mat1)%*%mat2
#変量効果の推定
data=data.frame(population=c("A","A","A","B","B","B"),bull=c("S1","S2","S3","S4","S5","S6"),H1=c(24,30,NA,30,25,NA),H2=c(22,26,21,26,NA,23))
fixed=1
y=c(t(as.matrix(cbind(data$H1,data$H2))))
y=y[is.na(y)==F]
X=matrix(c(1,1,1,1,1,1,1,1,1,1,0,1,0,0,1,0,1,0,0,1,0,1,1,0,1,0,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,1,1,1,1),ncol=5)
t_X=t(X)
Z=(matrix(c(1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1),ncol=6))
t_Z=t(Z)
V=array(0,dim=c(9,9))
V[1,]=c(3,0.225,0,0,0,0,0,0,0)
V[2,]=c(0,3,0,0,0,0,0,0,0)
V[3,]=c(0,0,3,0.225,0,0.1125,0.1125,0.1125,0)
V[4,]=c(0,0,0,3,0,0.1125,0.1125,0.1125,0)
V[5,5:9]=c(3,0,0,0,0)
V[6,6:9]=c(3,0.225,0.05625,0)
V[7,7:9]=c(3,0.05625,0)
V[8,8:9]=c(3,0)
V[9,9]=c(3)
V=V+t(V)
diag(V)=diag(V)/2
H=t(X)%*%solve(V)%*%X
#交配の入れ替えはすべて等しいから
H[lower.tri(H)]=0
beta=solve(H)%*%t(X)%*%solve(V)%*%t(t(y))
C=matrix(c(0.225,0.225,0,0,0,0,0,0,0,0,0,0.225,0.225,0,0.1125,0.1125,0.1125,0,0,0,0,0,0.225,0,0,0,0,0,0,0.1125,0.1125,0,0.225,0.225,0.05625,0,0,0,0.1125,0.1125,0,0.5625,0.5625,0.225,0,0,0,0,0,0,0,0,0,0.225),ncol=6)
t_k=array(0,dim=c(6,5))
t_k[1:3,4]=c(1,1,1)
t_k[4:6,5]=c(1,1,1)
diff=y-X%*%beta
if(fixed==1){
u=t(t(diff)%*%Z/apply(Z,2,sum))
cor(X%*%beta+Z%*%u,y)
}else{
u=t(C)%*%solve(V)%*%(y-X%*%beta)
cor(X%*%beta+Z%*%u,y)
}
result=data.frame(y=y,predict=X%*%beta+Z%*%u)
plot(c(1:nrow(result)),result$y,ylim=c(min(result),max(result)),xlab="samples",ylab="values",col=2,type="o")
par(new=T)
plot(c(1:nrow(result)),result$predict,ylim=c(min(result),max(result)),xlab="samples",ylab="values",col=3,type="o")
g_hat=t_k%*%beta+u
#p.85 random reg model
library(dplyr)
y=t(t(c(9,10,12,6,8,7,10,8,13)))
X=(matrix(c(1,1,1,1,1,1,1,1,1,1,2,3,1,2,2,3,1,3),ncol=2))
za=t(matrix(rbind(c(0,0,0,0,0,0,0,0,0),c(1,1,1,0,0,0,0,0,0),c(0,0,0,1,1,0,0,0,0),c(0,0,0,0,0,1,1,0,0),c(0,0,0,0,0,0,0,1,1)),nrow=5))
zb=t((matrix(rbind(c(0,0,0,0,0,0,0,0,0),c(1,2,3,0,0,0,0,0,0),c(0,0,0,1,2,0,0,0,0),c(0,0,0,0,0,2,3,0,0),c(0,0,0,0,0,0,0,1,3)),nrow=5)))
inv_A=as.matrix(rbind(c(2.1667,-0.6667,0,-0.1667,-1),c(0,1.33333,0,0,0),c(0,0,1,0,0),c(0,0,0,1.8333,-1),c(0,0,0,0,2)))
sig_a=0.5;sig_b=0.2;sig_e=0.75;sig_ab=0.75
mat=rbind(cbind(t(X)%*%X,t(X)%*%za,t(X)%*%zb),cbind(t(za)%*%X,t(za)%*%za+inv_A*(sig_e/sig_a)^2,t(za)%*%zb+inv_A*(sig_e^2/sig_ab)),cbind(t(zb)%*%X,t(zb)%*%za+inv_A*(sig_e^2/sig_ab),t(zb)%*%zb+inv_A*(sig_e/sig_b)^2))
solve(mat)%*%rbind(t(X)%*%y,t(za)%*%y,t(zb)%*%y)
#p.99
library(dplyr)
y=c(2.5,3.3,3.6,3.3,3.6,2.5,2.1,3.5,3.7,4,2.8,2.6)
XH=cbind(c(1,1,1,1,1,1,0,0,0,0,0,0),c(0,0,0,0,0,0,1,1,1,1,1,1))
Z=as.matrix(cbind(c(1,0,0,0,0,0,1,0,0,0,0,0),c(0,1,0,0,0,0,0,1,1,0,0,0),c(0,0,1,rep(0,9)),c(0,0,0,1,1,0,0,0,0,1,0,0),c(0,0,0,0,0,1,0,0,0,0,1,1)))
sig_a=0.5;sig_b=0.2;sig_e=0.75;sig_ab=0.75
sig_epsi=sig_e+(3/4)*sig_a
A=matrix(c(1,0,0,0,0,0,1,0,0,0,0,0.25,1,0,0,0,0.125,0.125,1,0,0,0,0,0,1),ncol=5)
mat=rbind(cbind(t(XH)%*%XH,t(XH)%*%Z),cbind(t(Z)%*%XH,t(Z)%*%Z+solve(A)*sig_epsi/sig_s))
vec=c(t(XH)%*%y,t(Z)%*%y)
params=solve(t(mat)%*%mat)%*%t(mat)%*%vec
#p.140
library(dummies)
X=as.matrix(dummy(as.numeric(c(1,1,2,1,1,2,2,1,1,1,2,2,1,2,2,1,2,2,1,2,1,2,2,1,2,1,2,2))))
colnames(X)=c("x1","x2")
a=c(rep(1,12),rep(2,8),rep(3,8))
b=c(1,1,1,2,2,2,2,3,3,4,4,4,1,1,1,2,2,2,3,3,2,2,2,3,3,4,4,4)
Z_a=dummy(a);Z_b=dummy(b)
colnames(Z_a)=c("za1","za2","za3")
colnames(Z_b)=c("zb1","zb2","zb3","zb4")
#part1
W=cbind(X,Z_a,Z_b);W_A=cbind(X,Z_a);W_B=W
y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)
W2=t(W)%*%W
M_X=X%*%solve(t(X)%*%X)%*%t(X)
M_A=W_A%*%(svd(t(W_A)%*%W_A)$u%*%diag(1/svd(t(W_A)%*%W_A)$d)%*%t(svd(t(W_A)%*%W_A)$v))%*%t(W_A)
M_B=W_B%*%(svd(t(W_B)%*%W_B)$u%*%diag(1/svd(t(W_B)%*%W_B)$d)%*%t(svd(t(W_B)%*%W_B)$v))%*%t(W_B)
Q_A=M_A-M_X;Q_B=M_B-M_A;M_E=diag(1,nrow(X));Q_E=M_E-M_B;Z_e=M_E
mat1=cbind(sum(diag(t(Z_a)%*%Q_A%*%Z_a)),sum(diag(t(Z_b)%*%Q_A%*%Z_b)),sum(diag(t(Z_e)%*%Q_A%*%Z_e)))
mat2=cbind(0,sum(diag(t(Z_b)%*%Q_B%*%Z_b)),sum(diag(t(Z_e)%*%Q_B%*%Z_e)))
mat3=cbind(0,0,sum(diag(t(Z_e)%*%Q_E%*%Z_e)))
mat=rbind(mat1,mat2,mat3)
sig_vec=solve(mat)%*%c(t(y)%*%Q_A%*%t(t(y)),t(y)%*%Q_B%*%t(t(y)),t(y)%*%Q_E%*%t(t(y)))
#part2
W=cbind(X,Z_b,Z_a);W_A=cbind(X,Z_b);W_B=W
y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)
W2=t(W)%*%W
M_X=X%*%solve(t(X)%*%X)%*%t(X)
M_A=W_A%*%(svd(t(W_A)%*%W_A)$u%*%diag(1/svd(t(W_A)%*%W_A)$d)%*%t(svd(t(W_A)%*%W_A)$v))%*%t(W_A)
M_B=W_B%*%(svd(t(W_B)%*%W_B)$u%*%diag(1/svd(t(W_B)%*%W_B)$d)%*%t(svd(t(W_B)%*%W_B)$v))%*%t(W_B)
Q_A=M_A-M_X;Q_B=M_B-M_A;M_E=diag(1,nrow(X));Q_E=M_E-M_B;Z_e=M_E
mat1=cbind(sum(diag(t(Z_a)%*%Q_A%*%Z_a)),sum(diag(t(Z_b)%*%Q_A%*%Z_b)),sum(diag(t(Z_e)%*%Q_A%*%Z_e)))
mat2=cbind(0,sum(diag(t(Z_b)%*%Q_B%*%Z_b)),sum(diag(t(Z_e)%*%Q_B%*%Z_e)))
mat3=cbind(0,0,sum(diag(t(Z_e)%*%Q_E%*%Z_e)))
mat=rbind(mat1,mat2,mat3)
sig_vec=solve(mat)%*%c(t(y)%*%Q_A%*%t(t(y)),t(y)%*%Q_B%*%t(t(y)),t(y)%*%Q_E%*%t(t(y)))
#5.2 最尤法
#p.142
library(dummies)
X=as.matrix(dummy(as.numeric(c(1,1,2,1,1,2,2,1,1,1,2,2,1,2,2,1,2,2,1,2,1,2,2,1,2,1,2,2))))
y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)
colnames(X)=c("x1","x2")
a=c(rep(1,12),rep(2,8),rep(3,8))
b=c(1,1,1,2,2,2,2,3,3,4,4,4,1,1,1,2,2,2,3,3,2,2,2,3,3,4,4,4)
Z_a=dummy(a);Z_b=dummy(b);Z_e=diag(1,nrow(X))
colnames(Z_a)=c("za1","za2","za3")
colnames(Z_b)=c("zb1","zb2","zb3","zb4")
n=nrow(X)
beta=rep(0.1,ncol(X))
sigma=rep(1,3)
ite=10000
eta=0.001
for(l in 1:ite){
V=Z_a%*%t(Z_a)*sigma[1]+Z_b%*%t(Z_b)*sigma[2]+Z_e%*%t(Z_e)*sigma[3]
sol_V=svd(V)$u%*%diag(1/svd(V)$d)%*%t(svd(V)$v)
beta=beta+eta*(t(X)%*%sol_V%*%y-t(X)%*%sol_V%*%X%*%beta)
dsig_a=-sum(diag(sol_V%*%Z_a%*%t(Z_a)))/2+t(y-X%*%beta)%*%sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%(y-X%*%beta)/2
dsig_b=-sum(diag(sol_V%*%Z_b%*%t(Z_b)))/2+t(y-X%*%beta)%*%sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%(y-X%*%beta)/2
dsig_e=-sum(diag(sol_V%*%Z_e%*%t(Z_e)))/2+t(y-X%*%beta)%*%sol_V%*%Z_e%*%t(Z_e)%*%sol_V%*%(y-X%*%beta)/2
sigma[1]=sigma[1]+eta*as.numeric(dsig_a)
sigma[2]=sigma[2]+eta*as.numeric(dsig_b)
sigma[3]=sigma[3]+eta*as.numeric(dsig_e)
log_lik=-n*log(2*pi)/2-log(det(V))/2-as.numeric(t(y-X%*%beta)%*%sol_V%*%t(t(y-X%*%beta)))/2
print(log_lik)
}
#NR method
#p.146
library(dummies)
X=as.matrix(dummy(as.numeric(c(1,1,2,1,1,2,2,1,1,1,2,2,1,2,2,1,2,2,1,2,1,2,2,1,2,1,2,2))))
y=c(157,160,138,96,110,115,120,82,65,120,130,110,145,140,142,117,122,98,70,94,105,112,125,92,100,116,129,133)
colnames(X)=c("x1","x2")
a=c(rep(1,12),rep(2,8),rep(3,8))
b=c(1,1,1,2,2,2,2,3,3,4,4,4,1,1,1,2,2,2,3,3,2,2,2,3,3,4,4,4)
Z_a=dummy(a);Z_b=dummy(b);Z_e=diag(1,nrow(X))
colnames(Z_a)=c("za1","za2","za3")
colnames(Z_b)=c("zb1","zb2","zb3","zb4")
n=nrow(X)
beta=rep(0.01,ncol(X))
sigma=rep(0.001,3)
ite=1000000
eta=10^(-8)
for(l in 1:ite){
sigma_pre=sigma
V=Z_a%*%t(Z_a)*sigma[1]+Z_b%*%t(Z_b)*sigma[2]+Z_e%*%t(Z_e)*sigma[3]
sol_V=svd(V)$u%*%diag(1/svd(V)$d)%*%t(svd(V)$v)
P=sol_V-sol_V%*%X%*%solve(t(X)%*%sol_V%*%X)%*%t(X)%*%sol_V
s=c(t(y)%*%P%*%P%*%t(t(y)),t(y)%*%P%*%Z_a%*%t(Z_a)%*%t(t(y)),t(y)%*%P%*%Z_b%*%t(Z_b)%*%t(t(y)),t(y)%*%P%*%Z_e%*%t(Z_e)%*%t(t(y)))
H=array(0,dim=c(4,4))
H[1,]=c(sum(diag(sol_V%*%sol_V)),sum(diag(sol_V%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%sol_V%*%Z_e%*%t(Z_e))))
H[2,]=c(sum(diag(sol_V%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_e%*%t(Z_e))))
H[3,]=c(sum(diag(sol_V%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_b%*%t(Z_b))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_a%*%t(Z_a))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_e%*%t(Z_e))))
H[4,]=c(sum(diag(sol_V%*%sol_V%*%Z_e%*%t(Z_e))),sum(diag(sol_V%*%Z_e%*%t(Z_e)%*%sol_V%*%Z_e%*%t(Z_e))),sum(diag(sol_V%*%Z_a%*%t(Z_a)%*%sol_V%*%Z_e%*%t(Z_e))),sum(diag(sol_V%*%Z_b%*%t(Z_b)%*%sol_V%*%Z_e%*%t(Z_e))))
sigma=ifelse(sigma-eta*c(s%*%solve(H))>0,sigma-eta*c(s%*%solve(H)),sigma)
#print(sum(abs(sigma-sigma_pre)))
beta=solve(t(X)%*%sol_V%*%X)%*%t(X)%*%sol_V%*%y
log_lik=-n*log(2*pi)/2-log(det(V))/2-as.numeric(t(y-X%*%beta)%*%sol_V%*%t(t(y-X%*%beta)))/2
print(log_lik)
}