#因子分析-その理論と方法-
#p.148
n=10;p=5
X=array(0,dim=c(n,p))
for(i in 1:n){
for(j in 1:p){
val=rpois(2,sample(10:20,1))
X[i,j]=sample(0:1,1,prob=val/max(val))
}
}
f_vec=rep(1,n);a0_vec=rep(1,p);a1_vec=rep(1,p)
log_lik=function(a0,a1,f){
val_mat=exp(-t(t(t(t(f))%*%t(a1))+c(a0)))
p=1/(1+val_mat);q=1-p
p[p<(10^(-10))]=1;q[q<10^(-10)]=1
return(sum(X*log(p)+(1-X)*log(q)))
}
ite=10000;
h=0.01;eta=0.01
log_like_pre=-2;log_like=-1
for(l in 1:ite){
if((log_like_pre-log_like)<0){
log_like_pre=log_lik(a0_vec,a1_vec,f_vec)
H=array(0,dim=c(length(a0_vec)+length(a1_vec),length(a0_vec)+length(a1_vec)))
diff=1
while(diff>(10^(-2)))({
cost_pre=log_lik(a0_vec,a1_vec,f_vec)
vec=c(a0_vec)
for(j in 1:(length(a0_vec))){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
a0_vec[j]=a0_vec[j]+eta*(log_lik(vec_sub,a1_vec,f_vec)-log_lik(vec,a1_vec,f_vec))/h
}
vec=c(a1_vec)
for(j in 1:(length(a1_vec))){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
a1_vec[j]=a1_vec[j]+eta*(log_lik(a0_vec,vec_sub,f_vec)-log_lik(a0_vec,vec,f_vec))/h
}
cost=log_lik(a0_vec,a1_vec,f_vec)
diff=abs(cost-cost_pre)
})
diff=1
while(diff>(10^(-2)))({
cost_pre=log_lik(a0_vec,a1_vec,f_vec)
vec=f_vec
for(j in 1:length(f_vec)){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
f_vec[j]=f_vec[j]+eta*(log_lik(a0_vec,a1_vec,vec_sub)-log_lik(a0_vec,a1_vec,vec))/h
}
cost=log_lik(a0_vec,a1_vec,f_vec)
diff=abs(cost-cost_pre)
})
log_like=log_lik(a0_vec,a1_vec,f_vec)
}
print(log_lik(a0_vec,a1_vec,f_vec))
}
#柳井晴夫 因子分析 p.133
#最尤推定法による母数の推定
library(dplyr)
d=3;n_vec=c()
class=5
X=array(0,dim=c(1,d+1))
for(j in 1:class){
#val=rpois(d,sample(5:10,1))
n=rpois(1,sample(10:20,1))
n_vec=c(n_vec,n)
X_mat=array(0,dim=c(n,d))
prob=sample(seq(0.1,0.9,0.1),d,replace = T)
for(i in 1:ncol(X_mat)){
X_mat[,i]=sample(0:1,n,replace=T,prob=c(prob[i],1-prob[i]))
}
X=rbind(X,cbind(rep(j,n),X_mat))
}
X=tail(X,nrow(X)-1)
X_data=data.frame(X)
class_data=X_data %>% group_by(X1,X2,X3,X4) %>% summarise(n=n())
X_datas=X_data %>% group_by(X2,X3,X4) %>% summarise(n=n())
N=sum(n_vec)
alpha=n_vec/(N^2);
mat=as.matrix(X_data[,2:ncol(X_data)])
lambda=array(0,dim=c(d,class))
for(j in 1:d){
for(k in 1:class){
X_data_sub=X_data %>% filter(X1==k)
mat_sub=as.matrix(X_data_sub[,2:ncol(X_data_sub)])
P_k_N=c()
for(i in 1:nrow(mat)){
vec=apply(abs(t(mat_sub)-c(mat[i,])),2,sum)
P_k_N=c(P_k_N,length(vec[vec==0])/nrow(X_data_sub))
}
lambda[j,k]=sum(mat[,j]*P_k_N)/sum(P_k_N)
}
}
ite=100
for(l in 1:ite){
#if(length(lambda[lambda==0])==0){
alpha_pre=alpha
P_k_x=array(0,dim=c(class,nrow(mat)))
for(i in 1:nrow(mat)){
for(k in 1:class){
lambda_vec=lambda[,k];
if(length(lambda_vec[lambda_vec==0])==0){
val1=exp(t(t(log(lambda_vec)))%*%t(mat[i,]))
val1[is.nan(val1)>0]=1
val2=exp(t(t(log(1-lambda_vec)))%*%t(1-mat[i,]))
val2[is.nan(val2)>0]=1
P_k_x[k,i]=as.numeric(alpha[k]*prod(val1*val2))
}else{
P_k_x[k,i]=0
}
}
}
loglik=sum(log(apply(P_k_x,2,sum)))
P_k_x=t(t(P_k_x)/c(apply(P_k_x,2,sum)))
alpha=apply(P_k_x,1,sum)/ncol(P_k_x)
lambda=array(0,dim=c(d,class))
for(j in 1:d){
for(k in 1:class){
if(sum(P_k_x[k,])>0){
lambda[j,k]=sum(mat[,j]*P_k_x[k,])/sum(P_k_x[k,])
}else{
lambda[j,k]=0
}
}
}
print(loglik)
#}
}
#generalized latent variable modeling p.177
#探索的因子分析
n=10;m=8;N=50
Y=array(0,dim=c(n,N))
lambda=array(1,dim=c(n,N))
sita=diag(1,n)
for(j in 1:N){
Y[,j]=rpois(n,sample(30:40,1))
}
S_yy=array(0,dim=c(n,n))
for(j in 1:N){
S_yy=S_yy+Y[,j]%*%t(Y[,j])/N
}
ite=10000
for(j in 1:ite){
sita_pre=sita
f=t(lambda)%*%solve(lambda%*%t(lambda)+sita)
gamma=diag(1,N)-f%*%lambda
lambda=S_yy%*%t(f)%*%solve(f%*%S_yy%*%t(f)+gamma)
sita=diag(diag(S_yy-lambda%*%f%*%S_yy))
print(sum(abs(sita_pre-sita)))
}
eta=array(0,dim=c(N,N))
for(j in 1:N){
eta[,j]=f%*%Y[,j]
}
predict=lambda%*%eta
#LISREL
m=10;l=12;n=6
Y=array(0,dim=c(m,n))
X=array(0,dim=c(m,n))
for(j in 1:nrow(X)){
X[j,]=rpois(n,sample(10:20,1))
Y[j,]=rpois(n,sample(30:40,1))
}
S=cbind(rbind(cov((X),(X)),(cov((X),(Y)))),rbind((cov((Y),(X))),cov((Y),(Y))))
det(S)
lambda_y=t(array(1,dim=c(m,n)))
lambda_x=t(array(1,dim=c(m,n)))
B=diag(1,m);A=solve(B)
gamma=diag(1,m)
sita_epsilon=diag(1,n);sita_delta=diag(1,n)
D=solve(B)%*%gamma
pthi=diag(1,m);tueta=diag(1,m);sigma_epsilon=diag(1,m)
sigma_yy=lambda_y%*%(solve(B)%*%(gamma%*%tueta%*%t(gamma)+pthi)%*%solve(t(B)))%*%t(lambda_y)+sita_epsilon
sigma_yx=lambda_y%*%solve(B)%*%gamma%*%tueta%*%t(lambda_x)+sita_delta
sigma_xy=lambda_x%*%tueta%*%t(gamma)%*%solve(t(B))%*%t(lambda_y)
sigma_xx=lambda_x%*%tueta%*%t(lambda_x)+sita_delta
sigma=cbind(rbind(sigma_yy,sigma_xy),rbind(sigma_yx,sigma_xx))
omega=solve(sigma)%*%(sigma-S)%*%solve(sigma)
omega_yy=omega[1:n,1:n];omega_yx=omega[1:n,(n+1):ncol(omega)]
omega_xy=omega[(n+1):nrow(omega),1:n];omega_xx=omega[(n+1):nrow(omega),(n+1):ncol(omega)]
C=D%*%tueta%*%t(D)+A%*%pthi%*%t(A)
#iteration
eta=-0.0001
ite=1000000
f=c()
f_val=10^(7);f_val_pre=10^7+1
for(j in 1:ite){
if(f_val_pre-f_val>0){
f_val_pre=f_val
lambda_y=lambda_y+eta*(omega_yy%*%lambda_y%*%C+t(omega_xy)%*%lambda_x%*%pthi%*%t(D))
lambda_x=lambda_x+eta*(omega_xy%*%lambda_y%*%D%*%pthi+omega_xx%*%lambda_x%*%pthi)
B=B-eta*(t(A)%*%t(lambda_y)%*%(omega_yy%*%lambda_y%*%C+t(omega_xy)%*%lambda_x%*%pthi%*%t(D)))
gamma=gamma+eta*(t(A)%*%t(lambda_y)%*%(omega_yy%*%lambda_y%*%D+t(omega_xy)%*%lambda_x)%*%pthi)
pthi=pthi+eta*(t(A)%*%t(lambda_y)%*%omega_yy%*%lambda_y%*%A)
tueta=tueta+eta*(t(D)%*%t(lambda_y)%*%omega_yy%*%lambda_y%*%D+t(lambda_x)%*%omega_xy%*%lambda_y%*%D+t(D)%*%t(lambda_y)%*%t(omega_xy)%*%lambda_x+t(lambda_x)%*%omega_xx%*%lambda_x)
sita_epsilon=sita_epsilon+eta*diag(diag(omega_yy))
sita_delta=sita_delta+eta*diag(diag(omega_xy))
sigma_yy=lambda_y%*%(solve(B)%*%(gamma%*%tueta%*%t(gamma)+pthi)%*%solve(t(B)))%*%t(lambda_y)+sita_epsilon
sigma_yx=lambda_y%*%solve(B)%*%gamma%*%tueta%*%t(lambda_x)+sita_delta
sigma_xy=lambda_x%*%tueta%*%t(gamma)%*%solve(t(B))%*%t(lambda_y)
sigma_xx=lambda_x%*%tueta%*%t(lambda_x)+sita_delta
sigma=cbind(rbind(sigma_yy,sigma_xy),rbind(sigma_yx,sigma_xx))
omega=solve(sigma)%*%(sigma-S)%*%solve(sigma)
omega_yy=omega[1:n,1:n];omega_yx=omega[1:n,(n+1):ncol(omega)]
omega_xy=omega[(n+1):nrow(omega),1:n];omega_xx=omega[(n+1):nrow(omega),(n+1):ncol(omega)]
A=solve(B);D=solve(B)%*%gamma;C=D%*%tueta%*%t(D)+A%*%pthi%*%t(A)
f_val=sum((S-sigma)^2)
f_val=as.numeric(f_val)
print(f_val)
f=c(f,f_val)
R_x=1-det(sita_delta)/det(sigma_xx)
R_y=1-det(sita_epsilon)/det(sigma_yy)
}else{
print("stop")
}
}