# ミクロ統計の集計解析と技法
# p.170 アレンジ
library(dplyr)
n=10;S=5;N=S*n
x_mat=array(0,dim=c(S,n))
y_mat=array(0,dim=c(S,n))
for(j in 1:S){
x=sample(1:10,n,replace = T)
y=sample(seq(0.01,1,0.01),n,replace = T)
y_mat[j,]=y
x_mat[j,]=x
}
beta=rep(1,S);pi=rep(1,n);
log_lik=function(b,p){
p=abs(p)/sum(abs(p))
val=c()
for(j in 1:S){
val=c(val,sum(log(dnorm(y_mat[j,],x_mat[j,]*b[j]))+log(p))-n*log(sum(dnorm(y_mat[j,],x_mat[j,]*b[j])*p))/N)
}
return(sum(val))
}
ite=100000
alpha=0.000001;h=0.01
G=diag(1,length(beta)+length(pi))
for(l in 1:ite){
df=c()
vec=beta
for(j in 1:length(beta)){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
df=c(df,(log_lik(vec_sub,pi)-log_lik(vec,pi))/h)
}
vec=pi
for(j in 1:length(pi)){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
df=c(df,(log_lik(beta,vec_sub)-log_lik(beta,vec))/h)
}
df_pre=df
params=c(beta,pi)+alpha*df
beta=params[1:length(beta)];pi=params[-c(1:length(beta))]
print(log_lik(beta,pi))
}
pi=abs(pi)/sum(abs(pi))
# 多値選択行動モデル p.196
library(dummies)
data(iris)
y=dummy(iris$Species)
X=as.matrix(iris[,!(colnames(iris) %in% "Species")])[,1:ncol(y)]
Z=sample(1:9,nrow(X),replace=T)
beta=0.0001
r=rep(0.0001,ncol(y))
ite=10000
eta=0.0001
for(l in 1:ite){
val=X*beta+t(t(c(Z)))%*%t(c(r))
val=exp(val)
dbeta=sum(X*(y-val/c(apply(val,1,sum))))
dr=t(y)%*%Z-t(val/apply(val,1,sum))%*%c(Z)
beta=beta+eta*dbeta
r=r+eta*dr
loglik=sum(y*(log(val/apply(val,1,sum))))
print(loglik)
}
# 多値選択行動モデル betaがベクトル
library(dummies)
data(iris)
y=dummy(iris$Species)
X=as.matrix(iris[,!(colnames(iris) %in% "Species")])[,1:ncol(y)]
Z=sample(1:9,nrow(X),replace=T)
beta=rep(0.0001,nrow(y))
r=rep(0.0001,ncol(y))
ite=10000
eta=0.0001
for(l in 1:ite){
val=X*beta+t(t(c(Z)))%*%t(c(r))
val=exp(val)
dbeta=apply(X*(y-val/c(apply(val,1,sum))),1,sum)
dr=t(y)%*%Z-t(val/apply(val,1,sum))%*%c(Z)
beta=beta+eta*dbeta
r=r+eta*dr
loglik=sum(y*(log(val/apply(val,1,sum))))
print(loglik)
}
# 切断分布モデル p.198
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82)/100)
Y=data$y;X=data$x1;N=nrow(data)
log_lik=function(beta,sigma){
return(-log(2*pi*sigma^2)*N/2-sum((Y-beta*X)^2)/(2*sigma^2)+sum(log(1-pnorm(-beta*X/sigma))))
}
ite=3000
eta=0.001
h=0.01
b=1;sig=10
for(l in 1:ite){
L=10;b_pre=0
while(L>0.01)({
L_pre=L
if(is.nan(log_lik(b,sig))!=T){
b=b+eta*(log_lik(b+h,sig)-log_lik(b,sig))/h
}
L=abs(L-L_pre)
})
diff=10;diff_pre=0
while(diff>0.01)({
diff_pre=diff
if(is.nan(log_lik(b,sig))!=T){
sig=sig+eta*(log_lik(b,sig+h)-log_lik(b,sig))/h
}
diff=abs(diff_pre-diff)
})
print(log_lik(b,sig))
}
E_u=sig*dnorm(-b*X)/(pnorm(b*X))
f_u=E_u/sig
# トービットモデル p.200
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82)/100)
Y=data$y;X=data$x1;N=nrow(data)
C=0.1
log_lik=function(beta,sigma){
value=Y-beta*X
return(-log(2*pi*sigma^2)*sum(Y>C)/2-sum((value[Y>C])^2)/(2*sigma^2)+sum(Y<=C)*sum(log(pnorm(C-beta*X/sigma))))
}
ite=30000
eta=0.001
h=0.01
b=1;sig=10
for(l in 1:ite){
L=10;b_pre=0
while(L>0.01)({
L_pre=L
if(is.nan(log_lik(b,sig))!=T){
b=b+eta*(log_lik(b+h,sig)-log_lik(b,sig))/h
}
L=abs(L-L_pre)
})
diff=10;diff_pre=0
while(diff>0.01)({
diff_pre=diff
if(is.nan(log_lik(b,sig))!=T){
sig=sig+eta*(log_lik(b,sig+h)-log_lik(b,sig))/h
}
diff=abs(diff_pre-diff)
})
print(log_lik(b,sig))
}
pre=b*X+sig*dnorm(b*X)/pnorm(b*X/sig)