0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

カテゴリカルデータのモデル分析 坂元 慶行先生 共立出版

Last updated at Posted at 2019-12-11

#p.78

#年齢別データ
data=data.frame(age=c(20:94),ans1=c(3,5,10,5,6,6,5,10,9,6,12,10,11,14,12,6,6,7,14,13,7,6,5,9,10,7,13,11,8,7,4,12,9,6,7,6,5,5,9,5,4,7,5,5,7,1,1,3,0,1,0,3,2,3,0,1,1,1,2,0,2,0,1,rep(0,12)),ans2=c(4,5,3,6,4,2,7,3,5,6,4,2,6,5,7,4,1,5,3,3,4,3,2,3,3,5,1,3,0,1,5,3,7,4,5,2,5,4,3,3,2,3,5,1,2,2,3,1,3,4,1,6,3,1,0,2,0,1,2,0,1,0,1,0,0,1,rep(0,8),1))

#20代、30代、として計算
nj1=c()
nj2=c()
ages=sort(unique(round(data$age/10)*10))
C=length(ages)
for(j in 1:C){
nj1=c(nj1,sum(data$ans1[data$age>=ages[j] & data$age<=(ages[j]+9)]))
nj2=c(nj2,sum(data$ans2[data$age>=ages[j] & data$age<=(ages[j]+9)]))
}
nj=nj1+nj2

D=diag(1,C)
for(j in 1:C){
for(i in j:(j+2)){  
if(i==j){D[i,j]=1}
if((i==(j+1))&(i<=C)){D[i,j]=-2} 
if((i==(j+2))&(i<=C)){D[i,j]=1}
}}

#対数尤度の最大化
#newton-raphson
qj=rep(1,C)
ite=10

for(l in 1:ite){
pj=exp(qj)/(1+exp(qj))  
H=diag(-nj*pj*(1-pj))
df=nj1-nj*pj
qj=c(qj-solve(H)%*%df)
loglik=sum(nj1*qj-nj*log(1+exp(qj)))
print(loglik)
}



#ABIC
ABIC=function(q){
v=q[1]
qvec=q[-1]
q01=qvec[-c(1:2)]
val=sum(nj1*q01-nj*log(1+exp(q01)))
for(j in 1:C){
val=val-sum(qvec[(j+2)]-2*qvec[(j+1)]+qvec[j])^2/(2*v^2)  
}
val=-2*val-C*log(2*pi)
p=exp(q01)/(1+exp(q01))
L=diag(sqrt(nj*p*(1-p)))
val=val+log(det(t(L)%*%L+t(D)%*%D/(v^2)))
return(val)
}

param=rep(1,(C+3))
ite=10000
eta=0.01
h=0.01

for(l in 1:ite){

dparam=c()
for(j in 1:length(param)){
param_sub=param
param_sub[j]=param_sub[j]+h
dparam=c(dparam,(ABIC(param_sub)-ABIC(param))/h)
}  
param=param-eta*dparam
print(ABIC(param))
}

exp(param[-1])/(1+exp(param[-1]))


#カテゴリかるデータのモデル分析 p.106

#ベイズ型一次元二値回帰モデル

library(dplyr)

data=data.frame(I0_1=c(2,3,2,1,3,3,0,2,1,2,3,1,2,1,0,2,2,2,4,0,1,1,1,0,0,1,2,1,1,1,0,1,1,0,1,1,0,2,0,0,2,1,2,0,1,1,2,2,2,1,0,0,0,0,0,1,3,0,1,2,0,0,1,1,0,0,3,1,0,1,2,0,2,0,3,2,3,1,1,3,2,1,0,2,3,1,3,1,2,1,2,2,2,2,3,3,2,0,2,1,1),I0_2=c(2,1,2,3,1,1,4,2,3,2,1,3,2,3,4,2,2,2,0,4,3,3,3,4,4,3,2,3,3,3,4,3,3,4,3,3,4,2,4,4,2,3,2,4,3,3,2,2,2,3,4,4,4,4,4,3,1,4,3,2,4,4,3,3,4,4,1,3,4,3,2,4,2,4,1,2,1,3,3,1,2,3,4,2,1,3,1,3,2,3,2,2,2,2,1,1,2,4,2,3,3))



summary1=data %>% group_by(I0_1) %>% summarise(n1=n())

summary2=data %>% group_by(I0_2) %>% summarise(n2=n())

summary=cbind(summary1,summary2)

n1_j=summary1$n1;n2_j=summary2$n2;n_j=summary1$n1+summary2$n2

class=sort(unique(c(data$I0_1,data$I0_2)))


D=array(0,dim=c(length(class),length(class)))

for(j in 1:(length(class)-2)){
  
D[j:(j+2),j]=c(1,-2,1)  
  
}

D[(length(class)-1),(length(class)-1)]=1

D[length(class),(length(class)-2):length(class)]=c(1,-2,1)

ABIC=function(q,q0,q1,v){
  
log_liklihood=sum(n1_j*q-n_j*log(1+exp(q)))  
  
val=c(length(class)*log(1/sqrt(2*pi*v^2)),-(q[1]-2*q0+q1)^2/(2*v^2),-(q[2]-2*q[1]+q0)^2/(2*v^2))

for(s in 3:length(class)){
  
val=c(val,-(q[j]-2*q[j-1]+q[j-2])^2/(2*v^2))  
  
}

p_j=exp(q)/(1+exp(q))  

L=diag(sqrt(c(n_j*p_j*(1-p_j))))

return(-2*(log_liklihood+(sum(val)))-length(class)*log(2*pi)+log(det(L%*%L+t(D)%*%D/(v^2))))

}


log_likli=function(a,b){
  
q=a+b*class

log_like=n1_j*q-n_j*log(1+exp(q))

log_like=sum(log_like)

return(log_like)
  
}


q=rep(1,length(class));

a=rep(1,length(class));b=rep(1,length(class))

ite=10000;h=0.01;eta=0.0001


for(l in 1:ite){
  
q=a+b*class  
  
p_j=exp(q)/(1+exp(q))  

vec=a

for(i in 1:length(a)){
  
vec_sub=vec;vec_sub[i]=vec_sub[i]+h  
  
a[i]=a[i]+eta*(log_likli(vec_sub,b)-log_likli(vec,b))/h

}

vec=b

for(i in 1:length(b)){
  
vec_sub=vec;vec_sub[i]=vec_sub[i]+h  
  
b[i]=b[i]+eta*(log_likli(a,vec_sub)-log_likli(a,vec))/h

}

print(log_likli(a,b))

}



ite=10000;

q0=1;q1=1;v=1;q=a+b*class  

cost=1000

for(l in 1:ite){

if(cost>0){  
  
#vec=q

#for(s in 1:length(q)){
  
#vec_sub=vec;vec_sub[s]=vec_sub[s]+h

#q[s]=q[s]+eta*(ABIC(vec_sub,q0,q1,v)-ABIC(vec,q0,q1,v))/h
  
#}
  
q0=q0-eta*(ABIC(q,q0+h,q1,v)-ABIC(q,q0,q1,v))/h

q1=q1-eta*(ABIC(q,q0,q1+h,v)-ABIC(q,q0,q1,v))/h

v=v-eta*(ABIC(q,q0,q1,v+h)-ABIC(q,q0,q1,v))/h

cost=ABIC(q,q0,q1,v)

}

print(ABIC(q,q0,q1,v))
  
}

ABIC(q,q0,q1,v)



#p.119 密度関数の推定法

library(dplyr)

C=1000

d=seq(0,1,1/C)

n=100000

val=rpois(n,sample(10:20,1));val=val/max(val)

n_vec=c()

for(j in 2:(length(d)-1)){
  
n_vec=c(n_vec,length(val[val>=d[j-1] & val<d[j]]))  
  
}

n_vec=c(n_vec,length(val[val>=d[length(d)-1] & val<=d[length(d)]]))

h_vec=rep(1,(C-1));h_vec=c(h_vec,-sum(h_vec))



ite=100000



for(l in 1:ite){
  
dl=(n_vec-n_vec[length(n_vec)])/C-n*(exp(h_vec/C)-exp(h_vec[length(h_vec)]/C))/(C*sum(exp(h_vec/C))) 

dl=dl[1:(C-1)]
  
p_j=exp(h_vec/C)/sum(exp(h_vec/C));p_j[is.nan(p_j)>0]=1
  
mat=cbind(diag(sqrt(p_j[1:(C-1)])),rep(sqrt(p_j[C]),C-1))

p_vec=c(p_j-p_j[length(p_j)])[1:(length(p_j)-1)]
  
#G0=mat%*%t(mat)-t(t(p_vec))%*%t(p_vec)

#dl2=-n*G0/(C^2);

#H=svd(dl2)$v%*%(diag(1/svd(dl2)$d))%*%t(svd(dl2)$u)

#diag(dl2)=diag(dl2)+(10^(-2))

h_vec2=h_vec[1:(C-1)]+(10^(-1))*dl

h_vec[1:(C-1)]=h_vec2;h_vec[C]=-sum(h_vec2)

log_lik=sum(n_vec*log(exp(h_vec/C)/sum(exp(h_vec/C))))

print(log_lik)

}

cor(C*p_j,n_vec)

ABIC=function(q,q0,q1,v){
  
log_liklihood=sum(n_vec*log(exp(q/C)/sum(exp(q/C))))
  
val=c((C)*log(1/sqrt(2*pi*v^2)),-(q[1]-2*q0+q1)^2/(2*v^2),-(q[2]-2*q[1]+q0)^2/(2*v^2))

for(s in 3:(C)){
  
val=c(val,-(q[j]-2*q[j-1]+q[j-2])^2/(2*v^2))  
  
}

#D=array(0,dim=c(C-1,C-1))

#for(j in 1:(C-3)){
  
#D[j:(j+2),j]=c(1,-2,1)  
  
#}

#D[(C-2),(C-2)]=1

#D[(C-1),(C-3):(C-1)]=c(1,-2,1);D=rbind(D,c(rep(1,C-3),c(0,3)))

p_j=exp(q/C)/(1+exp(q/C))  

#mat=cbind(diag(sqrt(p_j[1:(C-1)])),rep(sqrt(p_j[C]),C-1))

#G_hat=mat%*%t(mat)*(n/(C^2));H=G_hat+t(D)%*%D/(v^2)

return(-2*(log_liklihood+(sum(val))))

}


ite=10000;h=0.01;eta=0.0001

q0=1;q1=1;v=1

cost=1000;q=h_vec

for(l in 1:ite){

if(cost>0){  
  
#vec=q

#for(s in 1:length(q)){
  
#vec_sub=vec;vec_sub[s]=vec_sub[s]+h

#q[s]=q[s]+eta*(ABIC(vec_sub,q0,q1,v)-ABIC(vec,q0,q1,v))/h
  
#}
  
cost=ABIC(q,q0,q1,v)  
  
q0=q0-eta*(ABIC(q,q0+h,q1,v)-cost)/h

q1=q1-eta*(ABIC(q,q0,q1+h,v)-cost)/h

v=v-eta*(ABIC(q,q0,q1,v+h)-cost)/h

#cost=ABIC(q,q0,q1,v)

}

print(cost)
  
}




0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?