# コントラスティブダイバージェンス:CD法
N=10
d=5
X=array(0,dim=c(N,d))
prob=c()
for(i in 1:d){
val=sample(1:10,2)
prob=c(prob,val[1]/sum(val))
X[,i]=sample(0:1,N,replace=T,prob=val/sum(val))
}
# 初期パラメータ
W=array(0.01,dim=c(d,d))
diag(w)=0
b=rep(0.1,d)
eta=0.01
ite=10
for(l in 1:ite){
if(l!=1){ppre=p}
p=c()
for(j in 1:N){
p=c(p,exp(-X[j,]%*%W%*%X[j,]-sum(X[j,]*b)))
}
p=p/sum(p)
if(l==1){
db=-apply(X*c(p),2,sum)
dw=-t(X)%*%(X*c(p))
}else{
db=-apply(X*c(ppre-p),2,sum)
dw=-t(X)%*%(X*c(ppre-p))
}
diag(dw)=0
b=b+eta*db
w=w+eta*dw
print(sum(db^2)+sum(dw^2))
}
# コントラスティブダイバージェンス:CD法
# stochastic gradient method
N=10
d=5
X=array(0,dim=c(N,d))
prob=c()
for(i in 1:d){
val=sample(1:10,2)
prob=c(prob,val[1]/sum(val))
X[,i]=sample(0:1,N,replace=T,prob=val/sum(val))
}
# 初期パラメータ
W=array(0.01,dim=c(d,d))
diag(w)=0
b=rep(0.1,d)
eta=0.1
ite=10^3
df=c()
for(l in 1:ite){
if(l!=1){ppre=p}
j=sample(1:N,1)
p=c()
for(k in 1:N){
p=c(p,exp(-X[k,]%*%W%*%X[k,]-sum(X[k,]*b)))
}
p=p[j]/sum(p)
if(l==1){
db=-X[j,]*c(p)
dw=-t(t(X[j,]))%*%t(X[j,]*c(p))
}else{
db=-X[j,]*c(ppre-p)
dw=-t(t(X[j,]))%*%t(X[j,]*c(ppre-p))
}
diag(dw)=0
b=b+eta*db
w=w+eta*dw
df=c(df,sum(db^2)+sum(dw^2))
print(sum(db^2)+sum(dw^2))
}
plot(df)
# 制限ボルツマンマシンの自由エネルギーによる回帰 p.53
N=10
d=5
M=6
X=array(0,dim=c(N,d))
for(i in 1:d){
val=sample(1:10,2)
X[,i]=sample(0:1,N,replace=T,prob=val/sum(val))
}
Y=sample(1:9,N,replace=T)
bv=rep(0.1,d)
bh=rep(0.1,M)
W=array(1,dim=c(d,M))
ite=200
eta=0.01
df=c()
for(l in 1:ite){
if(l!=1){
bvpre=bv
bhpre=bh
Wpre=W
}
no=sample(1:N,1)
phx=c(1/(1+exp(-bh+t(X[no,])%*%W)))
H=c()
for(j in 1:M){
H=c(H,sample(0:1,1,replace=T,prob=c(1-phx[j],phx[j])))
}
b=bh+t(W)%*%X[no,]
f=-sum(bv*X[no,])-log(prod(1+exp(b)))
bv=bv+eta*(f-Y[no])*X[no,]
bh=bh+eta*(f-Y[no])*H
W=W+eta*(f-Y[no])*t(t(X[no,]))%*%t(H)
if(l!=1){
df=c(df,sum((bv-bvpre)^2)+sum((bh-bhpre)^2)+sum((W-Wpre)^2))
print(sum((bv-bvpre)^2)+sum((bh-bhpre)^2)+sum((W-Wpre)^2))
}
}
plot(df)
# 制限ボルツマンマシンの期待エネルギーによる回帰 p.59
N=10
d=5
M=6
X=array(0,dim=c(N,d))
for(i in 1:d){
val=sample(1:10,2)
X[,i]=sample(0:1,N,replace=T,prob=val/sum(val))
}
Y=sample(1:9,N,replace=T)
bv=rep(0.1,d)
bh=rep(0.1,M)
W=array(1,dim=c(d,M))
ite=1500
eta=0.001
df=c()
for(l in 1:ite){
if(l!=1){
bvpre=bv
bhpre=bh
Wpre=W
}
no=sample(1:N,1)
phx=m=c(1/(1+exp(-bh+t(X[no,])%*%W)))
H=c()
for(j in 1:M){
H=c(H,sample(0:1,1,replace=T,prob=c(1-phx[j],phx[j])))
}
b=bh+t(W)%*%X[no,]
E=-sum(bv*X[no,])-sum(b*H)
G=-sum(bv*X[no,])-sum(b*m)
bv=bv+eta*(G-Y[no])*X[no,]
bh=bh+eta*(G-Y[no])*((m-H)*E+H)
W=W+eta*(G-Y[no])*((t(t(c(X[no,])))%*%t(c(m-H)))*E+t(t(c(X[no,])))%*%t(c(H)))
if(l!=1){
df=c(df,sum((bv-bvpre)^2)+sum((bh-bhpre)^2)+sum((W-Wpre)^2))
print(sum((bv-bvpre)^2)+sum((bh-bhpre)^2)+sum((W-Wpre)^2))
}
}
plot(df)
# p.134 動的ボルツマンマシンにおける自己回帰模型のロジスティック回帰
N=100
m=5
d=2
L=1
X=array(0,dim=c(N,m))
prob=c()
for(i in 1:m){
val=sample(1:10,2)
prob=c(prob,val[1]/sum(val))
X[,i]=sample(0:1,N,replace=T,prob=val/sum(val))
}
alpha=array(1,dim=c(N,m*L))
for(j in 1:L){
for(i in 1:m){
seq=(j-1)
alpha[,seq*m+i]=sample(c(1),N,replace=T)
}}
b=rep(5,m)
W=array(5,dim=c(m*(d-1),m))
u=array(6,dim=c(L*m,m))
eta=0.001
ite=5000
dfvec=c()
for(l in 1:ite){
bpre=b;upre=u;Wpre=W
no=sample(c(d:N),1)
xvec=X[no,]
xpast=X[c((no-1):(no-(d-1))),]
apast=alpha[(no-1),]
mt=b
for(i in 1:(d-1)){
if((d-1)>1){
mt=mt+W[(1+m*(i-1)):(i*m),]%*%c(xpast[i,])
}else{
mt=mt+W[(1+m*(i-1)):(i*m),]%*%c(xpast)
}}
for(i in 1:L){
mt=mt+u[(1+m*(i-1)):(i*m),]%*%c(apast[(1+m*(i-1)):(i*m)])
}
EX=exp(mt)/(1+exp(mt))
b=b*(1-eta)+eta*(xvec-EX)
for(i in 1:(d-1)){
W[(1+m*(i-1)):(i*m),]=W[(1+m*(i-1)):(i*m),]*(1-eta)+eta*t(t(c(X[(no-i),])))%*%t(c(xvec-EX))
}
for(i in 1:L){
u[(1+m*(i-1)):(i*m),]=u[(1+m*(i-1)):(i*m),]*(1-eta)+eta*t(t(c(apast[(1+m*(i-1)):(i*m)])))%*%t(c(xvec-EX))
}
df=sum(abs(b-bpre))+sum(abs(upre-u))+sum(abs(Wpre-W))
dfvec=c(dfvec,df)
print(sum(abs(b-bpre))+sum(abs(upre-u))+sum(abs(Wpre-W)))
}
plot(dfvec)
# p.135 ガウス動的ボルツマンマシン
N=100
m=5
d=2
L=1
X=array(0,dim=c(N,m))
prob=c()
for(i in 1:m){
val=sample(10:20,1)
X[,i]=rpois(N,val)
}
alpha=array(1,dim=c(N,m*L))
for(j in 1:L){
for(i in 1:m){
seq=(j-1)
alpha[,seq*m+i]=sample(c(0:1),N,replace=T)
}}
b=rep(5,m)
W=array(5,dim=c(m*(d-1),m))
u=array(6,dim=c(L*m,m))
sig=rep(1,m)
eta=0.0001
ite=10000
loglikvec=c()
dfvec=c()
Evec=c()
for(l in 1:ite){
bpre=b;upre=u;Wpre=W;sigpre=sig
no=sample(c(d:N),1)
xvec=X[no,]
xpast=X[c((no-1):(no-(d-1))),]
apast=alpha[(no-1),]
mt=b
E=sum(((xvec)-c(b))^2/(2*sig^2))
for(i in 1:(d-1)){
if((d-1)>1){
E=E-X[no-i,]%*%W[(1+m*(i-1)):(i*m),]%*%t(t(c(xpast[i,])))
mt=mt+W[(1+m*(i-1)):(i*m),]%*%c(xpast[i,])
}else{
E=E-X[no-i,]%*%W[(1+m*(i-1)):(i*m),]%*%t(t(c(xpast)))
mt=mt+W[(1+m*(i-1)):(i*m),]%*%c(xpast)
}}
for(i in 1:L){
E=E-apast[(1+m*(i-1)):(i*m)]%*%u[(1+m*(i-1)):(i*m),]%*%c(xvec)
mt=mt+u[(1+m*(i-1)):(i*m),]%*%c(apast[(1+m*(i-1)):(i*m)])
}
loglik=-sum(log(2*pi*sig^2))/2-sum((xvec-mt)^2/(2*sig^2))
b=b*(1-eta)+eta*(xvec-mt)/(sig^2)
sig=sig*(1-eta)+eta*((xvec-mt)^2/(sig^3)-1/sig)
for(i in 1:(d-1)){
W[(1+m*(i-1)):(i*m),]=W[(1+m*(i-1)):(i*m),]*(1-eta)+eta*t(t(c(X[(no-i),])))%*%t(c((xvec-mt)/(sig^2)))
}
for(i in 1:L){
u[(1+m*(i-1)):(i*m),]=u[(1+m*(i-1)):(i*m),]*(1-eta)+eta*t(t(c(apast[(1+m*(i-1)):(i*m)])))%*%t(c((xvec-mt)/(sig^2)))
}
df=sum(abs(b-bpre))+sum(abs(upre-u))+sum(abs(Wpre-W))+sum(abs(sigpre-sig))
dfvec=c(dfvec,df)
loglikvec=c(loglikvec,loglik)
Evec=c(Evec,E)
}
plot(loglikvec[100:ite])