#######################################################################
TRIAL_N=10 ### Independent Trial Number
NNN=200 ### Number of training samples ### NNN=20, 100, 200, ...
TTT=5000 ### Number of testing samples
#######################################################################
Case=3
######## Regular Case
if(Case==1){
ah=0.5 ### True mixture ratio
bh=c(2,2,2)
sprintf('Regular case. Sample=%g. Theoretial Gen Err=%f',NNN,(3+1)/(2*NNN))
}
######## Singular Case
if(Case==2){
ah=0.5 ### True mixture ratio
bh=c(0,0,0)
sprintf('Singular case. Sample=%g. Theoretical Gen Err=%f',NNN,1/(2*NNN))
}
######## Delicate Case
if(Case==3){
ah=0.5 ### True mixture ratio
bh=c(0.3,0.3,0.3)
sprintf('Delicate case. Sample=%g. Theoretical Gen Err = %f ~ %f',NNN,1/(2*NNN),2*(3+1)/(2*NNN))
}
#######################################################################
randn = function(n,m){array(rnorm(n*m),dim=c(n,m))}
zeros = function(n,m){array(0,dim=c(n,m))}
mod = function(a,b){a%%b}
#######################################################################
STD=1 ### standard deviation of each normal distribution
STDB=10 ### STD of prior of (b1,b2,b3)
BURNIN=20000 ### Burn-in Number in MCMC
SIZEM=500 ### Number of MCMC parameter samples
INTER=100 ### MCMC sampling interval
######## Note: Prior of ah is the uniform distribution on [0,1]
MCMC_A=0.3*sqrt(2/NNN) ### Markov Chain Step for ah.
MCMC_B=0.5*sqrt(2/NNN) ### Markov Chain Step for (b1,b2,b3).
### If the number of samples is changed, then MCMC_A and MCMC_B
### are set appropriately.
######## Testing Samples generated ####################################
set.seed(1)
XT=STD*randn(3,TTT)
for(i in 1:TTT){
if(runif(1)<ah){
XT[,i]=XT[,i]+bh
}
}
######## statistical model : normal mixture ###########################
##### pmodel1: (a,b,c,d) is a vector, (x,y,z) is a scalar.
# pmodel1=@(x,y,z,a,b,c,d)(ttt*((1-a)*exp(-sss*(x^2+y^2+z^2))...
# +a.*exp(-sss*((x-b).^2+(y-c).^2+(z-d).^2))))
pmodel1 = function(x,A,B){
z=sqrt(2*pi*STD*STD)^3
((1-A)*exp(-sum(x*x)/2/STD^2)+A*exp(-colSums((x-B)*(x-B))/2/STD^2))/z
}
##### pmodel2: (x,y,z) is a vector, (a,b,c,d) is a scalar.
# pmodel2=@(x,y,z,a,b,c,d)(ttt*((1-a)*exp(-sss*(x.^2+y.^2+z.^2))...
# +a*exp(-sss*((x-b).^2+(y-c).^2+(z-d).^2))))
pmodel2 = function(X,a,b){
z=sqrt(2*pi*STD^2)^3
((1-a)*exp(-colSums(X*X)/2/STD^2)+a*exp(-colSums((X-b)*(X-b))/2/STD^2))/z
}
######## - log likelihood - log prior #################################
# HHH=@(a,b,c,d,X,Y,Z)(PRIOR*(b^2+c^2+d^2)-sum(log(pmodel2(X,Y,Z,a,b,c,d))))
HHH = function(a,b,X){
log(sqrt(2*pi*STDB^2))*3+sum(b*b)/2/STDB^2-sum(log(pmodel2(X,a,b)))
}
#######################################################################
sprintf('WAIC begin. Independent %g trials.',TRIAL_N)
######## Trial begins #################################################
t0 <- proc.time()
ret = t(sapply(1:TRIAL_N,function(trial){
######## training samples generated #######
set.seed(trial)
XN=STD*randn(3,NNN)
for(i in 1:NNN){
if(runif(1)<ah){
XN[,i]=XN[,i]+bh
}
}
######## MCMC process begins (Metropolis Method) ##################
######## Initial parameter is set by the true parameter ###########
######## Note: In practical applications, such method can not be used.
a0=ah
b0=bh
h0=HHH(a0,b0,XN)
k=0
acceptance=0
WA=zeros(1,SIZEM)
WB=zeros(3,SIZEM)
MCMC=BURNIN+SIZEM*INTER ### Total MCMC trial number
for(t in 1:MCMC){
a1=a0+MCMC_A*rnorm(1) ### MCMC step
b1=b0+MCMC_B*rnorm(3) ### MCMC step
### a should be in [0,1]
if(a1<0){
a1=a1+1
}
if(a1>1){
a1=a1-1
}
h1=HHH(a1,b1,XN)
delta=h1-h0
### Metropolis probability
if(exp(-delta)>runif(1)){
a0=a1
b0=b1
h0=h1
if(t>BURNIN){
acceptance=acceptance+1
}
}
### MCMC parameters sampled
if(mod(t,INTER)==0&t>BURNIN){
k=k+1
WA[1,k]=a0
WB[,k]=b0
}
}
### acceptance ratio
accept=acceptance/(MCMC-BURNIN)
######## If exchange rate is not appropriate, then MC step should be improved.
######## recommended echange rate : 0.05 < echange_rate < 0.6
######## It seems that there is no theory about the best exchange rate for MCMC.
######## Calculation of Training Error ############################
px=zeros(1,NNN)
for(i in 1:NNN){
px[i]=mean(pmodel1(XN[,i],WA,WB))
}
qx=pmodel2(XN,ah,bh)
te=mean(log(qx/px))
######## WAIC Functional Variance
power1=zeros(1,NNN)
power2=zeros(1,NNN)
for(i in 1:NNN){
logpr=log(pmodel1(XN[,i],WA,WB))
power1[i]=mean(logpr)
power2[i]=mean(logpr*logpr)
}
vn=sum(power2-power1*power1)
######## likelihoof of mean parameter
avwa=mean(WA) ### DIC1 average parameter
avwb=rowMeans(WB) ### DIC1 average parameter
dic0=log(pmodel2(XN,avwa,avwb))
######## DIC1 effective number of parameters
eff_num=2*sum(-power1+dic0)
### vn is the effective number of parameters in WAIC.
### This is not equal to the real log canonical threshold.
### eff_num is the effective number of parameters defined in DIC1.
######## Training error + effective number of paratemers / training sample number
dic1=te+eff_num/NNN
######## Training error + functional variance / training sample number
waic=te+vn/NNN
sumlog=zeros(1,SIZEM)
for(k in 1:SIZEM){
sumlog[k]=sum(log(pmodel2(XN,WA[k],WB[,k])))
}
power1=mean(sumlog)
power2=mean(sumlog*sumlog)
### Training error + effective number / training sample number
dic2=te+2*(power2-power1*power1)/NNN
######## Calculation of Generalization Error ######################
px=zeros(1,TTT)
for(i in 1:TTT){
px[i]=mean(pmodel1(XT[,i],WA,WB))
}
qx=pmodel2(XT,ah,bh)
### Kullback Leibler = int qlog(q/p) = int q(log(q/p)+p/q-1)
ge=mean(log(qx/px)+px/qx-1)
c(TRIAL=trial,GE=ge,DIC1=dic1,DIC2=dic2,WAIC=waic,TE=te,ACCEPT=accept)
}))
proc.time() - t0