#SDEM p.23
library(mvtnorm)
data(iris)
data=iris[1:150,]
X=data[,!(colnames(data) %in% "Species")]
X=matrix(as.numeric(unlist(X)),ncol=ncol(X))
#初期パラメータ
r=0.01
k=3
alpha=3
mu=matrix(sample(1:9,k*ncol(X),replace=T)/100,ncol=ncol(X))
mu_bar=mu
lam=diag(rep(1,ncol(X)))
for(j in 1:(k-1)){
lam=cbind(lam,diag(rep(1,ncol(X))))
}
lam_bar=lam
c=rep(1,k)/k
loglik_vec=c()
for(l in 1:nrow(X)){
mu_pre=mu;lam_pre=lam
x=X[l,]
sum_p=c()
val=array(0,dim=c(nrow(X),k))
for(i in 1:k){
sig=lam[,(ncol(X)*(i-1)+1):(i*ncol(X))]
sum_p=c(sum_p,dmvnorm(x,mu[i,],sig)*c[i])
val[,i]=dmvnorm(X[1:nrow(X),],mu[i,],sig)
}
loglik=log(val%*%c)
loglik_vec=c(loglik_vec,sum(loglik))
print(paste0(l,":",sum(loglik*r*(1-r)^(nrow(X)-c(1:nrow(X))))))
gam=(1-alpha*r)*sum_p/sum(sum_p)+alpha*r/k
c=(1-r)*c+r*gam
mu_bar=(1-r)*mu_bar+r*t(t(c(gam)))%*%t(as.numeric(x))
mu=mu_bar/c
lam_bar=(1-r)*lam_bar
for(i in 1:k){
lam_bar[,(ncol(X)*(i-1)+1):(i*ncol(X))]=lam_bar[,(ncol(X)*(i-1)+1):(i*ncol(X))]+r*gam[i]*t(t(as.numeric(x)))%*%t(as.numeric(x))
lam[,(ncol(X)*(i-1)+1):(i*ncol(X))]=lam_bar[,(ncol(X)*(i-1)+1):(i*ncol(X))]/c[i]-t(t(c(mu[i,])))%*%t(c(mu[i,]))
}
}
#SDAR p.52 確認中
data("AirPassengers")
values=c(AirPassengers)
k=3
x=array(0,dim=c((length(values)-k),k))
y=c()
for(j in 1:(length(values)-k)){
vec=values[j:(j+k-1)]
x[j,]=vec
y=c(y,(values[k+j]))
}
mu=1
r=0.00001
w=rep(1,k)
Cj=rep(1,k+1)
sig=1
for(l in (k+1):nrow(X)){
mu=mu*(1-r)+r*values[l]
for(j in 1:(k+1)){
Cj[j]=Cj[j]*(1-r)+r*(values[l]-mu)*(values[l-(j-1)]-mu)
}
mat=array(0,dim=c(k,k))
for(i in 1:k){
mat[i:k,i]=Cj[1:(k-(i-1))]
}
w=solve(mat,Cj[-1])
pre=(x-mu)%*%w+mu
sig=(1-r)*sig+r*sum((y-pre)^2)
loglik=-(nrow(X)-k)*log(sqrt(sig))-sum((y-pre)^2)/(2*sig)
print(loglik)
}
#混合正規分布の最尤推定 p.122
library(mvtnorm)
data(iris)
data=iris[1:150,]
X=data[,!(colnames(data) %in% "Species")]
X=matrix(as.numeric(unlist(X)),ncol=ncol(X))
X=t((t(X)-apply(X,2,mean))/apply(X,2,sd))
#初期パラメータ
k=3
mu=matrix(sample(1:9,k*ncol(X),replace=T)/100,ncol=ncol(X))
lam=diag(rep(1,ncol(X)))
for(j in 1:(k-1)){
lam=cbind(lam,diag(rep(1,ncol(X))))
}
c=rep(1,k)/k
ite=100
for(l in 1:ite){
sum_p=array(0,dim=c(nrow(X),k))
val=array(0,dim=c(l,k))
for(i in 1:k){
sig=lam[,(ncol(X)*(i-1)+1):(i*ncol(X))]
sum_p[,i]=dmvnorm(X,mu[i,],sig)
}
val=sum_p
loglik=log(ifelse(val%*%c>0,val%*%c,1))
loglik_vec=c(loglik_vec,sum(loglik))
print(paste0(l,":",sum(loglik)))
cp=(t(sum_p)*c)
gam=t(cp)/apply(cp,2,sum)
c=apply(t(cp)/apply(cp,2,sum),2,mean)
mu=t(t(X)%*%gam)/apply(gam,2,sum)
for(i in 1:k){
lam[,(ncol(X)*(i-1)+1):(i*ncol(X))]=t(X)%*%diag(c(gam[,i]/sum(gam[,i])))%*%X-t(t(c(mu[i,])))%*%t(c(mu[i,]))
}
}