#SMR model p.119
I=5;J=9
d_mat=array(0,dim=c(I,J))
n_mat=array(0,dim=c(I,J))
a=rep(1,I);b=rep(1,J)
for(i in 1:I){
val=rpois(J,sample(10:100,1))
n_mat[i,]=n_mat[i,]+val
}
for(j in 1:J){
val=rpois(I,sample(10:100,1))
n_mat[,j]=n_mat[,j]+val
}
for(i in 1:I){
for(j in 1:J){
d_mat[i,j]=n_mat[i,j]-sample(1:(n_mat[i,j]-1),1)
}
}
ite=100
mu=1
for(l in 1:ite){
a_pre=a;b_pre=b
di_plus=apply(d_mat,1,sum)
a=log(di_plus/n_mat%*%exp(b))
dj_plus=apply(d_mat,2,sum)
b=log(dj_plus/t(n_mat)%*%exp(a))
print(sum(abs(a_pre-a))+sum(abs(b_pre-b)))
pij=d_mat/n_mat;log_pij=log(pij)
predict=array(0,dim=c(I,J))
for(i in 1:I){
for(j in 1:J){
predict[i,j]=a[i]+b[j]
}
}
mu=mean(log_pij-predict)
predict=predict+mu
print(sum(abs(pij-exp(predict))))
}
#binomial logistic p.183
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22))
n_vec=rpois(nrow(data),sample(10:30,1))
d_vec=c()
for(j in 1:length(n_vec)){
d_vec=c(d_vec,sample(1:(n_vec[j]-1),1))
}
X=as.matrix(cbind(rep(1,nrow(data)),data[,colnames(data) %in% c("x1","x2")]))
pis=data$y
alpha=rep(0.01,(ncol(X)))
ite=10
alpha_beta_data=array(0,dim=c(nrow(X),ncol(X)))
for(j in 1:ite){
V=array(0,dim=c(nrow(data),nrow(data)))
diag(V)=(n_vec*(1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))
print(sum(V))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(d_vec-n_vec*(1/(1+exp(-(X%*%alpha)))))
alpha=alpha+mat
alpha_beta_data[j,]=alpha
}
data=data %>% dplyr::mutate(d=d_vec,predict=n_vec*(1/(1+exp(-(X%*%alpha)))))
#solution2 p.193
V=array(0,dim=c(nrow(data),nrow(data)))
diag(V)=(n_vec*(1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))
q_vec=n_vec*(1/(1+exp(-(X%*%alpha))))
z=X%*%alpha+solve(V)%*%(d_vec-q_vec)
beta=solve(t(X)%*%V%*%X)%*%t(X)%*%V%*%z
X2=sum((d_vec-q_vec)^2/diag(V))
#層別解析 p.204
library(dummies)
data=data.frame(num=1:20,y=sample(0:1,20,replace=T,prob=c(0.3,0.7)),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),e=c(rep(1,10),rep(2,5),rep(3,5)))
E=dummy(data$e)
X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])
X=t((t(X)-apply(X,2,mean))/apply(X,2,sd))
Y=data$y
beta=rep(1,ncol(X));r=rep(1,ncol(E))
ite=300000;eta=0.00001
loglik_vec=c()
for(l in 1:ite){
for(i in 1:length(beta)){
beta_sub=beta;
beta_sub[i]=beta_sub[i]+0.01
loglik1=as.numeric(beta_sub%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(log(1+exp(beta_sub%*%t(X)+r%*%t(E))))
loglik2=as.numeric(beta%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(log(1+exp(beta%*%t(X)+r%*%t(E))))
beta[i]=beta[i]+eta*(loglik1-loglik2)/0.01
}
for(i in 1:length(r)){
r_sub=r;
r_sub[i]=r_sub[i]+0.01
loglik1=as.numeric(beta%*%t(X)%*%Y+r_sub%*%t(E)%*%Y)-sum(1+exp(beta%*%t(X)+r_sub%*%t(E)))
loglik2=as.numeric(beta%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(1+exp(beta%*%t(X)+r%*%t(E)))
r[i]=r[i]+eta*(loglik1-loglik2)/0.01
}
log_lik=as.numeric(beta%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(1+exp(beta%*%t(X)+r%*%t(E)))
#print(log_lik)
loglik_vec=c(loglik_vec,log_lik)
}
#条件付尤度の最大化
library(dummies)
data=data.frame(num=1:20,x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),e=sample(1:3,20,replace=T))
E=data$e
class=sort(unique(E))
X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])/10
beta=rep(0.01,ncol(X))
eta=10^(-1)
ite=10
for(l in 1:ite){
dbeta=rep(0,length(beta))
ddbeta=array(0,dim=c(length(beta),length(beta)))
loglik=0
prob=c()
for(i in class){
values=apply(X[E==i,]*c(exp(X[E==i,]%*%beta)/sum(exp(X[E==i,]%*%beta))),2,sum)
dbeta=dbeta+apply(t(t(X[E==i,])-c(values)),2,sum)
ddbeta=ddbeta+t(t(c(values)))%*%t(c(values))/(sum(exp(X[E==i,]%*%beta))^2)-t(X[E==i,])%*%(X[E==i,]*c(exp(X[E==i,]%*%beta)))/sum(exp(X[E==i,]%*%beta))
loglik=loglik+sum(log(exp(X[E==i,]%*%beta)/sum(exp(X[E==i,]%*%beta))))
prob=c(prob,exp(X[E==i,]%*%beta)/sum(exp(X[E==i,]%*%beta)))
}
beta=beta+eta*solve(-ddbeta)%*%dbeta
print(loglik)
}
#logisticにおけるクラス分類
library(dplyr);library(dummies)
data=data.frame(iris)
data$Species=as.integer(data$Species)
Y=dummy(data$Species)
X=data[,!(colnames(data) %in% c("Species"))]
alpha_box=array(0,dim=c(length(unique(data$Species)),ncol(X)+1))
log_lik=c()
X=as.matrix(cbind(rep(1,nrow(X)),X))
p_value=array(0,dim=c(nrow(data),ncol(Y)))
ite=20
for(i in 1:length(unique(data$Species))){
alpha=rep(0.01,ncol(X));pis=Y[,i]
for(j in 1:ite){
p=1/(1+exp(-X%*%alpha))
V=array(0,dim=c(nrow(data),nrow(data)))
diag(V)=p*(1-p)
print(sum(V))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(pis-p)
alpha=alpha+mat
}
p_vec=p;inv_p_vec=1-p_vec
p_vec[p_vec==0]=(10^(-20));inv_p_vec[inv_p_vec==0]=(10^(-20))
log_lik=c(log_lik,as.numeric(pis*log(p_vec)+(1-pis)*log(inv_p_vec)))
alpha_box[i,]=alpha
p_value[,i]=p
}
colnames(p_value)=c("p1","p2","p3")
data=data.frame(data,p_value)
#p.143 MH test
option=1
pthi_up=c();pthi_und=c()
a=c(30,40,20,10)
b=c(40,20,10,34)
c=c(30,20,10,15)
d=c(20,22,34,56)
up=c();und=c()
for(j in 1:4){
mat=array(0,dim=c(2,2))
mat[1,1]=a[j];mat[1,2]=b[j];mat[2,1]=c[j];mat[2,2]=d[j]
N=apply(mat,2,sum);M=apply(mat,1,sum)
if(option==1){
pthi_up=c(pthi_up,(a[j]*d[j]/t))
pthi_und=c(pthi_und,(b[j]*c[j]/t))
}else{
pthi_up=c(pthi_up,a[j]*M[2]/t)
pthi_und=c(pthi_und,c[j]*M[1]/t)
}
t=sum(mat)
w=ifelse(option==1,1/(t-1),ifelse(option==2,1/t,1/N[2]))
up=c(up,a[j]-M[1]*N[1]/t)
und=c(und,w*prod(N)*prod(M)/(t^2))
}
kai=sum(up)^2/sum(und)
qnorm(1-0.01)
pthi=sum(pthi_up)/sum(pthi_und)
#confidence interval
#under
pthi^(1-qnorm(1-0.01/2)/kai)
#upper
pthi^(1+qnorm(1-0.01/2)/kai)
library(dplyr)
#bernoulli-logistic regression p.181
#multi dimension
library(dplyr)
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),sign=0)
for(j in 1:nrow(data)){
data$sign[j]=ifelse(data$y[j]>0.49,1,0)
}
X=as.matrix(cbind(rep(1,nrow(data)),data[,colnames(data) %in% c("x1","x2")]))
pis=as.numeric(data$sign)
n=sample(c(10:20),nrow(data),replace=T)
d=c()
for(j in 1:length(n)){
d=c(d,sample(c(1:n[j]),1))
}
alpha=rep(0.01,(ncol(X)))
ite=10000
eta=0.01
alpha_beta_data=array(0,dim=c(ite,ncol(X)))
for(j in 1:ite){
p=(1/(1+exp(-(X%*%alpha))))
V=diag(c(n*p*(1-p)))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(d-n*p)
alpha=alpha+eta*mat
alpha_beta_data[j,]=alpha
log_lik=sum(d*log(p))+sum((n-d)*log(1-p))
print(log_lik)
}
data=data %>% dplyr::mutate(predict=1/(1+exp(-(X%*%alpha))))
#Pearson kai2 static
#H:ロジスティク回帰モデルがデータにフィットしている
X2=sum((d-n*p)^2/(n*p*(1-p)))
qchisq(1-0.01,df=nrow(X)-ncol(X)-1)