1
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 3 years have passed since last update.

ロジスティック回帰分析ーSASを使った統計解析の実際

Last updated at Posted at 2020-04-01

#SMR model p.119

I=5;J=9

d_mat=array(0,dim=c(I,J))

n_mat=array(0,dim=c(I,J))

a=rep(1,I);b=rep(1,J)

for(i in 1:I){

val=rpois(J,sample(10:100,1))  
  
n_mat[i,]=n_mat[i,]+val

}

for(j in 1:J){
  
val=rpois(I,sample(10:100,1))  
  
n_mat[,j]=n_mat[,j]+val  
  
}

for(i in 1:I){
for(j in 1:J){  
  
d_mat[i,j]=n_mat[i,j]-sample(1:(n_mat[i,j]-1),1)
    
}
}

ite=100

mu=1

for(l in 1:ite){
  
a_pre=a;b_pre=b  
  
di_plus=apply(d_mat,1,sum)

a=log(di_plus/n_mat%*%exp(b))

dj_plus=apply(d_mat,2,sum)

b=log(dj_plus/t(n_mat)%*%exp(a))

print(sum(abs(a_pre-a))+sum(abs(b_pre-b)))

pij=d_mat/n_mat;log_pij=log(pij)

predict=array(0,dim=c(I,J))

for(i in 1:I){
for(j in 1:J){  
 
predict[i,j]=a[i]+b[j]   
  
}
}

mu=mean(log_pij-predict)

predict=predict+mu

print(sum(abs(pij-exp(predict))))

}





#binomial logistic p.183

data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22))

n_vec=rpois(nrow(data),sample(10:30,1))

d_vec=c()

for(j in 1:length(n_vec)){
  
d_vec=c(d_vec,sample(1:(n_vec[j]-1),1))  
  
}

X=as.matrix(cbind(rep(1,nrow(data)),data[,colnames(data) %in% c("x1","x2")]))

pis=data$y

alpha=rep(0.01,(ncol(X)))

ite=10

alpha_beta_data=array(0,dim=c(nrow(X),ncol(X)))

for(j in 1:ite){

V=array(0,dim=c(nrow(data),nrow(data)))

diag(V)=(n_vec*(1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))

print(sum(V))



mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(d_vec-n_vec*(1/(1+exp(-(X%*%alpha)))))

alpha=alpha+mat

alpha_beta_data[j,]=alpha

}

data=data %>% dplyr::mutate(d=d_vec,predict=n_vec*(1/(1+exp(-(X%*%alpha)))))


#solution2 p.193

V=array(0,dim=c(nrow(data),nrow(data)))

diag(V)=(n_vec*(1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))

q_vec=n_vec*(1/(1+exp(-(X%*%alpha))))

z=X%*%alpha+solve(V)%*%(d_vec-q_vec)

beta=solve(t(X)%*%V%*%X)%*%t(X)%*%V%*%z

X2=sum((d_vec-q_vec)^2/diag(V))


#層別解析 p.204

library(dummies)

data=data.frame(num=1:20,y=sample(0:1,20,replace=T,prob=c(0.3,0.7)),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),e=c(rep(1,10),rep(2,5),rep(3,5)))

E=dummy(data$e)

X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])

X=t((t(X)-apply(X,2,mean))/apply(X,2,sd))

Y=data$y

beta=rep(1,ncol(X));r=rep(1,ncol(E))

ite=300000;eta=0.00001

loglik_vec=c()

for(l in 1:ite){
  
for(i in 1:length(beta)){
  
beta_sub=beta;

beta_sub[i]=beta_sub[i]+0.01  
  
loglik1=as.numeric(beta_sub%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(log(1+exp(beta_sub%*%t(X)+r%*%t(E))))

loglik2=as.numeric(beta%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(log(1+exp(beta%*%t(X)+r%*%t(E))))

beta[i]=beta[i]+eta*(loglik1-loglik2)/0.01  
  
}  
  
for(i in 1:length(r)){
  
r_sub=r;

r_sub[i]=r_sub[i]+0.01  
  
loglik1=as.numeric(beta%*%t(X)%*%Y+r_sub%*%t(E)%*%Y)-sum(1+exp(beta%*%t(X)+r_sub%*%t(E)))

loglik2=as.numeric(beta%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(1+exp(beta%*%t(X)+r%*%t(E)))

r[i]=r[i]+eta*(loglik1-loglik2)/0.01  
  
}  
  
log_lik=as.numeric(beta%*%t(X)%*%Y+r%*%t(E)%*%Y)-sum(1+exp(beta%*%t(X)+r%*%t(E)))  
  
#print(log_lik)

loglik_vec=c(loglik_vec,log_lik)

}

#条件付尤度の最大化 

library(dummies)

data=data.frame(num=1:20,x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),e=sample(1:3,20,replace=T))

E=data$e

class=sort(unique(E))

X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])/10

beta=rep(0.01,ncol(X))

eta=10^(-1)

ite=10

for(l in 1:ite){
dbeta=rep(0,length(beta)) 
ddbeta=array(0,dim=c(length(beta),length(beta)))
loglik=0
prob=c()
for(i in class){
values=apply(X[E==i,]*c(exp(X[E==i,]%*%beta)/sum(exp(X[E==i,]%*%beta))),2,sum)  
dbeta=dbeta+apply(t(t(X[E==i,])-c(values)),2,sum)  
ddbeta=ddbeta+t(t(c(values)))%*%t(c(values))/(sum(exp(X[E==i,]%*%beta))^2)-t(X[E==i,])%*%(X[E==i,]*c(exp(X[E==i,]%*%beta)))/sum(exp(X[E==i,]%*%beta))
loglik=loglik+sum(log(exp(X[E==i,]%*%beta)/sum(exp(X[E==i,]%*%beta))))
prob=c(prob,exp(X[E==i,]%*%beta)/sum(exp(X[E==i,]%*%beta)))
}  
beta=beta+eta*solve(-ddbeta)%*%dbeta
print(loglik)
}




#logisticにおけるクラス分類

library(dplyr);library(dummies)

data=data.frame(iris)

data$Species=as.integer(data$Species)

Y=dummy(data$Species)

X=data[,!(colnames(data) %in% c("Species"))]

alpha_box=array(0,dim=c(length(unique(data$Species)),ncol(X)+1))

log_lik=c()

X=as.matrix(cbind(rep(1,nrow(X)),X))

p_value=array(0,dim=c(nrow(data),ncol(Y)))

ite=20

for(i in 1:length(unique(data$Species))){

alpha=rep(0.01,ncol(X));pis=Y[,i]

for(j in 1:ite){

p=1/(1+exp(-X%*%alpha))

V=array(0,dim=c(nrow(data),nrow(data)))

diag(V)=p*(1-p)

print(sum(V))

mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(pis-p)

alpha=alpha+mat


}

p_vec=p;inv_p_vec=1-p_vec

p_vec[p_vec==0]=(10^(-20));inv_p_vec[inv_p_vec==0]=(10^(-20))

log_lik=c(log_lik,as.numeric(pis*log(p_vec)+(1-pis)*log(inv_p_vec)))

alpha_box[i,]=alpha

p_value[,i]=p

}

colnames(p_value)=c("p1","p2","p3")

data=data.frame(data,p_value)


#p.143 MH test

option=1

pthi_up=c();pthi_und=c()

a=c(30,40,20,10)

b=c(40,20,10,34)

c=c(30,20,10,15)

d=c(20,22,34,56)

up=c();und=c()

for(j in 1:4){

mat=array(0,dim=c(2,2))

mat[1,1]=a[j];mat[1,2]=b[j];mat[2,1]=c[j];mat[2,2]=d[j]

N=apply(mat,2,sum);M=apply(mat,1,sum)

if(option==1){

pthi_up=c(pthi_up,(a[j]*d[j]/t))

pthi_und=c(pthi_und,(b[j]*c[j]/t))

}else{

pthi_up=c(pthi_up,a[j]*M[2]/t)

pthi_und=c(pthi_und,c[j]*M[1]/t)

}

t=sum(mat)

w=ifelse(option==1,1/(t-1),ifelse(option==2,1/t,1/N[2]))

up=c(up,a[j]-M[1]*N[1]/t)

und=c(und,w*prod(N)*prod(M)/(t^2))

}

kai=sum(up)^2/sum(und)

qnorm(1-0.01)

pthi=sum(pthi_up)/sum(pthi_und)

#confidence interval

#under
pthi^(1-qnorm(1-0.01/2)/kai)

#upper
pthi^(1+qnorm(1-0.01/2)/kai)



library(dplyr)

#bernoulli-logistic regression p.181

#multi dimension

library(dplyr)

data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22),sign=0)

for(j in 1:nrow(data)){

data$sign[j]=ifelse(data$y[j]>0.49,1,0)

}

X=as.matrix(cbind(rep(1,nrow(data)),data[,colnames(data) %in% c("x1","x2")]))

pis=as.numeric(data$sign)

n=sample(c(10:20),nrow(data),replace=T)

d=c()

for(j in 1:length(n)){

d=c(d,sample(c(1:n[j]),1))

}

alpha=rep(0.01,(ncol(X)))

ite=10000

eta=0.01

alpha_beta_data=array(0,dim=c(ite,ncol(X)))

for(j in 1:ite){

p=(1/(1+exp(-(X%*%alpha))))

V=diag(c(n*p*(1-p)))

mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(d-n*p)

alpha=alpha+eta*mat

alpha_beta_data[j,]=alpha

log_lik=sum(d*log(p))+sum((n-d)*log(1-p))

print(log_lik)

}

data=data %>% dplyr::mutate(predict=1/(1+exp(-(X%*%alpha))))

#Pearson kai2 static

#H:ロジスティク回帰モデルがデータにフィットしている

X2=sum((d-n*p)^2/(n*p*(1-p)))

qchisq(1-0.01,df=nrow(X)-ncol(X)-1)





1
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
1
1

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?