# 調査観察データの統計解析
# 3 セミパラメトリック解析 p.84
N=10;dim=4
Y=array(0,dim=c(N,2))
Z=c()
X=array(0,dim=c(N,dim))
for(j in 1:N){
val=rpois(2,sample(10:20,1))
val=val/sum(val);vec=ifelse(val==max(val),1,0)
Y[j,]=vec
val=rpois(2,sample(10:20,1))
val=val/sum(val)
Z=c(Z,ifelse(val[1]>val[2],1,0))
X[j,]=rpois(dim,sample(10:20,1))
}
Y_mat=Y
# bernoulli-logistic regression p.105
# multi dimension
alpha=rep(0.01,(ncol(X)))
ite=20
alpha_beta_data=array(0,dim=c(ite,ncol(X)))
for(j in 1:ite){
V=array(0,dim=c(nrow(X),nrow(X)))
diag(V)=((1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))
print(sum(V))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(Z-(1/(1+exp(-(X%*%alpha)))))
alpha=alpha+mat
alpha_beta_data[j,]=alpha
}
predict=1/(1+exp(-(X%*%alpha)))
e_vec=cbind(1-predict,predict)
e_vec[e_vec<10^(-10)]=10^(-10)
# PME
pj=c()
for(j in 1:2){
pj=c(pj,sum(Z*Y_mat[,j]/e_vec[,j])/sum(Z/e_vec[,j]))
}
beta=array(0,dim=c(2,N))
B=diag(0,2)
for(j in 1:N){
b=c((Z[j]/e_vec[j,2])*(Y_mat[j,2]/pj[2]-(1-Y_mat[j,2])/(1-pj[2])),((1-Z[j])/(1-e_vec[j,2]))*(Y_mat[j,1]/pj[1]-(1-Y_mat[j,1])/(1-pj[1])))
beta[,j]=b;B=B+t(t(b))%*%t(b)
}
B=B/N
A=-diag(c(1/pj[2]+1/(1-pj[2]),1/pj[1]+1/(1-pj[1])))
V_p1_p0=t(c(1,-1))%*%solve(A)%*%B%*%solve(A)%*%t(t(c(1,-1)))
w0=(1-Z)/(N*(1-predict))
w1=Z/(N*predict)
Q=c()
for(j in 1:2){
for(i in 1:N){
Q=c(Q,Z[i]*(Y_mat[i,j]*log(pj[j])+(1-Y_mat[i,j])*log(pj[j]))/e_vec[i,j])
}}
# IPW estimation p.69
E_y1=sum(Z*Y[,1]/predict)/sum(Z/predict)
E_y0=sum((1-Z)*Y[,1]/(1-predict))/sum((1-Z)/(1-predict))
# 確率収束
mean(Z/predict)
mean((1-Z)/(1-predict))
kernel=function(x,y,h){
return(exp(-(x-y)^2/h))
}
# kernel matching method p.74
K=array(0,dim=c(N,N))
h2=10
for(i in 1:N){
for(j in 1:N){
K[i,j]=kernel(predict[i],predict[j],h2)
}
}
y0_hat=K%*%(Y[,1]*c(1-Z))/(K%*%(1-Z))
y1_hat=K%*%(Y[,2]*c(Z))/(K%*%Z)
# p.71 漸近分散
cbind((Z/predict)*(Y[,1]-E_y1),(1-Z)*(Y[,1]-E_y0)/(1-predict))
mean(Z*(Y[,1]-E_y1)^2/(predict^2)+(1-Z)*(Y[,1]-E_y0)^2/((1-predict)^2))
## p.85
Z_mat=cbind(Z,1-Z)
mu=apply(Z_mat*Y/e_vec,2,sum)/apply(Z/e_vec,2,sum)
sig=array(0,dim=c(2,2))
for(j in 1:2){
sig[j,j]=sig[j,j]+sum(as.numeric(Z_mat[,j])*as.numeric((Y[,j]-mu[j])^2)/as.numeric(e_vec[,j]))/sum(as.numeric(Z_mat[,j])/as.numeric(e_vec[,j]))
}
# p.88
library(dplyr)
N=100;
z=sample(0:1,N,replace=T,prob=c(0.3,0.7))
y0=rpois(N,5)+2
y1=rpois(N,8)+1
X=cbind(rpois(N,3),rpois(N,5),rpois(N,6))
# 傾向スコア(ロジスティック)
# bernoulli-logistic regression p.105
# multi dimension
library(dplyr)
X=cbind(rep(1,nrow(X)),X)
pis=z
alpha=rep(0.01,(ncol(X)))
ite=1000
eta=0.01
for(j in 1:ite){
p=(1/(1+exp(-(X%*%alpha))))
V=diag(c(p*(1-p)))
tvt=t(X)%*%V%*%(X)
mat=svd(tvt)$u%*%diag(1/svd(tvt)$d)%*%t(svd(tvt)$v)%*%t(X)%*%(pis-p)
alpha=alpha+eta*mat
log_lik=sum(z*log(p)+(1-z)*log(1-p))
print(log_lik)
}
predict1=1/(1+exp(-(X%*%alpha)))
# 回帰関数E(y|x)
X2=t(X)%*%X
b0=svd(X2)$u%*%diag(1/svd(X2)$d)%*%t(svd(X2)$u)%*%t(X)%*%y0
b1=svd(X2)$u%*%diag(1/svd(X2)$d)%*%t(svd(X2)$u)%*%t(X)%*%y1
E_DR_y0=mean(z*y0/(1-predict1)+(1-z/(1-predict1))*c(X%*%b0))
E_DR_y1=mean(z*y1/predict1+(1-z/predict1)*c(X%*%b1))
# 因果効果:E_DR_y1-E_DR_y0
E_DR_y1-E_DR_y0
# P.93
N=100;
z=sample(0:1,N,replace=T,prob=c(0.4,0.6))
X=cbind(rpois(N,5),rpois(N,3),rpois(N,7))
X=as.matrix(cbind(rep(1,N),X))
y1=sample(0:1,N,replace=T,prob=c(0.3,0.7))
y0=sample(0:1,N,replace=T,prob=c(0.6,0.4))
# 傾向スコア(ロジスティック)
alpha=rep(0.01,(ncol(X)))
ite=2000;eta=0.01
for(j in 1:ite){
V=array(0,dim=c(nrow(X),nrow(X)))
diag(V)=((1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(z-(1/(1+exp(-(X%*%alpha)))))
alpha=alpha+eta*mat
p=1/(1+exp(-(X%*%alpha)))
log_lik=sum(z*log(p)+(1-z)*log(1-p))
print(log_lik)
}
predict=1/(1+exp(-(X%*%alpha)))
# ベルヌーイにおける最尤法
Q10=function(q){
value=mean(z*(1-predict)*(y1*log(q)+(1-y1)*log(1-q))/predict)
return(value)
}
Q01=function(q){
value=mean((1-z)*predict*(y1*log(q)+(1-y1)*log(1-q))/(1-predict))
return(value)
}
# Q10の最大化
q10=0.1
ite=1000
eta=0.01
h=0.01
for(l in 1:ite){
q10=q10+eta*(Q10(q10+h)-Q10(q10))/h
print(Q10(q10))
}
# Q01の最大化
q01=0.1
ite=1000
eta=0.01
h=0.01
for(l in 1:ite){
q01=q01+eta*(Q01(q01+h)-Q01(q01))/h
print(Q01(q01))
}
# p.148
library(dplyr)
library(stringr)
data(anscombe)
dat=data.frame(anscombe)
X=as.matrix(dat[,str_count(colnames(dat),"x")>0])
Y=dat$y1;Y2=Y-mean(Y);Y1=dat$y2
data=data.frame(cbind(X,Y1,Y2))
dat1=data%>%filter(Y2<=0)
dat2=data%>%filter(Y2>0)
X1=as.matrix(dat1[,str_count(colnames(dat1),"x")>0])
X2=as.matrix(dat2[,str_count(colnames(dat2),"x")>0])
Y1=dat2$y1
loglik=function(beta1,beta2,sig){
val=sum(log(1-pnorm(X1%*%beta2)))+sum(log(pnorm(X2%*%beta2)))+sum(log(dnorm((Y1-X2%*%beta1)/sig)/sig))
return(val)
}
ite=100
eta=0.0001
h=0.01
b1=rep(0.1,ncol(X));b2=rep(0.1,ncol(X));sigma=0.1
for(l in 1:ite){
vec=b1
for(j in 1:length(vec)){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
b1[j]=b1[j]+eta*(loglik(vec_sub,b2,sigma)-loglik(vec,b2,sigma))/h
}
vec=b2
for(j in 1:length(vec)){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
b2[j]=b2[j]+eta*(loglik(b1,vec_sub,sigma)-loglik(b1,vec,sigma))/h
}
sigma=sigma+eta*(loglik(b1,b2,sigma+h)-loglik(b1,b2,sigma))/h
print(loglik(b1,b2,sigma))
}
# へっクマンのプロビット選択モデル p.150
N=10;dim=4
Y1=c();Y2=c()
X1=array(0,dim=c(N,dim))
X2=array(0,dim=c(N,dim))
for(j in 1:N){
val=rpois(2,sample(10:20,1))
val=val/sum(val);vec=ifelse(val==max(val),1,0)
Y1=c(Y1,vec[1])
val=rpois(2,sample(10:20,1))
val=val/sum(val);vec=ifelse(val==max(val),1,0)
Y2=c(Y2,vec[1])
val=rpois(2,sample(10:20,1))
val=val/sum(val)
X1[j,]=rpois(dim,sample(10:20,1))
val=rpois(2,sample(10:20,1))
val=val/sum(val)
X2[j,]=rpois(dim,sample(10:20,1))
}
X1=X1;X2=X2
# probit regression p.105
# multi dimension
ite=50;X=X2;Y=Y1
alpha=rep(10^(-20),(ncol(X)))
alpha_beta_data=array(0,dim=c(ite,ncol(X)))
diff=1
for(j in 1:ite){
if(diff>0.001){
p=pnorm(X%*%alpha)
V=array(0,dim=c(nrow(X),nrow(X)))
diag(V)=(p*(1-p))
XVX=t(X)%*%V%*%X
sol_XVX=eigen(XVX)$vectors%*%diag(1/eigen(XVX)$values)%*%t(eigen(XVX)$vectors)
mat=sol_XVX%*%t(X)%*%(Y-p)
diff=sum(abs(Y-p))
alpha=alpha+mat
alpha_beta_data[j,]=alpha
}
}
predict=p
beta2=alpha
# least square method
lam=dnorm(X%*%beta2)/pnorm(X%*%beta2)
psigma=1;beta1=rep(1,ncol(X1))
cost=function(b1,psig){
return(sum(Y2*(Y1-X%*%b1+psig*lam)^2))
}
ite=1000000
eta=10^(-7)
h=0.01
cost_val=10;cost_val_pre=11
for(l in 1:ite){
if(cost_val_pre-cost_val>0){
cost_val_pre=cost(beta1,psigma)
vec=beta1
for(j in 1:length(beta1)){
vec_sub=vec;vec_sub[j]=vec_sub[j]+h
beta1[j]=beta1[j]-eta*(cost(vec_sub,psigma)-cost(vec,psigma))/h
}
psigma=psigma-eta*(cost(beta1,psigma+h)-cost(beta1,psigma))/h
cost_val=cost(beta1,psigma)
}
print(cost(beta1,psigma))
}
# p.160
library(dplyr)
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22))
N=30;n=nrow(data)
ite=10
pis=data$y;X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])
X=X/100
alpha=rep(0.01,(ncol(X)))
alpha_beta_data=array(0,dim=c(ite,ncol(X)))
for(j in 1:ite){
V=array(0,dim=c(nrow(data),nrow(data)))
diag(V)=((1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))
print(sum(V))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(pis-(1/(1+exp(-(X%*%alpha)))))
alpha=alpha+mat
alpha_beta_data[j,]=alpha
}
data=data %>% dplyr::mutate(predict=1/(1+exp(-(X%*%alpha))))
# part1(5.21)
eta=0.000001
h=0.01
ite=100000
lam1=0.0001;lam2=0.0001;W=0.01
mu=apply(X,2,mean)
L=function(lambda1,lambda2,w){
p_vec=1/(n*(1+lambda1*(apply(t(X)-mu,2,sum))+lambda2*(1/(1+exp(-X%*%alpha))-w)))
mean(log(1/(1+exp(-X%*%alpha))))+(N-n)*log(1-w)-mean(log(1+lambda1*apply(t(X)-mu,2,sum)+lambda2*(1/(1+exp(-X%*%alpha))-w)))-N*abs(sum(p_vec)-1)-N*abs(sum(p_vec/(1+exp(-X%*%alpha)))-w)-N*abs(sum(p_vec*apply(t(X)-mu,2,sum)))
}
for(l in 1:ite){
lam1=lam1+eta*(L(lam1+h,lam2,W)-L(lam1,lam2,W))/h
lam2=lam2+eta*(L(lam1,lam2+h,W)-L(lam1,lam2,W))/h
W=W+eta*(L(lam1,lam2,W+h)-L(lam1,lam2,W))/h
p=1/(n*(1+lam1*(apply(t(X)-mu,2,sum))+lam2*(1/(1+exp(-X%*%alpha))-W)))
print(L(lam1,lam2,W))
}
# p.160
# part2(5.24)
library(dplyr)
data=data.frame(num=1:20,y=c(0.20, 0.10, 0.49, 0.26, 0.92, 0.95, 0.18, 0.19, 0.44, 0.79, 0.61, 0.41, 0.49, 0.34, 0.62, 0.99, 0.38, 0.22,0.71,0.40),x1=c(84,87,86,85,82,87,92,94,88,84.9,78,90,88,87,82,84,86,83,85.2,82),x2=c(61,55.5,57,57,50,50,66.5,65,60.5,49.5,49.5,61,59.5,58.4,53.5,54,60,58.8,54,56),x3=c(24.5,24,23,22.5,22,24,25,26,25,24,23,21,26,25.5,24,23,24,24,24,22))
N=30;n=nrow(data)
ite=10
X=as.matrix(data[,colnames(data) %in% c("x1","x2","x3")])
Z=c()
for(j in 1:n){
val=rpois(2,sample(10:20,1))
val=val/sum(val)
Z=c(Z,sample(0:1,1,prob=val))
}
pis=Z
X=X/100
alpha=rep(0.01,(ncol(X)))
alpha_beta_data=array(0,dim=c(ite,ncol(X)))
for(j in 1:ite){
V=array(0,dim=c(nrow(data),nrow(data)))
diag(V)=((1/(1+exp(-(X%*%alpha))))*(1-1/(1+exp(-(X%*%alpha)))))
print(sum(V))
mat=solve(t(X)%*%V%*%(X))%*%t(X)%*%(pis-(1/(1+exp(-(X%*%alpha)))))
alpha=alpha+mat
alpha_beta_data[j,]=alpha
}
data=data %>% dplyr::mutate(predict=1/(1+exp(-(X%*%alpha))))
eta=0.000001
h=0.01
ite=100000
lam1=0.0001;lam2=0.0001;W=0.01
beta=solve(t(X)%*%X)%*%t(X)%*%pis
mu=apply(X,2,mean)
L2=function(lambda1,lambda2,w){
p_vec=1/(n*(1+lambda1*(1-1/(1+exp(-X%*%alpha)))*X%*%beta+lambda2*(1/(1+exp(-X%*%alpha))-w)))
return(sum(log(1/(1+exp(-X%*%alpha))))+(N-n)*log(1-w)-mean(log(1+lambda1*(1-1/(1+exp(-X%*%alpha)))*X%*%beta+lambda2*(1/(1+exp(-X%*%alpha))-w)))-N*abs(sum(p_vec)-1)-N*abs(sum(p_vec/(1+exp(-X%*%alpha))-p_vec*w))-N*abs(sum(p_vec*(1-1/(1+exp(-X%*%alpha)))*X%*%beta)))
}
for(l in 1:ite){
lam1=lam1+eta*(L2(lam1+h,lam2,W)-L2(lam1,lam2,W))/h
lam2=lam2+eta*(L2(lam1,lam2+h,W)-L2(lam1,lam2,W))/h
W=W+eta*(L2(lam1,lam2,W+h)-L2(lam1,lam2,W))/h
p=1/(n*(1+lam1*(apply(t(X)-mu,2,sum))+lam2*(1/(1+exp(-X%*%alpha))-W)))
print(L2(lam1,lam2,W))
}
# p.214 A.2
N=20
Y=c()
for(j in 1:N){
val=rpois(2,sample(10:20,1))
val=val/sum(val)
Y=c(Y,ifelse(val[1]==max(val),1,0))
}
lam=10;r_val=N
sita=1;sigma=1
eta=0.000001;h=0.01
ite=1000000
cost=function(lambda,r,sit,sig){
p=(1/(1+lambda*dnorm(Y,sit,sig)))/N
H=sum(log(p))-N*lam*sum(p*dnorm(Y,sit,sig))-(7)*r*(sum(p)-1)
return(H)
}
for(l in 1:ite){
sita=sita-eta*(cost(lam,r_val,sita+h,sigma)-cost(lam,r_val,sita,sigma))/h
sigma=sigma-eta*(cost(lam,r_val,sita,sigma+h)-cost(lam,r_val,sita,sigma))/h
lam=lam-eta*(cost(lam+h,r_val,sita,sigma)-cost(lam,r_val,sita,sigma))/h
p_val=(1/(1+lam*dnorm(Y,sita,sigma)))/N
# print(cost(lam,r_val,sita,sigma))
}
print(cost(lam,r_val,sita,sigma))
print(sum(p_val*dnorm(Y,sita,sigma)))
N*(sum(p_val)-1)
dpthi=c((sum(p_val*dnorm(Y,sita+h,sigma)-dnorm(Y,sita,sigma))/h),sum(p_val*(dnorm(Y,sita,sigma+h)-dnorm(Y,sita,sigma)/h)))
1/(sum(dpthi^2)/sum(p_val*dnorm(Y,sita,sigma)^2))