packageを使わないでやってみた(マルコフ生成行列の応用)

#attribution analysis(making sample_data)

library(pforeach)
library(dplyr)

initial_time=Sys.time()-as.numeric(substr(Sys.time(),12,13))*3600-as.numeric(substr(Sys.time(),15,16))*60-as.numeric(substr(Sys.time(),18,19))

N=1000;m=3;customers=100

data=data.frame(ID=sample(1:customers,N,replace=TRUE),time=initial_time+sample(1:24,N,replace=TRUE,prob=c(0,0,0,0,0,0,0.1,0.1,0.01,0.01,0,0.1,0.02,0,0,0,0.1,0.1,0.1,0.1,0.1,0.1,0.06,0))*3600+sample(0:3600,N,replace=TRUE),site=sample(1:m,N,replace=TRUE))

data=data[order(data$time),]

trans_time=c()


for(j in 1:customers){

data_sub=data %>% dplyr::filter(ID==j) %>% dplyr::mutate(num_time=as.numeric(time),sign=0)

data_sub=data_sub[order(data_sub$time),]

if(nrow(data_sub)>=2){

for(i in 2:nrow(data_sub)){

if(data_sub$site[i]==data_sub$site[i-1]){

data_sub$sign[i]=1 

}  

}

data_sub=data_sub %>% dplyr::filter(sign==0)



trans_time=c(trans_time,nrow(data_sub)-1) 

}

}

trans_time=max(trans_time)

#trans_time_data=data.frame(trans_time=1:trans_time)


trans_time_data=pforeach(j=1:customers,.c=rbind)({

data_sub=data %>% dplyr::filter(ID==j) %>% dplyr::mutate(num_time=as.numeric(time),sign=0)

data_sub=data_sub[order(data_sub$time),]

if(nrow(data_sub)>=2){

for(i in 2:nrow(data_sub)){

if(data_sub$site[i]==data_sub$site[i-1]){

data_sub$sign[i]=1 

}  

}

data_sub=data_sub %>% dplyr::filter(sign==0) %>% dplyr::select(-sign)

data_sub=data_sub %>% dplyr::mutate(before=0,after=0,diff_num_time=0)

for(i in 2:nrow(data_sub)){

data_sub$before[i]=data_sub$site[i-1];data_sub$after[i]=data_sub$site[i]

data_sub$diff_num_time[i]=data_sub$num_time[i]-data_sub$num_time[i-1]


}

data_sub=tail(data_sub,nrow(data_sub)-1) 

data_sub=data_sub %>% dplyr::select(before,after,diff_num_time)

if(nrow(data_sub)!=trans_time){


data_sub=rbind(data_sub,data.frame(before=rep(0,trans_time-nrow(data_sub)),after=rep(0,trans_time-nrow(data_sub)),diff_num_time=rep(0,trans_time-nrow(data_sub))))  

}



data_sub=data_sub %>% dplyr::mutate(trans_time=1:nrow(data_sub))

data_sub=data_sub %>% dplyr::select(trans_time,before,after,diff_num_time)

#colnames(data_sub)=c("trans_time",paste0("before",j),paste0("after",j),paste0("diff_num_time",j))

data_sub






}

})

trans_time_data=trans_time_data %>% dplyr::filter(diff_num_time!=0)

write.csv(trans_time_data,"~/attribution_box/trans_time_data.csv")


#attribution analysis(analysis)

trans_time_data=read.csv("~/attribution_box/trans_time_data.csv")

trans_time_data=trans_time_data %>% dplyr::select(trans_time,before,after,diff_num_time)

trans=trans_time_data %>% group_by(before,after) %>% summarise(n=n())

alpha=trans %>% dplyr::mutate(alpha=n/sum(trans$n))

beta=trans %>% dplyr::mutate(beta=0)

for(k in 1:nrow(beta)){

before=beta$before[k];after=beta$after[k]

sample_data=trans_time_data %>% dplyr::mutate(sign=0)

for(i in 1:nrow(sample_data)){

if(sample_data$before[i]==before){
if(sample_data$after[i]==after){  

sample_data$sign[i]=1  

}
}

}

sample_data=sample_data %>% dplyr::filter(sign==1) %>% dplyr::select(-sign,-trans_time)

t=sample_data$diff_num_time

lambda=1/mean(t)

beta$beta[k]=lambda

#exp_dis=function(t){

#z<-lambda*exp(-lambda*t)

#return(z)


#}

#exp_data=data.frame(t=seq(0,40000,1),prob=0)

#for(l in 1:nrow(exp_data)){


#  exp_data$prob[l]=0.3*integrate(exp_dis,0,exp_data$t[l])$value

#}

#plot(exp_data$t,exp_data$prob,type="o",col=3,xlab="t",ylab="prob",main=paste0("a:",0.3,",b:",lambda))

#integrate(exp_dis,0,10000)

#p_ij(t)=alpha*integrate(exp_dis,0,t)$value


}

A=array(0,dim=c(m,m))

for(i in 1:m){
for(j in 1:m){ 

if(i!=j){  

A[i,j]=alpha$alpha[alpha$before==i & alpha$after==j]*beta$beta[beta$before==i & beta$after==j]

}

}    
}

Q=A;

diag(A)=-apply(A,1,sum)

lambda=-diag(A)

for(j in 1:nrow(Q)){

Q[j,]=Q[j,]/lambda[j]  

}

diag(Q)=-diag(A)




#E[X_T=2,T<t|X_0=1]

t=40000

ave_func=function(s){

z<-s*Q[1,2]*lambda[1]*exp(-lambda[1]*s)

return(z)

}

integrate(ave_func,0,t)$value




#P(X_t=3,T<t|X_0=2)

prob=function(t,k,j,i){

z<-A[k,j]*((1/lambda[i])*(1-exp(-lambda[i]*t))+(1/(beta$beta[beta$before==k & beta$after==j]-lambda[i]))*(exp(-beta$beta[beta$before==k & beta$after==j]*t)-exp(-lambda[i]*t)))  

return(z)  

}

t=40000

Prob=function(t,i,j){

z=0

for(l in 1:m){

if(l!=i){

z=c(z,Q[i,l]*prob(t,l,j,i)) 

}

}

return(sum(z))


}

plot_data=data.frame(t=seq(0,t,1),value=0)

for(j in 1:nrow(plot_data)){

plot_data$value[j]=Prob(plot_data$t[j],2,3)  

}

plot(plot_data$t,plot_data$value,type="o",col=2,xlab="t",ylab="prob",main="P(X_t = 3,T < t | X_0 = 2)")

Sign up for free and join this conversation.
Sign Up
If you already have a Qiita account log in.