# packageを使わないでやってみた(マルコフ生成行列の応用)

```#attribution analysis(making sample_data)

library(pforeach)
library(dplyr)

initial_time=Sys.time()-as.numeric(substr(Sys.time(),12,13))*3600-as.numeric(substr(Sys.time(),15,16))*60-as.numeric(substr(Sys.time(),18,19))

N=1000;m=3;customers=100

data=data.frame(ID=sample(1:customers,N,replace=TRUE),time=initial_time+sample(1:24,N,replace=TRUE,prob=c(0,0,0,0,0,0,0.1,0.1,0.01,0.01,0,0.1,0.02,0,0,0,0.1,0.1,0.1,0.1,0.1,0.1,0.06,0))*3600+sample(0:3600,N,replace=TRUE),site=sample(1:m,N,replace=TRUE))

data=data[order(data\$time),]

trans_time=c()

for(j in 1:customers){

data_sub=data %>% dplyr::filter(ID==j) %>% dplyr::mutate(num_time=as.numeric(time),sign=0)

data_sub=data_sub[order(data_sub\$time),]

if(nrow(data_sub)>=2){

for(i in 2:nrow(data_sub)){

if(data_sub\$site[i]==data_sub\$site[i-1]){

data_sub\$sign[i]=1

}

}

data_sub=data_sub %>% dplyr::filter(sign==0)

trans_time=c(trans_time,nrow(data_sub)-1)

}

}

trans_time=max(trans_time)

#trans_time_data=data.frame(trans_time=1:trans_time)

trans_time_data=pforeach(j=1:customers,.c=rbind)({

data_sub=data %>% dplyr::filter(ID==j) %>% dplyr::mutate(num_time=as.numeric(time),sign=0)

data_sub=data_sub[order(data_sub\$time),]

if(nrow(data_sub)>=2){

for(i in 2:nrow(data_sub)){

if(data_sub\$site[i]==data_sub\$site[i-1]){

data_sub\$sign[i]=1

}

}

data_sub=data_sub %>% dplyr::filter(sign==0) %>% dplyr::select(-sign)

data_sub=data_sub %>% dplyr::mutate(before=0,after=0,diff_num_time=0)

for(i in 2:nrow(data_sub)){

data_sub\$before[i]=data_sub\$site[i-1];data_sub\$after[i]=data_sub\$site[i]

data_sub\$diff_num_time[i]=data_sub\$num_time[i]-data_sub\$num_time[i-1]

}

data_sub=tail(data_sub,nrow(data_sub)-1)

data_sub=data_sub %>% dplyr::select(before,after,diff_num_time)

if(nrow(data_sub)!=trans_time){

data_sub=rbind(data_sub,data.frame(before=rep(0,trans_time-nrow(data_sub)),after=rep(0,trans_time-nrow(data_sub)),diff_num_time=rep(0,trans_time-nrow(data_sub))))

}

data_sub=data_sub %>% dplyr::mutate(trans_time=1:nrow(data_sub))

data_sub=data_sub %>% dplyr::select(trans_time,before,after,diff_num_time)

#colnames(data_sub)=c("trans_time",paste0("before",j),paste0("after",j),paste0("diff_num_time",j))

data_sub

}

})

trans_time_data=trans_time_data %>% dplyr::filter(diff_num_time!=0)

```
```

trans_time_data=trans_time_data %>% dplyr::select(trans_time,before,after,diff_num_time)

trans=trans_time_data %>% group_by(before,after) %>% summarise(n=n())

alpha=trans %>% dplyr::mutate(alpha=n/sum(trans\$n))

beta=trans %>% dplyr::mutate(beta=0)

for(k in 1:nrow(beta)){

before=beta\$before[k];after=beta\$after[k]

sample_data=trans_time_data %>% dplyr::mutate(sign=0)

for(i in 1:nrow(sample_data)){

if(sample_data\$before[i]==before){
if(sample_data\$after[i]==after){

sample_data\$sign[i]=1

}
}

}

sample_data=sample_data %>% dplyr::filter(sign==1) %>% dplyr::select(-sign,-trans_time)

t=sample_data\$diff_num_time

lambda=1/mean(t)

beta\$beta[k]=lambda

#exp_dis=function(t){

#z<-lambda*exp(-lambda*t)

#return(z)

#}

#exp_data=data.frame(t=seq(0,40000,1),prob=0)

#for(l in 1:nrow(exp_data)){

#  exp_data\$prob[l]=0.3*integrate(exp_dis,0,exp_data\$t[l])\$value

#}

#plot(exp_data\$t,exp_data\$prob,type="o",col=3,xlab="t",ylab="prob",main=paste0("a:",0.3,",b:",lambda))

#integrate(exp_dis,0,10000)

#p_ij(t)=alpha*integrate(exp_dis,0,t)\$value

}

A=array(0,dim=c(m,m))

for(i in 1:m){
for(j in 1:m){

if(i!=j){

A[i,j]=alpha\$alpha[alpha\$before==i & alpha\$after==j]*beta\$beta[beta\$before==i & beta\$after==j]

}

}
}

Q=A;

diag(A)=-apply(A,1,sum)

lambda=-diag(A)

for(j in 1:nrow(Q)){

Q[j,]=Q[j,]/lambda[j]

}

diag(Q)=-diag(A)

```
```
#E[X_T=2,T<t|X_0=1]

t=40000

ave_func=function(s){

z<-s*Q[1,2]*lambda[1]*exp(-lambda[1]*s)

return(z)

}

integrate(ave_func,0,t)\$value

```
```
#P(X_t=3,T<t|X_0=2)

prob=function(t,k,j,i){

z<-A[k,j]*((1/lambda[i])*(1-exp(-lambda[i]*t))+(1/(beta\$beta[beta\$before==k & beta\$after==j]-lambda[i]))*(exp(-beta\$beta[beta\$before==k & beta\$after==j]*t)-exp(-lambda[i]*t)))

return(z)

}

t=40000

Prob=function(t,i,j){

z=0

for(l in 1:m){

if(l!=i){

z=c(z,Q[i,l]*prob(t,l,j,i))

}

}

return(sum(z))

}

plot_data=data.frame(t=seq(0,t,1),value=0)

for(j in 1:nrow(plot_data)){

plot_data\$value[j]=Prob(plot_data\$t[j],2,3)

}

plot(plot_data\$t,plot_data\$value,type="o",col=2,xlab="t",ylab="prob",main="P(X_t = 3,T < t | X_0 = 2)")

```