RStudio

packageを使用しないでやってみた(ある汎関数を含む確率過程の中心極限定理)

羽鳥裕久著「有限マルコフ連鎖」の第8章を加工して実装してみました。
自身は会社に入ってから出ていくまでにいくらぐらいの昇給合計収入が得られるのかをモデリングしたつもりです。よかったら一度動かしてみてください。何か気づかれましたら、ぜひ教えてください。

library(dplyr)

#総昇給給与予測モデル

#単位時間あたりにf(i)の基本給があるとする。昇給段階は4つあり、平社員、主任、係長、部長の立場があるとし、それぞれ1,2,3,4をその状態とします。昇格によって給与変動があり、単位時間当たりの給与関数fと昇格賞与行列G、昇給行列probがある場合に、一定期間でどのくらいもらえるかを予測します。t=1で1ヶ月とします。(単位1000000円あたり)

t=100;lambda=0.8

hand_made_prob=1

f=function(y){
  if(y==1){

      z<-0

    }else{

      if(y==2){

        z<-100

      }else{

        if(y==3){

          z<-1000

        }else{

          if(y==4){

            z<-2000

          }

        }

      }

    }

#月平均勤務日数を23日、1日の労働時間を8時間で計算

    return(z*23*8/10000000)

}

G=t(array(c(0,10000,100000,200000,0,0,100000,150000,0,0,0,100000,0,0,0,0),dim=c(4,4)))/10000000




#遷移確率をランダムに作成

result_sample_prob=data.frame(num=1:4)

prob=array(0,dim=c(4,4))

for(k in 1:ncol(prob)){

sample_prob=sample(1:4,10,replace=TRUE)


sample_prob_data=data.frame(num=sample_prob)

sample_prob_data=sample_prob_data %>% group_by(num) %>% summarise(freq=n()/length(sample_prob))

if(nrow(sample_prob_data)!=ncol(prob)){

  for(j in 1:ncol(prob)){

    unit_num=sort(unique(sample_prob))

    num=unit_num[j]

    if(!(j %in% sample_prob_data$num)){

      data_sub=data.frame(num=j,freq=0)

      sample_prob_data=rbind(sample_prob_data,data_sub)

  }

}
}

colnames(sample_prob_data)=c("num",paste0("freq",k))

result_sample_prob=left_join(result_sample_prob,sample_prob_data,by="num")

}

prob<-t(result_sample_prob[,2:ncol(result_sample_prob)])

if(hand_made_prob==1){

  prob=t(matrix(c(0.7,0.2,0.05,0.05,0.14,0.85,0.005,0.005,0.005,0.005,0.8,0.19,0.005,0.005,0.01,0.98),ncol=4,nrow=4))
}


#生成作用素が出来ているかどうか確認

A=-lambda*diag(1,4)+lambda*prob

if(sum(round(apply(A,1,sum))==0)==ncol(A)){

  print("生成作用素Aに従うマルコフ連鎖である")
}





#P(X(t)|X(0)=任意)のプロット

for(i in 1:4){


for(j in 1:4){

prob_data=data.frame(time=seq(1,t,by=1),prob=0,trans_prob=0)

Prob=prob

for(l in 1:nrow(prob_data)){




element_P=function(t){

 n=200

 p=array(0,dim=c(4,4))

 Prob=prob



 for(k in 1:n){

  p=p+(exp(-lambda*t)*(Prob))*prod(rep(lambda*t,k)/c(1:k))

 Prob=Prob%*%prob

 }

 return(p[i,j])

}

prob_data$prob[l]=element_P(prob_data$time[l])

prob_data$trans_prob[l]=Prob[i,j]

Prob=Prob%*%prob

write.csv(prob_data,paste0("~/markov_folder/p(",i,",",j,")の時系列データ.csv"))


}
png(paste0("~/markov_folder/p(",i,",",j,")の時系列データ.png"))
plot(prob_data$time,prob_data$prob,type="o",col=2,xlab="time",ylab="prob",main=paste0("p(",i,",",j,")の時系列グラフ;緑:遷移確率、赤:実際の遷移確率"),xlim=c(0,t),ylim=c(min(prob_data[,2:3]),max(prob_data[,2:3])))
par(new=T)
plot(prob_data$time,prob_data$trans_prob,type="o",col=3,xlab="time",ylab="prob",main=paste0("p(",i,",",j,")の時系列グラフ;緑:遷移確率、赤:実際の遷移確率"),xlim=c(0,t),ylim=c(min(prob_data[,2:3]),max(prob_data[,2:3])))
dev.off()


plot(prob_data$time,prob_data$prob,type="o",col=2,xlab="time",ylab="prob",main=paste0("p(",i,",",j,")の時系列グラフ;緑:遷移確率、赤:実際の遷移確率"),xlim=c(0,t),ylim=c(min(prob_data[,2:3]),max(prob_data[,2:3])))
par(new=T)
plot(prob_data$time,prob_data$trans_prob,type="o",col=3,xlab="time",ylab="prob",main=paste0("p(",i,",",j,")の時系列グラフ;緑:遷移確率、赤:実際の遷移確率"),xlim=c(0,t),ylim=c(min(prob_data[,2:3]),max(prob_data[,2:3])))

}

  }

print(paste0("定常分布は"))

print(Prob)

print("基の遷移確率は")

print(prob)


#ジャンプ時点の確率を決定する

random_time_p=data.frame(time=0,state.1=0,state.2=0,state.3=0,state.4=0,prob.1=0,prob.2=0,prob.3=0,prob.4=0)


time_seq=seq(0.1,t,0.1)

for(k in 1:length(time_seq)){

  P=state=array(0,dim=c(4,4))

 for(i in 1:4){

   for(j in 1:4){



element_P=function(t){

 n=200

 p=array(0,dim=c(4,4))


 Prob=prob



 for(k in 1:n){

   pois=function(x){

     z<-(exp(-lambda*x)*((lambda*x)^k))

     return(z)

   }


  p=p+(pois(t)*(Prob))/gamma(k+1)

 Prob=Prob%*%prob

 }

 return(p[i,j])

}

state[i,j]=paste0(i,",",j,",",time_seq[k])

P[i,j]=(element_P(time_seq[k]))

   }
 }

data=data.frame(time=time_seq[k],state=state,prob=P)

random_time_p=rbind(random_time_p,data)



}

random_time_p=tail(random_time_p,nrow(random_time_p)-1)


#初期状態の確認;

l=0;result_sample_data=data.frame(num=0,S=0)

#1,2,3,4のそれぞれの状態から入社した場合のサンプルパスを考える;

start_state=2



#サンプルパスを確認する(サンプル数としては20~50が目安)





state_data=data.frame(time=0,state=start_state)

jump_time=0

current_state=start_state

while(jump_time<t)({

  jump_time=jump_time+rexp(1,rate=lambda*(1-diag(prob)[current_state]))

  jump_time=floor(jump_time*10)/10



time_p=random_time_p %>% dplyr::filter( paste0("",current_state,"")==substring(random_time_p$state.1,1,1) )

time_p=time_p%>% dplyr::filter( paste0("",jump_time,"")==substring(time_p$state.1,5,) )

P=time_p[,6:9];state=(time_p[,2:5])

P=P[colnames(P)!=paste0("prob.",current_state)]

state=state[colnames(state)!=paste0("state.",current_state)]

P=(c(P$prob.1,P$prob.2,P$prob.3,P$prob.4))

state=c(state$state.1,state$state.2,state$state.3,state$state.4)

 jump_state=sample(state,1,prob=P)

 current_state=as.numeric(substring(jump_state,3,3))

 data=data.frame(time=jump_time,state=current_state)

 state_data=rbind(state_data,data)


})

state_data=state_data %>% group_by(time) %>% summarise(state=head(state,1)) %>% mutate(S=0)







S=0

for(j in 2:nrow(state_data)){

time=state_data$time[j]-state_data$time[j-1]

S=S+f(state_data$state[j-1])*time+G[state_data$state[j-1],state_data$state[j]]

state_data$S[j]=S

}

state_data=rbind(state_data,data.frame(time=t,state=0,S=S+f(state_data$state[nrow(state_data)])*(t-state_data$time[nrow(state_data)])))

l=l+1

plot(state_data$time,state_data$S,type="o",xlab="time",ylab="the amount of a raise in salary",main=paste0(l,"回目のサンプルパス"),col=2)



result_sample_data_sub=data.frame(num=l,S=state_data$S[nrow(state_data)])

result_sample_data=rbind(result_sample_data,result_sample_data_sub)



G1=G2=array(0,dim=c(4,4))

for(j in 1:ncol(G1)){

  diag(G1)[j]=f(j)

}

for(i in 1:ncol(G1)){
  for(j in 1:nrow(G1)){

    if(i!=j){
   G1[i,j]=A[i,j]*G[i,j]

   G2[i,j]=A[i,j]*G[i,j]^2 

    }
  }
}


pai=Prob[1,]

mu=pai%*%G1%*%rep(1,nrow(G1))

Z=solve(Prob-A)-Prob

sigma=pai%*%G2%*%rep(1,nrow(G2))+2*pai%*%G1%*%Z%*%G1%*%rep(1,nrow(G1))

mu_t=function(m){

  z<-mu*m*rep(1,4)+Z%*%G1%*%t(t(rep(1,4)))

  return(z)

}

sigma_t=function(m){

  z<-sqrt(sigma*m*rep(1,4))

  return(z)


}


result_sample_data=result_sample_data %>% filter(!(is.na(result_sample_data$S)>0))

result_sample_data=tail(result_sample_data,nrow(result_sample_data)-1)


plot(result_sample_data$S,xlab="num",ylab="the amount of a raise in salary",type="p",col=4,ylim=c(min(result_sample_data$S),max(result_sample_data$S)),main=paste0("初めの状態が",start_state,"のときの",t,"ヶ月後の合計昇給給与(赤;平均)"))
par(new=T)
plot(rep(mu_t(t)[start_state],nrow(result_sample_data)),xlab="num",ylab="the amount of a raise in salary",type="l",col=2,ylim=c(min(result_sample_data$S),max(result_sample_data$S)),main=paste0("初めの状態が",start_state,"のときの",t,"ヶ月後の合計昇給給与(赤;平均)"))



#定年(例えば65歳)までで稼げる金額の区間推定(中心極限定理)

alpha=0.05

time=12*40

if(pnorm(sqrt(time),mean=0,sd=sqrt(sigma))-pnorm(-sqrt(time),mean=0,sd=sqrt(sigma))>0.95){

under=mu*time+sqrt(time)*qnorm(alpha,mean=0,sd=sqrt(sigma))
upper=mu*time+sqrt(time)*qnorm(1-alpha,mean=0,sd=sqrt(sigma))

print(paste0(time/12,"年間の総昇給額の",(1-2*alpha)*100,"%区間は(",under,",",upper,")(単位は1000万)"))

}