下書き
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
# ファイルパス定義
IN_FPATH = "./input/q1_input.csv"
OUT_FPATH = "./output/q1_output.csv"
# ファイルの読み込み
df = pd.read_csv(IN_FPATH)
df.date = pd.to_datetime(df.date)
# q1-1)
# 前回フェーズ列を追加せよ
df_pre_phase = df.phase.copy()
df_diff_phase = df.phase.diff()
df_pre_phase = df_pre_phase - df_diff_phase
df_pre_phase[ df_diff_phase == 0 ] = np.nan
df_pre_phase = df_pre_phase.fillna(method='ffill')
df["pre_phase"] = df_pre_phase
# 前々回フェーズ列を追加せよ
df_pre_pre_phase = df.pre_phase.copy()
df_diff_phase = df.pre_phase.diff()
df_pre_pre_phase = df_pre_pre_phase - df_diff_phase
df_pre_pre_phase[ df_diff_phase == 0 ] = np.nan
df_pre_pre_phase = df_pre_pre_phase.fillna(method='ffill')
df["df_pre_pre_phase"] = df_pre_pre_phase
# q1-2)
# フェーズ4→5へ遷移した日を"1"にした列を追加せよ
df_phase4_5 = df.phase.copy()
df_diff_phase = df.phase.diff()
df_phase4_5[ df.phase!=5 ] = 0 #np.nan
df_phase4_5[ (df.phase==5) & (df_pre_phase==4) ] = 1
df_phase4_5[ df_diff_phase != 1 ] = 0 #np.nan
df["Phase4_5"] = df_phase4_5
# q1-3)
# フェーズ5→6へ遷移した日を"1"にした列を追加せよ
df_phase5_6 = df.phase.copy()
df_diff_phase = df.phase.diff()
df_phase5_6[ df.phase!=6 ] = 0 #np.nan
df_phase5_6[ (df.phase==6) & (df_pre_phase==5) ] = 1
df_phase5_6[ df_diff_phase != 1 ] = 0 #np.nan
df["Phase5_6"] = df_phase5_6
# q1-4)
# フェーズが5→6へ遷移する度にカウントアップする列を追加せよ
df_5_6_countup = df_phase5_6.cumsum()
df["5_6_countup"] = df_5_6_countup
# q1-5)
# フェーズが5→6へ遷移する度にカウントアップする列を追加せよ
# ただし、フェーズが4→5へ遷移したときにカウントを0クリアせよ
df_phase4_5_counter = df_5_6_countup.copy()
df_phase4_5_counter[ df_phase4_5==0 ] = np.nan
df_phase4_5_counter = df_phase4_5_counter.fillna(method='ffill')
df_5_6_counter_phase4_clear = df_5_6_countup - df_phase4_5_counter
df["5_6_countup_4clear"] = df_5_6_counter_phase4_clear
# q1-6)
# フェーズ5の累積時間の列を追加せよ
# ただし、前回フェーズが4の場合は累積から除外する
df_phase5_erapsed = df.date.copy()
df_diff_phase = df.phase.diff()
df_phase5_erapsed[ (df.phase!=5)|(df_diff_phase==0) ] = np.nan
df_phase5_erapsed = df_phase5_erapsed.fillna(method='ffill')
df_phase5_erapsed = df_phase5_erapsed.fillna(method='bfill')
df_phase5_erapsed = df.date - df_phase5_erapsed
df_phase5_erapsed = df_phase5_erapsed.dt.total_seconds()
df_phase5_erapsed[ (df_phase5_6!=1) ] = np.nan
df["phase5_erapsed"] = df_phase5_erapsed
df["phase5_erapsed_1st"] = df_phase5_erapsed[df_pre_pre_phase==4]
df_phase5_erapsed_cumsum = df_phase5_erapsed.copy()
df_phase5_erapsed_cumsum = df_phase5_erapsed_cumsum.cumsum()
df_phase5_erapsed_cumsum_st = df_phase5_erapsed_cumsum.copy()
df_phase5_erapsed_cumsum_st[ (df["Phase5_6"]!=1)|(df["df_pre_pre_phase"]!=4) ] = np.nan
df_phase5_erapsed_cumsum_st = df_phase5_erapsed_cumsum_st.fillna(method='ffill')
df_phase5_erapsed_cumsum_st = df_phase5_erapsed_cumsum_st.fillna(0)
df_phase5_erapsed_cumsum = df_phase5_erapsed_cumsum - df_phase5_erapsed_cumsum_st
df["phase5_erapsed_cumsum"] = df_phase5_erapsed_cumsum
df["phase5_erapsed_cumsum"] = df["phase5_erapsed_cumsum"].fillna(method="ffill")
df["phase5_erapsed_mean"] = df["phase5_erapsed_cumsum"]/(df["5_6_countup_4clear"]-1)
df["phase5_erapsed_mean"] = df["phase5_erapsed_mean"].fillna(method="ffill")
# q1-7)
# フェーズ6の累積時間の列を追加せよ
df_phase6_5 = df.phase.copy()
df_phase6_5[ df.phase!=5 ] = 0
df_phase6_5[ (df.phase==5) & (df_pre_phase==6) ] = 1
df_phase6_5[ df_diff_phase != -1 ] = 0 #np.nan
df_phase6_erapsed = df.date.copy()
df_diff_phase = df.phase.diff()
df_phase6_erapsed[ (df.phase!=6)|(df_diff_phase==0) ] = np.nan
df_phase6_erapsed = df_phase6_erapsed.fillna(method='ffill')
df_phase6_erapsed = df_phase6_erapsed.fillna(method='bfill')
df_phase6_erapsed = df.date - df_phase6_erapsed
df_phase6_erapsed = df_phase6_erapsed.dt.total_seconds()
df_phase6_erapsed[ (df_phase6_5!=1) ] = np.nan
df_phase6_erapsed_cumsum = df_phase6_erapsed.cumsum()
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum.copy()
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(method='ffill')
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(0)
df_phase6_erapsed_cumsum_st[ (df["Phase5_6"]!=1)|(df["df_pre_pre_phase"]!=4) ] = np.nan
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(method='ffill')
df_phase6_erapsed_cumsum_st = df_phase6_erapsed_cumsum_st.fillna(0)
df["phase6_erapsed_cumsum"] = df_phase6_erapsed_cumsum - df_phase6_erapsed_cumsum_st
df["phase6_erapsed_cumsum"] = df["phase6_erapsed_cumsum"].fillna(method="ffill")
df["phase6_erapsed_mean"] = ( df_phase6_erapsed_cumsum - df_phase6_erapsed_cumsum_st )/(df["5_6_countup_4clear"])
df["phase6_erapsed_mean"] = df["phase6_erapsed_mean"].fillna(method="ffill")
# ファイル出力
df.to_csv(OUT_FPATH, encoding="shift-jis")