次のコンペに投稿したものです。
covid19_starter_code.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
# covid19_starter_code.py
#
# May/10/2020 AM 07:20
#
# copied and edited from "Starter Code"
#
# train.csv
# 104 days
# 3463 keys
#
# test.csv
# 45 days
# 3463 keys
#
# ------------------------------------------------------------------
import numpy as np
import pandas as pd
import os
import sys
sys.stderr.write("*** start ***\n")
folder_src='../input/covid19-global-forecasting-week-5'
# folder_src='../input_short/covid19-global-forecasting-week-5'
df_train = pd.read_csv(folder_src + '/train.csv')
df_test = pd.read_csv(folder_src + '/test.csv')
submission = pd.read_csv(folder_src + '/submission.csv')
print("df_train.shape ",df_train.shape)
print("df_test.shape ",df_test.shape)
print("submission.shape ",submission.shape)
df_train['County']=df_train['County'].fillna("NR")
df_train['Province_State']=df_train['Province_State'].fillna("NR")
df_train.head()
# %%
# ------------------------------------------------------------------
df_train.loc[df_train['Country_Region'] == 'Japan']
# %%
# ------------------------------------------------------------------
aa=df_train.groupby(['County','Province_State','Country_Region','Target'])['TargetValue'].quantile(q=0.05).reset_index()
bb=df_train.groupby(['County','Province_State','Country_Region','Target'])['TargetValue'].quantile(q=0.5).reset_index()
cc=df_train.groupby(['County','Province_State','Country_Region','Target'])['TargetValue'].quantile(q=0.95).reset_index()
aa.columns=['County','Province_State','Country_Region','Target','q0.05']
bb.columns=['County','Province_State','Country_Region','Target','q0.5']
cc.columns=['County','Province_State','Country_Region','Target','q0.95']
aa=pd.concat([aa,bb['q0.5'],cc['q0.95']],1)
aa['q0.05']=aa['q0.05'].clip(0,10000)
aa['q0.5']=aa['q0.5'].clip(0,10000)
aa['q0.95']=aa['q0.95'].clip(0,10000)
aa.head()
# %%
# ------------------------------------------------------------------
df_test['County']=df_test['County'].fillna("NR")
df_test['Province_State']=df_test['Province_State'].fillna("NR")
df_test.head()
# %%
# ------------------------------------------------------------------
df_test=df_test.merge(aa,on=['Country_Region','County','Province_State','Target'],how='left')
df_test.head()
# %%
# ------------------------------------------------------------------
df_test2=df_test[['ForecastId','q0.05','q0.5','q0.95']]
df_test2.head()
# %%
# ------------------------------------------------------------------
sub=pd.melt(df_test2, id_vars=['ForecastId'], value_vars=['q0.05','q0.5','q0.95'])
sub['variable']=sub['variable'].str.replace("q","", regex=False)
sub['ForecastId_Quantile']=sub['ForecastId'].astype(str)+'_'+sub['variable']
sub['TargetValue']=sub['value']
sub=sub[['ForecastId_Quantile','TargetValue']]
sub.reset_index(drop=True,inplace=True)
sub.to_csv("submission.csv",index=False)
sub.head()
# %%
# ------------------------------------------------------------------
sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------
ノートブックに変換する方法です。
ipynb-py-convert covid19_starter_code.py covid19_may10.ipynb