0
0

More than 3 years have passed since last update.

Kaggle COVID19 Global Forecasting (Week 4)

Last updated at Posted at 2020-04-13

次のコンペに投稿する方法です。

COVID19 Global Forecasting (Week 4)
covid19-global-forecasting.py を covid19_apr13.ipynb に変換してアップロードしました。

結果は次の通りでした。
covid19_apr13.png

covid19-global-forecasting.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
#   covid19-global-forecasting.py
#
#                   Apr/13/2020 AM 09:22 
# ------------------------------------------------------------------
import numpy as np
import pandas as pd
import sys
from dateutil.parser import parse
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt

np.seterr(all=None, divide=None, over=None, under=None, invalid=None)
# ------------------------------------------------------------------
def curve_fit_proc(offset,np_train):
    sys.stderr.write("*** curve_fit_proc *** start ***\n")
    coefs = []

    XX = np.arange(offset, np_train.shape[0], 1)

    for it in range(np_train.shape[1]):
#       sys.stderr.write("%d " % it)
        if it in as_exponent:
            coefs.append(curve_fit(exp,  XX, np_train[offset:, it],p0 = (0.5, XX[0], 2, 0), maxfev=100000)[0])
#
    sys.stderr.write("*** curve_fit_proc *** end ***\n")
#
    return coefs
# ------------------------------------------------------------------
def pattern_proc():
    as_exponent = range(313)
#
    return as_exponent
# ------------------------------------------------------------------
def exp(xx, aa, bb, dd, pp):
    rvalue = 0.0
    try:
        rvalue = dd * np.exp(aa * xx - bb) + pp
    except Exception as ee:
        sys.stderr.write("*** error *** in exp ***\n")
        sys.stderr.write(str(ee) + "\n")
    return rvalue
#
# ------------------------------------------------------------------
def new_epx(x, a, b, d, p):
    return exp(x, a, b, d, p)*(exp(x, a, b, d, p)>=0)

# ------------------------------------------------------------------
def RMSLE(pred,actual):
    return np.sqrt(np.mean(np.power((np.log(pred+1)-np.log(actual+1)),2)))
#
# ------------------------------------------------------------------
def plot_exp(offset,np_train,coefs):

    XX = np.arange(offset, np_train.shape[0], 1)
    it = 148
    jt = 152
    plt.plot(np_train[offset:,it], label = "Japan")
    plt.plot(exp(XX, *coefs[it]))
    plt.plot(np_train[offset:,jt], label = "Korea")
    plt.plot(exp(XX, *coefs[jt]))
    plt.legend()
    plt.show()

# ------------------------------------------------------------------
def date_format_convert_proc(date_in):
    f_replace = lambda x: (parse(x) - parse("2020-01-01")).days
    date_aa = date_in.apply(f_replace)
    date_bb  = date_aa.astype(int)
#
    return date_bb
# ------------------------------------------------------------------
def key_define_proc(data_in):
    key = data_in['Country_Region'].astype('str') \
        + " " + data_in['Province_State'].astype('str')
#
    return key
# ------------------------------------------------------------------
def pivot_calc_proc(data_train):
#lets create pivot tables
    pivot_train = pd.pivot_table(data_train, index='Date', \
        columns = 'key', values = 'ConfirmedCases')
    pivot_train_death = pd.pivot_table(data_train, index='Date', \
        columns = 'key', values = 'Fatalities')
    np_train = pivot_train.to_numpy()
    np_train_death = pivot_train_death.to_numpy()

    sys.stderr.write("*** pivot_calc_proc ***\n")

    pivot_train.head(10)
#
    return np_train,np_train_death
# ------------------------------------------------------------------

# %%

sys.stderr.write("*** start ***\n")
offset = 50
offset = 45
offset = 48

folder_src='../input/covid19-global-forecasting-week-4'
data = pd.read_csv(folder_src + '/train.csv')
test_data = pd.read_csv(folder_src + '/test.csv')
submission = pd.read_csv(folder_src + '/submission.csv')
print(data.shape)
print(test_data.shape)
print(submission.shape)

# %%
# ------------------------------------------------------------------
datap = {"Date": ["2020-01-30","2020-01-31","2020-02-01","2020-02-02"]}
df = pd.DataFrame(datap, columns = ["Date"])
ee =  date_format_convert_proc(df["Date"])
print(ee)

# %%
# ------------------------------------------------------------------

data.loc[data['Country_Region'] == 'Japan']

# %%

data["Date"] = date_format_convert_proc(data["Date"])
test_data["Date"] = date_format_convert_proc(test_data["Date"])

data['key'] = key_define_proc(data)
test_data['key'] = key_define_proc(test_data)

data_train = data

# ------------------------------------------------------------------
# date_xx = 20200408
# date_xx = 20200326
# date_xx = 20200402
date_xx = (parse("2020-04-02") - parse("2020-01-01")).days
nn_unique = data_train['key'].nunique()
#last day in test data
test_last_day = int(test_data.shape[0]/nn_unique) \
    + int(data_train[data_train.Date< date_xx].shape[0]/nn_unique)

sys.stderr.write("nn_unique = %d\n" % nn_unique)
dtemp = data_train['key'].unique()
print(len(dtemp))
for it in range(nn_unique):
    if dtemp[it][:5] == "Japan":
        print("dtemp[%d] = %s" % (it, dtemp[it]))
    elif dtemp[it][:5] == "Korea":
        print("dtemp[%d] = %s" % (it, dtemp[it]))

print("nn_unique = ",nn_unique)
print("test_last_day = ",test_last_day)

# %%
# ------------------------------------------------------------------

#days in test data
test_days = np.arange(int(data_train[data_train.Date< date_xx].shape[0]/nn_unique), test_last_day, 1)

print(test_days)

# ------------------------------------------------------------------
sys.stderr.write("*** check *** ccc ***\n")

np_train,np_train_death = pivot_calc_proc(data_train)


data_train[['ConfirmedCases', 'Fatalities']].corr()


# ------------------------------------------------------------------
mask_deaths = np.zeros_like(np_train[0])
for i in range(1,21):
    mask_deaths += np_train_death[-i]/(np_train[-i]+0.0001)
mask_deaths = mask_deaths/20   

mask_deaths[(mask_deaths < 0.5) & (mask_deaths!=0)].mean()

mask_deaths[(mask_deaths> 0.5)|(mask_deaths<0.005)] = mask_deaths[(mask_deaths < 0.5) & (mask_deaths!=0)].mean()

sys.stderr.write("*** check *** hhh ***\n")

mask_mesh = np.meshgrid(mask_deaths, test_days)[0].T.flatten()

assert mask_mesh.shape[0] == test_data.shape[0]

# ------------------------------------------------------------------
as_exponent = pattern_proc()

# ------------------------------------------------------------------
# np_train.shape[0]

coefs = curve_fit_proc(offset,np_train)

# %%
# ------------------------------------------------------------------
# plot_exp(offset,np_train,coefs)


# %%
# ------------------------------------------------------------------

ConfirmedCases_test = np.zeros((nn_unique, test_days.shape[0]))

# ------------------------------------------------------------------

sys.stderr.write("*** check *** jjj ***\n")
sys.stderr.write("np_train.shape[1] = %d\n" % np_train.shape[1])

for it in range(np_train.shape[1]):
    if it in as_exponent:
        function = new_epx
    ConfirmedCases_test[it] = function(test_days, *coefs[it])

ConfirmedCases_test.flatten().shape[0]

sys.stderr.write("*** check *** kkk ***\n")

assert ConfirmedCases_test.flatten().shape[0] == test_data.shape[0]

# ------------------------------------------------------------------
test_data['predict'] = ConfirmedCases_test.flatten()

# test_data[test_data['Country_Region']=='Japan']

submission['ConfirmedCases'] = ConfirmedCases_test.flatten()
submission['Fatalities'] = ConfirmedCases_test.flatten()*mask_mesh
#
submission.to_csv('submission.csv', index=False)

sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------

ノートブックに変換する方法です。

ipynb-py-convert  covid19-global-forecasting.py covid19_apr13.ipynb
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0