0
0

More than 3 years have passed since last update.

Kaggle COVID19 Global Forecasting (Week 3)

Last updated at Posted at 2020-04-06

次のコンペに投稿する方法です。

COVID19 Global Forecasting (Week 3)
covid19-global-forecasting.py を covid19_apr06.ipynb に変換してアップロードしました。

covid19-global-forecasting.py
#! /usr/bin/python
# -*- coding: utf-8 -*-
#
#   covid19-global-forecasting.py
#
#                       Apr/06/2020
#                           PM 14:49
# ------------------------------------------------------------------
import numpy as np
import pandas as pd
import sys
from scipy.optimize import curve_fit

np.seterr(all=None, divide=None, over=None, under=None, invalid=None)
# ------------------------------------------------------------------
def curve_fit_proc(np_train):
    sys.stderr.write("*** curve_fit_proc *** start ***\n")
    coefs = []

    XX = np.arange(45, np_train.shape[0], 1)

    for it in range(np_train.shape[1]):
        sys.stderr.write("%d " % it)
        if it in as_exponent:
            coefs.append(curve_fit(exp,  XX, np_train[45:, it],p0 = (0.5, XX[0], 2, 0), maxfev=100000)[0])
        if it in as_linear:
            coefs.append(curve_fit(linear,  XX[10:], np_train[55:,it], p0 = (1,0,0), maxfev=100000)[0])
        if it in as_sigmoid:
            coefs.append(curve_fit(sigmoid,  XX, np_train[45:, it] , p0 = (1, XX[0], np_train[-1, it]/2,0), maxfev=100000)[0])
#
    sys.stderr.write("*** curve_fit_proc *** end ***\n")
#
    return coefs
# ------------------------------------------------------------------
def pattern_proc():
    as_exponent = [0, 1, 2, 3, 13, 14, 27 ,
        41, 42, 44, 48,    82, 85, 92, 93, 95, 96,
        100, 102, 106, 108, 110, 112, 113, 114,
        122, 125, 130, 131, 132, 134, 137, 138, 139, 143, 145, 148,
        149, 161, 165, 166, 172, 175, 177, 179, 185, 190, 194,
        202 , 204, 205, 212, 213, 216, 218 ,
        224, 225, 228, 229, 230, 231, 232, 233, 234, 235, 237, 238,
        239, 241, 242, 243, 244, 246, 247, 250, 252, 256, 258, 260,
        261, 264, 266, 269, 272, 273, 274, 284, 285, 286, 288, 289,
        290, 291, 292, 293,
        294,295,296,297,298,299,300,301,302,303,304,305]
#
    as_linear = [4,5, 7, 9, 10, 17, 18, 19, 22, 24, 25, 26, 29, 30,
        32, 33,34, 36, 37, 38, 39, 40, 43, 45, 46, 47, 49,
        51, 53, 54,  55, 56, 57,58,60,
        61,62, 63, 64, 65, 66,
        67, 68, 69, 70, 71, 72,73, 74,75, 76, 77, 78,
        79, 80, 81, 83, 86, 88, 89, 91, 94, 97, 98, 99,
        101,103,104, 105, 107, 109, 111, 115, 116, 117, 118,
        120, 121, 123, 127, 128, 129, 135, 136, 141, 142, 144,
        146,147,  150, 151, 152, 153, 154, 156, 159,
        160, 162, 164, 167, 169, 170, 171,174, 178,
        180, 182, 183, 184, 186, 187, 188, 189,
        191, 192, 195, 196, 197, 198, 200, 201, 203, 207, 208,
        209, 210, 211, 214, 217, 219,
        220, 221, 222, 226,   236, 240, 245,
        251, 254, 257, 259, 262, 263, 265, 
        267, 268, 270, 271, 277, 278, 279,
        280, 281, 282, 283, 287]
#
    as_sigmoid = [6, 8, 11, 12, 15, 16, 20, 21, 23, 28,
        31, 35, 50, 52, 59, 84, 87, 90 ,
        119, 124, 126, 133, 140, 155, 157, 158,
        163, 168, 173, 176, 181,
        193, 199, 206, 215, 223, 227, 248, 249, 253, 255, 276, 275]
#
    return as_exponent,as_linear,as_sigmoid
# ------------------------------------------------------------------
def exp(x, a, b, d, p):
    rvalue = 0.0
    try:
        rvalue = d * np.exp(a * x - b) + p
    except Exception as ee:
        sys.stderr.write("*** error *** in exp ***\n")
        sys.stderr.write(str(ee) + "\n")
    return rvalue
#
# ------------------------------------------------------------------
def linear(xx, aa, bb, cc):
    return aa*(xx-bb)+cc

# ------------------------------------------------------------------
def sigmoid(xx, aa, bb, dd, pp):
    rvalue = 0.0
    vv = (aa*xx-bb)
    try:
#       if 0 <= vv:
        zz = np.exp(-vv)
        rvalue = dd/(1 + zz) + pp
#       else:
#           zz = np.exp(vv)
#           rvalue = dd * zz/(1 + zz) + pp
    except Exception as ee:
        sys.stderr.write("*** error *** in sigmoid ***\n")
        sys.stderr.write(str(ee) + "\n")
        sys.exit(1)
    return rvalue
# ------------------------------------------------------------------
def new_linear(x, a, b, c):
    return (a*(x-b)+c)*(a*(x-b)+c>=0)

# ------------------------------------------------------------------
def new_sigmoid(x, a, b, d, p):
    return sigmoid(x, a, b, d, p)*(sigmoid(x, a, b, d, p)>=0)

# ------------------------------------------------------------------
def new_epx(x, a, b, d, p):
    return exp(x, a, b, d, p)*(exp(x, a, b, d, p)>=0)

# ------------------------------------------------------------------
def RMSLE(pred,actual):
    return np.sqrt(np.mean(np.power((np.log(pred+1)-np.log(actual+1)),2)))
#
# ------------------------------------------------------------------
sys.stderr.write("*** start ***\n")

folder_src='../input/covid19-global-forecasting-week-3'
data = pd.read_csv(folder_src + '/train.csv')
test_data = pd.read_csv(folder_src + '/test.csv')
submission = pd.read_csv(folder_src + '/submission.csv')
print(data.shape)
print(test_data.shape)
print(submission.shape)

# ------------------------------------------------------------------

data.loc[data['Country_Region'] == 'Japan']

data["Date"] = data["Date"].apply(lambda x: x.replace("-",""))
data["Date"]  = data["Date"].astype(int)
test_data["Date"] = test_data["Date"].apply(lambda x: x.replace("-",""))
test_data["Date"]  = test_data["Date"].astype(int)
data['key'] = data['Country_Region'].astype('str') + " " + data['Province_State'].astype('str')
test_data['key'] = test_data['Country_Region'].astype('str') + " " + test_data['Province_State'].astype('str')
data_train = data

# ------------------------------------------------------------------
date_xx = 20200326
nn_unique = data_train['key'].nunique()
#last day in test data
test_last_day = int(test_data.shape[0]/nn_unique) \
    + int(data_train[data_train.Date< date_xx].shape[0]/nn_unique)

sys.stderr.write("nn_unique = %d\n" % nn_unique)
dtemp = data_train['key'].unique()
print(len(dtemp))
#for it in [0,54,76,227]:
#   print("dtemp[%d] = %s" % (it, dtemp[it]))

print("test_last_day = ",test_last_day)

# ------------------------------------------------------------------

#days in test data
test_days = np.arange(int(data_train[data_train.Date< date_xx].shape[0]/nn_unique), test_last_day, 1)

print(test_days)

# ------------------------------------------------------------------
sys.stderr.write("*** check *** ccc ***\n")

#lets create pivot tables
pivot_train = pd.pivot_table(data_train, index='Date', columns = 'key', values = 'ConfirmedCases')
pivot_train_d = pd.pivot_table(data_train, index='Date', columns = 'key', values = 'Fatalities')
np_train = pivot_train.to_numpy()
np_train_d = pivot_train_d.to_numpy()

sys.stderr.write("*** check *** eee ***\n")

data_train[['ConfirmedCases', 'Fatalities']].corr()

pivot_train.head(10)

# ------------------------------------------------------------------

shift = [0,1,2,3,4,5,6,7]
for s in shift:
    sum = 0
    for i in range(1,20):
        sum += np.abs((np_train_d[-i][:]/(np_train[-i-s][:]+0.0001)-np_train_d[-i-1][:]/(np_train[-i-1-s][:]+0.0001)).mean())
    print(sum, s)

sys.stderr.write("*** check *** ggg ***\n")

# ------------------------------------------------------------------
mask_deaths = np.zeros_like(np_train[0])
for i in range(1,21):
    mask_deaths += np_train_d[-i]/(np_train[-i]+0.0001)
mask_deaths = mask_deaths/20   

mask_deaths[(mask_deaths < 0.5) & (mask_deaths!=0)].mean()

mask_deaths[(mask_deaths> 0.5)|(mask_deaths<0.005)] = mask_deaths[(mask_deaths < 0.5) & (mask_deaths!=0)].mean()

sys.stderr.write("*** check *** hhh ***\n")

mask_mesh = np.meshgrid(mask_deaths, test_days)[0].T.flatten()

assert mask_mesh.shape[0] == test_data.shape[0]

# ------------------------------------------------------------------
as_exponent,as_linear,as_sigmoid = pattern_proc()

# ------------------------------------------------------------------

set(as_sigmoid)&set(as_linear) | set(as_sigmoid)&set(as_exponent) | set(as_exponent)&set(as_linear)

sys.stderr.write("*** check *** iii ***\n")

# ------------------------------------------------------------------
np_train.shape[0]

coefs = curve_fit_proc(np_train)

# ------------------------------------------------------------------
# import matplotlib.pyplot as plt
# for i in as_linear:
#    plt.plot(np_train[45:,i], label = str(i))
#    plt.plot(linear(X, *coefs[i]))
#    plt.legend()
#    plt.show()

# ------------------------------------------------------------------

ConfirmedCases_test = np.zeros((nn_unique, test_days.shape[0]))

# ------------------------------------------------------------------

sys.stderr.write("*** check *** jjj ***\n")

for it in range(np_train.shape[1]):
    if it in as_exponent:
        function = new_epx
    if it in as_linear:
        function = new_linear
    if it in as_sigmoid:
        function = new_sigmoid
    ConfirmedCases_test[it] = function(test_days, *coefs[it])

ConfirmedCases_test.flatten().shape[0]

sys.stderr.write("*** check *** kkk ***\n")

assert ConfirmedCases_test.flatten().shape[0] == test_data.shape[0]

# ------------------------------------------------------------------
test_data['predict'] = ConfirmedCases_test.flatten()

# test_data[test_data['Country_Region']=='Japan']

submission['ConfirmedCases'] = ConfirmedCases_test.flatten()
submission['Fatalities'] = ConfirmedCases_test.flatten()*mask_mesh
submission.to_csv('submission.csv', index=False)

sys.stderr.write("*** end ***\n")
# ------------------------------------------------------------------

実行時に次の CSV ファイルが必要です。これらは、コンペのページからダウンロードできます。

$ ls ../input/covid19-global-forecasting-week-3/
submission.csv  test.csv  train.csv

ノートブックに変換する方法です。

ipynb-py-convert  covid19-global-forecasting.py covid19_apr06.ipynb
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0