単一の時系列データ(頻度は四半期)の特徴を示す
いつもの1行目。
import pandas as pd
from datetime import datetime as dt
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import scipy.optimize
import statsmodels.api as sm
データ読み込み。
df = pd.read_csv('data/best_forecast.csv', index_col=0)
df.index=pd.to_datetime(df.index)
これはデータの分布を示すfunction。
def prob_prot(np_array, ax):
#use this func as:axes[1]=prob_prot(df_ret.dropna().values, axes[1])
from statistics import mean, stdev
mu = mean(np_array) # mean of distribution
sigma = stdev(np_array) # standard deviation of distribution
num_bins = 10
#fig, ax = plt.subplots()
# the histogram of the data
n, bins, patches = ax.hist(np_array, num_bins, density=1)
# add a 'best fit' line
y = ((1 / (np.sqrt(2 * np.pi) * sigma)) *
np.exp(-0.5 * (1 / sigma * (bins - mu))**2))
ax.plot(bins, y, '--')
ax.set_title(r'Histogram: $\mu=$'+ str(round(mu,3)) +r', $\sigma=$' + str(round(sigma,3)))
return ax
季節性をグラフで示すfunction。
def seaz_prot(df_diff, ax):
from statistics import mean, stdev
ret_list = []
ret_list_index = []
ret_mean_list= []
for i in range(0,4):
j=df_diff.tail(4).index.month[i]
ret=df_diff[df_diff.index.month==j].values
ret_list.extend(ret)
ret_list_index.extend([j for k in range(0,len(ret))])
ret_mean_list.extend([np.nanmean(ret)])
df = pd.DataFrame({'series': ret_list}, index=ret_list_index)
df_mean = pd.DataFrame({'series': ret_mean_list}, index=df_diff.tail(4).index.month.tolist())
df_mean.sort_index(inplace=True)
ax.scatter(df.index, df['series'])
ax.set_xticks(df_mean.index)
ax.set_xticklabels =df_mean.index
ax.plot(df_mean.index, df_mean['series'],color=diff_color)
for a,b in zip(df_mean.index,df_mean['series']):
ax.text(a, b, str(round(b,3)))
ax.set_title('Seasonality')
return ax
さまざまなデータの特徴を1枚のグラフで示す
df_diff=(df[:dt.today()]['IS_OPER_INC']-df[:dt.today()]['BEST_OPP'])/df[:dt.today()]['BEST_OPP']
df_ret=df_diff
df_ret[~((df_ret < 0.3) & (df_ret > -0.3))]=np.nan
plt.rcParams["font.size"] = 10
fig, ax = plt.subplots(2, 3, figsize=(20, 8))
axes = ax.flatten()
diff_color = "#800000"
axes[0].plot(df_ret)
axes[0].set_title("Raw data")
axes[0].grid(which='major',color='gray',linestyle='-')
axes[1]=prob_prot(df_ret.dropna().values, axes[1])
axes[1].grid(which='major',color='gray',linestyle='-')
fig = sm.graphics.tsa.plot_acf(df_diff.dropna(), lags=5, ax=axes[2])
axes[2].grid(which='major',color='gray',linestyle='-')
diff1_data=df_diff-df_diff.shift()
axes[3]=seaz_prot(df_diff, axes[3])
axes[3].grid(which='major',color='gray',linestyle='-')
res = sm.tsa.seasonal_decompose(df_diff.dropna().values, freq=4)
df_res = pd.DataFrame({'Trend': res.trend, 'Residual': res.resid}, index=df_diff.dropna().index)
axes[4].plot(df_res['Trend'])
axes[5].plot(df_res['Residual'])
axes[4].grid(which='major',color='gray',linestyle='-')
axes[4].set_title('Trend')
axes[5].grid(which='major',color='gray',linestyle='-')
axes[5].set_title('Residual (=Raw data - Seasonality - Trend)')
plt.show()