インポート
import numpy as np
import pandas as pd
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
from matplotlib import colors
from matplotlib.markers import MarkerStyle
%matplotlib inline
データの準備
attendance1 = pd.read_csv('./data/data-attendance-1.txt')
attendance2 = pd.read_csv('./data/data-attendance-2.txt')
attendance3 = pd.read_csv('./data/data-attendance-3.txt')
data3a = pd.read_csv('./data/data3a.csv')
data4a = pd.read_csv('./data/data4a.csv')
(1)
data = dict(
N=len(attendance1.index),
A=attendance1['A'],
Score=attendance1['Score']/200,
Y=attendance1['Y']
)
fit = pystan.stan('./stan/model5-3.stan', data=data, seed=1234)
ms = fit.extract()
noise = attendance1['Y'].values - ms['mu']
(3)
pd.crosstab(attendance3['A'], attendance3['Y'])
(4)
data = {col: attendance3[col] for col in attendance3.columns if col in ['A', 'Score', 'Y']}
data['Score'] /= 200
data['I'] = attendance3.index.size
data['WID'] = pd.Categorical(attendance3['Weather'], categories=['', 'A', 'B', 'C'], ordered=True).codes
fit = pystan.stan('./stan/chap05ex4.stan', data=data, pars=['b', 'bw2', 'bw3'], seed=1234)
(5)
data = {col: attendance2[col] for col in attendance2.columns if col in ['A', 'Score', 'M']}
data['Score'] /= 200
data['N'] = attendance2.index.size
fit = pystan.stan('./stan/model5-6b.stan', data=data, seed=1234)
ms = fit.extract()
d_qua = pd.DataFrame(np.percentile(ms['m_pred'], (10, 50, 90), axis=0).T, columns=('p10', 'p50', 'p90'))
d_qua = pd.concat([attendance2, d_qua], axis=1)
plt.figure(figsize=figaspect(1))
ax = plt.axes()
c = ['black', 'gray']
for v in d_qua['A'].unique():
d_part = d_qua.query('A == @v')
ax.errorbar('M', 'p50', data=d_part, yerr=[d_part['p50']-d_part['p10'], d_part['p90']-d_part['p50']], fmt='.', color=colors.to_rgb(c[v]), marker=MarkerStyle.filled_markers[int(v)], label='A={}'.format(v))
# ax.axline(1, 1)
lim = (10, 80)
ax.plot(lim, lim, c='k', alpha=3/5)
plt.setp(ax, xlim=lim, ylim=lim, xlabel='Obserbed', ylabel='Predicted')
plt.show()
(6)
data = {col.upper(): data3a[col] for col in data3a.columns if col in ['x', 'y']}
data['N'] = data3a.index.size
data['F'] = pd.Categorical(data3a['f']).codes
fit = pystan.stan('./stan/chap05ex6.stan', data=data, seed=1234)
(7)
data = {col.upper(): data4a[col] for col in data4a.columns if col in ['N', 'y', 'x']}
data['I'] = data4a.index.size
data['F'] = pd.Categorical(data4a['f']).codes
fit = pystan.stan('./stan/chap05ex7.stan', data=data, seed=1234)
ms = fit.extract()
probs = (10, 50, 90)
d_qua = pd.DataFrame(np.percentile(ms['y_pred'], probs, axis=0).T, columns=['p{}'.format(p) for p in probs])
plt.figure(figsize=figaspect(1))
ax = plt.axes()
np.random.seed(1234)
x = data4a['y'] + np.random.uniform(-0.2, 0.2, data4a.index.size)
ax.errorbar(x, d_qua['p50'], yerr=[d_qua['p50']-d_qua['p10'], d_qua['p90']-d_qua['p50']], fmt='o', color='k', markersize=3, elinewidth=1)
xmin, xmax = ax.get_xlim()
ymin, ymax = ax.get_ylim()
lim = (min(xmin, ymin), max(xmax, ymax))
plt.plot(lim, lim, c='k', alpha=0.6)
plt.setp(ax, xlabel='Obserbed', ylabel='Predicted', xlim=lim, ylim=lim)
plt.show()