2
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 5 years have passed since last update.

StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - Chapter 8 練習問題

Last updated at Posted at 2018-08-19

実行環境

インポート

import numpy as np
import pandas as pd
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
from matplotlib.markers import MarkerStyle
import seaborn as sns
%matplotlib inline

データ読み込み

salary2 = pd.read_csv('./data/data-salary-2.txt')
salary3 = pd.read_csv('./data/data-salary-3.txt')
attendance41 = pd.read_csv('./data/data-attendance-4-1.txt')
attendance42 = pd.read_csv('./data/data-attendance-4-2.txt')
d = pd.read_csv('./data/data7a.csv')
d1 = pd.read_csv('./data/d1.csv')

(1)

N = salary2.index.size
K = salary2['KID'].max()
data = {col: salary2[col] for col in salary2.columns}
data['N'] = N
data['K'] = K
fit1 = pystan.stan('./stan/model8-2.stan', data=data, seed=1234)
ms1 = fit1.extract()

N_mcmc = ms1['lp__'].size
X_new = np.arange(salary2['X'].max()+1)
N_X = X_new.size

d2 = pd.DataFrame([])
for k in range(K):
    loc = ms1['a'][:, k].reshape((-1, 1)) + np.outer(ms1['b'][:, k], X_new)
    scale = np.broadcast_to(ms1['s_Y'].reshape((-1, 1)), loc.shape)
    y_base_mcmc2 = np.random.normal(loc, scale)
    d2 = d2.append(pd.DataFrame(np.hstack([X_new.reshape((-1, 1)), np.percentile(y_base_mcmc2, (2.5, 50, 97.5), axis=0).T, np.full_like(X_new, k+1).reshape((-1, 1))]), columns=('X', 'p2.5', 'p50', 'p97.5', 'KID')), ignore_index=True)

_, axes = plt.subplots(2, 2, figsize=figaspect(2/2), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    kid = i + 1
    q = 'KID==@kid'
    ax.fill_between('X', 'p2.5', 'p97.5', data=d2.query(q), color='k', alpha=1/5)
    ax.plot('X', 'p50', data=d2.query(q), color='k', alpha=0.8, label='')
    ax.scatter('X', 'Y', data=salary2.query(q), c='k', marker=MarkerStyle.filled_markers[i], label=kid)
    ax.legend(title='KID')
    plt.setp(ax, title=kid)
plt.tight_layout()
plt.show()

chap08ex1.png

(2)

N = salary2.index.size
K = salary2['KID'].max()
data = {col: salary2[col] for col in salary2.columns}
data['N'] = N
data['K'] = K
fit2 = pystan.stan('./stan/model8-3.stan', data=data, seed=1234)
ms2 = fit2.extract()

N_mcmc = ms2['lp__'].size
X_new = np.arange(salary2['X'].max()+1)
N_X = X_new.size

d3 = pd.DataFrame([])
for k in range(K):
    loc = ms2['a'][:, k].reshape((-1, 1)) + np.outer(ms2['b'][:, k], X_new)
    scale = np.broadcast_to(ms2['s_Y'].reshape((-1, 1)), loc.shape)
    y_base_mcmc3 = np.random.normal(loc, scale)
    d3 = d3.append(pd.DataFrame(np.hstack([X_new.reshape((-1, 1)), np.percentile(y_base_mcmc3, (2.5, 50, 97.5), axis=0).T, np.full_like(X_new, k+1).reshape((-1, 1))]), columns=('X', 'p2.5', 'p50', 'p97.5', 'KID')), ignore_index=True)

_, axes = plt.subplots(2, 2, figsize=figaspect(2/2), sharex=True, sharey=True)
for i, ax in enumerate(axes.flat):
    kid = i + 1
    q = 'KID==@kid'
    ax.fill_between('X', 'p2.5', 'p97.5', data=d3.query(q), color='k', alpha=1/5)
    ax.plot('X', 'p50', data=d3.query(q), color='k', alpha=0.8, label='')
    ax.scatter('X', 'Y', data=salary2.query(q), c='k', marker=MarkerStyle.filled_markers[i], label=kid)
    ax.legend(title='KID')
    plt.setp(ax, title=kid)
plt.tight_layout()
plt.show()

chap08ex2.png

(3)

N = salary3.index.size
G = salary3['GID'].max()
K = salary3['KID'].max()
data = dict(
    N=N,
    G=G,
    K=K,
    X=salary3['X'],
    Y=salary3['Y'],
    KID=salary3['KID'],
    K2G=salary3.drop_duplicates(('KID', 'GID'))['GID']
)
fit5 = pystan.stan('./stan/model8-5.stan', data=data, seed=12345)
ms3 = fit5.extract()
N_mcmc = ms3['lp__'].size

param_names = ['mcmc'] + ['a1_{}'.format(i+1) for i in range(3)] + ['b1_{}'.format(i+1) for i in range(3)]
d_est = pd.DataFrame(np.hstack([np.arange(N_mcmc).reshape((-1, 1)), ms3['a1'], ms3['b1']]), columns=param_names)
d_qua = d_est.quantile((0.025, 0.5, 0.975)).transpose().reset_index()
d_qua.columns = ('X', 'p2.5', 'p50', 'p97.5')
d_melt = pd.melt(d_est, id_vars=('mcmc'), var_name='X')

_, axes = plt.subplots(1, 2, figsize=figaspect(5/8))
for ax, param in zip(axes, ['a', 'b']):
    q = 'X.str.startswith("{}1")'.format(param)
    ax.hlines('X', 'p2.5', 'p97.5', data=d_qua.query(q))
    sns.violinplot('value', 'X', data=d_melt.query(q), scale='width', inner=None, color='w', ax=ax)
    ax.scatter('p50', 'X', data=d_qua.query(q), c='k')
    plt.setp(ax, xlabel='value', ylabel='parameter')
plt.tight_layout()
plt.show()

fig8-ex3.png

(4)

d_person = attendance42.groupby('PersonID')['Y'].mean()
plt.figure(figsize=figaspect(3/4))
sns.distplot(d_person)
plt.show()

fig8-ex4-person.png

d_course = attendance42.groupby('CourseID')['Y'].mean()
plt.figure(figsize=figaspect(3/4))
sns.distplot(d_course)
plt.show()

fig8-ex4-course.png

(5)

d_conv = pd.Series([0, 0.2, 1], index=('A', 'B', 'C'))
data = dict(
    N=attendance41.index.size,
    C=attendance42['CourseID'].max(),
    I=attendance42.index.size,
    A=attendance41['A'],
    Score=attendance41['Score']/200,
    PID=attendance42['PersonID'],
    CID=attendance42['CourseID'],
    W=d_conv[attendance42['Weather']],
    Y=attendance42['Y']
)
fit = pystan.stan('./stan/model8ex5.stan', data=data, pars=('b', 'b_P', 'b_C', 's_P', 's_C', 'q'), seed=1234)

(6)

data = dict(
    N=d.index.size,
    Y=d['y']
)
fit = pystan.stan('./stan/model8ex6.stan', data=data, seed=1234)

(7)

F2int = pd.Series([0, 1], index=('C', 'T'))
pots = d1['pot'].unique()
N_Pot = pots.size
Pot2int = pd.Series(np.arange(N_Pot)+1, index=pots)

data = dict(
    N=d1.index.size,
    N_Pot=N_Pot,
    F=F2int[d1['f']],
    N2Pot=Pot2int[d1['pot']],
    Y=d1['y']
)
fit = pystan.stan('./stan/model8ex7.stan', data=data, seed=1234)
2
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
2

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?