Edited at

# StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - 8.2 複数の階層を持つ階層モデル

More than 1 year has passed since last update.

### インポート

```import pandas as pd
import statsmodels.formula.api as smf
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
from matplotlib.gridspec import GridSpec
from matplotlib.markers import MarkerStyle
import seaborn as sns
%matplotlib inline
```

### データ読み込み

```salary3 = pd.read_csv('./data/data-salary-3.txt')
```

# 8.2 複数の階層を持つ階層モデル

## 8.2.1 解析の目的とデータの分布の確認

```fig = plt.figure(figsize=figaspect(2/4))

gs1 = GridSpec(1, 1, figure=fig)
ax1 = plt.subplot(gs1[0, 0])
for gid in salary3['GID'].unique():
ax1.scatter('X', 'Y', data=salary3.query('GID==@gid'), marker=MarkerStyle.filled_markers[gid], label=gid)
sns.regplot('X', 'Y', data=salary3, scatter=False, ci=None, line_kws={'alpha': 0.3}, ax=ax1)
xlim = ax1.get_xlim()
ylim = ax1.get_ylim()
ax1.legend(title='GID')

gs2 = GridSpec(2, 2, figure=fig)
for i, gid in enumerate(salary3['GID'].unique()):
row = i // 2
col = i % 2
ax = fig.add_subplot(gs2[row, col], sharex=ax1, sharey=ax1)
ax.scatter('X', 'Y', data=salary3.query('GID==@gid'), c=sns.color_palette()[i], marker=MarkerStyle.filled_markers[gid], alpha=0.8, label=gid)
sns.regplot('X', 'Y', data=salary3, scatter=False, ci=None, line_kws={'alpha': 0.3}, ax=ax)
ax.legend(title='GID')
if row == 1:
plt.setp(ax, xlabel='X')
else:
plt.setp(ax, xlabel='')
plt.setp(ax.get_xticklabels(), visible=False)
if col == 0:
plt.setp(ax, ylabel='Y')
else:
plt.setp(ax, ylabel='')
plt.setp(ax.get_yticklabels(), visible=False)
plt.setp(ax1, xlim=xlim, ylim=ylim)
gs1.tight_layout(fig, rect=[None, None, 0.5, None])
gs2.tight_layout(fig, rect=[0.5, None, None, None])
plt.show()
```

```KIDGID = salary3.loc[:, ('KID', 'GID')].drop_duplicates()

N = salary3.index.size
K = 30
G = 3
coefs = salary3.groupby('KID').apply(lambda d: smf.ols('Y~X', data=d).fit().params.rename({'Intercept': 'a', 'X': 'b'})).reset_index()
d_plot = pd.merge(coefs, KIDGID, on='KID')

fig = plt.figure(figsize=figaspect(3/4))
for i, gid in enumerate(salary3['GID'].unique()):
ax1 = fig.add_subplot(3, 2, i*2+1, sharex=ax1 if i > 0 else None, sharey=ax1 if i > 0 else None)
sns.distplot(d_plot.query('GID==@i+1')['a'], ax=ax1)
plt.setp(ax1, title=gid)
ax2 = fig.add_subplot(3, 2, i*2+2, sharex=ax2 if i > 0 else None, sharey=ax2 if i > 0 else None)
sns.distplot(d_plot.query('GID==@i+1')['b'], ax=ax2)
plt.setp(ax2, title=gid)
if i < salary3['GID'].unique().size - 1:
plt.setp(ax1, xlabel='')
plt.setp(ax1.get_xticklabels(), visible=False)
plt.setp(ax2, xlabel='')
plt.setp(ax2.get_xticklabels(), visible=False)

plt.tight_layout()
plt.show()
```

## 8.2.4 Stanで実装−その1

```data = dict(
N=salary3.index.size,
G=salary3['GID'].max(),
K=salary3['KID'].max(),
X=salary3['X'],
Y=salary3['Y'],
KID=salary3['KID'],
K2G=salary3.drop_duplicates(subset=['KID', 'GID'])['GID']
)
fit = pystan.stan('./stan/model8-5.stan', data=data, seed=12345)
```

## 8.2.7 Stanで実装−その2

```data = {col: salary3[col] for col in salary3.columns}
data['N'] = salary3.index.size
data['G'] = salary3['GID'].max()
data['K'] = salary3['KID'].max()
data['K2G'] = salary3.drop_duplicates(subset=['KID', 'GID'])['GID']
fit = pystan.stan('./stan/model8-6.stan', data=data, seed=123)
```