More than 5 years have passed since last update.

StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - 8.2 複数の階層を持つ階層モデル

Last updated at 2018-08-21Posted at 2018-08-19

インポート

import pandas as pd
import statsmodels.formula.api as smf
import pystan
import matplotlib.pyplot as plt
from matplotlib.figure import figaspect
from matplotlib.gridspec import GridSpec
from matplotlib.markers import MarkerStyle
import seaborn as sns
%matplotlib inline

データ読み込み

salary3 = pd.read_csv('./data/data-salary-3.txt')

8.2 複数の階層を持つ階層モデル

8.2.1 解析の目的とデータの分布の確認

fig = plt.figure(figsize=figaspect(2/4))

gs1 = GridSpec(1, 1, figure=fig)
ax1 = plt.subplot(gs1[0, 0])
for gid in salary3['GID'].unique():
    ax1.scatter('X', 'Y', data=salary3.query('GID==@gid'), marker=MarkerStyle.filled_markers[gid], label=gid)
sns.regplot('X', 'Y', data=salary3, scatter=False, ci=None, line_kws={'alpha': 0.3}, ax=ax1)
xlim = ax1.get_xlim()
ylim = ax1.get_ylim()
ax1.legend(title='GID')

gs2 = GridSpec(2, 2, figure=fig)
for i, gid in enumerate(salary3['GID'].unique()):
    row = i // 2
    col = i % 2
    ax = fig.add_subplot(gs2[row, col], sharex=ax1, sharey=ax1)
    ax.scatter('X', 'Y', data=salary3.query('GID==@gid'), c=sns.color_palette()[i], marker=MarkerStyle.filled_markers[gid], alpha=0.8, label=gid)
    sns.regplot('X', 'Y', data=salary3, scatter=False, ci=None, line_kws={'alpha': 0.3}, ax=ax)
    ax.legend(title='GID')
    if row == 1:
        plt.setp(ax, xlabel='X')
    else:
        plt.setp(ax, xlabel='')
        plt.setp(ax.get_xticklabels(), visible=False)
    if col == 0:
        plt.setp(ax, ylabel='Y')
    else:
        plt.setp(ax, ylabel='')
        plt.setp(ax.get_yticklabels(), visible=False)
plt.setp(ax1, xlim=xlim, ylim=ylim)
gs1.tight_layout(fig, rect=[None, None, 0.5, None])
gs2.tight_layout(fig, rect=[0.5, None, None, None])
plt.show()

KIDGID = salary3.loc[:, ('KID', 'GID')].drop_duplicates()

N = salary3.index.size
K = 30
G = 3
coefs = salary3.groupby('KID').apply(lambda d: smf.ols('Y~X', data=d).fit().params.rename({'Intercept': 'a', 'X': 'b'})).reset_index()
d_plot = pd.merge(coefs, KIDGID, on='KID')

fig = plt.figure(figsize=figaspect(3/4))
for i, gid in enumerate(salary3['GID'].unique()):
    ax1 = fig.add_subplot(3, 2, i*2+1, sharex=ax1 if i > 0 else None, sharey=ax1 if i > 0 else None)
    sns.distplot(d_plot.query('GID==@i+1')['a'], ax=ax1)
    plt.setp(ax1, title=gid)
    ax2 = fig.add_subplot(3, 2, i*2+2, sharex=ax2 if i > 0 else None, sharey=ax2 if i > 0 else None)
    sns.distplot(d_plot.query('GID==@i+1')['b'], ax=ax2)
    plt.setp(ax2, title=gid)
    if i < salary3['GID'].unique().size - 1:
        plt.setp(ax1, xlabel='')
        plt.setp(ax1.get_xticklabels(), visible=False)
        plt.setp(ax2, xlabel='')
        plt.setp(ax2.get_xticklabels(), visible=False)

plt.tight_layout()
plt.show()

8.2.4 Stanで実装−その1

data = dict(
    N=salary3.index.size,
    G=salary3['GID'].max(),
    K=salary3['KID'].max(),
    X=salary3['X'],
    Y=salary3['Y'],
    KID=salary3['KID'],
    K2G=salary3.drop_duplicates(subset=['KID', 'GID'])['GID']
)
fit = pystan.stan('./stan/model8-5.stan', data=data, seed=12345)

8.2.7 Stanで実装−その2

data = {col: salary3[col] for col in salary3.columns}
data['N'] = salary3.index.size
data['G'] = salary3['GID'].max()
data['K'] = salary3['KID'].max()
data['K2G'] = salary3.drop_duplicates(subset=['KID', 'GID'])['GID']
fit = pystan.stan('./stan/model8-6.stan', data=data, seed=123)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up