Python
Stan
PyStan

StanとRでベイズ統計モデリング(アヒル本)をPythonにしてみる - 12.8 地図を使った空間構造

実行環境

インポート

import numpy as np
from scipy import stats
import pandas as pd
from japanmap import pref_names, picture
import japanmap
import pystan
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import cm
from matplotlib.figure import figaspect
import seaborn as sns
%matplotlib inline
from bokeh.io import output_notebook
from bokeh.resources import INLINE
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.tickers import FixedTicker
output_notebook(resources=INLINE)

データ読み込み

map_temperature = pd.read_csv('./data/data-map-temperature.txt')
map_neighbor = pd.read_csv('./data/data-map-neighbor.txt')
map_JIS = pd.read_csv('./data/data-map-JIS.txt', header=None)
map_JIS.columns = ('prefID', 'name')

12.8 地図を使った空間構造

data = dict(
    N=map_temperature.index.size,
    Y=map_temperature['Y'],
    I=map_neighbor.index.size,
    From=map_neighbor['From'],
    To=map_neighbor['To']
)

stanmodel = pystan.StanModel('./stan/model12-14.stan')
fit = stanmodel.sampling(data=data, seed=1234, init=lambda: dict(r=map_temperature['Y'], s_r=1, s_Y=0.1))
def draw_map(ax, temperatures):
    norm = mpl.colors.Normalize(vmin=9, vmax=19)
    sm = cm.ScalarMappable(norm=norm, cmap='binary')
    colors = {}
    for prefID, temperature in zip(map_temperature['prefID'], temperatures):
        r, g, b, a = sm.to_rgba(temperature, bytes=True)
        colors[prefID] = (int(r),) * 3
    ax.imshow(picture(colors))

_, (ax1, ax2) = plt.subplots(1, 2, figsize=figaspect(3/8))

draw_map(ax1, map_temperature['Y'])

ms = fit.extract()
est = ms['r'].mean(axis=0)
draw_map(ax2, est)

plt.show()

fig12-10.png

この終盤に来てのmatplotlib日本語化は心折れたので、bokehに逃げました。東京と静岡が重なっているのも見なかったことにしました。

probs = (2.5, 50, 97.5)
d_est = pd.DataFrame(np.percentile(ms['r'], probs, axis=0).T, columns=['p{}'.format(p) for p in probs])
d_est['x'] = map_temperature['Y']
d_all = pd.concat([map_JIS, d_est], axis=1)
d_all['diff'] = d_all['x'] - d_all['p50']
top3_diff = d_all.iloc[np.argsort(-np.abs(d_all['diff']))[:3]]

source = ColumnDataSource(d_all)
p = figure(x_range=(9, 24), y_range=(9, 24))
p.scatter(x='x', y='p50', source=source)
p.multi_line([(x, x) for x in d_all['x']], [(y1, y2) for y1, y2 in zip(d_all['p2.5'], d_all['p97.5'])])
p.line((9, 24), (9, 24), line_dash='dashed')
p.add_layout(LabelSet(x='x', y='p50', text='name', source=ColumnDataSource(top3_diff)))
p.xaxis.axis_label = 'Observed'
p.yaxis.axis_label = 'Predicted'
ticks = list(range(0, 26, 5))
p.xaxis.ticker = FixedTicker(ticks=ticks)
p.yaxis.ticker = FixedTicker(ticks=ticks)
show(p)

bokeh_plot.png

d_noise = map_temperature['Y'].values.reshape((1, -1)) - ms['r']

def get_map(col):
    kernel = stats.gaussian_kde(col)
    dens_x = np.linspace(col.min(), col.max(), kernel.n)
    dens_y = kernel.pdf(dens_x)
    mode_i = np.argmax(dens_y)
    mode_x = dens_x[mode_i]
    mode_y = dens_y[mode_i]
    return pd.Series([mode_x, mode_y], index=['X', 'Y'])

d_mode = pd.DataFrame(d_noise).apply(get_map).T
d_mode.columns = ('X', 'Y')
s_MAP = get_map(ms['s_Y'])['X']
plt.figure(figsize=figaspect(1))
ax = plt.axes()
sns.distplot(d_mode['X'], color='k', hist_kws=dict(facecolor='w', edgecolor='k'), kde_kws=dict(shade=True), ax=ax)
xmin, xmax = ax.get_xlim()
xx = np.linspace(xmin, xmax, 50)
ax.plot(xx, stats.norm.pdf(xx, loc=0, scale=s_MAP), color='k', linestyle='dashed')
plt.setp(ax, xlabel='value', ylabel='density', xlim=(xmin, xmax))
plt.show()

fig12-11-right.png