More than 3 years have passed since last update.

正規分布をmatplotlibで描画してみる

Last updated at 2020-07-17Posted at 2020-07-17

概要

正規分布をもっとイメージ的に掴みたいので、matplotlibで描画しながらメモしておきます。

利用したライブラリ

Python 3.8.3
matplotlib 3.3.2
numpy 1.19.0
seaborn 0.10.1
tensorflow 2.2.0
tensorflow-probability 0.10.1

tensorflow-probabilityも試したいので、今回は利用してみます。

環境設定

%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid1 import make_axes_locatable
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Poly3DCollection

tfd, tfb = tfp.distributions, tfp.bijectors

rc = {
    'font.family': ['sans-serif'],
    'font.sans-serif': ['Open Sans', 'Arial Unicode MS'],
    'font.size': 12,
    'figure.figsize': (8, 6),
    'grid.linewidth': 0.5,
    'legend.fontsize': 10,
    'legend.frameon': True,
    'legend.framealpha': 0.6,
    'legend.handletextpad': 0.2,
    'lines.linewidth': 1,
    'axes.facecolor': '#fafafa',
    'axes.labelsize': 11,
    'axes.titlesize': 14,
    'axes.linewidth': 0.5,
    'xtick.labelsize': 11,
    'xtick.major.width': 0.5,
    'ytick.labelsize': 11,
    'ytick.major.width': 0.5,
    'figure.titlesize': 14,
}
sns.set('notebook', 'whitegrid', rc=rc)

一次元正規分布

$$p(x \mid \mu, \sigma^2) = \frac{1}{\sqrt{2\pi}\sigma} \exp{ \left( -\frac{(x - \mu)^2}{2\sigma^2}\right)}$$

fig, ax = plt.subplots(figsize=(8, 5))

x = np.linspace(-5, 5, 500)
for loc, std in [[-2,1.0],[2,1.2],[0,0.8],[-1,0.5]]:
    normal = tfd.Normal(loc, std)
    label = '$\mathcal{{N}}({:.1f},{:.1f})$'.format(loc, std)
    ax.plot(x, normal.prob(x), label=label)

ax.set(xlabel='$x$',ylabel='$p(x)$',xlim=(-5,5))
ax.legend()

plt.tight_layout()
plt.show()

多変量正規分布

$$p(\boldsymbol{x}; \boldsymbol{\mu}, \boldsymbol{\Sigma}) = \frac{1}{\sqrt{(2\pi)^n|\boldsymbol{\Sigma}|}} \exp\left( -\frac{1}{2}(\boldsymbol{x}-\boldsymbol{\mu})^\top \boldsymbol{\Sigma}^{-1}(\boldsymbol{x}-\boldsymbol{\mu})\right),$$

nd1 = tfd.MultivariateNormalTriL(loc=[0., 0.], scale_tril=tf.linalg.cholesky([[1.,0.],[0., 1.]]))
nd2 = tfd.MultivariateNormalTriL(loc=[0., 1.], scale_tril=tf.linalg.cholesky([[1.,-0.5],[-0.5, 1.5]]))

X1, X2 = np.meshgrid(np.linspace(-3,3,100), np.linspace(-3,3,100))
X = np.dstack((X1, X2))
data = [["Independent", nd1.prob(X)], ["Correlated", nd2.prob(X)]]

fig = plt.figure(figsize=(14, 6))

for i, (title, y) in enumerate(data):
    ax = fig.add_subplot(1, 2, i + 1, projection='3d')
    ax.plot_surface(X1, X2, y, rstride=2, cstride=2, linewidth=0.1, antialiased=True, cmap='cividis')
    ax.contourf(X1, X2, y, 15, zdir='z', offset=-0.15, cmap='cividis')
    ax.view_init(30, -21)
    ax.tick_params(color='0.8')
    ax.set(title=title,
           zlim=(-0.15, 0.2),
           xlabel='$x_1$',ylabel="$x_2$",zlabel="$p(x_1,x_2)$",
           fc='none')
    
fig.suptitle("Bivariate Normal Distributions")

plt.tight_layout()
plt.show()

サンプリング

nd = tfd.MultivariateNormalTriL(loc=[0., 0.], scale_tril=tf.linalg.cholesky([[1.0,-0.2],[-0.8,1.]]))

X1, X2 = np.meshgrid(np.linspace(-3,3,100), np.linspace(-3,3,100))
Z = nd.prob(np.dstack((X1, X2)))
Y = nd.sample(500)

fig, ax = plt.subplots(figsize=(8, 6))

ax.plot(Y[:,0], Y[:,1], 'ro',alpha=.7, markeredgecolor='k', markeredgewidth=0.5)

con = ax.contourf(X1, X2, Z, 100, cmap='YlGnBu')
ax.set(xlabel='$x_1$',ylabel='$x_2$', axisbelow=False,
       xlim=(-3,3),ylim=(-3,3),
       title='Samples From Bivariate Normal')

cbar = fig.colorbar(con)
cbar.ax.set_ylabel('$p(x_1, x_2)$')

plt.tight_layout()
plt.show()

周辺分布


\begin{bmatrix}
\mathbf{x} \\
\mathbf{y} 
\end{bmatrix}
\sim
\mathcal{N}\left(
\begin{bmatrix}
\mu_{\mathbf{x}} \\
\mu_{\mathbf{y}}
\end{bmatrix},
\begin{bmatrix}
A & C \\
C^\top & B
\end{bmatrix}
\right)
= \mathcal{N}(\mu, \Sigma)
, \qquad 
\Sigma^{-1} = \Lambda = 
\begin{bmatrix}
\tilde{A} & \tilde{C} \\
\tilde{C}^\top & \tilde{B}
\end{bmatrix} \\

p(\mathbf{x}) = \mathcal{N}(\mu_{\mathbf{x}}, A) \\

p(\mathbf{y}) = \mathcal{N}(\mu_{\mathbf{y}}, B)

データを用意

mean = np.array([0., 0.])
cov = np.array([[1.0,0.7],[0.7,1.]])
nd = tfd.MultivariateNormalTriL(loc=mean, scale_tril=tf.linalg.cholesky(cov))
nx, ny = tfd.Normal(mean[0], cov[1, 1]), tfd.Normal(mean[1], cov[0, 0])

x, y = np.linspace(-3,3,100), np.linspace(-3,3,100)
X1, X2 = np.meshgrid(x, y)
Z = nd.prob(np.dstack((X1, X2)))

描画

fig = plt.figure(figsize=(8, 8))

gs = GridSpec(2, 2, width_ratios=[8,2], height_ratios=[8,2])

ax = plt.subplot(gs[0])
con = ax.contourf(X1, X2, Z, 100, cmap='YlGnBu')
ax.set(axisbelow=False)
ax.autoscale(tight=True)

ax = plt.subplot(gs[1])
ax.plot(ny.prob(y), y, 'b--', label='$p(y)$')
ax.set(ylabel='$y$')
ax.yaxis.set_label_position('right')
ax.yaxis.set_ticks_position('right')
ax.tick_params(color="0.8")
ax.autoscale(tight=True)
ax.legend()

ax = plt.subplot(gs[2])
ax.plot(x, nx.prob(x), 'r--', label='$p(x_1)$')
ax.set(xlabel='$x$')
ax.autoscale(tight=True)
ax.legend()

ax = plt.subplot(gs[3])
ax.set_visible(False)
cax = make_axes_locatable(ax).append_axes('left', size='20%', pad=0)
cbar = fig.colorbar(con, cax=cax)
cbar.ax.set_ylabel('$p(x, xy)$')

fig.suptitle('Marginal Distributions')

plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.show()

三次元で

x1, y1, z1 = np.repeat(-3, 100), y, ny.prob(y).numpy()
x2, y2, z2 = x, np.repeat(3, 100), nx.prob(x).numpy()

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(projection='3d')

ax.plot_surface(X1, X2, Z, rstride=2, cstride=2, linewidth=0.1, antialiased=True, cmap='YlGnBu')
ax.plot(x1, y1, z1, 'b--',zorder=-1, label='$p(y)$')
ax.plot(x2, y2, z2, 'r--',zorder=-1, label='$p(x)$')
ax.legend()

ax.tick_params(color='0.8')
ax.set(xlabel='$x$',ylabel="$y$",zlabel="$p(x,y)$",fc='none')
    
fig.suptitle("Marginal Distributions")

plt.tight_layout()
plt.show()

条件付き分布

p(\mathbf{x} \mid \mathbf{y}) = \mathcal{N}(\mu_{x \mid y}, \Sigma_{x \mid y}) \\

p(\mathbf{y} \mid \mathbf{x}) = \mathcal{N}(\mu_{y \mid x}, \Sigma_{y \mid x}) \\

\begin{split}
\mu_{x \mid y} & = \mu_x + CB^{-1}(\mathbf{y}-\mu_y), \quad 
\Sigma_{x \mid y} & = A - CB^{-1}C^\top = \tilde{A}^{-1}
\end{split} \\

\begin{split}
\mu_{y \mid x} & = \mu_y + C^\top A^{-1}(\mathbf{x}-\mu_x), \quad 
\Sigma_{y \mid x} & = B - C^\top A^{-1}C = \tilde{B}^{-1}
\end{split}

データを用意

A, B, C = cov[0, 0], cov[1, 1], cov[0, 1]

x_cond, y_cond = -1, 1

mean_xy = mean[0] + (C * (1/B) * (y_cond - mean[1]))
cov_xy = A - C * (1/B) * C

mean_yx = mean[1] + (C * (1/A) * (x_cond - mean[0]))
cov_yx = B - (C * (1/A) * C)

nyx, nxy = tfd.Normal(mean_yx, cov_yx), tfd.Normal(mean_xy, cov_xy)

描画する

fig = plt.figure(figsize=(8, 8))

gs = GridSpec(2, 2, width_ratios=[8,2], height_ratios=[8,2])

ax = plt.subplot(gs[0])
con = ax.contourf(X1, X2, Z, 100, cmap='YlGnBu')
ax.plot([x_cond,x_cond], [-3,3], 'b--', lw=1.5, label='$p(y \mid x={:.1f})$'.format(x_cond))
ax.plot([-3,3], [y_cond,y_cond], 'r--', lw=1.5, label='$p(x \mid y={:.1f})$'.format(y_cond))
ax.set(axisbelow=False)
ax.legend(loc="lower right")
ax.autoscale(tight=True)

ax = plt.subplot(gs[1])
ax.plot(nyx.prob(y), y, 'b--')
ax.set(ylabel='$y$')
ax.yaxis.set_label_position('right')
ax.yaxis.set_ticks_position('right')
ax.autoscale(tight=True)

ax = plt.subplot(gs[2])
ax.plot(x, nxy.prob(x), 'r--')
ax.set(xlabel='$x$')
ax.autoscale(tight=True)

ax = plt.subplot(gs[3])
ax.set_visible(False)
cax = make_axes_locatable(ax).append_axes('left', size='20%',pad=0)
cbar = fig.colorbar(con, cax=cax)
cbar.ax.set_ylabel('$p(x, xy)$')

fig.suptitle('Conditional Distributions')

plt.tight_layout(rect=[0, 0, 1, 0.97])
plt.show()

三次元の方も

x1, y1, z1 = x, np.repeat(y_cond,100), nyx.prob(y).numpy()
x2, y2, z2 = np.repeat(x_cond,100), y, nxy.prob(x).numpy()

fig = plt.figure(figsize=(8, 6))
ax = fig.add_subplot(projection='3d')

ax.plot(x1, y1, z1, 'r--', lw=2, label='$p(y \mid y={:.1f})$'.format(y_cond),zorder=2)
ax.plot(x2, y2, z2, 'b--', lw=2, label='$p(x \mid x={:.1f})$'.format(x_cond),zorder=2)
ax.add_collection3d(Poly3DCollection([list(zip(x1, y1, z1))],color='r',alpha=0.4))
ax.add_collection3d(Poly3DCollection([list(zip(x2, y2, z2))],color='b',alpha=0.4))
ax.contourf(X1, X2, Z, 15, zdir='z', offset=0, cmap='YlGnBu')

ax.tick_params(color='0.8')
ax.set(xlabel='$x$',ylabel="$y$",zlabel="$p(x,y)$",fc='none')
ax.legend()
    
fig.suptitle("Conditional Distributions")

plt.tight_layout()
plt.show()

まとめ

すべえのコードは Colab にも置きました、興味がある方はここに参照してください

参照資料

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up