More than 1 year has passed since last update.

オープンソースで公開されているMatplotlibを使ったデータ可視化の本が最強だった件

Last updated at 2022-03-31Posted at 2022-03-31

初めに　

"Scientific Visualization: Python + Matplotlib"というMatplotlibを使ったデータ可視化の本がオープンソースで公開されています。内容は英語ですが、実際のコードがGitHubにまとめられており、内容が非常に充実しています。個人的にはみてるだけでも目の保養です。
今回は皆さんへの共有ということで、内容を簡単にまとめていきたいと思います！普通に眺めるだけでも楽しいので読んでみてください！

"Scientific Visualization: Python + Matplotlib"の概要

コード：https://github.com/rougier/scientific-visualization-book
書籍（PDF）：https://hal.inria.fr/hal-03427242/document

本の内容は、Matplotlibを使ったデータ可視化について基本的なから応用まで幅広く取り上げています。どちらかという一歩進んだ応用の面という面が強く難易度は若干高めです。Matplotlibの基本的な使い方に慣れていない方は一般的な使い方は一通り勉強することをお勧めします。
また、全ての可視化に対してコードがついているのがめちゃくちゃ嬉しいです！そして、250ページとオープンソースなのにまさかの大作。（笑）

本の構成は以下の通りです。

Matplotlibの基本的な使い方
ハイレベルAPIだけでなくて普段使わないようなロウレベルAPIの使い方も詳しくサポートされています。
Figure Design
タイポグラフィとか色の使い方とかデザインの基本的な内容をMatplotlibを使って解説しています。
応用
3Dを含め応用的な可視化について解説しています。
ショーケース
ただただ目の保養。みてるだけで楽しい。

僕のお気に入りをいくつか紹介します

本で取り上げられている可視化をいくつかご紹介します。

まず、本記事の最初のカバー画像ですが、これもMatplotlibで作成されています。

コード


# Reference: https://github.com/rougier/scientific-visualization-book/blob/master/code/showcases/text-shadow.py

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.textpath import TextPath
from matplotlib.patches import PathPatch
from matplotlib.collections import PolyCollection
from matplotlib.font_manager import FontProperties

red = np.array([233, 77, 85, 255]) / 255
darkred = np.array([130, 60, 71, 255]) / 255
prop = FontProperties(family="Source Sans Pro", weight=100)

fig = plt.figure(figsize=(14.8 / 2.54, 21 / 2.54))

ax = fig.add_axes([0, 0, 1, 1], aspect=1, xlim=[-10, 10], ylim=[-14.2, 14.2])
ax.axis("off")

# Text path
path = TextPath((0, 0), "MATPLOTLIB", size=2.5, prop=prop)

# Text centering
V = path.vertices
xmin, xmax = V[:, 0].min(), V[:, 0].max()
ymin, ymax = V[:, 1].min(), V[:, 1].max()
V -= (xmin + xmax) / 2 + 1, (ymin + ymax) / 2


# Compute shadow by iterating over text path segments
polys = []
for (point, code) in path.iter_segments(curves=False):
    if code == path.MOVETO:
        points = [point]
    elif code == path.LINETO:
        points.append(point)
    elif code == path.CLOSEPOLY:
        points.append(points[0])
        points = np.array(points)
        for i in range(len(points) - 1):
            p0, p1 = points[i], points[i + 1]
            polys.append([p0, p1, p1 + (+20, -20), p0 + (+20, -20)])

# Display shadow
collection = PolyCollection(
    polys, closed=True, linewidth=0.0, facecolor=darkred, zorder=-10
)
ax.add_collection(collection)

# Display text
patch = PathPatch(path, facecolor="white", edgecolor="none", zorder=10)
ax.add_artist(patch)

# Transparent gradient to fade out shadow
I = np.zeros((200, 1, 4)) + red
ax.imshow(I, extent=[-11, 11, -15, 15], zorder=-20, clip_on=False)
I[:, 0, 3] = np.linspace(0, 1, len(I))
ax.imshow(I, extent=[-11, 11, -15, 15], zorder=0, clip_on=False)


ax.text(
    6.5,
    -1.75,
    "a versatile scientific visualization library ",
    color="white",
    ha="right",
    va="baseline",
    size=10,
    family="Pacifico",
    zorder=30,
)

# Save and show result
plt.savefig("../../figures/showcases/text-shadow.pdf")
plt.savefig("../../figures/showcases/text-shadow.png", dpi=600)
plt.show()

Star Wars的なやつ

コード

# Reference: https://github.com/rougier/scientific-visualization-book/blob/master/code/typography/text-starwars.py

import matplotlib.pyplot as plt
from matplotlib.textpath import TextPath
from matplotlib.patches import PathPatch
from matplotlib.path import Path
import numpy as np

fig = plt.figure(figsize=(4.25, 2))
ax = fig.add_axes([0, 0, 1, 1], aspect=1, xlim=[-40, 40], ylim=[-1, 25])
ax.axis("off")

text = [
    "Beautiful is better than ugly.",
    "Explicit is better than implicit.",
    "Simple is better than complex.",
    "Complex is better than complicated.",
    "Flat is better than nested.",
    "Sparse is better than dense.",
    "Readability counts.",
]

y = 0
size = 6
xfactor = 1 / 50
yfactor = 1 / 120

for i, line in enumerate(text[::-1]):
    path = TextPath((0, 0), line, size=size)
    V = path.vertices
    codes = path.codes
    xmin, xmax = V[:, 0].min(), V[:, 0].max()
    ymin, ymax = V[:, 1].min(), V[:, 1].max()

    # New Path
    # X centering
    Px = V[:, 0] - (xmax + xmin) / 2

    # Moving whole text at y coordinates
    Py = V[:, 1] + y

    # Rescaling along y
    Py *= 1 - (Py * yfactor)

    # Rescaling along x
    Px *= 1 - (Py * xfactor)

    # Update interlines
    y += size * (1 - ymin * yfactor)

    # new path
    path_new = Path(np.array([Px, Py]).T, codes=codes)
    
    # Display
    patch = PathPatch(path_new, facecolor="%.2f" % (i / 10), linewidth=0, clip_on=False)
    ax.add_artist(patch)

plt.savefig("../../figures/typography/text-starwars.pdf")
plt.show()

よくわからん3Dのウネウネ

コード

# Reference: https://github.com/rougier/scientific-visualization-book/blob/master/code/typography/text-starwars.py

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from shapely.geometry import box, Polygon
from scipy.ndimage import gaussian_filter1d
from matplotlib.collections import PolyCollection
from matplotlib.transforms import Affine2D
from matplotlib.text import TextPath
from matplotlib.patches import PathPatch
from mpl_toolkits.mplot3d import Axes3D, art3d
from matplotlib.font_manager import FontProperties


def text3d(ax, xyz, s, zdir="z", size=0.1, angle=0, **kwargs):
    x, y, z = xyz
    if zdir == "y":
        xy, z = (x, z), y
    elif zdir == "x":
        xy, z = (y, z), x
    else:
        xy, z = (x, y), z
    path = TextPath((0, 0), s, size=size, prop=FontProperties(family="Roboto"))
    V = path.vertices
    V[:, 0] -= (V[:, 0].max() - V[:, 0].min()) / 2
    trans = Affine2D().rotate(angle).translate(xy[0], xy[1])
    path = PathPatch(trans.transform_path(path), **kwargs)
    ax.add_patch(path)
    art3d.pathpatch_2d_to_3d(path, z=z, zdir=zdir)


# Some nice but random curves
def random_curve(n=100):
    Y = np.random.uniform(0, 1, n)
    Y = gaussian_filter1d(Y, 1)
    X = np.linspace(-1, 1, len(Y))
    Y *= np.exp(-2 * (X * X))
    return Y


def cmap_plot(Y, ymin=0, ymax=1, n=50, cmap="magma", y0=0):
    X = np.linspace(0.3, 0.7, len(Y))
    Y = gaussian_filter1d(Y, 2)

    verts = []
    colors = []
    P = Polygon([(X[0], 0), *zip(X, Y), (X[-1], 0)])

    dy = (ymax - ymin) / n
    cmap = plt.cm.get_cmap(cmap)
    cnorm = matplotlib.colors.Normalize(vmin=ymin, vmax=ymax)

    for y in np.arange(Y.min(), Y.max(), dy):
        B = box(0, y, 10, y + dy)
        I = P.intersection(B)
        if hasattr(I, "geoms"):
            for p in I.geoms:
                V = np.array(p.exterior.coords)
                V[:, 1] += y0
                verts.append(V)
                colors.append(cmap(cnorm(y)))
        else:
            if I.exterior.coords:
                V = np.array(I.exterior.coords)
                V[:, 1] += y0
                verts.append(V)
                colors.append(cmap(cnorm(y)))

    return verts, colors


fig = plt.figure(figsize=(10, 10))
fig.patch.set_facecolor("black")
ax = fig.gca(projection="3d", proj_type="ortho")
ax.patch.set_facecolor("black")

# Make panes transparent
ax.xaxis.pane.fill = False  # Left pane
ax.yaxis.pane.fill = False  # Right pane
ax.zaxis.pane.fill = False  # Right pane

# Remove grid lines
ax.grid(False)

# Remove tick labels
ax.set_xticklabels([])
ax.set_yticklabels([])
ax.set_zticklabels([])

# Transparent spines
ax.w_xaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.line.set_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.line.set_color((1.0, 1.0, 1.0, 0.0))

# Transparent panes
ax.w_xaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_yaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))
ax.w_zaxis.set_pane_color((1.0, 1.0, 1.0, 0.0))

# No ticks
ax.set_xticks([])
ax.set_yticks([])
ax.set_zticks([])


np.random.seed(1)

# text3d(ax, (1.05, 0.5, 0), "Series A", zdir="x",
#        size=0.05, facecolor="white", edgecolor="None", alpha=.25)

# text3d(ax, (1.05, 0.5, 0.6), "Series B", zdir="x",
#        size=0.05, facecolor="white", edgecolor="None", alpha=.25)

# text3d(ax, (1.05, 0.5, 1.2), "Series C", zdir="x",
#        size=0.05, facecolor="white", edgecolor="None", alpha=.25)


for zs in np.linspace(0, 1, 50):
    Y = 0.1 * random_curve()
    verts, colors = cmap_plot(Y, ymin=0, ymax=0.075, n=50, cmap="magma", y0=-0.2)
    collection = PolyCollection(
        verts, antialiased=False, edgecolors="None", facecolor=colors
    )
    ax.add_collection3d(collection, zdir="x", zs=zs)

    Y = 0.1 * random_curve()
    verts, colors = cmap_plot(Y, ymin=0, ymax=0.075, n=50, cmap="magma", y0=0.4)
    collection = PolyCollection(
        verts, antialiased=False, edgecolors="None", facecolor=colors
    )
    ax.add_collection3d(collection, zdir="x", zs=zs)

    Y = 0.1 * random_curve()
    verts, colors = cmap_plot(Y, ymin=0, ymax=0.075, n=50, cmap="magma", y0=1.0)
    collection = PolyCollection(
        verts, antialiased=False, edgecolors="None", facecolor=colors
    )
    ax.add_collection3d(collection, zdir="x", zs=zs)

ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_zlim(0, 1)

ax.view_init(elev=40, azim=-40)

plt.tight_layout()
plt.savefig("../../figures/showcases/waterfall-3d.pdf")
# plt.savefig("./waterfall-3d.png", dpi=300)
plt.show()

3Dうさぎ

こんなこともできんのか。。。と驚きました。

コード

# Reference: https://github.com/rougier/scientific-visualization-book/blob/master/code/threed/bunny.py

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.collections import PolyCollection


def frustum(left, right, bottom, top, znear, zfar):
    M = np.zeros((4, 4))
    M[0, 0] = +2.0 * znear / (right - left)
    M[2, 0] = (right + left) / (right - left)
    M[1, 1] = +2.0 * znear / (top - bottom)
    M[2, 1] = (top + bottom) / (top - bottom)
    M[2, 2] = -(zfar + znear) / (zfar - znear)
    M[3, 2] = -2.0 * znear * zfar / (zfar - znear)
    M[2, 3] = -1.0
    return M.T


def perspective(fovy, aspect, znear, zfar):
    h = np.tan(fovy / 360.0 * np.pi) * znear
    w = h * aspect
    return frustum(-w, w, -h, h, znear, zfar)


def scale(x, y, z):
    return np.array(
        [[x, 0, 0, 0], [0, y, 0, 0], [0, 0, z, 0], [0, 0, 0, 1]], dtype=float
    )


def zoom(z):
    return scale(z, z, z)


def translate(x, y, z):
    return np.array(
        [[1, 0, 0, x], [0, 1, 0, y], [0, 0, 1, z], [0, 0, 0, 1]], dtype=float
    )


def xrotate(theta):
    t = np.pi * theta / 180
    c, s = np.cos(t), np.sin(t)
    return np.array(
        [[1, 0, 0, 0], [0, c, -s, 0], [0, s, c, 0], [0, 0, 0, 1]], dtype=float
    )


def yrotate(theta):
    t = np.pi * theta / 180
    c, s = np.cos(t), np.sin(t)
    return np.array(
        [[c, 0, s, 0], [0, 1, 0, 0], [-s, 0, c, 0], [0, 0, 0, 1]], dtype=float
    )


def obj_load(filename):
    V, Vi = [], []
    with open(filename) as f:
        for line in f.readlines():
            if line.startswith("#"):
                continue
            values = line.split()
            if not values:
                continue
            if values[0] == "v":
                V.append([float(x) for x in values[1:4]])
            elif values[0] == "f":
                Vi.append([int(x) for x in values[1:4]])
    return np.array(V), np.array(Vi) - 1


# -----------------------------------------------------------------------------

# Loading and centering
V, Vi = obj_load("bunny.obj")
V = (V - (V.max(axis=0) + V.min(axis=0)) / 2) / max(V.max(axis=0) - V.min(axis=0))

# Computing model-view-projection matrix
model = zoom(1.5) @ xrotate(20) @ yrotate(45)
view = translate(0, 0, -4.5)
proj = perspective(25, 1, 1, 100)
MVP = proj @ view @ model

# Applying MVP
VH = np.c_[V, np.ones(len(V))]  # Homogenous coordinates
VT = VH @ MVP.T  # Transformed coordinates
VN = VT / VT[:, 3].reshape(-1, 1)  # Normalization
VS = VN[:, :3]  # Normalized device coordinates

# Actual faces
V = VS[Vi]

# Backface culling
CW = (
    (V[:, 1, 0] - V[:, 0, 0]) * (V[:, 1, 1] + V[:, 0, 1])
    + (V[:, 2, 0] - V[:, 1, 0]) * (V[:, 2, 1] + V[:, 1, 1])
    + (V[:, 0, 0] - V[:, 2, 0]) * (V[:, 0, 1] + V[:, 2, 1])
)
V = V[CW < 0]

# Rendering as a collection of polygons (triangles)
segments = V[:, :, :2]
zbuffer = -V[:, :, 2].mean(axis=1)

# Color according to depth
zmin, zmax = zbuffer.min(), zbuffer.max()
zbuffer = (zbuffer - zmin) / (zmax - zmin)
colors = plt.get_cmap("magma")(zbuffer)

# Sort triangles according to z buffer
I = np.argsort(zbuffer)
segments, colors = segments[I, :], colors[I, :]

# Actual rendering
fig = plt.figure(figsize=(6, 6))
ax = fig.add_axes([0, 0, 1, 1], xlim=[-1, +1], ylim=[-1, +1], aspect=1)
ax.axis("off")

for fc, ec, lw in [
    ("None", "black", 6.0),
    ("None", "white", 3.0),
    (colors, "black", 0.25),
]:
    collection = PolyCollection(
        segments, closed=True, linewidth=lw, facecolor=fc, edgecolor=ec
    )
    ax.add_collection(collection)

plt.savefig("../../figures/threed/bunny.pdf", transparent=True)
plt.show()

終わりに

まず、このクオリティの本が無料公開されていること驚きでした。無料のリソースは積極的に活用していきたいですね〜。

PlotlyやSeabornなどが出てきたり、可視化なんてWandbに任せてしまっていたりすることもあり、最近Matplotlibそんなに使っていなかったのですが、使うモチベが湧きました。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up