LoginSignup
0
1

More than 1 year has passed since last update.

Pythonで次元削除後の散布図に楕円を描く

Posted at

背景

分類データを扱う際、PCAやNMDSで次元削除した後、matplotlib/seabornで散布図を書くのですが、
「分類ごとに楕円を描いてほしい」と思ったが、なかなか見つからなかったので、メモです。

ドキュメント

コード

# ライブラリのインポート
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse
import matplotlib.transforms as transforms

# 楕円を描く関数
# https://github.com/matplotlib/matplotlib/blob/main/examples/statistics/confidence_ellipse.py
def confidence_ellipse(x, y, ax, n_std=3.0, facecolor='none', **kwargs):
    """
    Create a plot of the covariance confidence ellipse of *x* and *y*.

    Parameters
    ----------
    x, y : array-like, shape (n, )
        Input data.

    ax : matplotlib.axes.Axes
        The axes object to draw the ellipse into.

    n_std : float
        The number of standard deviations to determine the ellipse's radiuses.

    **kwargs
        Forwarded to `~matplotlib.patches.Ellipse`

    Returns
    -------
    matplotlib.patches.Ellipse
    """
    if x.size != y.size:
        raise ValueError("x and y must be the same size")

    cov = np.cov(x, y)
    pearson = cov[0, 1]/np.sqrt(cov[0, 0] * cov[1, 1])
    # Using a special case to obtain the eigenvalues of this
    # two-dimensional dataset.
    ell_radius_x = np.sqrt(1 + pearson)
    ell_radius_y = np.sqrt(1 - pearson)
    ellipse = Ellipse((0, 0), width=ell_radius_x * 2, height=ell_radius_y * 2,
                      facecolor=facecolor, **kwargs)

    # Calculating the standard deviation of x from
    # the squareroot of the variance and multiplying
    # with the given number of standard deviations.
    scale_x = np.sqrt(cov[0, 0]) * n_std
    mean_x = np.mean(x)

    # calculating the standard deviation of y ...
    scale_y = np.sqrt(cov[1, 1]) * n_std
    mean_y = np.mean(y)

    transf = transforms.Affine2D() \
        .rotate_deg(45) \
        .scale(scale_x, scale_y) \
        .translate(mean_x, mean_y)

    ellipse.set_transform(transf + ax.transData)
    return ax.add_patch(ellipse)

使用してみた

iris
from sklearn.datasets import load_iris
import seaborn as sns
import pandas as pd
from sklearn.decomposition import PCA

# データの読み込み
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
# 標準化
df = df.iloc[:, :].apply(lambda x: (x-x.mean())/x.std(), axis=0)

# PCA
_n_components=2
pca = PCA(n_components=_n_components)
pca.fit(df)
feature = pca.transform(df)
feature_df = pd.DataFrame(feature, columns=["PC{}".format(x + 1) for x in range(_n_components)])

# 目的変数の追加
feature_df["label"] = iris.target
feature_df["label"] = feature_df["label"].map({0: iris.target_names[0], 1: iris.target_names[1], 2: iris.target_names[2]})

# seabornで散布図
colors = dict({"setosa": "blue", 
                "virginica": "orange", 
                "versicolor": "green"})

fig, ax = plt.subplots()
sns.scatterplot(data=feature_df, x="PC1", y="PC2", hue="label", palette=colors, ax=ax)
# 楕円の追加
for name, color in colors.items():
    _x = feature_df[feature_df["label"] == name]["PC1"].values
    _y = feature_df[feature_df["label"] == name]["PC2"].values
    confidence_ellipse(_x, _y, ax, edgecolor=color)
plt.show()

image.png

参考にさせていただきました

0
1
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
1