0
0

前回作った改良型のbiplotを目的変数で分かりやすくした関数を作りました。

関数

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
def biplot_spv(df, y_name):
    y = df[y_name]
    x = df.drop(y_name, axis=1)
    ss = StandardScaler()
    ss.fit(x)
    sx = ss.transform(x)
    model = PCA()
    model.fit(sx)
    tx = model.transform(sx)
    evr = model.explained_variance_ratio_
    com = model.components_
    fac = []
    for i in range(len(evr)):
        fac.append(np.sqrt(evr[i])*com[i])
    fig, ax = plt.subplots()
    ax1 = ax.twinx()
    ax2 = ax.twiny()
    ax1.scatter(tx[:, 0], tx[:, 1], c=y, cmap="brg")
    xlim = [abs(min(fac[0])), abs(max(fac[0]))]
    ylim = [abs(min(fac[1])), abs(max(fac[1]))]
    ax2.set_xlim(-max(xlim), max(xlim))
    ax2.set_ylim(-max(ylim), max(ylim))
    for i in range(len(x.columns)):
        ax2.annotate("", xytext=[0, 0], xy=[fac[0][i], fac[1][i]],
                     arrowprops=dict(shrink=0, width=1, headwidth=6, 
                                    headlength=10, connectionstyle='arc3',
                                    facecolor='red', edgecolor='red'))
        ax2.text(fac[0][i], fac[1][i], x.columns[i])
    plt.show()

使用例

import pandas as pd

df = pd.read_csv("wine.csv")
biplot_spv(df, "Wine")

Untitled.png

df = pd.read_csv("boston.csv")
biplot_spv(df, "PRICE")

Untitled.png

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0