More than 1 year has passed since last update.

凝集型クラスター分析

Python

Posted at 2022-05-30

凝集型クラスター分析手法

#ライブラリー呼び出し
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import random
random.seed(0)
import copy

#下記URL参考にしつつ実施
#https://qiita.com/g-k/items/8f0d9905d3e106caed59
import pandas as pd
import numpy as np
import scipy
from sklearn import datasets

dataset = data

#目的変数（今回分ける基準）
y = data["index"]
#説明変数
X = data.iloc[:40,1:]

dataset_data = X
dataset_target = y

#目的変数の名前
target_names = "index"

from scipy.cluster.hierarchy import linkage,dendrogram,fcluster
#引数でウォード法を指定
Z = linkage(dataset_data, method='ward', metric='euclidean')
pd.DataFrame(Z)

#可視化実施
import matplotlib.pyplot as plt
fig2, ax2 = plt.subplots(figsize=(20,5))
ax2 = dendrogram(Z)
fig2.show()

#3つに分けた時のクラスターNoを振り分け
F = fcluster(Z, t = 3, criterion = 'maxclust')
print(F)

#クラスターラベルの付与
data_with_cluster_label = data.iloc[:40,:]
data_with_cluster_label['cluster_label'] = F

#凝集型クラスターを可視化
from scipy.cluster.hierarchy import dendrogram, ward
from sklearn.datasets import make_blobs

# y:目的変数
# x:説明変数
#ここを設定する

X,y = make_blobs(random_state = 0, n_samples=12)
linkage_array = ward(X)
dendrogram(linkage_array)

ax = plt.gca()
bounds = ax.get_xbound()

#線の位置を決める
ax.plot(bounds,[7.25,7.25],"--",c="k")
ax.plot(bounds,[4,4],"-",c="k")

#自分で線を引く場所を決める
ax.text(bounds[1],7.25,"-two_culusters", va="center",fontdict={"size":15})
ax.text(bounds[1],4,"-three_culusters",va="center",fontdict={"size":15})

#X軸のラベル付
plt.xlabel("Sanple_index")
#y軸のラベル付
plt.ylabel("Cluster_distance")

#クラスターNoの振り分け
F = fcluster(Z, t = 3, criterion = 'maxclust')
print(F)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up