LoginSignup
0
0

More than 1 year has passed since last update.

凝集型クラスター分析

Posted at

凝集型クラスター分析手法

#ライブラリー呼び出し
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import random
random.seed(0)
import copy



#下記URL参考にしつつ実施
#https://qiita.com/g-k/items/8f0d9905d3e106caed59
import pandas as pd
import numpy as np
import scipy
from sklearn import datasets

dataset = data

#目的変数(今回分ける基準)
y = data["index"]
#説明変数
X = data.iloc[:40,1:]

dataset_data = X
dataset_target = y

#目的変数の名前
target_names = "index"

from scipy.cluster.hierarchy import linkage,dendrogram,fcluster
#引数でウォード法を指定
Z = linkage(dataset_data, method='ward', metric='euclidean')
pd.DataFrame(Z)

#可視化実施
import matplotlib.pyplot as plt
fig2, ax2 = plt.subplots(figsize=(20,5))
ax2 = dendrogram(Z)
fig2.show()

#3つに分けた時のクラスターNoを振り分け
F = fcluster(Z, t = 3, criterion = 'maxclust')
print(F)

#クラスターラベルの付与
data_with_cluster_label = data.iloc[:40,:]
data_with_cluster_label['cluster_label'] = F

#凝集型クラスターを可視化
from scipy.cluster.hierarchy import dendrogram, ward
from sklearn.datasets import make_blobs

# y:目的変数
# x:説明変数
#ここを設定する

X,y = make_blobs(random_state = 0, n_samples=12)
linkage_array = ward(X)
dendrogram(linkage_array)

ax = plt.gca()
bounds = ax.get_xbound()

#線の位置を決める
ax.plot(bounds,[7.25,7.25],"--",c="k")
ax.plot(bounds,[4,4],"-",c="k")

#自分で線を引く場所を決める
ax.text(bounds[1],7.25,"-two_culusters", va="center",fontdict={"size":15})
ax.text(bounds[1],4,"-three_culusters",va="center",fontdict={"size":15})

#X軸のラベル付
plt.xlabel("Sanple_index")
#y軸のラベル付
plt.ylabel("Cluster_distance")

#クラスターNoの振り分け
F = fcluster(Z, t = 3, criterion = 'maxclust')
print(F)

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0