1. Graph2Vecを試してみる
pythonのGraph2Vecを試してみたので、よければ参考にしてください。
1-1. ライブラリのインストール
$ python -V
Python 3.11.1
$ pip install karateclub
graph2vec.py
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from torch_geometric.datasets import TUDataset
from karateclub.graph_embedding import Graph2Vec
# MUTAGデータセットを読み込み
dataset = TUDataset(root="./", name="MUTAG")
# グラフ情報の前処理
graphs = []
for data in dataset:
e_list = []
edge_index = data.edge_index
for i in range(len(edge_index[0])):
e_list.append((int(edge_index[0][i]), int(edge_index[1][i])))
g = nx.from_edgelist(e_list)
x = data.x
nx.set_node_attributes(g, {j: str(j) for j in range(g.number_of_nodes())}, "feature")
node_indices = sorted([node for node in g.nodes()])
numeric_indices = [index for index in range(g.number_of_nodes())]
if numeric_indices == node_indices:
graphs.append(g)
# モデルの呼び出し
model = Graph2Vec(wl_iterations=4, epochs=150)
# グラフ情報の予測
model.fit(graphs)
emb128d = model.get_embedding()
print("128d:", emb128d.shape)
# 主成分分析
pca = PCA(n_components=2)
emb2d = pca.fit_transform(emb128d)
print("2d:", emb2d.shape)
np.save('./embedded_data/graph2vec_MUTAG', emb2d)
# 埋め込みデータをグラフで表示
plt.title("node embedding in 2D")
plt.scatter(emb2d[:,0],emb2d[:,1])
plt.show()
1-2. 実行してみる
$ python graph2vec.py
# ("128d:", 188, 128)
# ("2d:", 188, 2)
1-3. 埋め込みグラフを表示する
2. SVMを用いてグラフ予測をしてみる
svm.py
import numpy as np
import torchvision.transforms as transforms
from torch_geometric.datasets import TUDataset
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
DATA_PATH = f'./embedded_data/graph2vec_MUTAG.npy'
# 保存した埋め込みデータを読み込む
x = np.load(DATA_PATH)
# MUTAGデータセットを読み込み
dataset = TUDataset(root="./", name="MUTAG")
y = []
for data in dataset:
y.append(data.y.item())
# 正解ラベル
y = np.array(y)
# 予測ラベル保存用
pred_list = np.array([])
# SVMモデル呼び出し
model = SVC(kernel='linear')
# Leave One Outを用いる
loo = LeaveOneOut()
i = 1
# Leave One Outを用いる
for train_index, test_index in loo.split(x):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
# 埋め込みデータを学習
model.fit(x_train, y_train)
# 予測
pred = model.predict(x_test)
print(i, "番目")
print("予測:", pred)
print("正解:", y_test)
print()
pred_list = np.append(pred_list, pred.item())
i = i + 1
accuracy = accuracy_score(y, pred_list)
print(f"正解率: {round(accuracy, 3)}")
2-1. 実行してみる
$ python svm.py
.
.
.
正解率: 0.665
3. ニューラルネットワークでグラフ分類をしてみる
nn.py
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.model_selection import LeaveOneOut
from torch_geometric.datasets import TUDataset
from sklearn.metrics import accuracy_score
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(2, 32)
self.fc2 = nn.Linear(32, 4)
def forward(self, x):
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = F.softmax(x, dim=1)
return x
DATA_PATH = f'./embedded_data/graph2vec_MUTAG.npy'
# 保存した埋め込みデータを読み込む
x = torch.from_numpy(np.load(DATA_PATH)).clone()
# MUTAGデータセットを読み込み
dataset = TUDataset(root="./", name="MUTAG")
y = []
for data in dataset:
y.append(data.y.item())
# 正解ラベル
y = torch.tensor(y)
# 予測ラベル保存用
pred_list = np.array([])
# Leave One Outを用いる
loo = LeaveOneOut()
# モデル呼び出し
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
i = 1
# Leave One Outを用いる
for train_index, test_index in loo.split(x):
x_train, x_test = x[train_index], x[test_index]
y_train, y_test = y[train_index], y[test_index]
# 学習
model.train()
for epoch in range(100):
optimizer.zero_grad()
out = model(x_train)
loss = criterion(out, y_train)
loss.backward(retain_graph=True)
optimizer.step()
# 予測
model.eval()
_, pred = model(x_test).max(dim=1)
print(i, "番目")
print("予測:", pred.item())
print("正解:", y_test.item())
print("loss:", round(loss.item(), 3))
print()
pred_list = np.append(pred_list, pred.item())
i = i + 1
accuracy = accuracy_score(y, pred_list)
print(f"正解率: {round(accuracy, 3)}")
3-1. 実行してみる
$ python nn.py
.
.
.
正解率: 0.846