0
0

[Python Graph2Vec]グラフ情報を埋め込んでグラフ予測をしてみる

Last updated at Posted at 2024-07-03

1. Graph2Vecを試してみる

pythonのGraph2Vecを試してみたので、よければ参考にしてください。

1-1. ライブラリのインストール

$ python -V

Python 3.11.1

$ pip install karateclub
graph2vec.py
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from torch_geometric.datasets import TUDataset
from karateclub.graph_embedding import Graph2Vec

# MUTAGデータセットを読み込み
dataset = TUDataset(root="./", name="MUTAG")

# グラフ情報の前処理
graphs = []
for data in dataset:
    e_list = []
    edge_index = data.edge_index
    for i in range(len(edge_index[0])):
        e_list.append((int(edge_index[0][i]), int(edge_index[1][i])))
    g = nx.from_edgelist(e_list)

    x = data.x
    nx.set_node_attributes(g, {j: str(j) for j in range(g.number_of_nodes())}, "feature")

    node_indices = sorted([node for node in g.nodes()])
    numeric_indices = [index for index in range(g.number_of_nodes())]

    if numeric_indices == node_indices:
        graphs.append(g)

# モデルの呼び出し
model = Graph2Vec(wl_iterations=4, epochs=150)

# グラフ情報の予測
model.fit(graphs)

emb128d = model.get_embedding()

print("128d:", emb128d.shape)

# 主成分分析
pca = PCA(n_components=2)
emb2d = pca.fit_transform(emb128d)
print("2d:", emb2d.shape)

np.save('./embedded_data/graph2vec_MUTAG', emb2d)

# 埋め込みデータをグラフで表示
plt.title("node embedding in 2D")
plt.scatter(emb2d[:,0],emb2d[:,1])
plt.show()

1-2. 実行してみる

$ python graph2vec.py

# ("128d:", 188, 128)
# ("2d:", 188, 2)

1-3. 埋め込みグラフを表示する

Figure_1.png

2. SVMを用いてグラフ予測をしてみる

svm.py
import numpy as np

import torchvision.transforms as transforms

from torch_geometric.datasets import TUDataset
from sklearn.model_selection import LeaveOneOut

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


DATA_PATH = f'./embedded_data/graph2vec_MUTAG.npy'

# 保存した埋め込みデータを読み込む
x = np.load(DATA_PATH)

# MUTAGデータセットを読み込み
dataset = TUDataset(root="./", name="MUTAG")

y = []

for data in dataset:

    y.append(data.y.item())

# 正解ラベル
y = np.array(y)

# 予測ラベル保存用
pred_list = np.array([])

# SVMモデル呼び出し
model = SVC(kernel='linear')

# Leave One Outを用いる
loo = LeaveOneOut()

i = 1

# Leave One Outを用いる
for train_index, test_index in loo.split(x):
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # 埋め込みデータを学習
    model.fit(x_train, y_train)
    
    # 予測
    pred = model.predict(x_test)
    
    print(i, "番目")
    print("予測:", pred)
    print("正解:", y_test)
    print()
    
    pred_list = np.append(pred_list, pred.item())
    i = i + 1

accuracy = accuracy_score(y, pred_list)
print(f"正解率: {round(accuracy, 3)}")

2-1. 実行してみる

$ python svm.py

.
.
.

正解率: 0.665

3. ニューラルネットワークでグラフ分類をしてみる

nn.py
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from sklearn.model_selection import LeaveOneOut
from torch_geometric.datasets import TUDataset
from sklearn.metrics import accuracy_score

class Net(nn.Module):    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2, 32)
        self.fc2 = nn.Linear(32, 4)
    
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.softmax(x, dim=1)
        return x

DATA_PATH = f'./embedded_data/graph2vec_MUTAG.npy'

# 保存した埋め込みデータを読み込む
x = torch.from_numpy(np.load(DATA_PATH)).clone()

# MUTAGデータセットを読み込み
dataset = TUDataset(root="./", name="MUTAG")

y = []

for data in dataset:

    y.append(data.y.item())

# 正解ラベル
y = torch.tensor(y)

# 予測ラベル保存用
pred_list = np.array([])

# Leave One Outを用いる
loo = LeaveOneOut()

# モデル呼び出し
model = Net()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

i = 1

# Leave One Outを用いる
for train_index, test_index in loo.split(x):
    
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    # 学習
    model.train()
    for epoch in range(100):
        optimizer.zero_grad()
        out = model(x_train)
        loss = criterion(out, y_train)
        loss.backward(retain_graph=True)
        optimizer.step()
    
    # 予測
    model.eval()
    _, pred = model(x_test).max(dim=1)

    print(i, "番目")
    print("予測:", pred.item())
    print("正解:", y_test.item())
    print("loss:", round(loss.item(), 3))
    print()
    
    pred_list = np.append(pred_list, pred.item())
    i = i + 1

accuracy = accuracy_score(y, pred_list)
print(f"正解率: {round(accuracy, 3)}")

3-1. 実行してみる

$ python nn.py

.
.
.

正解率: 0.846
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0