自分のメモ程度にまとめたものになります。
参考になれば幸いです。
1. karateデータセットの確認
karate_dataset.py
from torch_geometric.datasets import KarateClub
data = KarateClub()[0]
for key in list(data):
print(key)
コンソール画面
$ python karate_dataset.py
('x', tensor([[1., 0., 0., ..., 0., 0., 0.],
[0., 1., 0., ..., 0., 0., 0.],
[0., 0., 1., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 1., 0., 0.],
[0., 0., 0., ..., 0., 1., 0.],
[0., 0., 0., ..., 0., 0., 1.]]))
('edge_index', tensor([[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3,
3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7,
7, 7, 8, 8, 8, 8, 8, 9, 9, 10, 10, 10, 11, 12, 12, 13, 13, 13,
13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, 20, 21,
21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27,
27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 31, 31,
31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33,
33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33],
[ 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2,
3, 7, 13, 17, 19, 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0,
1, 2, 7, 12, 13, 0, 6, 10, 0, 6, 10, 16, 0, 4, 5, 16, 0, 1,
2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3, 0, 1, 2,
3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0,
1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23,
24, 33, 2, 31, 33, 23, 26, 32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32,
33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33, 8, 9, 13, 14, 15,
18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32]]))
('y', tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
2, 2, 0, 0, 2, 0, 0, 2, 0, 0]))
('train_mask', tensor([ True, False, False, False, True, False, False, False, True, False,
False, False, False, False, False, False, False, False, False, False,
False, False, False, False, True, False, False, False, False, False,
False, False, False, False]))
2. TransformerConvのアーキテクチャ
TFConv.py
import pickle
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, TransformerConv, GATConv
from torch_geometric.nn import Linear
from torch_geometric.datasets import KarateClub
torch.manual_seed(1)
num_node_features = KarateClub().num_node_features
num_classes = KarateClub().num_classes
hidden_size = 64
class TFConv(torch.nn.Module):
def __init__(self):
super(TFConv, self).__init__()
self.conv1 = TransformerConv(num_node_features, hidden_size)
self.conv2 = TransformerConv(hidden_size, num_classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
class GCN(torch.nn.Module):
def __init__(self):
super(GCN, self).__init__()
self.conv1 = GCNConv(num_node_features, hidden_size)
self.conv2 = GCNConv(hidden_size, num_classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
class GAT(torch.nn.Module):
def __init__(self):
super(GAT, self).__init__()
self.conv1 = GATConv(num_node_features, hidden_size)
self.conv2 = GATConv(hidden_size, num_classes)
def forward(self, data):
x, edge_index = data.x, data.edge_index
x = self.conv1(x, edge_index)
x = F.relu(x)
x = F.dropout(x, training=self.training)
x = self.conv2(x, edge_index)
return F.log_softmax(x, dim=1)
3. 学習用サンプルソース
learn.py
import time
import torch
import pickle
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from torch_geometric.datasets import KarateClub
from Net.TFConv import TFConv
PICKLE_PATH = './model/classification_model.pickle'
def main():
dataset = KarateClub()
data = dataset[0]
model = TFConv(dataset)
model.train()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# learnig loop
for epoch in range(100):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out, data.y)
loss.backward()
optimizer.step()
if epoch % 9 == 0:
print('Epoch %d | Loss: %.4f' % (epoch, loss.item()))
# モデルを評価モードに設定
model.eval()
# 推論
_, pred = model(data).max(dim=1)
print("結果:", pred)
print("真値:", data.y)
accuracy = accuracy_score(data.y, pred)
print(f"正解率: {round(accuracy, 3)}")
model.save_model(model, PICKLE_PATH)
if __name__ == "__main__":
start = time.time()
main()
end = time.time()
print(round(end - start, 3), "秒")
3-1. 実行してみる
コンソール結果画面
$ python learn.py
Epoch 0 | Loss: 1.4885
Epoch 9 | Loss: 1.3408
Epoch 18 | Loss: 1.2545
Epoch 27 | Loss: 1.0197
Epoch 36 | Loss: 0.8998
Epoch 45 | Loss: 0.6569
Epoch 54 | Loss: 0.5792
Epoch 63 | Loss: 0.5220
Epoch 72 | Loss: 0.4881
Epoch 81 | Loss: 0.3948
Epoch 90 | Loss: 0.3162
Epoch 99 | Loss: 0.4034
結果: tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
2, 2, 0, 0, 2, 0, 0, 2, 0, 0])
真値: tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
2, 2, 0, 0, 2, 0, 0, 2, 0, 0])
正解率: 1.0
0.203 秒
3-2. 保存したモデルを使用してみる
sample.py
import time
import pickle
from sklearn.metrics import accuracy_score
from torch_geometric.datasets import KarateClub
PICKLE_PATH = './model/classification_model.pickle'
def load_model(path = PICKLE_PATH):
return pickle.load(open(path, 'rb'))
def main():
dataset = KarateClub()
data = dataset[0]
model = load_model(PICKLE_PATH)
model.eval()
# 推論
_, pred = model(data).max(dim=1)
print("結果:", pred)
print("真値:", data.y)
accuracy = accuracy_score(data.y, pred)
print(f"正解率: {round(accuracy, 3)}")
if __name__ == "__main__":
start = time.time()
main()
end = time.time()
print(round(end - start, 3), "秒")
3-3. 実行してみる
コンソール結果画面
$ python sample.py
結果: tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
2, 2, 0, 0, 2, 0, 0, 2, 0, 0])
真値: tensor([1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1, 0, 0,
2, 2, 0, 0, 2, 0, 0, 2, 0, 0])
正解率: 1.0
0.009 秒
3-4. Leave One Outを用いた検証
val.py
import time
import torch
import numpy as np
import pickle
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from torch_geometric.datasets import KarateClub
from Net.TFConv import TFConv
PICKLE_PATH = './model/classification_model.pickle'
# ノードの個数分の訓練maskを作成
def get_train_mask(dataset, index):
train_mask = torch.tensor([True for i in range(dataset.data.y.shape[0])])
train_mask[index] = False
return train_mask
# ノードの個数分のテストmaskを作成
def get_test_mask(dataset, index):
test_mask = torch.tensor([False for i in range(dataset.data.y.shape[0])])
test_mask[index] = True
return test_mask
def main():
dataset = KarateClub()
data = dataset[0]
# 正解率保存用リスト
accuracy_rate = np.array([])
# ノードの個数分テストを行う
for node in range(data.y.shape[0]):
model = TFConv(dataset)
model.train()
# 訓練用のmask取得
train_mask = get_train_mask(dataset, node)
# テスト用のmask取得
test_mask = get_test_mask(dataset, node)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# 学習
for _ in range(100):
optimizer.zero_grad()
out = model(data)
loss = F.nll_loss(out[train_mask], data.y[train_mask])
loss.backward()
optimizer.step()
print('Node %d | Loss: %.4f' % (node, loss.item()))
# モデルを評価モードに設定
model.eval()
# 推論
_, pred = model(data).max(dim=1)
print("ノード:", node, "番目")
print("結果:", pred[test_mask].item())
print("真値:", data.y[test_mask].item())
print()
accuracy = accuracy_score(data.y, pred)
accuracy_rate = np.append(accuracy, accuracy_rate)
print("Leave One Out:[", round(np.mean(accuracy_rate), 3), "]")
if __name__ == "__main__":
start = time.time()
main()
end = time.time()
print(round(end - start, 3), "秒")
3-5. 実行してみる
コンソール結果画面
$ python val.py
Node 0 | Loss: 0.3723
ノード: 0 番目
結果: 1
真値: 1
.
.
.
Node 33 | Loss: 0.1673
ノード: 33 番目
結果: 0
真値: 0
Leave One Out:[ 0.984 ]
5.005 秒