はじめに
言語処理100本ノック2020を解きました。
ニューラルネットの実装にはPyTorchを用います。ただし、単層ニューラルネットについては可能な限りクラスtorch.nn.Module
は用いず、tensor
の計算で実装します。
環境はWindows10、Python3.8です。
データセット
前の章で、データセットから学習データ、検証データ、評価データを以下のようにして作成しました。
import pandas as pd
from sklearn.model_selection import train_test_split
# データセットの読み込み
file = './data/NewsAggregatorDataset/newsCorpora.csv'
newsCorpora = pd.read_csv(file, sep='\t', header=None)
# columnsを変更する
newsCorpora.columns = ['ID', 'TITLE', 'URL', 'PUBLISHER', 'CATEGORY', 'STORY', 'HOSTNAME', 'TIMESTAMP']
# 2. 情報源(publisher)を限定する
# 3. 抽出された事例をランダムに並び替える
# sample(frac, random_state)...frac:抽出する割合(1=100%)、random_state:シード値
pub_list = ['Reuters', 'Huffington Post', 'Businessweek', 'Contactmusic.com', 'Daily Mail']
newsCorpora_sp = (newsCorpora[newsCorpora['PUBLISHER'].isin(pub_list)].sample(frac=1, random_state=0).reset_index(drop=True))
# 4. データを分割する
# stratify:指定した要素の割合が均等になるように分かれる
train, valid_and_test = train_test_split(newsCorpora_sp, test_size=0.2, random_state=0, stratify=newsCorpora_sp['CATEGORY'])
valid, test = train_test_split(valid_and_test, test_size=0.5, random_state=0, stratify=valid_and_test['CATEGORY'])
# 出力
path = './data/NewsAggregatorDataset/'
train.to_csv(path + 'train.csv', sep='\t', index=None)
valid.to_csv(path + 'vaild.csv', sep='\t', index=None)
test.to_csv(path + 'test.csv', sep='\t', index=None)
70. 単語ベクトルの和による特徴量
過去の章で作成した学習データ(train.csv)、検証データ(valid.csv)、評価データ(test.csv)をpandasのDataFrame
として読み込みます。
# 分割したデータの読み込み
path = './data/NewsAggregatorDataset/'
train = pd.read_csv(path + 'train.csv', sep='\t')
valid = pd.read_csv(path + 'vaild.csv', sep='\t')
test = pd.read_csv(path + 'test.csv', sep='\t')
# データの確認
print(train.head(3))
print('学習データ')
print(train['CATEGORY'].value_counts())
print('検証データ')
print(valid['CATEGORY'].value_counts())
print('評価データ')
print(test['CATEGORY'].value_counts())
ID TITLE \
0 374424 UPDATE 2-US airlines signal solid demand ahead...
1 314859 GLOBAL MARKETS-Subdued Ifo takes M&A shine off...
2 236379 Hugh Jackman didn't warn daughter about nude s...
URL PUBLISHER \
0 http://in.reuters.com/article/2014/07/09/usa-a... Reuters
1 http://in.reuters.com/article/2014/06/24/marke... Reuters
2 http://www.contactmusic.com/story/hugh-jackman... Contactmusic.com
CATEGORY STORY HOSTNAME TIMESTAMP
0 b dOd6Dz1t7cfuQEMif1KJCYJLKHJ8M in.reuters.com 1404966295491
1 b d02UFXK26SEszSM8Q1X2nUHj2pcOM in.reuters.com 1403700942137
2 e dOjehg3YSV_e60Mp3ra-L5UQ_-AqM www.contactmusic.com 1400764595882
学習データ
b 4502
e 4223
t 1219
m 728
Name: CATEGORY, dtype: int64
検証データ
b 562
e 528
t 153
m 91
Name: CATEGORY, dtype: int64
評価データ
b 563
e 528
t 152
m 91
Name: CATEGORY, dtype: int64
特徴量行列
Word2Vecモデルによる単語の特徴量ベクトルへの変換にはgensimを用います。単語の前処理では「記号の消去」「小文字化」「数字列を0に置換」の操作のみを行っています。
import string
import re
import torch
from gensim.models import KeyedVectors
model = KeyedVectors.load_word2vec_format('./data/GoogleNews-vectors-negative300.bin.gz', binary=True)
# 前処理
def preprocessing(text):
# string.punctuation(!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~)消去するためのテーブル
table = str.maketrans('', '', string.punctuation)
text = text.translate(table) # 記号を消去
# 小文字化
text = text.lower()
# 数字列を0に置換
text = re.sub('[0-9]+', '0', text)
return text
# 文字列リストの各語の特徴量ベクトルを計算し、平均化してtensorとして返す
def get_feature_vec(text):
vec = []
# 文字列をスペースで分割する
text = word_split(text)
for i in range(len(text)):
# modelに含まれる単語のみベクトル化
if text[i] in model.index_to_key:
vec.append(model[text[i]])
return torch.tensor(sum(vec) / len(vec))
# 文字列をスペースで分割する
def word_split(text):
# すべての見出しの文字列をまとめたリスト
words = []
for t in re.split(' +', text):
if t != '':
words.append(t)
return words
# 記事見出しを特徴量行列に
train_fea_mat = [get_feature_vec(preprocessing(tr)) for tr in train['TITLE']]
valid_fea_mat = [get_feature_vec(preprocessing(va)) for va in valid['TITLE']]
test_fea_mat = [get_feature_vec(preprocessing(te)) for te in test['TITLE']]
# 平均化した特徴量ベクトルをまとめたlistをtensorに変換
train_fea_mat = torch.stack(train_fea_mat)
valid_fea_mat = torch.stack(valid_fea_mat)
test_fea_mat = torch.stack(test_fea_mat)
# 特徴量ベクトルを保存
path = './data/torch/'
torch.save(train_fea_mat, path+'train_fea_mat.pt')
torch.save(valid_fea_mat, path+'valid_fea_mat.pt')
torch.save(test_fea_mat, path+'test_fea_mat.pt')
ラベルベクトル
問題文の例の通りにカテゴリに対応した数値を割り当てます。
category_dict = {'b':0, 't':1, 'e':2, 'm':3}
train_lab_vec = torch.tensor(train['CATEGORY'].map(lambda x: category_dict[x]).values)
valid_lab_vec = torch.tensor(valid['CATEGORY'].map(lambda x: category_dict[x]).values)
test_lab_vec = torch.tensor(test['CATEGORY'].map(lambda x: category_dict[x]).values)
# ラベルベクトルを保存
path = './data/torch/'
torch.save(train_lab_vec, path+'train_lab_vec.pt')
torch.save(valid_lab_vec, path+'valid_lab_vec.pt')
torch.save(test_lab_vec, path+'test_lab_vec.pt')
71. 単層ニューラルネットワークによる予測
クラスtorch.nn.Module
を用いてニューラルネットの実装を行うことが一般的ですが、ここでは問題文の文脈に沿って、行列$W$をtensor
で実装します。以降の節でも、この行列$W$をtensor
の操作によって学習していきます。
# データの読み込み
path = 'data/torch/train_fea_mat.pt'
train_fea_mat = torch.load(path)
print(train_fea_mat.shape)
W = torch.randn(300, 4)
softmax = torch.nn.Softmax(dim=1)
print(softmax(torch.matmul(train_fea_mat[:1], W)))
print(softmax(torch.matmul(train_fea_mat[:4], W)))
torch.Size([10672, 300])
tensor([[0.1038, 0.2496, 0.2370, 0.4096]])
tensor([[0.1038, 0.2496, 0.2370, 0.4096],
[0.2429, 0.2660, 0.1713, 0.3198],
[0.0426, 0.4262, 0.2380, 0.2932],
[0.4396, 0.0468, 0.1468, 0.3669]])
72. 損失と勾配の計算
入力$\boldsymbol{x}$と行列$W$の積$\boldsymbol{x}W$とラベルベクトルからクロスエントロピー損失を計算します。71で行ったsoftmaxの処理はPyTorchのクラスtorch.nn.CrossEntropyLoss
の処理に組み込まれています。また、このクラスでは、事例集合に対する損失は各事例の損失の平均をとるように処理されます。
# データの読み込み
path = './data/torch/'
train_fea_mat = torch.load(path+'train_fea_mat.pt')
print(train_fea_mat.shape)
train_lab_vec = torch.load(path+'train_lab_vec.pt')
print(train_lab_vec.shape)
print('---')
# 事例x1
print('事例x1')
W = torch.randn(300, 4, requires_grad = True)
# 損失
loss_fn = torch.nn.CrossEntropyLoss()
print('損失')
loss = loss_fn(torch.matmul(train_fea_mat[:1], W), train_lab_vec[:1])
print(loss)
# 勾配
print('勾配')
loss.backward()
print(W.grad)
print('---')
# 事例集合
print('事例集合')
W = torch.randn(300, 4, requires_grad = True)
loss = loss_fn(torch.matmul(train_fea_mat[:4], W), train_lab_vec[:4])
print(loss)
# 勾配
print('勾配')
loss.backward()
print(W.grad)
torch.Size([10672, 300])
torch.Size([10672])
---
事例x1
損失
tensor(0.5833, grad_fn=<NllLossBackward>)
勾配
tensor([[ 1.0242e-02, -1.3424e-04, -7.7465e-03, -2.3613e-03],
[-1.1733e-02, 1.5378e-04, 8.8744e-03, 2.7051e-03],
[-7.4652e-04, 9.7840e-06, 5.6462e-04, 1.7211e-04],
...,
[ 3.9496e-03, -5.1765e-05, -2.9873e-03, -9.1059e-04],
[-6.9436e-02, 9.1005e-04, 5.2518e-02, 1.6009e-02],
[ 7.9347e-02, -1.0399e-03, -6.0014e-02, -1.8293e-02]])
---
事例集合
tensor(1.8652, grad_fn=<NllLossBackward>)
勾配
tensor([[ 0.0095, 0.0002, -0.0119, 0.0022],
[-0.0082, 0.0003, 0.0055, 0.0024],
[-0.0006, -0.0005, 0.0056, -0.0045],
...,
[-0.0048, 0.0001, 0.0020, 0.0026],
[-0.0519, 0.0026, 0.0352, 0.0142],
[ 0.0419, -0.0011, -0.0415, 0.0008]])
73. 確率的勾配降下法による学習
学習データセットはtorch.utils.data
のTensorDataset, DataLoader
を用いてミニバッチ化します。ただし、ここでは確率的勾配降下法(SGD)による学習を行うので、batch_size=1
とし、データを1つ入力するたびに重みの更新を行います。
%%time
from torch.utils.data import TensorDataset, DataLoader
# データセット
ds = TensorDataset(train_fea_mat, train_lab_vec)
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1, shuffle=True)
epochs = 100
learning_rate = 0.01
# 行列W
W = torch.randn(300, 4, requires_grad=True)
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# 損失
loss = loss_fn(torch.matmul(inputs, W), targets)
# 勾配
loss.backward()
# 更新
W = (W - W.grad * learning_rate).detach().requires_grad_()
if epoch % 10 == 0:
print(f"Epoch {epoch: 3d}: loss={loss: .4f}")
Epoch 10: loss= 1.2607
Epoch 20: loss= 0.0220
Epoch 30: loss= 0.0898
Epoch 40: loss= 0.0420
Epoch 50: loss= 0.1452
Epoch 60: loss= 0.0023
Epoch 70: loss= 0.5905
Epoch 80: loss= 0.0040
Epoch 90: loss= 0.0043
Epoch 100: loss= 0.0017
Wall time: 2min 49s
#74. 正解率の計測
# 入力されたデータから予測分類ラベルを返す
def pred(inputs, W):
softmax = torch.nn.Softmax(dim=1)
# 分類の確率
class_probs = softmax(torch.matmul(inputs, W))
# 確率最大のインデックスを取得
class_idx = torch.argmax(class_probs, dim=1)
return class_idx
# 予測ラベルと正解ラベルから正解率を返す
def accuracy(pred, targets):
correct = 0
for p, t in zip(pred, targets):
if p == t:
correct += 1
return correct / len(pred)
# 訓練データの正解率
with torch.no_grad():
train_pred = pred(train_fea_mat, W)
train_accuracy = accuracy(train_pred, train_lab_vec)
print(f'train accuracy:{train_accuracy: .3f}')
# 評価データ
# データの読み込み
path = './data/torch/'
test_fea_mat = torch.load(path+'test_fea_mat.pt')
test_lab_vec = torch.load(path+'test_lab_vec.pt')
# 評価データの正解率
with torch.no_grad():
test_pred = pred(test_fea_mat, W)
test_accuracy = accuracy(test_pred, test_lab_vec)
print(f'test accuracy:{train_accuracy: .3f}')
train accuracy: 0.903
test accuracy: 0.886
75. 損失と正解率のプロット
各エポックが終了した時点での損失、正解率を記録し、プロットします。
%%time
# データの読み込み
path = 'data/torch/'
valid_fea_mat = torch.load(path+'valid_fea_mat.pt')
valid_lab_vec = torch.load(path+'valid_lab_vec.pt')
# データセット
ds = TensorDataset(train_fea_mat, train_lab_vec)
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1, shuffle=True)
epochs = 100
learning_rate = 0.01
# 行列W
W = torch.randn(300, 4, requires_grad=True)
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
# プロットするためloss, accuracyを記録
train_losses = []
valid_losses = []
train_accuracies = []
valid_accuracies = []
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# 損失
loss = loss_fn(torch.matmul(inputs, W), targets)
# 勾配
loss.backward()
# 更新
W = (W - W.grad * learning_rate).detach().requires_grad_()
# loss, accuracy
with torch.no_grad():
# 訓練
train_pred = pred(train_fea_mat, W)
train_accuracy = accuracy(train_pred, train_lab_vec)
train_loss = loss_fn(torch.matmul(train_fea_mat, W), train_lab_vec).item()
# 検証
valid_pred = pred(valid_fea_mat, W)
valid_accuracy = accuracy(valid_pred, valid_lab_vec)
valid_loss = loss_fn(torch.matmul(valid_fea_mat, W), valid_lab_vec).item()
# loss, accuracyを記録する
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
valid_losses.append(valid_loss)
valid_accuracies.append(valid_accuracy)
if epoch % 10 == 0:
print(f"Epoch {epoch: 3d}:")
print(f"train [loss: {train_loss: .4f}] [accuracy: {train_accuracy: .3f}]")
print(f"valid [loss: {valid_loss: .4f}] [accuracy: {valid_accuracy: .3f}]")
Epoch 10:
train [loss: 0.4004] [accuracy: 0.864]
valid [loss: 0.4301] [accuracy: 0.860]
Epoch 20:
train [loss: 0.3526] [accuracy: 0.880]
valid [loss: 0.3975] [accuracy: 0.871]
Epoch 30:
train [loss: 0.3296] [accuracy: 0.888]
valid [loss: 0.3837] [accuracy: 0.875]
Epoch 40:
train [loss: 0.3163] [accuracy: 0.893]
valid [loss: 0.3768] [accuracy: 0.882]
Epoch 50:
train [loss: 0.3075] [accuracy: 0.896]
valid [loss: 0.3733] [accuracy: 0.883]
Epoch 60:
train [loss: 0.3008] [accuracy: 0.899]
valid [loss: 0.3704] [accuracy: 0.885]
Epoch 70:
train [loss: 0.2958] [accuracy: 0.901]
valid [loss: 0.3690] [accuracy: 0.885]
Epoch 80:
train [loss: 0.2920] [accuracy: 0.901]
valid [loss: 0.3688] [accuracy: 0.884]
Epoch 90:
train [loss: 0.2888] [accuracy: 0.902]
valid [loss: 0.3680] [accuracy: 0.882]
Epoch 100:
train [loss: 0.2862] [accuracy: 0.903]
valid [loss: 0.3679] [accuracy: 0.882]
Wall time: 3min 5s
import matplotlib.pyplot as plt
plt.style.use('ggplot')
fig, ax = plt.subplots(1, 2, figsize=(12, 4))
x = list(range(1, epochs+1, 1))
trains = [train_losses, train_accuracies]
valids = [valid_losses, valid_accuracies]
titles = ['loss', 'accuracy']
for a, y1, y2, t in zip(ax, trains, valids, titles):
a.plot(x, y1, label='train')
a.plot(x, y2, label='valid')
a.set_title(t)
a.legend()
plt.show()
76. チェックポイント
「最適化アルゴリズムの内部状態」を記録するとありますが、上で行った学習においては最適化アルゴリズムの内部状態が変化しません。SGDをクラスtorch.optim.SGD
で実装した場合、メソッドstate_dict()
を用いることで最適化アルゴリズムの内部状態を得ることができるので、ここでは機能の確認の意味も込めてクラスtorch.optim.SGD
を使ってSGDを実装します。
チェックポイントとして「エポック」「重み行列」「最適化アルゴリズムの内部状態」「損失」を記録します。
%%time
import torch.optim as optim
import os
# データセット
ds = TensorDataset(train_fea_mat, train_lab_vec)
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1, shuffle=True)
epochs = 100
learning_rate = 0.01
# 行列W
W = torch.randn(300, 4, requires_grad=True)
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
# 最適化
optimizer = optim.SGD([W], lr=learning_rate)
# checkpoint用path
path = '.\\data\\torch\\SGD\\'
# checkpoint用フォルダ
os.makedirs(path, exist_ok=True)
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# 損失
loss = loss_fn(torch.matmul(inputs, W), targets)
# 勾配
optimizer.zero_grad()
loss.backward()
# 更新
optimizer.step()
# loss, accuracy
with torch.no_grad():
# 訓練
train_pred = pred(train_fea_mat, W)
train_accuracy = accuracy(train_pred, train_lab_vec)
train_loss = loss_fn(torch.matmul(train_fea_mat, W), train_lab_vec).item()
# 検証
valid_pred = pred(valid_fea_mat, W)
valid_accuracy = accuracy(valid_pred, valid_lab_vec)
valid_loss = loss_fn(torch.matmul(valid_fea_mat, W), valid_lab_vec).item()
if epoch % 10 == 0:
print(f"Epoch {epoch: 3d}:")
print(f"train [loss: {train_loss: .4f}] [accuracy: {train_accuracy: .3f}]")
print(f"valid [loss: {valid_loss: .4f}] [accuracy: {valid_accuracy: .3f}]")
# チェックポイント
if epoch < 10:
str_epoch = '00'+str(epoch)
elif epoch < 100:
str_epoch = '0'+str(epoch)
else:
str_epoch = str(epoch)
torch.save({'epoch': epoch, 'W': W, 'optimizer_state_dict': optimizer.state_dict(),
'loss': loss,}, path+str_epoch+'.qt')
チェックポイントの内容を確認します。ここではエポック1、100の内容を表示しています。
path = '.\\data\\torch\\SGD\\'
for str_epoch in ['001', '100']:
checkpoint = torch.load(path+str_epoch+'.qt')
print(str_epoch+'.qt')
print(checkpoint['W'])
print(checkpoint['optimizer_state_dict'], '\n')
001.qt
tensor([[-1.1179e+00, -5.2224e-01, -8.5316e-01, 1.0265e-03],
[-1.0001e+00, 1.4584e+00, -7.6860e-01, -5.5979e-01],
[-6.5533e-01, 1.0893e+00, 1.8742e+00, -6.3610e-02],
...,
[-1.3617e-01, -1.1282e+00, -3.3203e-01, 7.7469e-01],
[ 1.0637e+00, 3.2511e-02, -2.5581e-01, -3.2261e-01],
[-6.7052e-01, 2.2379e-01, -7.1881e-01, 7.4596e-01]],
requires_grad=True)
{'state': {0: {'momentum_buffer': None}}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0]}]}
100.qt
tensor([[-1.7633, 1.1876, -1.9330, 0.0163],
[-0.9714, 0.6202, 0.3532, -0.8718],
[ 1.5965, 0.8069, -1.2901, 1.1309],
...,
[ 0.0246, -1.1899, 0.1920, 0.1514],
[-0.3103, -0.4224, 0.6214, 0.6293],
[-1.7357, -1.2247, 0.3008, 2.2404]], requires_grad=True)
{'state': {0: {'momentum_buffer': None}}, 'param_groups': [{'lr': 0.01, 'momentum': 0, 'dampening': 0, 'weight_decay': 0, 'nesterov': False, 'params': [0]}]}
ここではあまり恩恵を感じられませんが、より複雑なアーキテクチャーや最適化アルゴリズムを持つモデルでメソッドstate_dict()
はより有用なものとなるでしょう!
77. ミニバッチ化
バッチサイズは1,2,4,8,...,128で実験しました。また、各バッチサイズでのエポック数は10としました。
import time
# データセット
ds = TensorDataset(train_fea_mat, train_lab_vec)
epochs = 10
learning_rate = 0.01
# バッチサイズ
batch_sizes = [2**i for i in range(8)]
# 時間
times = []
for batch_size in batch_sizes:
# DataLoaderを作成
loader = DataLoader(ds, batch_size=batch_size, shuffle=True)
# 行列W
W = torch.randn(300, 4, requires_grad=True)
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
# 最適化
optimizer = optim.SGD([W], lr=learning_rate)
print('batch size: ', batch_size)
start = time.time()
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# 損失
loss = loss_fn(torch.matmul(inputs, W), targets)
# 勾配
optimizer.zero_grad()
loss.backward()
# 更新
optimizer.step()
# loss, accuracy
with torch.no_grad():
# 訓練
train_pred = pred(train_fea_mat, W)
train_accuracy = accuracy(train_pred, train_lab_vec)
train_loss = loss_fn(torch.matmul(train_fea_mat, W), train_lab_vec).item()
# 検証
valid_pred = pred(valid_fea_mat, W)
valid_accuracy = accuracy(valid_pred, valid_lab_vec)
valid_loss = loss_fn(torch.matmul(valid_fea_mat, W), valid_lab_vec).item()
if epoch % 2 == 0:
print(f"Epoch {epoch: 3d}:")
print(f"train [loss: {train_loss: .4f}] [accuracy: {train_accuracy: .3f}]")
print(f"valid [loss: {valid_loss: .4f}] [accuracy: {valid_accuracy: .3f}]")
# 1epochあたりの時間を計算
t = (time.time()-start) / epochs
times.append(t)
print('time / ephocs: ', t, '\n')
print(times)
...(学習状況の表示は省略)...
[2.1517689704895018, 1.108371639251709, 0.6498688220977783, 0.4161367654800415, 0.28735790252685545, 0.20714108943939208, 0.18788692951202393, 0.16640586853027345]
78. GPU上での学習
基本的にはtensor
型のデータに対しGPUを有効にすることで、GPU上での学習が可能となるはずですが、この辺りについては理解が浅いので不必要にGPUを有効にしていたり、GPUを有効にすべき場所でしていなかったりするかもしれません。
# GPU
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
# データセット
ds = TensorDataset(train_fea_mat.to(device), train_lab_vec.to(device))
epochs = 10
learning_rate = 0.01
# バッチサイズ
batch_sizes = [2**i for i in range(8)]
# 時間
times = []
for batch_size in batch_sizes:
# DataLoaderを作成
loader = DataLoader(ds, batch_size=batch_size, shuffle=True)
# 行列W
W = torch.randn(300, 4, requires_grad=True).to(device)
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
# 最適化
optimizer = optim.SGD([W], lr=learning_rate)
print('batch size: ', batch_size)
start = time.time()
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# 損失
loss = loss_fn(torch.matmul(inputs, W), targets).to(device)
# 勾配
optimizer.zero_grad()
loss.backward()
# 更新
optimizer.step()
# loss, accuracy
with torch.no_grad():
# 訓練
train_pred = pred(train_fea_mat.to(device), W)
train_accuracy = accuracy(train_pred, train_lab_vec)
train_loss = loss_fn(torch.matmul(train_fea_mat.to(device), W), train_lab_vec.to(device)).item()
# 検証
valid_pred = pred(valid_fea_mat.to(device), W)
valid_accuracy = accuracy(valid_pred, valid_lab_vec)
valid_loss = loss_fn(torch.matmul(valid_fea_mat.to(device), W), valid_lab_vec.to(device)).item()
if epoch % 2 == 0:
print(f"Epoch {epoch: 3d}:")
print(f"train [loss: {train_loss: .4f}] [accuracy: {train_accuracy: .3f}]")
print(f"valid [loss: {valid_loss: .4f}] [accuracy: {valid_accuracy: .3f}]")
# 1epochあたりの時間を計算
t = (time.time()-start) / epochs
times.append(t)
print('time / ephocs: ', t, '\n')
print(times)
...(学習状況の表示は省略)...
[2.2572943925857545, 1.1734703302383422, 0.6524808883666993, 0.43701658248901365, 0.30716125965118407, 0.251761794090271, 0.18550732135772705, 0.18426837921142578]
計算速度はGPUを使わない場合にくらべて改善はしていません。より複雑なアーキテクチャー、バッチ学習においてGPUでの計算は有効になるでしょう!
79. 多層ニューラルネットワーク
バイアス項の導入
入力$\boldsymbol{x}$と行列$W$の積$\boldsymbol{x}W$を計算した結果にバイアスベクトル$\boldsymbol{b}$を加えるモデルを考えます。バイアスベクトル$\boldsymbol{b}$の実装も、tensor
を用います。また、このバイアス項を加えたモデルの予測値を返す関数pred_with_b
を新たに定義します。
%%time
# 入力されたデータから予測分類ラベルを返す
def pred_with_b(inputs, W, b):
softmax = torch.nn.Softmax(dim=1)
# 分類の確率
class_probs = softmax(torch.matmul(inputs, W) + b)
# 確率最大のインデックスを取得
class_idx = torch.argmax(class_probs, dim=1)
return class_idx
# GPU
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
# データセット
ds = TensorDataset(train_fea_mat.to(device), train_lab_vec.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1, shuffle=True)
epochs = 100
learning_rate = 0.01
# 行列W
W = torch.randn(300, 4, requires_grad=True).to(device)
# バイアス項
b = torch.randn(1, 4, requires_grad=True).to(device)
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
# 最適化
optimizer = optim.SGD([W, b], lr=learning_rate)
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# 損失
loss = loss_fn(torch.matmul(inputs, W)+b, targets).to(device)
# 勾配
optimizer.zero_grad()
loss.backward()
# 更新
optimizer.step()
# loss, accuracy
with torch.no_grad():
# 訓練
train_pred = pred_with_b(train_fea_mat.to(device), W, b)
train_accuracy = accuracy(train_pred, train_lab_vec)
train_loss = loss_fn(torch.matmul(train_fea_mat.to(device), W)+b, train_lab_vec.to(device)).item()
# 検証
valid_pred = pred_with_b(valid_fea_mat.to(device), W, b)
valid_accuracy = accuracy(valid_pred, valid_lab_vec)
valid_loss = loss_fn(torch.matmul(valid_fea_mat.to(device), W)+b, valid_lab_vec.to(device)).item()
if epoch % 10 == 0:
print(f"Epoch {epoch: 3d}:")
print(f"train [loss: {train_loss: .4f}] [accuracy: {train_accuracy: .3f}]")
print(f"valid [loss: {valid_loss: .4f}] [accuracy: {valid_accuracy: .3f}]")
Epoch 10:
train [loss: 0.3958] [accuracy: 0.862]
valid [loss: 0.4373] [accuracy: 0.850]
Epoch 20:
train [loss: 0.3491] [accuracy: 0.879]
valid [loss: 0.4000] [accuracy: 0.869]
Epoch 30:
train [loss: 0.3285] [accuracy: 0.890]
valid [loss: 0.3851] [accuracy: 0.879]
Epoch 40:
train [loss: 0.3148] [accuracy: 0.893]
valid [loss: 0.3778] [accuracy: 0.882]
Epoch 50:
train [loss: 0.3063] [accuracy: 0.896]
valid [loss: 0.3736] [accuracy: 0.883]
Epoch 60:
train [loss: 0.2997] [accuracy: 0.897]
valid [loss: 0.3712] [accuracy: 0.885]
Epoch 70:
train [loss: 0.2956] [accuracy: 0.900]
valid [loss: 0.3703] [accuracy: 0.885]
Epoch 80:
train [loss: 0.2911] [accuracy: 0.900]
valid [loss: 0.3680] [accuracy: 0.888]
Epoch 90:
train [loss: 0.2877] [accuracy: 0.901]
valid [loss: 0.3675] [accuracy: 0.887]
Epoch 100:
train [loss: 0.2850] [accuracy: 0.902]
valid [loss: 0.3671] [accuracy: 0.888]
Wall time: 4min 4s
多層ニューラルネットワーク
ここではクラスtorch.nn.Module
を用いて実装を行います。
ニューラルネットワークをクラスMLNet
として定義します。中間層は2層で、サイズは64とし、重みは平均0、標準偏差1で初期化します。
import torch.nn as nn
class MLNet(nn.Module):
def __init__(self, input_size, output_size):
super(MLNet, self).__init__()
self.fc1 = nn.Linear(input_size, 64)
self.fc2 = nn.Linear(64, output_size)
nn.init.normal_(self.fc1.weight, 0.0, 1.0)
nn.init.normal_(self.fc2.weight, 0.0, 1.0)
nn.init.normal_(self.fc1.bias, 0.0, 1.0)
nn.init.normal_(self.fc2.bias, 0.0, 1.0)
def forward(self, x):
x = self.fc1(x)
x = self.fc2(x)
return x
# GPU
device = torch.device('cuda:0') if torch.cuda.is_available() else torch.device('cpu')
# データセット
ds = TensorDataset(train_fea_mat.to(device), train_lab_vec.to(device))
# DataLoaderを作成
loader = DataLoader(ds, batch_size=1, shuffle=True)
# hyper parameters
input_size = 300
output_size = 4
epochs = 100
learning_rate = 0.01
# 損失関数
loss_fn = torch.nn.CrossEntropyLoss()
# プロットするためloss, accuracyを記録
train_losses = []
valid_losses = []
train_accuracies = []
valid_accuracies = []
model = MLNet(input_size, output_size).to(device)
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
for epoch in range(1, epochs + 1):
for inputs, targets in loader:
# forward
outputs = model(inputs)
# 損失
loss = loss_fn(outputs, targets).to(device)
# 勾配
optimizer.zero_grad()
loss.backward()
# 更新
optimizer.step()
# loss, accuracy
with torch.no_grad():
train_pred = model(train_fea_mat.to(device))
train_accuracy = accuracy(torch.argmax(train_pred.to(device), dim=1), train_lab_vec)
train_loss = loss_fn(train_pred.to(device), train_lab_vec.to(device)).item()
valid_pred = model(valid_fea_mat.to(device))
valid_accuracy = accuracy(torch.argmax(valid_pred.to(device), dim=1), valid_lab_vec.to(device))
valid_loss = loss_fn(valid_pred.to(device), valid_lab_vec.to(device)).item()
train_losses.append(train_loss)
train_accuracies.append(train_accuracy)
valid_losses.append(valid_loss)
valid_accuracies.append(valid_accuracy)
if epoch % 10 == 0:
print(f"Epoch {epoch: 3d}:")
print(f"train [loss: {train_loss: .4f}] [accuracy: {train_accuracy: .3f}]")
print(f"valid [loss: {valid_loss: .4f}] [accuracy: {valid_accuracy: .3f}]")
Epoch 10:
train [loss: 0.3966] [accuracy: 0.857]
valid [loss: 0.4338] [accuracy: 0.847]
Epoch 20:
train [loss: 0.3485] [accuracy: 0.880]
valid [loss: 0.3949] [accuracy: 0.869]
Epoch 30:
train [loss: 0.3263] [accuracy: 0.889]
valid [loss: 0.3808] [accuracy: 0.879]
Epoch 40:
train [loss: 0.3140] [accuracy: 0.895]
valid [loss: 0.3731] [accuracy: 0.882]
Epoch 50:
train [loss: 0.3057] [accuracy: 0.898]
valid [loss: 0.3702] [accuracy: 0.882]
Epoch 60:
train [loss: 0.2989] [accuracy: 0.900]
valid [loss: 0.3670] [accuracy: 0.885]
Epoch 70:
train [loss: 0.2940] [accuracy: 0.902]
valid [loss: 0.3658] [accuracy: 0.885]
Epoch 80:
train [loss: 0.2907] [accuracy: 0.902]
valid [loss: 0.3651] [accuracy: 0.885]
Epoch 90:
train [loss: 0.2884] [accuracy: 0.902]
valid [loss: 0.3647] [accuracy: 0.887]
Epoch 100:
train [loss: 0.2852] [accuracy: 0.903]
valid [loss: 0.3646] [accuracy: 0.882]