More than 3 years have passed since last update.

ＡＩシステムトレード開発　VGG16編

Last updated at 2022-02-04Posted at 2022-02-01

さてさて、ここまでは順調に進んできたので、今日中に完成まで行けそうな気がしてきました。

本日のお題　https://note.nkmk.me/python-pytorch-pretrained-models-image-classification/

STEP1　具材を準備

　データセットを元に、データローダを作りました。

main.py

# 1バッチに含む画像の枚数を指定する
batch_size = 64

# ImageFolderで取り込んだ画像からデータローダーを作成する
train_dataloader = data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True)
val_dataloader = data.DataLoader(val_dataset, batch_size = batch_size, shuffle = True)

for x, t in train_dataloader:
    print(x.shape, t) 
    break
    
for x, t in val_dataloader:
    print(x.shape, t) 
    break

中身は、こんな感じですね。
テンソルは、データ数、チャネル、縦、横でしょうか？
チャーハンの具材はそろったという感じですね。

torch.Size([64, 3, 106, 106]) tensor([4, 2, 4, 2, 4, 4, 4, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 0, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 0, 4, 4, 4, 4, 4,
        4, 0, 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 4, 4])
torch.Size([64, 3, 106, 106]) tensor([4, 4, 4, 2, 4, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4, 2, 4, 4, 2, 4, 4, 4, 2,
        4, 4, 4, 4, 4, 4, 4, 2, 4, 3, 4, 4, 3, 2, 4, 4, 4, 4, 4, 0, 4, 4, 4, 2,
        4, 0, 4, 0, 4, 4, 0, 4, 4, 4, 4, 4, 4, 4, 4, 2])

STEP2　コンロに点火

最初に火加減を見るみたいですね。

main.py

from torchvision import models, transforms
vgg16 = models.vgg16(pretrained=True)
vgg16.eval()

なんか、すごいレシピが出てきました。
コンロというよりは、電子レンジのメニューのような気がしてきました。

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (18): ReLU(inplace=True)
    (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (20): ReLU(inplace=True)
    (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (22): ReLU(inplace=True)
    (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (25): ReLU(inplace=True)
    (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (27): ReLU(inplace=True)
    (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (29): ReLU(inplace=True)
    (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
  (classifier): Sequential(
    (0): Linear(in_features=25088, out_features=4096, bias=True)
    (1): ReLU(inplace=True)
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4096, out_features=4096, bias=True)
    (4): ReLU(inplace=True)
    (5): Dropout(p=0.5, inplace=False)
    (6): Linear(in_features=4096, out_features=1000, bias=True)
  )
)

とりあえず、予測させてみる。

main.py

for x, t in train_dataloader:
    img_batch=x
    break

result = vgg16(img_batch)
print(type(result))

# <class 'torch.Tensor'>
# torch.Size([64, 1000])

idx = torch.argmax(result[0])
print(idx)

# tensor(549)

549？　お前誰やねん！

STEP3 再教育

　ここからは、転移学習を始める　→　https://axa.biopapyrus.jp/deep-learning/object-classification/pytorch-vgg-transferlearning.html

main.py


# 学習データ、検証データに 8:2 の割合で分割する。
train_size = int(0.8 * len(images))
val_size = len(images) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(
    images, [train_size, val_size]
)

# 1バッチに含む画像の枚数を指定する
batch_size = 64

# ImageFolderで取り込んだ画像からデータローダーを作成する
dataloaders = {
  'train': data.DataLoader(train_dataset, batch_size = batch_size, shuffle = True),
  'valid': data.DataLoader(val_dataset, batch_size = batch_size, shuffle = True)
}
dataset_sizes = {
    'train': len(train_dataset),
    'valid': len(val_dataset)
}

from torchvision import models, transforms
vgg16 = models.vgg16(pretrained=True)

device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')

# 出力層の変更
num_ftrs = vgg16.classifier[6].in_features
vgg16.classifier[6] = torch.nn.Linear(num_ftrs,  out_features=5)

net_ft1 = vgg16.to(device)

# 損失関数および学習パラメーターの定義
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net_ft1.parameters(), lr=0.001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


num_epochs = 20
acc_history_ft1 = {'train': [], 'valid': []}

for epoch in range(num_epochs):
    print('Epoch {}/{}'.format(epoch, num_epochs - 1))
    print('-' * 10)

    for phase in ['train', 'valid']:
        if phase == 'train':
            net_ft1.train()
        else:
            net_ft1.eval()

        running_loss = 0.0
        running_corrects = 0

        for inputs, labels in dataloaders[phase]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = net_ft1(inputs)
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)
        if phase == 'train':
            scheduler.step()

        epoch_loss = running_loss / dataset_sizes[phase]
        epoch_acc = running_corrects.double() / dataset_sizes[phase]
        acc_history_ft1[phase].append(epoch_acc)
        print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

　ひととおり、動かすところまで、出来たみたいです。

Epoch 0/19
----------
----------
train Loss: 0.3049 Acc: 0.8899
valid Loss: 0.4941 Acc: 0.8274
Epoch 19/19
----------
train Loss: 0.3017 Acc: 0.8919
valid Loss: 0.4893 Acc: 0.8155

　精度は89%、まあ最初なので、こんなものなんでしょうかねえ・・・
　よくやったよ、自分！

STEP4 学習結果の確認

どうやら、エポックは２５あたりで、良いらしい。
しかし、validationが低いのが気にはなりますなあ・・・

main.py

import matplotlib.pyplot as plt
%matplotlib inline

plt.style.use('default')

fig = plt.figure()
ax = fig.add_subplot()
ax.plot([a.cpu().numpy() for a in acc_history_ft1['train']], label='train')
ax.plot([a.cpu().numpy() for a in acc_history_ft1['valid']],label='valid')
ax.legend()
ax.set_ylim(0, 1)

反省点

　少し勉強したのであるが、VGG-16というのは、データサイズを合わせる必要があるらしいですね。

https://yukitaka13.hatenablog.jp/entry/2019/01/21/183050#VGG16%E3%81%A8%E3%81%AF
https://aidiary.hatenablog.com/entry/20180212/1518404395

main.py

print(imgs[0].shape)
before = transforms.ToPILImage(mode='RGB')(imgs[0])

img = transforms.Compose([    transforms.Resize(260),    transforms.CenterCrop(224)])(imgs[0])
after = transforms.ToPILImage(mode='RGB')(img)
print(img.shape)