はじめに
ResNet, VGGなど、ポピュラーなモデルはtensorflowやpytorchといったライブラリで簡単に使えるように提供されてなっています。
一方で中身をあまり意識しないで使ってしまうため、自分で手を動かしてモデルのclassを作ってみたいと思った次第です。
自分用メモのため、乱雑ですがご容赦ください。
動作環境
- Python 3.8.5
- pytorch 1.8.0
モデル構造
元論文はこちらです。
VGGはlayer数にバリエーションがあり、VGG16は16層からなるモデルです。
https://arxiv.org/pdf/1409.1556.pdf
13層の畳み込み層、3層の全結合層を直列に並べたシンプルな作りに見えます。
材料
- nn.Conv2d
- nn.ReLU
- nn.MaxPool2d
- nn.Linear
- nn.Dropout2d
書き出してみるとこれだけなんですね。
クッキング
import torch.nn as nn
class VGG16(nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
self.feature_modules = FeatureModules()
self.classifier_modules = ClassifierModules(n_classes)
def forward(self, x):
x = self.feature_conv(x)
output = self.classifier_modules(x)
return output
class FeatureModules(nn.Module):
def __init__(self):
super(FeatureModules, self).__init__()
# out channels 64 modules
self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_1 = nn.ReLU(inplace=True)
self.conv_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_2 = nn.ReLU(inplace=True)
self.maxpool_2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)
# out channels 128 modules
self.conv_3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_3 = nn.ReLU(inplace=True)
self.conv_4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_4 = nn.ReLU(inplace=True)
self.maxpool_4 = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0, dilation=1)
# out channels 256 modules
self.conv_5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_5 = nn.ReLU(inplace=True)
self.conv_6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_6 = nn.ReLU(inplace=True)
self.conv_7 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_7 = nn.ReLU(inplace=True)
self.maxpool_7 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)
# out channels 512 modules
self.conv_8 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_8 = nn.ReLU(inplace=True)
self.conv_9 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_9 = nn.ReLU(inplace=True)
self.conv_10 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_10 = nn.ReLU(inplace=True)
self.maxpool_10 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)
# out channels 512 modules
self.conv_11 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_11 = nn.ReLU(inplace=True)
self.conv_12 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_12 = nn.ReLU(inplace=True)
self.conv_13 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
self.relu_13 = nn.ReLU(inplace=True)
self.maxpool_13 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)
def forward(self, x):
x = self.conv_1(x)
x = self.relu_1(x)
x = self.conv_2(x)
x = self.relu_2(x)
x = self.maxpool_2(x)
x = self.conv_3(x)
x = self.relu_3(x)
x = self.conv_4(x)
x = self.relu_4(x)
x = self.maxpool_4(x)
x = self.conv_5(x)
x = self.relu_5(x)
x = self.conv_6(x)
x = self.relu_6(x)
x = self.conv_7(x)
x = self.relu_7(x)
x = self.maxpool_7(x)
x = self.conv_8(x)
x = self.relu_8(x)
x = self.conv_9(x)
x = self.relu_9(x)
x = self.conv_10(x)
x = self.relu_10(x)
x = self.maxpool_10(x)
x = self.conv_11(x)
x = self.relu_11(x)
x = self.conv_11(x)
x = self.relu_12(x)
x = self.conv_12(x)
x = self.relu_12(x)
x = self.maxpool_13(x)
return x
class ClassifierModules(nn.Module):
def __init__(self, n_classes):
super(ClassifierModules, self).__init__()
self.fc_14 = nn.Linear(in_features=25088, out_features=4096)
self.relu_14 = nn.ReLU(inplace=True)
self.dropout_14 = nn.Dropout2d(p=0.5)
self.fc_15 = nn.Linear(in_features=4096, out_features=4096)
self.relu_15 = nn.ReLU(inplace=True)
self.dropout_15 = nn.Dropout2d(p=0.5)
self.fc_16 = nn.Linear(in_features=4096, out_features=n_classes)
def forward(self, x):
x = self.fc_14(x)
x = self.relu_14(x)
x = self.dropout_14(x)
x = self.fc_15(x)
x = self.relu_15(x)
x = self.dropout_15(x)
x = self.fc_16(x)
return x
model = VGG16(n_classes=20)