0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?

More than 1 year has passed since last update.

モデルクッキング:作って理解するVGG16

Posted at

はじめに

ResNet, VGGなど、ポピュラーなモデルはtensorflowやpytorchといったライブラリで簡単に使えるように提供されてなっています。
一方で中身をあまり意識しないで使ってしまうため、自分で手を動かしてモデルのclassを作ってみたいと思った次第です。
自分用メモのため、乱雑ですがご容赦ください。

動作環境

  • Python 3.8.5
  • pytorch 1.8.0

モデル構造

元論文はこちらです。
VGGはlayer数にバリエーションがあり、VGG16は16層からなるモデルです。
https://arxiv.org/pdf/1409.1556.pdf
13層の畳み込み層、3層の全結合層を直列に並べたシンプルな作りに見えます。

材料

  • nn.Conv2d
  • nn.ReLU
  • nn.MaxPool2d
  • nn.Linear
  • nn.Dropout2d

書き出してみるとこれだけなんですね。

クッキング

import torch.nn as nn

class VGG16(nn.Module):
    def __init__(self, n_classes):
        super(VGG16, self).__init__()
        self.feature_modules = FeatureModules()
        self.classifier_modules = ClassifierModules(n_classes)
        
    def forward(self, x):
        x = self.feature_conv(x)
        output = self.classifier_modules(x)

        return output

class FeatureModules(nn.Module):
    def __init__(self):
        super(FeatureModules, self).__init__()
        # out channels 64 modules
        self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_1 = nn.ReLU(inplace=True)
        self.conv_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_2 = nn.ReLU(inplace=True)
        self.maxpool_2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)
        
        # out channels 128 modules
        self.conv_3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_3 = nn.ReLU(inplace=True)
        self.conv_4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_4 = nn.ReLU(inplace=True)
        self.maxpool_4 = nn.MaxPool2d(kernel_size=(2,2), stride=2, padding=0, dilation=1)

        # out channels 256 modules
        self.conv_5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_5 = nn.ReLU(inplace=True)
        self.conv_6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_6 = nn.ReLU(inplace=True)
        self.conv_7 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_7 = nn.ReLU(inplace=True)
        self.maxpool_7 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)

        # out channels 512 modules
        self.conv_8 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_8 = nn.ReLU(inplace=True)
        self.conv_9 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_9 = nn.ReLU(inplace=True)
        self.conv_10 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_10 = nn.ReLU(inplace=True)
        self.maxpool_10 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)        

        # out channels 512 modules
        self.conv_11 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_11 = nn.ReLU(inplace=True)
        self.conv_12 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_12 = nn.ReLU(inplace=True)
        self.conv_13 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=(3,3), stride=(1,1), padding=(1,1))
        self.relu_13 = nn.ReLU(inplace=True)
        self.maxpool_13 = nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1)        


    def forward(self, x):
        x = self.conv_1(x)
        x = self.relu_1(x)
        x = self.conv_2(x)
        x = self.relu_2(x)
        x = self.maxpool_2(x)
        
        x = self.conv_3(x)
        x = self.relu_3(x)
        x = self.conv_4(x)
        x = self.relu_4(x)
        x = self.maxpool_4(x)

        x = self.conv_5(x)
        x = self.relu_5(x)
        x = self.conv_6(x)
        x = self.relu_6(x)
        x = self.conv_7(x)
        x = self.relu_7(x)
        x = self.maxpool_7(x)
                
        x = self.conv_8(x)
        x = self.relu_8(x)
        x = self.conv_9(x)
        x = self.relu_9(x)
        x = self.conv_10(x)
        x = self.relu_10(x)
        x = self.maxpool_10(x)

        x = self.conv_11(x)
        x = self.relu_11(x)
        x = self.conv_11(x)
        x = self.relu_12(x)
        x = self.conv_12(x)
        x = self.relu_12(x)
        x = self.maxpool_13(x)

        return x

class ClassifierModules(nn.Module):
    def __init__(self, n_classes):
        super(ClassifierModules, self).__init__()
        self.fc_14 = nn.Linear(in_features=25088, out_features=4096)
        self.relu_14 = nn.ReLU(inplace=True)
        self.dropout_14 = nn.Dropout2d(p=0.5)
        self.fc_15 = nn.Linear(in_features=4096, out_features=4096)
        self.relu_15 = nn.ReLU(inplace=True)
        self.dropout_15 = nn.Dropout2d(p=0.5)
        self.fc_16 = nn.Linear(in_features=4096, out_features=n_classes)

    def forward(self, x):
        x = self.fc_14(x)
        x = self.relu_14(x)
        x = self.dropout_14(x)
        x = self.fc_15(x)
        x = self.relu_15(x)
        x = self.dropout_15(x)
        x = self.fc_16(x)
        return x

model = VGG16(n_classes=20)
0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0

Delete article

Deleted articles cannot be recovered.

Draft of this article would be also deleted.

Are you sure you want to delete this article?