p.209まで。
# %%
import torch
import torch.nn as nn
import torch.nn.functional as F
# %%
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv1 = nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=0)
self.pool1 = nn.MaxPool2d(kernel_size=3, stride=2)
self.conv2 = nn.Conv2d(in_channels=96, out_channels=256, kernel_size=5, stride=1, padding=2)
self.pool2 = nn.MaxPool2d(kernel_size=3, stride=2)
self.conv3 = nn.Conv2d(in_channels=256, out_channels=384, kernel_size=3, stride=1, padding=1)
self.conv4 = nn.Conv2d(in_channels=384, out_channels=384, kernel_size=3, stride=1, padding=1)
self.conv5 = nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1)
self.pool3 = nn.MaxPool2d(kernel_size=3, stride=2)
# 全結合層
self.fc1 = nn.Linear(in_features=9216, out_features=4096)
self.dropout1 = nn.Dropout(0.5)
self.fc2 = nn.Linear(in_features=4096, out_features=4096)
self.dropout2 = nn.Dropout(0.5)
self.fc3 = nn.Linear(in_features=4096, out_features=1000)
def forward(self, image):
# 画像バッチからバッチサイズ、チャンネル、高さ、幅を取得
# 元のバッチサイズ:(bs, 3, 227, 227)
bs, c, h, w = image.size()
x = F.relu(self.conv1(image))
x = self.pool1(x)
x = F.relu(self.conv2(x))
x = self.pool2(x)
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
x = F.relu(self.conv5(x))
x = self.pool3(x)
x = x.view(bs, -1)
x = F.relu(self.fc1(x))
x = self.dropout1(x)
x = F.relu(self.fc2(x))
x = self.dropout2(x)
x = self.fc3(x)
x = torch.softmax(x, axis=1)
return x
# %% [markdown]
# dataset.py
# %%
import torch
import numpy as np
from PIL import Image
from PIL import ImageFile
# %%
ImageFile.LOAD_TRUNCATED_IMAGES = True
# %%
class ClassificationDataset:
def __init__(self, image_paths, targets, resize=None, augmentations=None):
self.image_paths = image_paths
self.targets = targets
self.resize = resize
self.augmentations = augmentations
def __len__(self):
return len(self.image_paths)
def __getitem__(self, item):
image = Image.open(self.image_paths[item])
image = image.convert("RGB")
targets = self.targets[item]
if self.resize is not None:
# 修正: 引数の順序を変更
image = image.resize((self.resize[1], self.resize[0]), resample=Image.BILINEAR)
image = np.array(image)
if self.augmentations is not None:
augmented = self.augmentations(image=image)
image = augmented["image"]
image = np.transpose(image, (2, 0, 1)).astype(np.float32)
return {
"image": torch.tensor(image, dtype=torch.float),
"targets": torch.tensor(targets, dtype=torch.long)
}
# %% [markdown]
# engine.py
# %%
import torch
import torch.nn as nn
from tqdm import tqdm
def train(data_loader, model, optimizer, device):
model.train()
for data in data_loader:
inputs = data["image"]
targets = data["targets"]
inputs = inputs.to(device, dtype=torch.float)
targets = targets.to(device, dtype=torch.float)
optimizer.zero_grad()
outputs = model(inputs)
loss = nn.BCEWithLogitsLoss()(outputs, targets.view(-1, 1))
loss.backward()
optimizer.step()
def evaluate(data_loader, model, device):
model.eval()
final_targets = []
final_outputs = []
with torch.no_grad():
for data in data_loader:
inputs = data["image"]
targets = data["targets"]
inputs = inputs.to(device, dtype=torch.float)
targets = targets.to(device, dtype=torch.float)
output = model(inputs)
targets = targets.detach().cpu().numpy().tolist()
output = output.detach().cpu().numpy().tolist()
final_targets.extend(targets)
final_outputs.extend(output)
return final_outputs, final_targets
# %% [markdown]
# model.py
# %%
import torch.nn as nn
import pretrainedmodels
# %%
def get_model(pretrained):
if pretrained:
model = pretrainedmodels.__dict__["resnet18"](
pretrained="imagenet"
)
else:
model = pretrainedmodels.__dict__["resnet18"](
pretrained=None
)
model.last_linear = nn.Sequential(
nn.BatchNorm1d(512),
nn.Dropout(p=0.25),
nn.Linear(in_features=512, out_features=2048),
nn.ReLU(),
nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1),
nn.Dropout(p=0.5),
nn.Linear(in_features=2048, out_features=1),
)
return model
# %% [markdown]
# train.py
# %%
import os
import pandas as pd
import numpy as np
import albumentations
import torch
from sklearn import metrics
from sklearn.model_selection import train_test_split
# import dataset
# import engine
# from model import get_model
data_path = "../input/"
device = "cuda"
epochs = 10
df = pd.read_csv(os.path.join(data_path, "train.csv"))
images = df.ImageId.values.tolist()
images = [os.path.join(data_path, "train_png", i + ".png") for i in images]
# targets = df.target.values.tolist() # 修正
targets = df.target.values
model = get_model(pretrained=True)
model.to(device)
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
aug = albumentations.Compose(
[albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)],
)
train_images, valid_images, train_targets, valid_targets = train_test_split(
images, targets, stratify=targets, random_state=42
)
train_dataset = ClassificationDataset(
image_paths=train_images,
targets=train_targets,
resize=(227, 227),
augmentations=aug,
)
train_loader = torch.utils.data.DataLoader(
train_dataset,
batch_size=16,
shuffle=True,
num_workers=4,
)
valid_dataset = ClassificationDataset(
image_paths=valid_images,
targets=valid_targets,
resize=(227, 227),
augmentations=aug,
)
valid_loader = torch.utils.data.DataLoader(
valid_dataset,
batch_size=16,
shuffle=False,
num_workers=4,
)
optimizer = torch.optim.Adam(model.parameters(), lr=5e-4)
for epoch in range(epochs):
train(train_loader, model, optimizer, device=device)
predictions, valid_targets = evaluate(valid_loader, model, device=device)
roc_auc = metrics.roc_auc_score(valid_targets, predictions)
print(f"Epoch: {epoch}, Valid ROC AUC: {roc_auc}")