PytorchによるSegmentation modelの学習時にGPUが使用されない
Segmentation modelの学習時にGPUが使用されない
初心者です。下記を参考に、マルチクラスのセグメンテーションの実装を試みています。
モデルの学習までは実行できたのですが、GPUではなくCPUが使用されるため一向に終了しません。GPUを使用する方法を教えていただきたいです。
発生している問題
下記を実行すると膨大な時間がかかります。
max_lr = 1e-3
epoch = 100
weight_decay = 1e-4
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=max_lr, weight_decay=weight_decay)
sched = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr, epochs=epoch,
steps_per_epoch=len(train_loader))
history = fit(epoch, model, train_loader, valid_loader, criterion, optimizer, sched)
問題箇所直前の処理
下記を実行してもエラーは生じませんが、ここに元凶があると考えています。
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def fit(epochs, model, train_loader, val_loader, criterion, optimizer, scheduler, patch=False):
torch.cuda.empty_cache()
train_losses = []
test_losses = []
val_iou = []; val_acc = []
train_iou = []; train_acc = []
lrs = []
min_loss = np.inf
decrease = 1 ; not_improve=0
model.to(device)
fit_time = time.time()
for e in range(epochs):
since = time.time()
running_loss = 0
iou_score = 0
accuracy = 0
#training loop
model.train()
for i, data in enumerate(tqdm(train_loader)):
#training phase
image_tiles, mask_tiles = data
if patch:
bs, n_tiles, c, h, w = image_tiles.size()
image_tiles = image_tiles.view(-1, c, h, w)
mask_tiles = mask_tiles.view(-1, h, w)
image = image_tiles.to(device); mask = mask_tiles.to(device);
#forward
output = model(image)
loss = criterion(output, mask)
#evaluation metrics
iou_score += mIoU(output, mask)
accuracy += pixel_accuracy(output, mask)
#backward
loss.backward()
optimizer.step() #update weight
optimizer.zero_grad() #reset gradient
#step the learning rate
lrs.append(get_lr(optimizer))
scheduler.step()
running_loss += loss.item()
else:
model.eval()
test_loss = 0
test_accuracy = 0
val_iou_score = 0
#validation loop
with torch.no_grad():
for i, data in enumerate(tqdm(val_loader)):
#reshape to 9 patches from single image, delete batch size
image_tiles, mask_tiles = data
if patch:
bs, n_tiles, c, h, w = image_tiles.size()
image_tiles = image_tiles.view(-1,c, h, w)
mask_tiles = mask_tiles.view(-1, h, w)
image = image_tiles.to(device); mask = mask_tiles.to(device);
output = model(image)
#evaluation metrics
val_iou_score += mIoU(output, mask)
test_accuracy += pixel_accuracy(output, mask)
#loss
loss = criterion(output, mask)
test_loss += loss.item()
#calculatio mean for each batch
train_losses.append(running_loss/len(train_loader))
test_losses.append(test_loss/len(val_loader))
if min_loss > (test_loss/len(val_loader)):
print('Loss Decreasing.. {:.3f} >> {:.3f} '.format(min_loss, (test_loss/len(val_loader))))
min_loss = (test_loss/len(val_loader))
decrease += 1
if decrease % 5 == 0:
print('saving model...')
#torch.save(model, 'Unet-_mIoU-{:.3f}.pt'.format(val_iou_score/len(val_loader))) #Train途中もモデルを保存するときは実行する
if (test_loss/len(val_loader)) > min_loss:
not_improve += 1
min_loss = (test_loss/len(val_loader))
print(f'Loss Not Decrease for {not_improve} time')
if not_improve == 20:
print('Loss not decrease for 20 times, Stop Training')
break
#iou
val_iou.append(val_iou_score/len(val_loader))
train_iou.append(iou_score/len(train_loader))
train_acc.append(accuracy/len(train_loader))
val_acc.append(test_accuracy/ len(val_loader))
print("Epoch:{}/{}..".format(e+1, epochs),
"Train Loss: {:.3f}..".format(running_loss/len(train_loader)),
"Val Loss: {:.3f}..".format(test_loss/len(val_loader)),
"Train mIoU:{:.3f}..".format(iou_score/len(train_loader)),
"Val mIoU: {:.3f}..".format(val_iou_score/len(val_loader)),
"Train Acc:{:.3f}..".format(accuracy/len(train_loader)),
"Val Acc:{:.3f}..".format(test_accuracy/len(val_loader)),
"Time: {:.2f}m".format((time.time()-since)/60))
history = {'train_loss' : train_losses, 'val_loss': test_losses,
'train_miou' :train_iou, 'val_miou':val_iou,
'train_acc' :train_acc, 'val_acc':val_acc,
'lrs': lrs}
print('Total time: {:.2f} m' .format((time.time()- fit_time)/60))
return history
下記は冒頭で実行済みです。
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import Dataset as BaseDataset
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable
import time
import os
from tqdm.notebook import tqdm
from torch.utils.data import Dataset as BaseDataset
import segmentation_models_pytorch as smp
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
自分で試したこと
import torch
if torch.cuda.is_available():
print("Number of GPUs available:", torch.cuda.device_count())
else:
print("CUDA is not available. GPU is not being used.")
print(device)
print(image.device)
print(mask.device)
print(len(train_loader))
上記を実行すると、以下のように表示されることから、
image = image_tiles.to(device); mask = mask_tiles.to(device);
あたりが原因かと予想しています。
Number of GPUs available: 1
cuda
cpu
cpu
10