学習後に「Kernel Restarting The kernel for appears to have died. It will restart automatically.」
エラー「Kernel Restarting The kernel for appears to have died. It will restart automatically.」が出てしまう.
オライリー・ジャパンから出版されている「ゼロから作るDeep Learning 5 生成モデル編」のサンプルコードで拡散モデルを実行し,学習終了後にエラーが発生しました.
なにかわかることがありましたら,教えていただきたいです.
使用したライブラリのバージョンは,下記の通りです.
python:3.9.19
torch:2.6.0
torchaudio:2.6.0
torchvision:0.21.0
tqdm:4.67.1
出力結果とエラーはこちらです.
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.94it/s]
Epoch 0 | Loss: 0.04928520366327086
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.92it/s]
Epoch 1 | Loss: 0.027706134027795495
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.80it/s]
Epoch 2 | Loss: 0.024513568200949413
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.94it/s]
Epoch 3 | Loss: 0.02249647936682457
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.84it/s]
Epoch 4 | Loss: 0.021065952836164532
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:32<00:00, 14.59it/s]
Epoch 5 | Loss: 0.01998442535731457
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.97it/s]
Epoch 6 | Loss: 0.019427061021359746
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:31<00:00, 14.88it/s]
Epoch 7 | Loss: 0.01897648024335027
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:32<00:00, 14.61it/s]
Epoch 8 | Loss: 0.018839439573779163
100%|████████████████████████████████████████████████████████████████████████████████| 469/469 [00:32<00:00, 14.62it/s]
Epoch 9 | Loss: 0.018507046037232442
#エラー
Kernel Restarting The kernel for deep/deeplearning/diffusion_model.ipynb appears to have died. It will restart automatically.
実装しているコードは下記になります.
import math
import torch
import torchvision
import matplotlib.pyplot as plt
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
import torch.nn.functional as F
from torch import nn
from tqdm import tqdm
img_size = 28
batch_size = 128
num_timesteps = 1000
epochs = 10
lr = 1e-3
device = 'cuda'
def show_images(images, rows=2, cols=10):
fig = plt.figure(figsize=(cols, rows))
i = 0
for r in range(rows):
for c in range(cols):
fig.add_subplot(rows, cols, i + 1)
plt.imshow(images[i], cmap='gray')
plt.axis('off')
i += 1
plt.show()
def _pos_encoding(time_idx, output_dim, device='cpu'):
t, D = time_idx, output_dim
v = torch.zeros(D, device=device)
i = torch.arange(0, D, device=device)
div_term = torch.exp(i / D * math.log(10000))
v[0::2] = torch.sin(t / div_term[0::2])
v[1::2] = torch.cos(t / div_term[1::2])
return v
def pos_encoding(timesteps, output_dim, device='cpu'):
batch_size = len(timesteps)
device = timesteps.device
v = torch.zeros(batch_size, output_dim, device=device)
for i in range(batch_size):
v[i] = _pos_encoding(timesteps[i], output_dim, device)
return v
class ConvBlock(nn.Module):
def __init__(self, in_ch, out_ch, time_embed_dim):
super().__init__()
self.convs = nn.Sequential(
nn.Conv2d(in_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU(),
nn.Conv2d(out_ch, out_ch, 3, padding=1),
nn.BatchNorm2d(out_ch),
nn.ReLU()
)
self.mlp = nn.Sequential(
nn.Linear(time_embed_dim, in_ch),
nn.ReLU(),
nn.Linear(in_ch, in_ch)
)
def forward(self, x, v):
N, C, _, _ = x.shape
v = self.mlp(v)
v = v.view(N, C, 1, 1)
y = self.convs(x + v)
return y
class UNet(nn.Module):
def __init__(self, in_ch=1, time_embed_dim=100):
super().__init__()
self.time_embed_dim = time_embed_dim
self.down1 = ConvBlock(in_ch, 64, time_embed_dim)
self.down2 = ConvBlock(64, 128, time_embed_dim)
self.bot1 = ConvBlock(128, 256, time_embed_dim)
self.up2 = ConvBlock(128 + 256, 128, time_embed_dim)
self.up1 = ConvBlock(128 + 64, 64, time_embed_dim)
self.out = nn.Conv2d(64, in_ch, 1)
self.maxpool = nn.MaxPool2d(2)
self.upsample = nn.Upsample(scale_factor=2, mode='bilinear')
def forward(self, x, timesteps):
v = pos_encoding(timesteps, self.time_embed_dim, x.device)
x1 = self.down1(x, v)
x = self.maxpool(x1)
x2 = self.down2(x, v)
x = self.maxpool(x2)
x = self.bot1(x, v)
x = self.upsample(x)
x = torch.cat([x, x2], dim=1)
x = self.up2(x, v)
x = self.upsample(x)
x = torch.cat([x, x1], dim=1)
x = self.up1(x, v)
x = self.out(x)
return x
class Diffuser:
def __init__(self, num_timesteps=1000, beta_start=0.0001, beta_end=0.02, device='cpu'):
self.num_timesteps = num_timesteps
self.device = device
self.betas = torch.linspace(beta_start, beta_end, num_timesteps, device=device)
self.alphas = 1 - self.betas
self.alpha_bars = torch.cumprod(self.alphas, dim=0)
def add_noise(self, x_0, t):
T = self.num_timesteps
assert (t >= 1).all() and (t <= T).all()
t_idx = t - 1 # alpha_bars[0] is for t=1
alpha_bar = self.alpha_bars[t_idx] # (N,)
N = alpha_bar.size(0)
alpha_bar = alpha_bar.view(N, 1, 1, 1) # (N, 1, 1, 1)
noise = torch.randn_like(x_0, device=self.device)
x_t = torch.sqrt(alpha_bar) * x_0 + torch.sqrt(1 - alpha_bar) * noise
return x_t, noise
def denoise(self, model, x, t):
T = self.num_timesteps
assert (t >= 1).all() and (t <= T).all()
t_idx = t - 1 # alphas[0] is for t=1
alpha = self.alphas[t_idx]
alpha_bar = self.alpha_bars[t_idx]
alpha_bar_prev = self.alpha_bars[t_idx-1]
N = alpha.size(0)
alpha = alpha.view(N, 1, 1, 1)
alpha_bar = alpha_bar.view(N, 1, 1, 1)
alpha_bar_prev = alpha_bar_prev.view(N, 1, 1, 1)
model.eval()
with torch.no_grad():
eps = model(x, t)
model.train()
noise = torch.randn_like(x, device=self.device)
noise[t == 1] = 0 # no noise at t=1
mu = (x - ((1-alpha) / torch.sqrt(1-alpha_bar)) * eps) / torch.sqrt(alpha)
std = torch.sqrt((1-alpha) * (1-alpha_bar_prev) / (1-alpha_bar))
return mu + noise * std
def reverse_to_img(self, x):
x = x * 255
x = x.clamp(0, 255)
x = x.to(torch.uint8)
x = x.cpu()
to_pil = transforms.ToPILImage()
return to_pil(x)
def sample(self, model, x_shape=(20, 1, 28, 28)):
batch_size = x_shape[0]
x = torch.randn(x_shape, device=self.device)
for i in tqdm(range(self.num_timesteps, 0, -1)):
t = torch.tensor([i] * batch_size, device=self.device, dtype=torch.long)
x = self.denoise(model, x, t)
images = [self.reverse_to_img(x[i]) for i in range(batch_size)]
return images
preprocess = transforms.ToTensor()
dataset = torchvision.datasets.MNIST(root='./data', download=True, transform=preprocess)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
diffuser = Diffuser(num_timesteps, device=device)
model = UNet()
model.to(device)
optimizer = Adam(model.parameters(), lr=lr)
losses = []
for epoch in range(epochs):
loss_sum = 0.0
cnt = 0
# generate samples every epoch ===================
# images = diffuser.sample(model)
# show_images(images)
# ================================================
for images, labels in tqdm(dataloader):
optimizer.zero_grad()
x = images.to(device)
t = torch.randint(1, num_timesteps+1, (len(x),), device=device)
x_noisy, noise = diffuser.add_noise(x, t)
noise_pred = model(x_noisy, t)
loss = F.mse_loss(noise, noise_pred)
loss.backward()
optimizer.step()
loss_sum += loss.item()
cnt += 1
loss_avg = loss_sum / cnt
losses.append(loss_avg)
print(f'Epoch {epoch} | Loss: {loss_avg}')
# plot losses
plt.plot(losses)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()
# generate samples
images = diffuser.sample(model)
show_images(images)
0 likes