LoginSignup
0
0

超解像の画像生成AI モデル  “Vision Master”

Last updated at Posted at 2024-01-12

image.png

2047年、先端技術の進展により、地球は新たな時代に突入していた。超解像度技術は飛躍的に進み、人々は高精細な画像を手に入れることができるようになった。しかし、ある研究チームは、通常の解像度を遙かに超え、人々の予想を遥かに超える超解像度の画像を生成できるAIモデルを開発した。
このAIモデルは「ビジョンマスター」と呼ばれ、通常の光学機器やレーザーを用いずに、拡散量子力学の原理を利用して画像を生成することができた。ビジョンマスターは人間の知覚を超え、超自然的な次元からの情報を取り込んで現実以上の超解像度画像を生み出すのだった。
ある日、研究チームはビジョンマスターを用いて未知の次元からのデータを解析し、それを画像として生成することに成功した。彼らは宇宙の果てに広がる謎の次元からの情報を取り込み、地球上には見たこともない美しい風景や存在の可能性を描き出すことができた。
ビジョンマスターが生成した画像には、色とりどりの異次元の生命体が共存し、光と影が複雑なパターンを描いていた。これはまさに人間の理解を超えた、スーパーナチュラルな現実のようだった。

image.png

Deep Lerning のアップサンプリングで超解像度の画像生成を試みます。
上記のぬるい解像度の画像をシャキッとさせます。(足りない画素情報を生成します。)

低解像度のCIFAR-10画像を2倍にアップサンプリングして高解像度画像を生成するディープラーニングモデルを構築し、訓練します。
SRCNN(Super-Resolution Convolutional Neural Network)
超解像度技術は、低解像度の画像を高解像度に復元する技術のことです。

残渣接続 Residual block を使用した16層の深いモデルです。(ちょっと重い。)

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, UpSampling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.datasets import cifar10
import matplotlib.pyplot as plt

# データセットの読み込み
(train_lr_images, _), (test_lr_images, _) = cifar10.load_data()
# 低解像度画像を正規化
train_lr_images = train_lr_images.astype('float32') / 255.0
test_lr_images = test_lr_images.astype('float32') / 255.0

# 高解像度画像は低解像度画像を2倍にアップサンプリングして生成
train_hr_images = tf.image.resize(train_lr_images, size=(64, 64)).numpy()
test_hr_images = tf.image.resize(test_lr_images, size=(64, 64)).numpy()

# ディープラーニングモデルの構築
def build_sr_model():
    input_img = Input(shape=(32, 32, 3))  # 3はカラー画像のチャンネル数

    # Low-resolution feature extraction
    x = Conv2D(64, (3, 3), padding='same')(input_img)
    x = Activation('relu')(x)

    # Residual blocks
    for _ in range(16):
        x = residual_block(x)

    # High-resolution reconstruction
    x = Conv2D(64, (3, 3), padding='same')(x)
    x = BatchNormalization()(x)
    x = UpSampling2D((2, 2))(x)  # 2倍のアップサンプリング
    output_img = Conv2D(3, (3, 3), padding='same', activation='sigmoid')(x)  # 3はカラー画像のチャンネル数

    model = Model(inputs=input_img, outputs=output_img)
    return model

# Residual block
def residual_block(x):
    filters = 64
    res = Conv2D(filters, (3, 3), padding='same')(x)
    res = BatchNormalization()(res)
    res = Activation('relu')(res)
    res = Conv2D(filters, (3, 3), padding='same')(res)
    res = BatchNormalization()(res)
    res = tf.keras.layers.add([res, x])  # Residual connection
    return res

# モデルの構築
sr_model = build_sr_model()

# モデルのコンパイル
sr_model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# モデルのサマリー表示
sr_model.summary()

# モデルの訓練
history = sr_model.fit(train_lr_images, train_hr_images, epochs=10, batch_size=16, validation_split=0.2)

# 訓練の進捗をプロット
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error Loss')
plt.legend()
plt.show()

足りない画素情報を生成してアップサンプリングします。

import numpy as np

# テストデータからランダムに低解像度画像を選択
idx = np.random.randint(0, len(test_lr_images))
test_lr_image = test_lr_images[idx:idx+1]

# 低解像度画像から高解像度画像を生成
generated_hr_image = sr_model.predict(test_lr_image)

# 画像を表示
plt.figure(figsize=(10, 5))

# 低解像度画像
plt.subplot(1, 2, 1)
plt.imshow(test_lr_image[0])
plt.title('Low Resolution Image')

# 生成された高解像度画像
plt.subplot(1, 2, 2)
plt.imshow(generated_hr_image[0])
plt.title('Generated High Resolution Image')

plt.show()

実行結果

image.png

image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png

5枚のランダムなテストデータをアップサンプリング

import numpy as np

# 5枚のランダムなテストデータの低解像度画像を選択
num_samples = 5
random_indices = np.random.randint(0, len(test_lr_images), size=num_samples)
test_lr_samples = test_lr_images[random_indices]

# それぞれの低解像度画像から高解像度画像を生成
generated_hr_samples = sr_model.predict(test_lr_samples)

# 画像を表示
plt.figure(figsize=(15, 5))

for i in range(num_samples):
    # 低解像度画像
    plt.subplot(2, num_samples, i + 1)
    plt.imshow(test_lr_samples[i])
    plt.title(f'Low Res {i + 1}')

    # 生成された高解像度画像
    plt.subplot(2, num_samples, i + num_samples + 1)
    plt.imshow(generated_hr_samples[i])
    plt.title(f'Generated HR {i + 1}')

plt.show()

image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png
image.png

image.png

Total params: 1230595 (4.69 MB)
Trainable params: 1226371 (4.68 MB)
Non-trainable params: 4224 (16.50 KB)

Model: "model"


Layer (type) Output Shape Param # Connected to

input_1 (InputLayer) [(None, 32, 32, 3)] 0 []

conv2d (Conv2D) (None, 32, 32, 64) 1792 ['input_1[0][0]']

activation (Activation) (None, 32, 32, 64) 0 ['conv2d[0][0]']

conv2d_1 (Conv2D) (None, 32, 32, 64) 36928 ['activation[0][0]']

batch_normalization (Batch (None, 32, 32, 64) 256 ['conv2d_1[0][0]']
Normalization)

activation_1 (Activation) (None, 32, 32, 64) 0 ['batch_normalization[0][0]']

conv2d_2 (Conv2D) (None, 32, 32, 64) 36928 ['activation_1[0][0]']

batch_normalization_1 (Bat (None, 32, 32, 64) 256 ['conv2d_2[0][0]']
chNormalization)

add (Add) (None, 32, 32, 64) 0 ['batch_normalization_1[0][0]'
, 'activation[0][0]']

conv2d_3 (Conv2D) (None, 32, 32, 64) 36928 ['add[0][0]']

batch_normalization_2 (Bat (None, 32, 32, 64) 256 ['conv2d_3[0][0]']
chNormalization)

activation_2 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_2[0][0]'
]

conv2d_4 (Conv2D) (None, 32, 32, 64) 36928 ['activation_2[0][0]']

batch_normalization_3 (Bat (None, 32, 32, 64) 256 ['conv2d_4[0][0]']
chNormalization)

add_1 (Add) (None, 32, 32, 64) 0 ['batch_normalization_3[0][0]'
, 'add[0][0]']

conv2d_5 (Conv2D) (None, 32, 32, 64) 36928 ['add_1[0][0]']

batch_normalization_4 (Bat (None, 32, 32, 64) 256 ['conv2d_5[0][0]']
chNormalization)

activation_3 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_4[0][0]'
]

conv2d_6 (Conv2D) (None, 32, 32, 64) 36928 ['activation_3[0][0]']

batch_normalization_5 (Bat (None, 32, 32, 64) 256 ['conv2d_6[0][0]']
chNormalization)

add_2 (Add) (None, 32, 32, 64) 0 ['batch_normalization_5[0][0]'
, 'add_1[0][0]']

conv2d_7 (Conv2D) (None, 32, 32, 64) 36928 ['add_2[0][0]']

batch_normalization_6 (Bat (None, 32, 32, 64) 256 ['conv2d_7[0][0]']
chNormalization)

activation_4 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_6[0][0]'
]

conv2d_8 (Conv2D) (None, 32, 32, 64) 36928 ['activation_4[0][0]']

batch_normalization_7 (Bat (None, 32, 32, 64) 256 ['conv2d_8[0][0]']
chNormalization)

add_3 (Add) (None, 32, 32, 64) 0 ['batch_normalization_7[0][0]'
, 'add_2[0][0]']

conv2d_9 (Conv2D) (None, 32, 32, 64) 36928 ['add_3[0][0]']

batch_normalization_8 (Bat (None, 32, 32, 64) 256 ['conv2d_9[0][0]']
chNormalization)

activation_5 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_8[0][0]'
]

conv2d_10 (Conv2D) (None, 32, 32, 64) 36928 ['activation_5[0][0]']

batch_normalization_9 (Bat (None, 32, 32, 64) 256 ['conv2d_10[0][0]']
chNormalization)

add_4 (Add) (None, 32, 32, 64) 0 ['batch_normalization_9[0][0]'
, 'add_3[0][0]']

conv2d_11 (Conv2D) (None, 32, 32, 64) 36928 ['add_4[0][0]']

batch_normalization_10 (Ba (None, 32, 32, 64) 256 ['conv2d_11[0][0]']
tchNormalization)

activation_6 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_10[0][0]
']

conv2d_12 (Conv2D) (None, 32, 32, 64) 36928 ['activation_6[0][0]']

batch_normalization_11 (Ba (None, 32, 32, 64) 256 ['conv2d_12[0][0]']
tchNormalization)

add_5 (Add) (None, 32, 32, 64) 0 ['batch_normalization_11[0][0]
',
'add_4[0][0]']

conv2d_13 (Conv2D) (None, 32, 32, 64) 36928 ['add_5[0][0]']

batch_normalization_12 (Ba (None, 32, 32, 64) 256 ['conv2d_13[0][0]']
tchNormalization)

activation_7 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_12[0][0]
']

conv2d_14 (Conv2D) (None, 32, 32, 64) 36928 ['activation_7[0][0]']

batch_normalization_13 (Ba (None, 32, 32, 64) 256 ['conv2d_14[0][0]']
tchNormalization)

add_6 (Add) (None, 32, 32, 64) 0 ['batch_normalization_13[0][0]
',
'add_5[0][0]']

conv2d_15 (Conv2D) (None, 32, 32, 64) 36928 ['add_6[0][0]']

batch_normalization_14 (Ba (None, 32, 32, 64) 256 ['conv2d_15[0][0]']
tchNormalization)

activation_8 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_14[0][0]
']

conv2d_16 (Conv2D) (None, 32, 32, 64) 36928 ['activation_8[0][0]']

batch_normalization_15 (Ba (None, 32, 32, 64) 256 ['conv2d_16[0][0]']
tchNormalization)

add_7 (Add) (None, 32, 32, 64) 0 ['batch_normalization_15[0][0]
',
'add_6[0][0]']

conv2d_17 (Conv2D) (None, 32, 32, 64) 36928 ['add_7[0][0]']

batch_normalization_16 (Ba (None, 32, 32, 64) 256 ['conv2d_17[0][0]']
tchNormalization)

activation_9 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_16[0][0]
']

conv2d_18 (Conv2D) (None, 32, 32, 64) 36928 ['activation_9[0][0]']

batch_normalization_17 (Ba (None, 32, 32, 64) 256 ['conv2d_18[0][0]']
tchNormalization)

add_8 (Add) (None, 32, 32, 64) 0 ['batch_normalization_17[0][0]
',
'add_7[0][0]']

conv2d_19 (Conv2D) (None, 32, 32, 64) 36928 ['add_8[0][0]']

batch_normalization_18 (Ba (None, 32, 32, 64) 256 ['conv2d_19[0][0]']
tchNormalization)

activation_10 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_18[0][0]
']

conv2d_20 (Conv2D) (None, 32, 32, 64) 36928 ['activation_10[0][0]']

batch_normalization_19 (Ba (None, 32, 32, 64) 256 ['conv2d_20[0][0]']
tchNormalization)

add_9 (Add) (None, 32, 32, 64) 0 ['batch_normalization_19[0][0]
',
'add_8[0][0]']

conv2d_21 (Conv2D) (None, 32, 32, 64) 36928 ['add_9[0][0]']

batch_normalization_20 (Ba (None, 32, 32, 64) 256 ['conv2d_21[0][0]']
tchNormalization)

activation_11 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_20[0][0]
']

conv2d_22 (Conv2D) (None, 32, 32, 64) 36928 ['activation_11[0][0]']

batch_normalization_21 (Ba (None, 32, 32, 64) 256 ['conv2d_22[0][0]']
tchNormalization)

add_10 (Add) (None, 32, 32, 64) 0 ['batch_normalization_21[0][0]
',
'add_9[0][0]']

conv2d_23 (Conv2D) (None, 32, 32, 64) 36928 ['add_10[0][0]']

batch_normalization_22 (Ba (None, 32, 32, 64) 256 ['conv2d_23[0][0]']
tchNormalization)

activation_12 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_22[0][0]
']

conv2d_24 (Conv2D) (None, 32, 32, 64) 36928 ['activation_12[0][0]']

batch_normalization_23 (Ba (None, 32, 32, 64) 256 ['conv2d_24[0][0]']
tchNormalization)

add_11 (Add) (None, 32, 32, 64) 0 ['batch_normalization_23[0][0]
',
'add_10[0][0]']

conv2d_25 (Conv2D) (None, 32, 32, 64) 36928 ['add_11[0][0]']

batch_normalization_24 (Ba (None, 32, 32, 64) 256 ['conv2d_25[0][0]']
tchNormalization)

activation_13 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_24[0][0]
']

conv2d_26 (Conv2D) (None, 32, 32, 64) 36928 ['activation_13[0][0]']

batch_normalization_25 (Ba (None, 32, 32, 64) 256 ['conv2d_26[0][0]']
tchNormalization)

add_12 (Add) (None, 32, 32, 64) 0 ['batch_normalization_25[0][0]
',
'add_11[0][0]']

conv2d_27 (Conv2D) (None, 32, 32, 64) 36928 ['add_12[0][0]']

batch_normalization_26 (Ba (None, 32, 32, 64) 256 ['conv2d_27[0][0]']
tchNormalization)

activation_14 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_26[0][0]
']

conv2d_28 (Conv2D) (None, 32, 32, 64) 36928 ['activation_14[0][0]']

batch_normalization_27 (Ba (None, 32, 32, 64) 256 ['conv2d_28[0][0]']
tchNormalization)

add_13 (Add) (None, 32, 32, 64) 0 ['batch_normalization_27[0][0]
',
'add_12[0][0]']

conv2d_29 (Conv2D) (None, 32, 32, 64) 36928 ['add_13[0][0]']

batch_normalization_28 (Ba (None, 32, 32, 64) 256 ['conv2d_29[0][0]']
tchNormalization)

activation_15 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_28[0][0]
']

conv2d_30 (Conv2D) (None, 32, 32, 64) 36928 ['activation_15[0][0]']

batch_normalization_29 (Ba (None, 32, 32, 64) 256 ['conv2d_30[0][0]']
tchNormalization)

add_14 (Add) (None, 32, 32, 64) 0 ['batch_normalization_29[0][0]
',
'add_13[0][0]']

conv2d_31 (Conv2D) (None, 32, 32, 64) 36928 ['add_14[0][0]']

batch_normalization_30 (Ba (None, 32, 32, 64) 256 ['conv2d_31[0][0]']
tchNormalization)

activation_16 (Activation) (None, 32, 32, 64) 0 ['batch_normalization_30[0][0]
']

conv2d_32 (Conv2D) (None, 32, 32, 64) 36928 ['activation_16[0][0]']

batch_normalization_31 (Ba (None, 32, 32, 64) 256 ['conv2d_32[0][0]']
tchNormalization)

add_15 (Add) (None, 32, 32, 64) 0 ['batch_normalization_31[0][0]
',
'add_14[0][0]']

conv2d_33 (Conv2D) (None, 32, 32, 64) 36928 ['add_15[0][0]']

batch_normalization_32 (Ba (None, 32, 32, 64) 256 ['conv2d_33[0][0]']
tchNormalization)

up_sampling2d (UpSampling2 (None, 64, 64, 64) 0 ['batch_normalization_32[0][0]
D) ']

conv2d_34 (Conv2D) (None, 64, 64, 3) 1731 ['up_sampling2d[0][0]']

GPUでモデル生成に1時間!!!  大成功!!!
(Google corab)

0
0
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
0
0