More than 1 year has passed since last update.

JacobianSaliencyMapAttack 実装（PyTorch, CIFAR-10）

Posted at 2023-08-05

はじめに

「JacobianSaliencyMapAttack」という敵対的サンプル攻撃をPyTorchで実装しました。
画像データはCIFAR10を用いました。
実装にあたり書籍「AIセキュリティから学ぶディープラーニング[技術]入門」に掲載のソースコードを参考にしました。

参考元ソースコード

実装

ライブラリのインポート

import numpy as np

import torch
import torch.nn as nn
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
from tqdm import tqdm

CIFAR10データセットを用意

mu=0.5
sigma=0.5

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((mu, mu, mu), (sigma, sigma, sigma)), 
])
trainset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=True,
    download=True,
    transform=transform
)
trainloader = torch.utils.data.DataLoader(
    trainset,
    batch_size=100,
    shuffle=True,
    num_workers=2
)
testset = torchvision.datasets.CIFAR10(
    root='./data', 
    train=False, 
    download=True, 
    transform=transform
)
testloader = torch.utils.data.DataLoader(
    testset, 
    batch_size=1, 
    shuffle=False, 
    num_workers=2
)

画素値が[-1,1]に収まるように正規化しています。

モデルの用意

def Net():
    model_ft=models.resnet50(pretrained=True)
    model_ft.fc=nn.Linear(model_ft.fc.in_features, 10)
    return  model_ft

# Define what device we are using
# print("CUDA Available: ",torch.cuda.is_available())
use_cuda=True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

# Initialize the network
model = Net().to(device)

# Load the pretrained model
path="./exp/bestloss.pth"
model.load_state_dict(torch.load(path))

# Set the model in evaluation mode. In this case this is for the Dropout layers
model.eval()

class to name の作成

class2name={
    0: "airplane", 
    1: "automobile", 
    2: "bird", 
    3: "cat", 
    4: "deer", 
    5: "dog", 
    6: "frog", 
    7: "horse", 
    8: "ship", 
    9: "truck", 
}

可視化で使います。

敵対的サンプルの生成と攻撃

class JSMA:
    """JSMA
    Attributes:
        classifier (Model) : モデル
        theta (float): ノイズの大きさを表すハイパーパラメータ
        target_score (float): 目標スコア。ターゲットクラスのスコアがこの値を超えるまでノイズを加える
        mask (ndarray): 更新済みサブピクセルを管理する変数。更新前は1、更新済みは0で管理される `(1, 224, 224, 3)`
    """

    def __init__(self, model, theta, target_score):

        # 引数をインスタンス変数にセット
        self.model = model
        self.theta = theta
        self.target_score = target_score

    def generate(self, norm_input_image, target_class):
        """
        敵対的サンプルを生成する
        Args:
            norm_input_images (tensor): 正規化された画像 `(1, 224, 224, 3)`
            target_class (int): ターゲットクラス
        Returns:
            norm_adv_image (tensor): 正規化された敵対的サンプル　`(1, 224, 224, 3)`
        """
        
        # カウンタ
        iter_count = 0
        
        # 初回ステップ用のダミーのクラスとスコアをセット
        adv_class = torch.inf
        adv_score = -torch.inf

        # マスクを初期化
        self.mask = np.ones(norm_input_image.shape)
        
        # 敵対的サンプルを格納する変数。 norm_input_image を初期値とする
        norm_adv_image = norm_input_image.clone() # detach()にすると元画像norm_adv_imageまで変わってしまうので注意。
        
        #  ターゲットクラスのスコアが目標クラスを超えるまでサブピクセルを更新する
        while not (adv_class == target_class and adv_score > self.target_score):

            # カウンタを更新
            iter_count += 1

            # サブピクセルを更新
            norm_adv_image = self.update(norm_adv_image, target_class)

            # 推論
            Y_hat_adv = self.model(norm_adv_image)
            
            # 分類
            adv_class = torch.argmax(Y_hat_adv, 1)
            adv_name = class2name[adv_class.item()]
            adv_score = torch.max(torch.softmax(Y_hat_adv, 1)).item()

            # 5回ごとにログを表示
            if iter_count % 5 == 0:
                # print(iter_count, adv_name, adv_score)
                print('Iteration = {0}: Prediction: {1} - score {2:.2f}%'.format(iter_count, adv_name, adv_score * 100))
        
        print('Iteration = {0}: Prediction: {1} - score {2:.2f}%'.format(iter_count, adv_name, adv_score * 100))
        
        return norm_adv_image

    def update(self, norm_input_image, target_class):
        """
        Saliency Map を生成し、サブピクセルを更新する
        Args:
            norm_input_image (tensor): 正規化された画像 `(1, 224, 224, 3)`
            target_class (int): ターゲットクラス
        Returns:
            clipped_norm_input_image (tensor): norm_input_image を更新した画像 `(1, 224, 224, 3)`
        """

        # 正規化された画像のバッチ化 `(1, 224, 224, 3)`
        X = norm_input_image

        # Saliency Map を生成
        X.requires_grad=True
        y_hat_target = torch.softmax(X, 1)
        y_hat_target = self.model(X)[0][target_class] # ターゲットクラスのスコア
        self.model.zero_grad()
        y_hat_target.backward()
        saliency_maps = X.grad # Saliency Map `(1, 224, 224, 3)`

        # バッチデータである saliency_maps から1件目のデータを取得 `(224, 224, 3)`
        saliency_map = saliency_maps[0]
        
        # saliency_map を ndarray に変換
        saliency_map = saliency_map.numpy()
        
        # saliency_map を mask でフィルタ 
        masked_saliency_map = saliency_map * self.mask
        
        # 最大値となる要素のインデックスを取得
        # ここで取得されるインデックスは、masked_saliency_map を1次元配列としたときのインデックスのため、
        # max_index はスカラーとなる
        max_index = np.argmax(masked_saliency_map)
        
        # np.unravel_index　を使って、1次元配列上でのインデックスを3次元配列上でのインデックスに変換する
        # 変換後の max_index はタプルである点に注意
        max_index = np.unravel_index(max_index, masked_saliency_map.shape)

        # max_index　が指すサブピクセルを更新
        norm_input_image = norm_input_image.detach().numpy()
        norm_input_image[max_index] += self.theta
        
        # max_index が指すサブピクセルが以降に更新されないよう、mask を更新
        self.mask[max_index] = 0

        # クリッピング
        clipped_norm_input_image = np.clip(norm_input_image, -1, 1)

        return torch.tensor(clipped_norm_input_image)

元画像の用意

norm_original_image, _ = next(iter(testloader))

パラメータの設定

# ノイズの大きさを設定
theta = 0.4 

# 目標スコアを設定
target_score = 0.1

# ターゲットクラスを設定
target_class = 0 # airplane

# JSMA インスタンスを生成
attack = JSMA(model, theta, target_score)

攻撃

# 敵対的サンプルの生成
norm_adv_image = attack.generate(norm_original_image, target_class)

# 敵対的サンプルの推論
Y_hat_adv = model(norm_adv_image)

# 敵対的サンプルの分類
adv_class = torch.argmax(Y_hat_adv, 1)
adv_name = class2name[adv_class.item()]
adv_score = torch.max(torch.softmax(Y_hat_adv, 1)).item()

# 推論結果と分類結果を表示
print('\nPrediction: {0} - score {1:.2f}%'.format(adv_name, adv_score * 100))

# 実行結果
Iteration = 5: Prediction: cat - score 99.89%
Iteration = 10: Prediction: cat - score 99.88%
Iteration = 15: Prediction: cat - score 99.88%
Iteration = 20: Prediction: cat - score 99.19%
Iteration = 25: Prediction: cat - score 92.63%
Iteration = 30: Prediction: cat - score 77.41%
Iteration = 34: Prediction: airplane - score 58.33%

Prediction: airplane - score 58.33%

34イテレーションでairplaneと誤認識させるような敵対的サンプルを生成することができました。

可視化

# 非正規化
original_image = mu+sigma*norm_original_image
adv_image = mu+sigma*norm_adv_image

original_image = original_image[0].numpy()
adv_image = adv_image[0].numpy()

diff = adv_image - original_image # 加えられたノイズ
L0 = np.linalg.norm(diff.reshape(-1), ord=0) # L0 を計算

original_image=original_image.transpose(1,2,0)
adv_image=adv_image.transpose(1,2,0)
diff=diff.transpose(1,2,0)

plt.figure(figsize=(15, 15))

# オリジナル画像を表示
plt.subplot(1, 3, 1)
plt.axis('off')
plt.title('Original\n {0} - {1:.2f}%'.format(original_name, original_score * 100))
plt.imshow(original_image)

# 差分を表示
plt.subplot(1, 3, 2)
plt.gca().xaxis.set_visible(False)
plt.gca().yaxis.set_visible(False)
diff[diff == 0] = 1
plt.title('Diff\nL0 - {0}'.format(int(L0)))
plt.imshow(diff)

# 敵対的サンプルを表示
plt.subplot(1, 3, 3)
plt.axis('off')
plt.title('Adversarial\n {0} - {1:.2f}%'.format(adv_name, adv_score * 100))
plt.imshow(adv_image)

実行後、以下の画像が出力されます。

左が元画像、右が敵対的サンプル、真ん中が差分画像です。
ところどころノイズが目立っていますね...お世辞にも成功とは言えなさそうです。

おわりに

書籍によると今回実装したJSMAは簡易版とのこと。論文に準じたJSMAを実装したい。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up