More than 1 year has passed since last update.

botorch入門1 モデル作成1

Last updated at 2025-05-19Posted at 2025-05-19

2年ぶりの更新となります。
ベイズ最適化を調査しており、pytorchと親和性の高そうなbotorchを利用を検討しています。
備忘録代わりに使い方をまとめていこうと思います。

冗長的な部分がありますが、あとでコピペできるようにという意図もあります。致命的なミスなどあればご指摘いただければと思います。

botorchはモデル作成だけでも多数実装されています。
何回かに分けて記事にします。まずは単目的モデルです。

1. SingleTaskGP

概要:
SingleTaskGP は BoTorch における最も基本的なガウス過程モデルであり、1つの出力をもつ連続値回帰問題に使用されます。

主な用途:

標準的なベイズ最適化の基礎モデル
比較的ノイズの少ないスカラー出力のモデリング

特徴:

デフォルトでは RBF カーネル（Radial Basis Function）を使用
outcome_transform を利用して標準化などの前処理が可能
カーネルの変更（Matern, ARDなど）や likelihood のカスタマイズも容易

次のような関数からデータ点をサンプリングしてモデルの作成を行います。

import numpy as np
import torch
import matplotlib.pyplot as plt
import japanize_matplotlib

def blackbox(x):
    return np.sin(x*2)+np.cos(x/2-2)-x*0.1+np.random.rand(len(x),1)

x = np.arange(0,10,0.1).reshape(-1,1)
y = blackbox(x)

plt.plot(x, y);
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

from botorch.fit import fit_gpytorch_mll
from botorch.models import SingleTaskGP
from botorch.models.transforms.outcome import Standardize
from botorch.optim import optimize_acqf
from botorch.sampling import SobolQMCNormalSampler
from botorch.utils.transforms import normalize, unnormalize
from gpytorch.mlls import ExactMarginalLogLikelihood
import gpytorch

train_x = torch.rand(10,1, dtype=torch.double)*10
train_y = blackbox(train_x)

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

# モデルの作成
model = SingleTaskGP(
    train_x_scaled,
    train_y,
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x_norm = normalize(x, bounds)

pred = model.posterior(x_norm)
pred_mean = pred.mean.detach().numpy().ravel()
pred_std = pred.variance.sqrt().detach().numpy().ravel()

plt.plot(x.ravel(), pred_mean, color='orange')
plt.fill_between(x.ravel(), pred_mean+pred_std, pred_mean-pred_std, color='orange', alpha=.4)
plt.scatter(train_x, train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

カーネル関数の変更

from gpytorch.kernels import RBFKernel, MaternKernel, ScaleKernel

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

# モデルの作成
model = SingleTaskGP(
    train_x_scaled,
    train_y,
    covar_module=ScaleKernel(RBFKernel()),
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x_norm = normalize(x, bounds)

pred = model.posterior(x_norm)
pred_mean = pred.mean.detach().numpy().ravel()
pred_std = pred.variance.sqrt().detach().numpy().ravel()

plt.plot(x.ravel(), pred_mean, color='orange')
plt.fill_between(x.ravel(), pred_mean+pred_std, pred_mean-pred_std, color='orange', alpha=.4)
plt.scatter(train_x, train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

# モデルの作成
model = SingleTaskGP(
    train_x_scaled,
    train_y,
    covar_module=ScaleKernel(
        MaternKernel(nu=2.5)
    ),
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x_norm = normalize(x, bounds)

pred = model.posterior(x_norm)
pred_mean = pred.mean.detach().numpy().ravel()
pred_std = pred.variance.sqrt().detach().numpy().ravel()

plt.plot(x.ravel(), pred_mean, color='orange')
plt.fill_between(x.ravel(), pred_mean+pred_std, pred_mean-pred_std, color='orange', alpha=.4)
plt.scatter(train_x, train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

ard_num_dims を指定すると、各入力次元に個別の長さスケールを割り当てる ARD (Automatic Relevance Determination) が有効になる（次元ごとの関連性を学習）

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

# モデルの作成
model = SingleTaskGP(
    train_x_scaled,
    train_y,
    covar_module=ScaleKernel(
        RBFKernel(
            ard_num_dims=train_x.shape[-1]
        )
    ),
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x_norm = normalize(x, bounds)

pred = model.posterior(x_norm)
pred_mean = pred.mean.detach().numpy().ravel()
pred_std = pred.variance.sqrt().detach().numpy().ravel()

plt.plot(x.ravel(), pred_mean, color='orange')
plt.fill_between(x.ravel(), pred_mean+pred_std, pred_mean-pred_std, color='orange', alpha=.4)
plt.scatter(train_x, train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

尤度関数の変更

from gpytorch.likelihoods.gaussian_likelihood import GaussianLikelihood
from gpytorch.constraints import GreaterThan
from gpytorch.priors.torch_priors import LogNormalPrior

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

noise_prior = LogNormalPrior(loc=-4.0, scale=1.0)
likelihood = GaussianLikelihood(
    noise_prior=noise_prior,
    batch_shape=train_x.shape[:-2],
    noise_constraint=GreaterThan(
        1e-1,
        transform=None,
        initial_value=noise_prior.mode,
    ),
)

# モデルの作成
model = SingleTaskGP(
    train_x_scaled,
    train_y,
    covar_module=ScaleKernel(
        RBFKernel(
            ard_num_dims=train_x.shape[-1]
        )
    ),
    likelihood=likelihood,
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x_norm = normalize(x, bounds)

pred = model.posterior(x_norm)
pred_mean = pred.mean.detach().numpy().ravel()
pred_std = pred.variance.sqrt().detach().numpy().ravel()

plt.plot(x.ravel(), pred_mean, color='orange')
plt.fill_between(x.ravel(), pred_mean+pred_std, pred_mean-pred_std, color='orange', alpha=.4)
plt.scatter(train_x, train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

2. MixedSingleTaskGP

概要:
MixedSingleTaskGP は、連続値とカテゴリカル（離散）変数の両方を入力に持つ回帰モデルです。

主な用途:

材料設計や条件設定など、カテゴリ変数を含む最適化問題

特徴:

cat_dims を指定することで、指定した次元に対してカテゴリカルカーネルを適用
内部的に AdditiveKernel と ProductKernel を組み合わせて連続・離散空間の依存性をモデリング
モデルは入力空間ごとに挙動を変化させるため、カテゴリごとの関数形が大きく異なる場合に有効

次のような関数からデータ点をサンプリングしてモデルの作成を行います。
カテゴリ値によって関数の振る舞いが変わる場合を想定します。カテゴリ値は0,1,...のようにラベルで与えられているとします。

def blackbox(x, noise=True):
    if type(x)==np.ndarray:
        return np.sin(x[:,[0]]*2)+np.cos(x[:,[0]]/2-2)+np.sign((x[:,[1]]==1).astype(int)-0.5)*x[:,[0]]*0.2 - 2*(x[:,[1]]==1).astype(int) + int(noise) * np.random.rand(len(x),1)
    else:
        return np.sin(x[:,[0]]*2)+np.cos(x[:,[0]]/2-2)+np.sign((x[:,[1]]==1).int()-0.5)*x[:,[0]]*0.2 - 2*(x[:,[1]]==1).int() + int(noise) * np.random.rand(len(x),1)

x = np.arange(0,10,0.1).reshape(-1,1)
y1=blackbox(np.concatenate([x,np.zeros_like(x)], axis=1), noise=False)
y2=blackbox(np.concatenate([x,np.ones_like(x)], axis=1), noise=False)

plt.plot(x[:,0], y1);
plt.plot(x[:,0], y2);
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

from botorch.models import MixedSingleTaskGP

train_x = torch.cat([torch.rand(15,1, dtype=torch.double)*10, torch.tensor(np.random.choice([0,1], (15,1)))], dim=1)
train_y = blackbox(train_x)

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

# モデルの作成
model = MixedSingleTaskGP(
    train_x_scaled,
    train_y,
    cat_dims=[1],
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x1 = torch.cat([x, torch.zeros_like(x)], axis=1)
x2 = torch.cat([x, torch.ones_like(x)], axis=1)
x_norm1 = normalize(x1, bounds)
x_norm2 = normalize(x2, bounds)

pred1 = model.posterior(x_norm1)
pred1_mean = pred1.mean.detach().numpy().ravel()
pred1_std = pred1.variance.sqrt().detach().numpy().ravel()

pred2 = model.posterior(x_norm2)
pred2_mean = pred2.mean.detach().numpy().ravel()
pred2_std = pred2.variance.sqrt().detach().numpy().ravel()

plt.plot(x, pred1_mean, color='orange')
plt.fill_between(x.ravel(), pred1_mean+pred1_std, pred1_mean-pred1_std, color='orange', alpha=.4)

plt.plot(x, pred2_mean, color='green')
plt.fill_between(x.ravel(), pred2_mean+pred2_std, pred2_mean-pred2_std, color='green', alpha=.4)

plt.scatter(train_x[:,0], train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

尤度関数の変更も確認します。

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

noise_prior = LogNormalPrior(loc=-4.0, scale=1.0)
likelihood = GaussianLikelihood(
    noise_prior=noise_prior,
    batch_shape=train_x.shape[:-2],
    noise_constraint=GreaterThan(
        1e-1,
        transform=None,
        initial_value=noise_prior.mode,
    ),
)

# モデルの作成
model = MixedSingleTaskGP(
    train_x_scaled,
    train_y,
    cat_dims=[1],
    likelihood=likelihood,
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

x = torch.arange(0,10,0.1).reshape(-1,1)
x1 = torch.cat([x, torch.zeros_like(x)], axis=1)
x2 = torch.cat([x, torch.ones_like(x)], axis=1)
x_norm1 = normalize(x1, bounds)
x_norm2 = normalize(x2, bounds)

pred1 = model.posterior(x_norm1)
pred1_mean = pred1.mean.detach().numpy().ravel()
pred1_std = pred1.variance.sqrt().detach().numpy().ravel()

pred2 = model.posterior(x_norm2)
pred2_mean = pred2.mean.detach().numpy().ravel()
pred2_std = pred2.variance.sqrt().detach().numpy().ravel()

plt.plot(x, pred1_mean, color='orange')
plt.fill_between(x.ravel(), pred1_mean+pred1_std, pred1_mean-pred1_std, color='orange', alpha=.4)

plt.plot(x, pred2_mean, color='green')
plt.fill_between(x.ravel(), pred2_mean+pred2_std, pred2_mean-pred2_std, color='green', alpha=.4)

plt.scatter(train_x[:,0], train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

3. RobustRelevancePursuitSingleTaskGP

概要:
RobustRelevancePursuitSingleTaskGP は、入力の次元に対する選択性（relevance）と外れ値の影響を同時に学習可能なロバストな GP モデルです。

主な用途:

外れ値の混入が予想される実験データ
重要な変数の選択を含むモデリング

特徴:

自動的に入力変数の中で重要なものを選択（Relevance Pursuit）
外れ値（outlier）候補の数を指定して同時にモデル化
通常の GP と比較して過学習の抑制や予測の安定性が向上
prior_mean_of_support は、変数選択に対してスパース性の事前情報を与える役割を果たす（値が大きいほど強くスパース化を促す）

次のような関数からデータ点をサンプリングしてモデルの作成を行います。
サンプリングしたデータに明示的に外れ値となる値を追加します。

from botorch.models.relevance_pursuit import forward_relevance_pursuit
from botorch.models.robust_relevance_pursuit_model import RobustRelevancePursuitSingleTaskGP
from gpytorch.means import ZeroMean
from botorch.utils.constraints import NonTransformedInterval
from botorch.models.transforms import Normalize

def blackbox(x):
    return np.sin(x*2)+np.cos(x/2-2)-x*0.1+np.random.rand(len(x),1)

x = np.arange(0,10,0.1).reshape(-1,1)
y = blackbox(x)

plt.plot(x, y);
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

train_x = torch.rand(30,1, dtype=torch.double)*10
train_y = blackbox(train_x)

train_x = torch.cat([train_x, torch.tensor([[2],[5]])])
train_y = torch.cat([train_y, torch.tensor([[-2],[3]])])

bounds = torch.stack([
    torch.min(train_x, dim=0)[0],
    torch.max(train_x, dim=0)[0]
])

# 入力データを正規化
train_x_scaled = normalize(train_x, bounds=bounds)

# モデルの作成
model = SingleTaskGP(
    train_x_scaled,
    train_y,
    outcome_transform=Standardize(
        m=train_y.size(-1)
    )
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model.likelihood,
    model
)
fit_gpytorch_mll(mll)

model_robust = RobustRelevancePursuitSingleTaskGP(
    train_x_scaled,
    train_y,
    prior_mean_of_support=16.0,
)

# モデルの最尤推定を行う
mll = ExactMarginalLogLikelihood(
    model_robust.likelihood,
    model_robust
)
fit_gpytorch_mll(
    mll=mll, 
    numbers_of_outliers=[0, 1, 2, 3, 4],
    reset_parameters=False,
    relevance_pursuit_optimizer=forward_relevance_pursuit,
);

min_lengthscale = 0.05
lengthscale_constraint = NonTransformedInterval(
    min_lengthscale, torch.inf, initial_value=0.2
)

covar_module = ScaleKernel(
    RBFKernel(ard_num_dims=train_x_scaled.shape[-1], lengthscale_constraint=lengthscale_constraint),
    outputscale_constraint=NonTransformedInterval(0.01, 10.0, initial_value=0.1),
)

model_robust2 = RobustRelevancePursuitSingleTaskGP(
    train_x_scaled,
    train_y,
    input_transform=Normalize(d=train_x_scaled.shape[-1]),
    cache_model_trace=True,  # necessary to plot the model trace after optimization
    mean_module=ZeroMean(),
    covar_module=covar_module,
)

# モデルの最尤推定を行う
mll2 = ExactMarginalLogLikelihood(
    model_robust2.likelihood,
    model_robust2
)
fit_gpytorch_mll(
    mll=mll2, 
    numbers_of_outliers=[0, 1, 2, 3, 4],
    reset_parameters=False,
    relevance_pursuit_optimizer=forward_relevance_pursuit,
);

x = torch.arange(0,10,0.1).reshape(-1,1)
x_norm = normalize(x, bounds)

pred1 = model.posterior(x_norm)
pred1_mean = pred1.mean.detach().numpy().ravel()
pred1_std = pred1.variance.sqrt().detach().numpy().ravel()

pred2 = model_robust.posterior(x_norm)
pred2_mean = pred2.mean.detach().numpy().ravel()
pred2_std = pred2.variance.sqrt().detach().numpy().ravel()

pred3 = model_robust2.posterior(x_norm)
pred3_mean = pred3.mean.detach().numpy().ravel()
pred3_std = pred3.variance.sqrt().detach().numpy().ravel()

plt.plot(x.ravel(), pred1_mean, color='orange')
plt.fill_between(x.ravel(), pred1_mean+pred1_std, pred1_mean-pred1_std, color='orange', alpha=.4)

plt.plot(x.ravel(), pred2_mean, color='green')
plt.fill_between(x.ravel(), pred2_mean+pred2_std, pred2_mean-pred2_std, color='green', alpha=.4)

plt.plot(x.ravel(), pred3_mean, color='blue')
plt.fill_between(x.ravel(), pred3_mean+pred3_std, pred3_mean-pred3_std, color='blue', alpha=.4)

plt.scatter(train_x, train_y, color='red')
plt.xlabel('x', size=20)
plt.ylabel('y', size=20)

4. SaasFullyBayesianSingleTaskGP

概要:
SaasFullyBayesianSingleTaskGP は、MCMC（NUTSアルゴリズム）による事後分布推論を用いた完全ベイズモデルです。"SaaS"は "Sparse Axis-Aligned Subspace" を意味し、関連性の低い入力変数に対して自然に抑制をかける機構を持ちます。

主な用途:

高次元空間でのベイズ最適化
モデル不確実性をより厳密に考慮したい場合

特徴:

NUTS によるサンプルベースの推論（fit_fully_bayesian_model_nuts）
高次元問題での変数選択と精度の両立
実行時間は長くなるが不確実性の扱いに優れる

次のような関数からデータ点をサンプリングしてモデルの作成を行います。
20次元のデータを用意しますが、目的変数は4つの説明変数のみに依存しているものを想定します。

from botorch.models import SaasFullyBayesianSingleTaskGP
from botorch.fit import fit_fully_bayesian_model_nuts

f_hd = lambda y: 3*torch.sin(y[:,[4]]*6) + 2*torch.sin(y[:,[9]]*2+torch.pi/6) - torch.sin(y[:,[14]]*2+torch.pi/1.5) + 2.5*torch.sin(y[:,[19]]*4) + 0.2*torch.randn(len(y),1)

train_X = torch.rand(20,20, dtype=torch.double)
train_Y = f_hd(train_X)

model = SaasFullyBayesianSingleTaskGP(
    train_X,
    train_Y,
    outcome_transform=Standardize(m=train_Y.size(-1))
)

fit_fully_bayesian_model_nuts(
    model,
    warmup_steps=128,
    num_samples=64,
    thinning=16,
    disable_progbar=False
)

Sample: 100%|█████████████████████████████████████████| 192/192 [01:53,  1.69it/s, step size=4.20e-01, acc. prob=0.897]

重要度として各説明変数に対するパラメータを確認します。

def plot_lengthscale_importance(model, feature_names=None):
    """
    SaasFullyBayesianSingleTaskGP の lengthscale を解析・可視化する。

    Args:
        model: 学習済みの SaasFullyBayesianSingleTaskGP モデル
        feature_names: 特徴量名のリスト（Noneならインデックス表示）

    Returns:
        None（プロット表示）
    """
    # MCMCサンプルからlengthscaleを取得
    lengthscale_samples = model.covar_module.base_kernel.lengthscale.detach().cpu().numpy()
    # shape: (num_samples, 1, input_dim) → squeezeして (num_samples, input_dim)
    lengthscale_samples = lengthscale_samples.squeeze(1)

    # 各特徴量ごとの統計量
    mean_lengthscales = np.mean(lengthscale_samples, axis=0)
    perc_5 = np.percentile(lengthscale_samples, 5, axis=0)
    perc_95 = np.percentile(lengthscale_samples, 95, axis=0)

    # 重要度順に並べ替え（lengthscaleが短い方が重要）
    sorted_idx = np.argsort(mean_lengthscales)
    sorted_means = mean_lengthscales[sorted_idx]
    sorted_err_low = sorted_means - perc_5[sorted_idx]
    sorted_err_high = perc_95[sorted_idx] - sorted_means

    # 特徴量名
    if feature_names is None:
        feature_names = [f"X{i}" for i in range(lengthscale_samples.shape[1])]
    sorted_feature_names = np.array(feature_names)[sorted_idx]

    # プロット
    plt.figure(figsize=(10, 6))
    plt.errorbar(
        range(len(sorted_feature_names)),
        sorted_means,
        yerr=[sorted_err_low, sorted_err_high],
        fmt='o', ecolor='gray', capsize=5, markersize=5
    )
    plt.xticks(range(len(sorted_feature_names)), sorted_feature_names, rotation=45, ha='right')
    plt.xlabel("Features")
    plt.ylabel("Mean Lengthscale (lower = more important)")
    plt.title("Feature Importance based on Lengthscale (SaasFullyBayesianSingleTaskGP)")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

plot_lengthscale_importance(model)

今回は以上となります。
次回は多目的モデルについて説明したいと思います。

ご意見あればコメントお願いします

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up