More than 3 years have passed since last update.

Introduction to Heuristic Contestで焼きなましを学ぶ

Last updated at 2020-06-30Posted at 2020-06-30

こんにちは、ganariyaです＞＜
主に競技プログラミングを嗜んでいます。

先日、改善型コンテスト（マラソン）がAtCoderに定期開催されるとのことで、その導入コンテストとして「Introduction to Heuristics Contest」が開催されました。

今回は、その参加記ならびに焼きなましに対する自分の理解やマラソンでの個人的な方針をまとめようと思います。

コンテスト中の方針

問題を読むと分かりますが、問題B、CをやってからAをやるといいと書いてあります。
僕はマラソン系初心者なので、Bから解き始めました。

Bは、Aでのスコア計算プログラムを書こう！というものでした。
これはそんなに難しくなく、300点ぐらいの内容でした。

公式解説はRustのため、C++で書く場合はどうなるか載せておきます。

LL get_score(VLL &c, VVLL &S, VLL &t) {
    LL score = 0;
    VLL prev(types, -1);
    for (int i = 0; i < t.size(); i++) {
        score += S[i][t[i]];
        prev[t[i]] = i;
        for (int j = 0; j < types; j++) score -= c[j] * (i - prev[j]);
    }
    return score;
}

あとはすぐにA問題に取り組みました。

自分の方針として、まあ直接的に最適解は求まらないので（それはそう）、愚直にGreedy解を求めることにしました。

Greedy解を作った後は、焼きなまししながら一点改善を続ければ改善できそうだったので、焼きなましをブログで学びながらひいひい実装したところ、107523489点をえることが出来ました。

結局この点数を改善できず、237/1165位で終わりました。

マラソン初心者にしては頑張れた方だと思います。＞＜

公式解を読み解いていく

Editorialがとても丁寧なため、これを読んでいて感じた点や、「焼きなまし」の個人的理解について触れていこうと思います。

スコア計算

まず、時間に余裕があるならば「最初はスコア計算プログラムを作る」を意識する必要があります。
スコア計算を行う中で、問題の特性や内部構造を理解できる可能性があります。
スコア計算は先程のプログラムにあんります。

貪欲法

競プロAlgo部門でもよく出てきますが、Greedyな解を最初マラソンでも出すことが多いです。マトロイドの構造とかではないので、最適性は当然保証されませんが、なんだかんだ良い解になることがあるらしいです。

今回の問題では、ある問題を採用するかしないかを26通り全部試しながら探索すればいいです。

これは



LL evaluate(VLL &c, VVLL &S, VLL &t, int k, int D) {
    LL score = 0;
    VLL prev(types, -1);
    for (int i = 0; i < t.size(); i++) {
        score += S[i][t[i]];
        prev[t[i]] = i;
        for (int j = 0; j < types; j++) score -= c[j] * (i - prev[j]);
    }

    for (LL d = t.size(); d < min<int>(D, t.size() + k); d++) {
        for (int j = 0; j < types; j++) {
            score -= (d - prev[j]) * c[j];
        }
    }
    return score;
}


VLL solve(int D, VLL &c, VVLL &S, int k) {
    VLL out;

    for (int i = 0; i < D; i++) {
        LL max_score = LLONG_MIN;
        LL type = -1;

        for (int j = 0; j < types; j++) {
            out.push_back(j);
            LL score = evaluate(c, S, out, k, D);
            if (score > max_score) {
                max_score = score;
                type = j;
            }
            out.pop_back();
        }
        out.push_back(type);
    }

    return out;
}

VLL ans;
    LL ans_score = LLONG_MIN;
    for (int k = 0; k < types; k++) {
        auto out = solve(D, c, S, k);
        LL score = get_score(c, S, out);
        if (score > ans_score) {
            ans_score = score;
            ans = out;
        }
    }

のように、先程のget_scoreを使いながらどうすればコストが最大になるかを選んでいけばいいです。

貪欲法の改善として、前後13日を考える、というのがあります。
これは、平均13日ごとにコンテストが開催されるため、これから13日間コンテストが開催されないのでは･･･？というのを考えることで、さらにそれらしい値を得ることができます。
これはevaluateで実装しています。

局所探索

Greedy解を構築した後は、解を壊さないように少しずつ修正していく局所探索をします。
いわゆる山登り探索です。
目的空間における最適値を探すために、少しずつ山を登っていきます。
勾配微分は山登りですね＞＜

局所探索では、今の解を変化させて、変化させた解がよりスコアが大きいならそちらに移動します。
ここで、変化させる量が少ないとすぐに局所解にはまります。
一方変化させる量が大きいと解として制約を満たさない、また計算量が大きくなるなどの問題が発生するため、問題の性質を見極める必要があります。

今回の問題では、Editorialで詳しく解説していますが

コンテストの開催日に関する一点更新
開催日の近いコンテストをswap

することで、探索空間を保ちながら広く探索できます。

これはC++であれば


    // search
    while (true) {
        auto now = chrono::system_clock::now();
        auto elapsed = chrono::duration_cast<chrono::milliseconds>(now - start).count();
        if (elapsed >= 1940) break;
 
        uniform_int_distribution<int> rand_day(0, D - 1);
        uniform_int_distribution<int> rand_type(0, types - 1);
        uniform_real_distribution<double> rand_01(0, 1);
 
        if (rand_01(mt) < 0.5) {
            auto day = rand_day(mt);
            auto type = rand_type(mt);
            auto prev_val = ans[day];
            ans[day] = type;
            LL score = get_score(c, S, ans);
            if (score > ans_score) {
                ans_score = score;
            } else {
                ans[day] = prev_val;
            }
        } else {
            auto day1 = max(0, rand_day(mt) - 1);
            uniform_int_distribution<int> rand_day2(day1 + 1, min(D - 1, day1 + 16));
            auto day2 = rand_day2(mt);
            swap(ans[day1], ans[day2]);
            LL score = get_score(c, S, ans);
            if (score > ans_score) {
                ans_score = score;
            } else {
                swap(ans[day1], ans[day2]);
            }
        }
 
    }

のように実装できます。
Rust版のプログラムと比較しながら読んでみてください。

焼きなまし

焼きなましは、スコアを最大化する方向にのみ山登りをすると、局所解にはまり更新がされなくなってしまいます。

そこで、鍛冶の焼きなましに着想を得た焼きなましを実装します。

焼きなましは、探索の最初ほど悪い方向への更新を許可し、探索の後半ほど良い方向への更新のみ許可するヒューリスティックです。
局所解にはまってしまう原因として、大域的な視点を持っていないことがあります。
そのため、最初は活発的に悪い方向にも移動することで、局所解ではなく大域的に良い解を探そうとします。
そして、後半では見つかった大域解を細かく動いて絞っていきもっとも良い局所解を発見します。

焼きなましの初期温度を$T_0$、終点温度を$T_1$とします。

現在の温度を$T$とし、更新を認めるとそこからスコアが$\Delta$だけ改善するとき、その更新を認める確率は

スコアの更新が正、つまり増えるのであれば必ず更新を認め、

更新が負である、つまり悪化するのであれば$e^{\Delta/T}$で採用します。

この、$e^{\Delta/T}$は、増える値が負であるとき、Tが大きいほどよりその採用する確率が大きくなります。
また、Tが小さいとき（探索後半　温度が低い）ときは、採用確率が小さくなり、よくなる方向にしか移動しません。
これは、ネイピア数の関数の形を考えると分かります。

また、温度の遷移は、今の経過時間を$t \in [0, 1]$とすると

$T_t = T_0^{1-t} \times T_1^t$と遷移します。

これだとよくわからないのでPythonで軽く形を見てみました。

# -*- coding: utf-8 -*-

import matplotlib.pyplot as plt
import numpy as np
import time

T0 = 2000
T1 = 600
TL = 5

T = T0

x = []
y = []
cnt = 0

start = time.perf_counter()
while True:
  cnt += 1
  if cnt % 500 == 0:
      now = time.perf_counter()
      elapsed = now - start
      t = elapsed / TL
      if t > TL :
        break
      T = (T0 ** (1-t)) * (T1 ** t)
      x.append(cnt)
      y.append(T)

plt.plot(x, y)

上記のような形になります。
つまり、初期段階ではやめに温度が冷えていき、後半ほど温度が変わらなくなります。

これは、最初ほどランダムに移動しますが、そもそもこのランダム移動は短時間しか行わず、後半の多い時間ではより局所に攻めるような移動になっています。

他にも線形にプログラムすることもあります。

実際に焼きなましをすると以下のようになります。


   start = chrono::system_clock::now();
    // search
    while (true) {
        auto now = chrono::system_clock::now();
        auto elapsed = chrono::duration_cast<chrono::milliseconds>(now - start).count();
        if (elapsed >= 1940) break;

        uniform_int_distribution<int> rand_day(0, D - 1);
        uniform_int_distribution<int> rand_type(0, types - 1);
        uniform_real_distribution<double> rand_01(0, 1);

        cnt++;

        if (cnt % 50 == 0) {
            auto t = elapsed / TIME_LIMIT;
            T = pow(T0, 1 - t) * pow(T1, t);
        }

        if (rand_01(mt) < 0.5) {
            auto day = rand_day(mt);
            auto type = rand_type(mt);
            auto prev_val = ans[day];
            ans[day] = type;
            LL score = get_score(c, S, ans);
            auto p = exp((score - ans_score) / T);
            auto r = rand_01(mt);
            if (score > ans_score or r < p) {
                ans_score = score;
            } else {
                ans[day] = prev_val;
            }
        } else {
            auto day1 = max(0, rand_day(mt) - 1);
            uniform_int_distribution<int> rand_day2(day1 + 1, min(D - 1, day1 + 16));
            auto day2 = rand_day2(mt);
            swap(ans[day1], ans[day2]);
            LL score = get_score(c, S, ans);
            auto p = exp((score - ans_score) / T);
            auto r = rand_01(mt);
            if (score > ans_score or r < p) {
                ans_score = score;
            } else {
                swap(ans[day1], ans[day2]);
            }
        }

    }

T = pow(T0, 1-t) * pow(T1, t)は、今の温度を時間によって遷移させています。

p = exp((score - ans_score) / T)は、温度でスコアを割ってこの値を指数関数になげています。
もし更新が負であればこの値は0~1の値を取り、それがrより大きいかどうかで、改悪の更新を行うかを許可します。

ここまでやることで119809269を得ることができました。

最後に、自分のプログラム全体を載せておきます。


constexpr int types = 26;
std::mt19937 mt{std::random_device{}()};

LL get_score(VLL &c, VVLL &S, VLL &t) {
    LL score = 0;
    VLL prev(types, -1);
    for (int i = 0; i < t.size(); i++) {
        score += S[i][t[i]];
        prev[t[i]] = i;
        for (int j = 0; j < types; j++) score -= c[j] * (i - prev[j]);
    }
    return score;
}

LL evaluate(VLL &c, VVLL &S, VLL &t, int k, int D) {
    LL score = 0;
    VLL prev(types, -1);
    for (int i = 0; i < t.size(); i++) {
        score += S[i][t[i]];
        prev[t[i]] = i;
        for (int j = 0; j < types; j++) score -= c[j] * (i - prev[j]);
    }

    for (LL d = t.size(); d < min<int>(D, t.size() + k); d++) {
        for (int j = 0; j < types; j++) {
            score -= (d - prev[j]) * c[j];
        }
    }
    return score;
}

VLL solve(int D, VLL &c, VVLL &S, int k) {
    VLL out;

    for (int i = 0; i < D; i++) {
        LL max_score = LLONG_MIN;
        LL type = -1;

        for (int j = 0; j < types; j++) {
            out.push_back(j);
            LL score = evaluate(c, S, out, k, D);
            if (score > max_score) {
                max_score = score;
                type = j;
            }
            out.pop_back();
        }
        out.push_back(type);
    }

    return out;
}

constexpr double TIME_LIMIT = 2000;

int main() {

    auto start = chrono::system_clock::now();

    int D;
    cin >> D;

    VLL c(types);
    cin >> c;

    VVLL S(D, VLL(types));
    cin >> S;

    VLL ans;
    LL ans_score = LLONG_MIN;
    for (int k = 0; k < types; k++) {
        auto out = solve(D, c, S, k);
        LL score = get_score(c, S, out);
        if (score > ans_score) {
            ans_score = score;
            ans = out;
        }
    }

    const double T0 = 2000;
    const double T1 = 600;
    double T = T0;

    int cnt = 0;


   start = chrono::system_clock::now();
    // search
    while (true) {
        auto now = chrono::system_clock::now();
        auto elapsed = chrono::duration_cast<chrono::milliseconds>(now - start).count();
        if (elapsed >= 1940) break;

        uniform_int_distribution<int> rand_day(0, D - 1);
        uniform_int_distribution<int> rand_type(0, types - 1);
        uniform_real_distribution<double> rand_01(0, 1);

        cnt++;

        if (cnt % 50 == 0) {
            auto t = elapsed / TIME_LIMIT;
            T = pow(T0, 1 - t) * pow(T1, t);
        }

        if (rand_01(mt) < 0.5) {
            auto day = rand_day(mt);
            auto type = rand_type(mt);
            auto prev_val = ans[day];
            ans[day] = type;
            LL score = get_score(c, S, ans);
            auto p = exp((score - ans_score) / T);
            auto r = rand_01(mt);
            if (score > ans_score or r < p) {
                ans_score = score;
            } else {
                ans[day] = prev_val;
            }
        } else {
            auto day1 = max(0, rand_day(mt) - 1);
            uniform_int_distribution<int> rand_day2(day1 + 1, min(D - 1, day1 + 16));
            auto day2 = rand_day2(mt);
            swap(ans[day1], ans[day2]);
            LL score = get_score(c, S, ans);
            auto p = exp((score - ans_score) / T);
            auto r = rand_01(mt);
            if (score > ans_score or r < p) {
                ans_score = score;
            } else {
                swap(ans[day1], ans[day2]);
            }
        }

    }


    for (auto x: ans) cout << x + 1 << endl;

}

参考リンク

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up