強凸関数だがNewton法が収束しない例

Last updated at 2025-10-01Posted at 2025-09-29

Doikov, N. (2021). New second-order and tensor methods in convex optimization (Doctoral dissertation, Ph. D. thesis, Université catholique de Louvain) の Example 1.4.3.が面白かったので、走り書き程度のメモを残しておきます。

強凸な目的関数の例

目的関数、およびその導関数は以下の通りです。

\begin{align*}
f(x) &= \log(1 + e^x) - \frac{x}{2} + \frac{\mu x^2}{2}\\
f'(x) &= \frac{e^x}{1+e^x} - \frac{1}{2} + \mu x\\
f''(x) &= \frac{e^x}{(1+e^x)^2} + \mu\\
f'''(x) &= \frac{e^x(1 - e^x)}{(1+e^x)^3}\\
f''''(x) &= \frac{e^x(1 - 4e^x + e^{2x})}{(1+e^x)^4}
\end{align*}

この目的関数は、

$\mu$-強凸
$\max_x | f''(x) | = \frac{1}{4} + \mu$ ($e^x=1$ のとき) であるので、$\nabla f$ が $L$-smooth ($L=\frac{1}{4} + \mu$)
$\max_x | f'''(x) | = \frac{1}{6\sqrt{3}}$ ($e^x=2-\sqrt{3}$ のとき) であるので、$\nabla^2 f$ が $M$-Lipschitz連続 ($M=\frac{1}{6\sqrt{3}}$)

という各種の良い性質を満たしますが、$\mu$ に対して初期点 $x_0$ が十分大きいとき、Newton法は収束しません。

(初期点 $x_0=-4, \mu=0.1$ の場合、収束する)

(初期点 $x_0=-4, \mu=0.01$ の場合、振動する)

強凸でない目的関数の例

ちなみに、次のような強凸でない目的関数の場合、よりシンプルな例でNewton法は発散します。

\begin{align*}
f(x) &= \sqrt{1 + x^2}\\
f'(x) &= \frac{x}{\sqrt{1 + x^2}}\\
f''(x) &= \frac{1}{(1 + x^2)^{3/2}}
\end{align*}

この関数は強凸ではありません($f''(x)$ は $x \to \infty$ で $0$ に近づく)。

このような関数に対してNewton法を適用すると、初期点の絶対値が1より真に大きいと発散します。

(初期点 $x_0=1.1$ の場合、発散する)

実験コード

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

def f(x):
    return np.log(1 + np.exp(x)) - x / 2 + (mu / 2) * x**2

def df(x):
    return np.exp(x) / (1 + np.exp(x)) - 0.5 + mu * x

def d2f(x):
    return np.exp(x) / (1 + np.exp(x)) ** 2 + mu

def vis(mu, x0):
    xs = [x0]
    for _ in range(30):
        x_next = xs[-1] - df(xs[-1]) / d2f(xs[-1])
        xs.append(x_next)

    f_points = f(np.array(xs))
    abs_max = max(abs(np.array(xs)))
    x = np.linspace(-abs_max * 1.2, abs_max * 1.2, 400)
    fx = f(x)

    plt.figure(figsize=(8, 5))
    plt.plot(x, fx, label=r"$f(x)=\log(1+e^x)-x/2 + \mu x^2/2$")
    plt.title(rf"Graph of $f(x)$ with $\mu$={mu} and $x_0$={x0}")
    plt.xlabel("x")
    plt.ylabel("f(x)")
    plt.grid(True)

    plt.scatter(xs, f_points, color="red", label="Newton steps")
    for i in range(len(xs) - 1):
        plt.annotate(
            "",
            xy=(xs[i + 1], f_points[i + 1]),
            xytext=(xs[i], f_points[i]),
            arrowprops=dict(arrowstyle="->", color="red", lw=1.5, mutation_scale=30),
        )

    plt.legend(loc="upper center")
    plt.savefig(
        Path(__file__).parent / f"strongly_convex_function_{mu}_{x0}.png", dpi=300
    )
    plt.close()

if __name__ == "__main__":
    for mu, x0 in [(0.1, -4), (0.01, -4)]:
        vis(mu, x0)

from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np

def f(x):
    return np.sqrt(1 + x**2)

def df(x):
    return x / np.sqrt(1 + x**2)

def d2f(x):
    return 1 / (1 + x**2) ** (3 / 2)

def vis(x0):
    xs = [x0]
    for _ in range(3):
        x_next = xs[-1] - df(xs[-1]) / d2f(xs[-1])
        xs.append(x_next)

    f_points = f(np.array(xs))
    abs_max = max(abs(np.array(xs)))
    x = np.linspace(-abs_max * 1.2, abs_max * 1.2, 400)
    fx = f(x)

    plt.figure(figsize=(8, 5))
    plt.plot(x, fx, label=r"$f(x)=\sqrt{1+x^2}$")
    plt.title(rf"Graph of $f(x)$ with $x_0$={x0}")
    plt.xlabel("x")
    plt.ylabel("f(x)")
    plt.grid(True)

    plt.scatter(xs, f_points, color="red", label="Newton steps")
    for i in range(len(xs) - 1):
        plt.annotate(
            "",
            xy=(xs[i + 1], f_points[i + 1]),
            xytext=(xs[i], f_points[i]),
            arrowprops=dict(arrowstyle="->", color="red", lw=1.5, mutation_scale=30),
        )

    plt.legend(loc="upper center")
    plt.savefig(Path(__file__).parent / f"sqrt_function_{x0}.png", dpi=300)
    plt.close()

if __name__ == "__main__":
    for x0 in [1.1]:
        vis(x0)

終わりに

書いた後に気付いたのですが、過去に読んでいたこちらのpdfと全く同じ論文からの引用をしていました。

図があると自分の理解の助けになるので、記事として残しておくことにします。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up