More than 3 years have passed since last update.

乱数の固定方法(random/numpy/tensorflow/gym)

Posted at 2022-06-27

強化学習をしていると乱数の固定が出来ているようないないような…
挙動が分かりづらかったので検証してみました。

TL;DR

seed = 0

# random
import random
random.seed(seed)

# numpy
import numpy as np
np.random.seed(seed)

# tensorflow
import tensorflow as tf
tf.random.set_seed(seed)

# gym
import gym
env = gym.make("Pendulum-v1")
env.action_space.seed(seed)
env.observation_space.seed(seed)
env.reset(seed=seed)

random

それぞれ2回実施し、同じ値になるか見てみます。

def std_random():
    import random

    random.seed(0)
    print(random.random())
    print(random.random())

    random.seed(0)
    print(random.random())
    print(random.random())

std_random()

0.8444218515250481
0.7579544029403025
0.8444218515250481
0.7579544029403025

同じ値になっています。

numpy

def np_random():
    import numpy as np

    np.random.seed(0)
    print(np.random.random())
    print(np.random.random())

    np.random.seed(0)
    print(np.random.random())
    print(np.random.random())

np_random()

0.5488135039273248
0.7151893663724195
0.5488135039273248
0.7151893663724195

tensorflow

実際にモデルを作成してみてモデルの出力を見てみます。
学習なしとありで見てみました。

def tf_random():
    import numpy as np
    import tensorflow as tf
    import tensorflow.keras as keras
    from tensorflow.keras import layers as kl

    def _no_train_model():
        c = input_ = kl.Input(shape=(2,))
        c = kl.Dense(64, activation="relu")(c)
        c = kl.Dense(1)(c)
        model = keras.Model(input_, c)
        return model(np.array([[1, 2]])).numpy()[0]

    print("no train")
    tf.random.set_seed(0)
    print(_no_train_model())
    print(_no_train_model())

    tf.random.set_seed(0)
    print(_no_train_model())
    print(_no_train_model())

    def _train_model():
        c = input_ = kl.Input(shape=(2,))
        c = kl.Dense(64, activation="relu")(c)
        c = kl.Dense(1)(c)
        model = keras.Model(input_, c)
        model.compile(optimizer="adam", loss="mse")
        x = np.array([[1, 2], [2, 3]])
        y = np.array([[1], [2]])
        model.fit(x, y, epochs=10, verbose=0)
        return model(np.array([[1, 2]])).numpy()[0]

    print("train")
    tf.random.set_seed(0)
    print(_train_model())
    print(_train_model())

    tf.random.set_seed(0)
    print(_train_model())
    print(_train_model())

tf_random()

no train
[-0.17752141]
[-0.44846547]
[-0.17752141]
[-0.44846547]
train
[0.13368675]
[-0.12454459]
[0.13368675]
[-0.12454459]

gym

独立してseed管理していたようで、これに引っかかっていました…。

resetの引数で指定するようです。
また、action_spaceとobservation_spaceは独立に管理されており、そちらも別途指定する必要があります。

def gym_random():
    import gym

    env = gym.make("Pendulum-v1")

    def _run(seed):
        if seed is not None:
            env.action_space.seed(seed)
            env.observation_space.seed(seed)
            env.reset(seed=seed)
        else:
            env.reset()

        for _ in range(10):
            action = env.action_space.sample()
            state, reward, done, _ = env.step(action)
        print(state)

    _run(0)
    _run(None)
    _run(0)
    _run(None)

gym_random()

結果は10step後の状態を表示しています。

[-0.65035427  0.7596311   6.390267  ]
[-0.9302957   0.36681047 -0.80060995]
[-0.65035427  0.7596311   6.390267  ]
[-0.9302957   0.36681047 -0.80060995]

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up