OpenAI GymのCartPole-v0をKeras-RLのサンプルDQN1で解こうとしてみました.
コード
DQN版
import numpy as np
import gym
from gym import wrappers
from keras.layers import Flatten, Dense, Input
from keras.models import Model
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_model(input_dim, output_dim):
x_input = Input(shape=(1, input_dim))
x = Flatten()(x_input)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(output_dim, activation="linear")(x)
return Model(inputs=x_input, outputs=x)
def run():
env = gym.make("CartPole-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-v0-dqn', force=True)
model = build_model(env.observation_space.shape[0], env.action_space.n)
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy,)
dqn.compile("adam", metrics=["mae"])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
if __name__ == "__main__":
run()
Duel-DQN版
import numpy as np
import gym
from gym import wrappers
from keras.layers import Flatten, Dense, Input
from keras.models import Model
from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
def build_model(input_dim, output_dim):
x_input = Input(shape=(1, input_dim))
x = Flatten()(x_input)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(16, activation="relu")(x)
x = Dense(output_dim, activation="linear")(x)
return Model(inputs=x_input, outputs=x)
def run():
env = gym.make("CartPole-v0")
env = wrappers.Monitor(env, '/tmp/cartpole-v0-duel-dqn', force=True)
model = build_model(env.observation_space.shape[0], env.action_space.n)
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, policy=policy, enable_dueling_network=True, dueling_type="avg")
dqn.compile("adam", metrics=["mae"])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=2)
if __name__ == "__main__":
run()
スコア
- 50,000ステップほど試してみましたが,両方とも解けずじまいでした.
- コンストラクタ時のパラメータを設定すれば解けるかもしれないが,
Keras==2.0.6
だと,まともに動かなくなっている模様.
DQN: 32.98 ± 2.91
Duel-DQN: 42.46 ± 3.83
References
-
Mnih, Playing Atari with Deep Reinforcement Learning, 2013. ↩
-
Wang, Dueling Network Architectures for Deep Reinforcement Learning, 2016. ↩