import gym
import numpy as np
env = gym.make('Breakout-v0')
print('observation space:', env.observation_space)
print('action space:', env.action_space)
obs = env.reset()
# env.render()
print('initial observation:', obs)
action = env.action_space.sample()
obs, r, done, info = env.step(action)
print('next observation:', obs)
print('reward:', r)
print('done:', done)
print('info:', info)
# Start virtual display
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1024, 768))
display.start()
frames = []
for i in range(3):
obs = env.reset()
done = False
R = 0
t = 0
while not done and t < 200:
frames.append(env.render(mode = 'rgb_array'))
obs, r, done, _ = env.step(env.action_space.sample())
R += r
t += 1
print('test episode:', i, 'R:', R)
env.render()
import matplotlib.pyplot as plt
import matplotlib.animation
import numpy as np
from IPython.display import HTML
plt.figure(figsize=(frames[0].shape[1] / 72.0, frames[0].shape[0] / 72.0), dpi = 72)
patch = plt.imshow(frames[0])
plt.axis('off')
animate = lambda i: patch.set_data(frames[i])
ani = matplotlib.animation.FuncAnimation(plt.gcf(), animate, frames=len(frames), interval = 50)
HTML(ani.to_jshtml())
import gym
from IPython import display
import matplotlib.pyplot as plt
%matplotlib inline
env = gym.make('CartPole-v0')
env.reset()
for _ in range(50):
plt.imshow(env.render(mode='rgb_array'))
display.clear_output(wait=True)
display.display(plt.gcf())
env.step(env.action_space.sample())
plt.close()
動画保存
import numpy as np
import gym
def sigmoid(x):
return 1.0 / (1.0 + np.exp(-x))
env = gym.make('CartPole-v1')
desired_state = np.array([0, 0, 0, 0])
desired_mask = np.array([0, 0, 1, 0])
P, I, D = 0.1, 0.01, 0.5
env = gym.wrappers.Monitor(env, 'video/',video_callable=lambda episode_id: True,force = True)
for i_episode in range(20):
state = env.reset()
integral = 0
derivative = 0
prev_error = 0
for t in range(500):
env.render()
error = state - desired_state
integral += error
derivative = error - prev_error
prev_error = error
pid = np.dot(P * error + I * integral + D * derivative, desired_mask)
action = sigmoid(pid)
action = np.round(action).astype(np.int32)
state, reward, done, info = env.step(action)
if done:
print("Episode finished after {} timesteps".format(t+1))
break
env.close()
動画保存
# https://github.com/TTitcombe/docker_openai_gym
import gym
import torch
env_to_wrap = gym.make("BankHeist-ram-v0")
env = gym.wrappers.Monitor(env_to_wrap, "someDir2")
frame = env.reset()
is_done = False
while not is_done:
action = env.action_space.sample()
_, _, is_done, _ = env.step(action)
env.close()
env_to_wrap.close()
https://gist.github.com/HenryJia/23db12d61546054aa43f8dc587d9dc2c
http://korechipostit.hatenablog.com/entry/2018/11/05/190000
https://colab.research.google.com/drive/1GLlB53gvZaUyqMYv8GmZQJmshRUzV_tg#scrollTo=ZAcCvXYJlJ8I