参考リンクまとめ
Pythonコード
# 不要なbox2dを除いてインストール(軽量)
!pip install -q stable-baselines3[extra] gymnasium[classic_control] imageio
# 必要なライブラリのインポート
import gymnasium as gym
from stable_baselines3 import PPO
import imageio
from IPython.display import Video, HTML
import os
# --- 設定 ---
env_id = 'CartPole-v1'
video_path = "/content/cartpole_simulation.mp4"
total_steps = 20000 # 精度を上げたい場合はステップ数を多めに
video_steps = 500 # 動画フレーム数(1000 → 500 で軽量化)
# --- 環境とモデルのセットアップ ---
env = gym.make(env_id, render_mode='rgb_array')
model = PPO("MlpPolicy", env, verbose=0)
model.learn(total_timesteps=total_steps)
# --- シミュレーション実行とフレーム取得 ---
obs, _ = env.reset()
frames = []
for _ in range(video_steps):
action, _ = model.predict(obs, deterministic=True)
obs, reward, terminated, truncated, _ = env.step(action)
frames.append(env.render())
if terminated or truncated:
obs, _ = env.reset()
env.close()
# --- 動画保存(MP4形式で軽量に)---
imageio.mimsave(video_path, frames, fps=30)
# --- Colabで動画を表示 ---
display(Video(video_path))
# --- ダウンロードリンクの表示(軽量方式) ---
def download_link(path):
filename = os.path.basename(path)
return HTML(f'<a href="{path}" download="{filename}" target="_blank">📥 Download MP4: {filename}</a>')
download_link(video_path)
結果
| rollout/ | |
| ep_len_mean | 22.7 |
| ep_rew_mean | 22.7 |
| time/ | |
| fps | 1098 |
| iterations | 1 |
| time_elapsed | 1 |
| total_timesteps | 2048 |
| rollout/ | |
| ep_len_mean | 26.1 |
| ep_rew_mean | 26.1 |
| time/ | |
| fps | 823 |
| iterations | 2 |
| time_elapsed | 4 |
| total_timesteps | 4096 |
| train/ | |
| approx_kl | 0.008552462 |
| clip_fraction | 0.112 |
| clip_range | 0.2 |
| entropy_loss | -0.686 |
| explained_variance | -0.000214 |
| learning_rate | 0.0003 |
| loss | 5.99 |
| n_updates | 10 |
| policy_gradient_loss | -0.0183 |
| value_loss | 51.7 |
| rollout/ | |
| ep_len_mean | 33.4 |
| ep_rew_mean | 33.4 |
| time/ | |
| fps | 801 |
| iterations | 3 |
| time_elapsed | 7 |
| total_timesteps | 6144 |
| train/ | |
| approx_kl | 0.009235643 |
| clip_fraction | 0.0684 |
| clip_range | 0.2 |
| entropy_loss | -0.664 |
| explained_variance | 0.0877 |
| learning_rate | 0.0003 |
| loss | 9.59 |
| n_updates | 20 |
| policy_gradient_loss | -0.0189 |
| value_loss | 37.2 |
| rollout/ | |
| ep_len_mean | 43.2 |
| ep_rew_mean | 43.2 |
| time/ | |
| fps | 782 |
| iterations | 4 |
| time_elapsed | 10 |
| total_timesteps | 8192 |
| train/ | |
| approx_kl | 0.00852371 |
| clip_fraction | 0.096 |
| clip_range | 0.2 |
| entropy_loss | -0.634 |
| explained_variance | 0.234 |
| learning_rate | 0.0003 |
| loss | 17.6 |
| n_updates | 30 |
| policy_gradient_loss | -0.0211 |
| value_loss | 49.3 |
| rollout/ | |
| ep_len_mean | 57.2 |
| ep_rew_mean | 57.2 |
| time/ | |
| fps | 777 |
| iterations | 5 |
| time_elapsed | 13 |
| total_timesteps | 10240 |
| train/ | |
| approx_kl | 0.010492191 |
| clip_fraction | 0.0974 |
| clip_range | 0.2 |
| entropy_loss | -0.604 |
| explained_variance | 0.279 |
| learning_rate | 0.0003 |
| loss | 20.8 |
| n_updates | 40 |
| policy_gradient_loss | -0.0227 |
| value_loss | 59.3 |