ぷよぷよプログラミングAI学習システムは、中高生の自己調整学習に向けて開発されました。
どうやって、Chromebookで、10分で、素敵な人工知能を作れるのでしょうか?
秘密は、3つ。
1 jupyter liteで、webクライアントがpythonコードを実行
2 超軽量な機械学習フレームワークdezeroを採用
3 ぷよぷよのstageとactionがコンパクト
Github&X
ぷよぷよプログラミングAI学習用まとめ
チュートリアルまとめ
Step7 first DQN
7 まずは、DQNをやってみよう
いよいよ、機械学習をします。boardのデータをそのまま使うのではなくて、特殊な計算をして、特徴量を使ってやることにします。全体を見て、動くのを確認してから、細かいところを説明していきます。
まずは、基本的な部分を作っていきます。
Library
import os
import numpy as np
import random
import dezero_emb as dezero
from puyopuyo import *
from collections import deque
dezero_embは、機械学習のためのフレームワークです。
CFG_ML
fallblock.pyにも同じように、設定ファイルを作っています。機械学習に関する設定のみ入れます
save_pathが存在しないと、モデルの保存ができないので、save_pathを作ります。
class CFG_ML:
lr = 1e-3
buffer_size = 30000
num_epochs = 300
final_epsilon = 1e-3
initial_epsilon = 1
num_decay_epochs = 200
batch_size = 512
gamma = 0.99
save_interval = 100
saved_path = "trained_models"
sync_interval = 10
if not os.path.isdir(CFG_ML.saved_path):
os.makedirs(CFG_ML.saved_path)
deep_q_network
class DQNet(dezero.Models.Model):
def __init__(self):
super().__init__()
self.l1 = dezero.L.Linear(128)
self.l2 = dezero.L.Linear(128)
self.l3 = dezero.L.Linear(1)
def forward(self, x):
x = dezero.F.relu(self.l1(x))
x = dezero.F.relu(self.l2(x))
x = self.l3(x)
return x
Replay buffer
class ReplayBuffer:
def __init__(self, buffer_size, batch_size):
self.buffer = deque(maxlen=buffer_size)
self.batch_size = batch_size
def add(self, state, action, reward, next_state, done):
data = (state, action, reward, next_state, done)
self.buffer.append(data)
def __len__(self):
return len(self.buffer)
def get_batch(self):
data = random.sample(self.buffer, self.batch_size)
state = np.stack([x[0] for x in data])
action = np.array([x[1] for x in data])
reward = np.array([x[2] for x in data])
next_state = np.stack([x[3] for x in data])
done = np.array([x[4] for x in data]).astype(np.int32)
return state, action, reward, next_state, done
DQN agent
import copy
class DQNAgent:
def __init__(self):
self.epsilon = CFG_ML.initial_epsilon
self.action_size = 2
self.replay_buffer = ReplayBuffer(CFG_ML.buffer_size, CFG_ML.batch_size)
self.qnet = DQNet()
self.qnet_target = DQNet()
self.optimizer = dezero.optimizers.Adam(CFG_ML.lr)
self.optimizer.setup(self.qnet)
def __call__(self, board, puyo):
action_list = utils.create_action_list(board)
next_boards = []
next_reward =[]
action =(2, 1)
if len(action_list):
for action in action_list:
next_board, reward, done = utils.next_board(board, puyo, action)
if not done:
next_boards.append(next_board)
next_reward.append(reward)
next_boards = np.stack(next_boards)
predictions = self.eval2(next_boards)
next_reward =np.array(next_reward)[:, np.newaxis]
predictions += dezero.Variable(next_reward)
index = predictions.data.argmax()
action = action_list[index]
return action
def boardtostate(self, board):
cont_b = 2 ** np.arange(CFG.Width,dtype=np.int32)
b1 = np.zeros(CFG.Height * CFG.Width,dtype = np.int32).reshape(CFG.Height , CFG.Width)
b1[board == 1] = 1
b2 = np.zeros(CFG.Height * CFG.Width,dtype = np.int32).reshape(CFG.Height , CFG.Width)
b2[board == 2] = 1
b3 = np.zeros(CFG.Height * CFG.Width,dtype = np.int32).reshape(CFG.Height , CFG.Width)
b3[board == 3] = 1
b4 = np.zeros(CFG.Height * CFG.Width,dtype = np.int32).reshape(CFG.Height , CFG.Width)
b4[board == 4] = 1
board_list =np.concatenate([b1,b2,b3,b4])
state = board_list.dot(cont_b)
return state
def eval(self, board):
state = self.boardtostate(board)
return self.qnet_target(state)
def eval2(self, boards):
states = []
for i in range(boards.shape[0]):
state = self.boardtostate(boards[i])
states.append(state)
states = np.stack(states)
return self.qnet_target(states)
def update(self, board, action, reward, next_board, done):
state = self.boardtostate(board)
next_state = self.boardtostate(next_board)
self.replay_buffer.add(state, action, reward, next_state, done)
if not done:
return
if len(self.replay_buffer) < CFG_ML.batch_size:
return
state, action, reward, next_state, done = self.replay_buffer.get_batch()
qs = self.qnet(state)
next_qs = self.qnet_target(next_state)
reward =reward[:,np.newaxis]
done =done[:,np.newaxis]
target = reward + (1 - done) * CFG_ML.gamma * next_qs
self.qnet.cleargrads()
loss = dezero.F.mean_squared_error(qs, target)
loss.backward()
self.optimizer.update()
def sync_qnet(self):
self.qnet_target = copy.deepcopy(self.qnet)
def save_model(self,filename):
self.qnet.save_weights(filename)
def load_model(self,filename):
self.qnet.load_weights(filename)
self.qnet_target.load_weights(filename)
Train
np.random.seed(seed=123)
env = EnvPuyopuyo()
agent = DQNAgent()
for epochs in range(CFG_ML.num_epochs):
epsilon = CFG_ML.final_epsilon + (max(CFG_ML.num_decay_epochs - epochs, 0) * (
CFG_ML.initial_epsilon - CFG_ML.final_epsilon) / CFG_ML.num_decay_epochs)
board, puyo = env.reset()
done = False
final_score = 0
final_pieces = 0
while not done:
u = random.random()
random_action = u <= epsilon
action = agent(board, puyo)
if random_action:
action_list = utils.create_action_list(board)
if len(action_list):
index = random.randint(0, len(action_list) - 1)
action = action_list[index]
next_board, puyo, reward, done = env.step(action)
agent.update(board, action, reward, next_board, done)
board = next_board
final_score += reward
final_pieces += 2
if epochs % CFG_ML.sync_interval == 0:
agent.sync_qnet()
print("Epoch: {}/{}, Score: {}, pieces {}".format(
epochs,
CFG_ML.num_epochs,
final_score,
final_pieces))
if epochs > 0 and epochs % CFG_ML.save_interval == 0:
agent.save_model("{}/puyopuyo_{}".format(CFG_ML.saved_path, epochs))
agent.save_model("{}/puyopuyo".format(CFG_ML.saved_path))
Epoch: 0/300, Score: 3420, pieces 108
Epoch: 1/300, Score: 2300, pieces 78
Epoch: 2/300, Score: 5510, pieces 134
Epoch: 3/300, Score: 1120, pieces 48
Epoch: 4/300, Score: 1500, pieces 96
Epoch: 5/300, Score: 3510, pieces 106
Epoch: 6/300, Score: 6310, pieces 140
Epoch: 7/300, Score: 3270, pieces 86
Epoch: 8/300, Score: 4340, pieces 86
Epoch: 9/300, Score: 5040, pieces 110
Epoch: 10/300, Score: 2960, pieces 112
Epoch: 11/300, Score: 6290, pieces 124
Epoch: 12/300, Score: 26370, pieces 242
Epoch: 13/300, Score: 3670, pieces 124
Epoch: 14/300, Score: 9250, pieces 132
Epoch: 15/300, Score: 4240, pieces 122
Epoch: 16/300, Score: 24840, pieces 218
Epoch: 17/300, Score: 3030, pieces 106
Epoch: 18/300, Score: 7990, pieces 124
Epoch: 19/300, Score: 8930, pieces 114
Epoch: 20/300, Score: 38280, pieces 294
Epoch: 21/300, Score: 1750, pieces 62
Epoch: 22/300, Score: 2400, pieces 74
Epoch: 23/300, Score: 7900, pieces 154
Epoch: 24/300, Score: 10720, pieces 158
Epoch: 25/300, Score: 28100, pieces 164
Epoch: 26/300, Score: 5900, pieces 130
Epoch: 27/300, Score: 1200, pieces 62
Epoch: 28/300, Score: 12780, pieces 152
Epoch: 29/300, Score: 6630, pieces 120
Epoch: 30/300, Score: 12020, pieces 136
Epoch: 31/300, Score: 4960, pieces 120
Epoch: 32/300, Score: 3520, pieces 118
Epoch: 33/300, Score: 7670, pieces 110
Epoch: 34/300, Score: 23150, pieces 230
Epoch: 35/300, Score: 4640, pieces 110
Epoch: 36/300, Score: 1750, pieces 102
Epoch: 37/300, Score: 11860, pieces 114
Epoch: 38/300, Score: 10310, pieces 158
Epoch: 39/300, Score: 3820, pieces 100
Epoch: 40/300, Score: 32930, pieces 274
Epoch: 41/300, Score: 8610, pieces 142
Epoch: 42/300, Score: 1750, pieces 70
Epoch: 43/300, Score: 11830, pieces 146
Epoch: 44/300, Score: 9470, pieces 180
Epoch: 45/300, Score: 11980, pieces 122
Epoch: 46/300, Score: 3470, pieces 62
Epoch: 47/300, Score: 2700, pieces 100
Epoch: 48/300, Score: 18270, pieces 202
Epoch: 49/300, Score: 7700, pieces 144
Epoch: 50/300, Score: 30770, pieces 330
Epoch: 51/300, Score: 11270, pieces 160
Epoch: 52/300, Score: 5020, pieces 128
Epoch: 53/300, Score: 50680, pieces 276
Epoch: 54/300, Score: 5250, pieces 132
Epoch: 55/300, Score: 16550, pieces 194
Epoch: 56/300, Score: 15920, pieces 220
Epoch: 57/300, Score: 22050, pieces 240
Epoch: 58/300, Score: 36430, pieces 292
Epoch: 59/300, Score: 8150, pieces 106
Epoch: 60/300, Score: 3590, pieces 84
Epoch: 61/300, Score: 5320, pieces 108
Epoch: 62/300, Score: 6240, pieces 138
Epoch: 63/300, Score: 23830, pieces 292
Epoch: 64/300, Score: 16380, pieces 166
Epoch: 65/300, Score: 13830, pieces 168
Epoch: 66/300, Score: 17190, pieces 200
Epoch: 67/300, Score: 5520, pieces 122
Epoch: 68/300, Score: 26560, pieces 276
Epoch: 69/300, Score: 18580, pieces 228
Epoch: 70/300, Score: 21220, pieces 180
Epoch: 71/300, Score: 8620, pieces 156
Epoch: 72/300, Score: 4320, pieces 118
Epoch: 73/300, Score: 21520, pieces 252
Epoch: 74/300, Score: 12120, pieces 144
Epoch: 75/300, Score: 4620, pieces 112
Epoch: 76/300, Score: 50040, pieces 362
Epoch: 77/300, Score: 40010, pieces 400
Epoch: 78/300, Score: 90440, pieces 746
Epoch: 79/300, Score: 24140, pieces 270
Epoch: 80/300, Score: 30840, pieces 278
Epoch: 81/300, Score: 26770, pieces 256
Epoch: 82/300, Score: 16190, pieces 210
Epoch: 83/300, Score: 16880, pieces 202
Epoch: 84/300, Score: 27300, pieces 276
Epoch: 85/300, Score: 13300, pieces 174
Epoch: 86/300, Score: 42760, pieces 450
Epoch: 87/300, Score: 55680, pieces 510
Epoch: 88/300, Score: 45240, pieces 430
Epoch: 89/300, Score: 81940, pieces 688
Epoch: 90/300, Score: 50100, pieces 386
Epoch: 91/300, Score: 41480, pieces 382
Epoch: 92/300, Score: 28590, pieces 316
Epoch: 93/300, Score: 32860, pieces 334
Epoch: 94/300, Score: 21080, pieces 172
Epoch: 95/300, Score: 44210, pieces 398
Epoch: 96/300, Score: 72290, pieces 624
Epoch: 97/300, Score: 32260, pieces 352
Epoch: 98/300, Score: 2400, pieces 90
Epoch: 99/300, Score: 43370, pieces 418
Epoch: 100/300, Score: 9490, pieces 118
Epoch: 101/300, Score: 105030, pieces 806
Epoch: 102/300, Score: 134260, pieces 1014
Epoch: 103/300, Score: 9560, pieces 152
Epoch: 104/300, Score: 56250, pieces 494
Epoch: 105/300, Score: 69180, pieces 564
Epoch: 106/300, Score: 72350, pieces 610
Epoch: 107/300, Score: 40750, pieces 318
Epoch: 108/300, Score: 18490, pieces 224
Epoch: 109/300, Score: 39190, pieces 394
Epoch: 110/300, Score: 16420, pieces 186
Epoch: 111/300, Score: 24760, pieces 232
Epoch: 112/300, Score: 14380, pieces 202
Epoch: 113/300, Score: 282620, pieces 1904
Epoch: 114/300, Score: 35060, pieces 376
Epoch: 115/300, Score: 75350, pieces 602
Epoch: 116/300, Score: 5920, pieces 112
Epoch: 117/300, Score: 80610, pieces 652
Epoch: 118/300, Score: 94650, pieces 716
Epoch: 119/300, Score: 10260, pieces 110
Epoch: 120/300, Score: 48270, pieces 430
Epoch: 121/300, Score: 3420, pieces 106
Epoch: 122/300, Score: 111150, pieces 908
Epoch: 123/300, Score: 91410, pieces 628
Epoch: 124/300, Score: 171640, pieces 1268
Epoch: 125/300, Score: 46110, pieces 444
Epoch: 126/300, Score: 87480, pieces 720
Epoch: 127/300, Score: 42640, pieces 350
Epoch: 128/300, Score: 34580, pieces 338
Epoch: 129/300, Score: 96040, pieces 724
Epoch: 130/300, Score: 18500, pieces 200
Epoch: 131/300, Score: 23330, pieces 180
Epoch: 132/300, Score: 129190, pieces 1038
Epoch: 133/300, Score: 66060, pieces 604
Epoch: 134/300, Score: 148980, pieces 1092
Epoch: 135/300, Score: 62730, pieces 444
Epoch: 136/300, Score: 34900, pieces 340
Epoch: 137/300, Score: 128190, pieces 976
Epoch: 138/300, Score: 168600, pieces 1176
Epoch: 139/300, Score: 63080, pieces 512
Epoch: 140/300, Score: 258290, pieces 1870
Epoch: 141/300, Score: 83100, pieces 606
Epoch: 142/300, Score: 140850, pieces 958
Epoch: 143/300, Score: 29940, pieces 264
Epoch: 144/300, Score: 526550, pieces 3500
Epoch: 145/300, Score: 28960, pieces 324
Epoch: 146/300, Score: 55610, pieces 524
Epoch: 147/300, Score: 113990, pieces 686
Epoch: 148/300, Score: 114620, pieces 912
Epoch: 149/300, Score: 17730, pieces 190
Epoch: 150/300, Score: 390300, pieces 2784
Epoch: 151/300, Score: 34380, pieces 352
Epoch: 152/300, Score: 32720, pieces 332
Epoch: 153/300, Score: 50020, pieces 436
Epoch: 154/300, Score: 56950, pieces 490
Epoch: 155/300, Score: 104560, pieces 754
Epoch: 156/300, Score: 677580, pieces 4528
Epoch: 157/300, Score: 173530, pieces 1124
Epoch: 158/300, Score: 719770, pieces 4898
Epoch: 159/300, Score: 629340, pieces 4400
Epoch: 160/300, Score: 972880, pieces 6402
Epoch: 161/300, Score: 248100, pieces 1680
Epoch: 162/300, Score: 491060, pieces 3436
Epoch: 163/300, Score: 85100, pieces 508
Epoch: 164/300, Score: 315670, pieces 2158
Epoch: 165/300, Score: 724140, pieces 4700
Epoch: 166/300, Score: 2278070, pieces 14860
Epoch: 167/300, Score: 552410, pieces 3544
Epoch: 168/300, Score: 360970, pieces 2420
Epoch: 169/300, Score: 146240, pieces 1118
Epoch: 170/300, Score: 467350, pieces 3146
Epoch: 171/300, Score: 1261620, pieces 8026
Epoch: 172/300, Score: 763500, pieces 4984
Epoch: 173/300, Score: 80300, pieces 636
Epoch: 174/300, Score: 732560, pieces 4650
Epoch: 175/300, Score: 192990, pieces 1324
Epoch: 176/300, Score: 296210, pieces 1978
Epoch: 177/300, Score: 345510, pieces 2306
Epoch: 178/300, Score: 315190, pieces 2128
Epoch: 179/300, Score: 103330, pieces 734
Epoch: 180/300, Score: 276740, pieces 1982
Epoch: 181/300, Score: 623200, pieces 4260
Epoch: 182/300, Score: 273230, pieces 1780
Epoch: 183/300, Score: 488120, pieces 3038
Epoch: 184/300, Score: 1050130, pieces 6596
Epoch: 185/300, Score: 229110, pieces 1642
Epoch: 186/300, Score: 107020, pieces 768
Epoch: 187/300, Score: 125600, pieces 858
Epoch: 188/300, Score: 929930, pieces 6030
Epoch: 189/300, Score: 107190, pieces 718
Epoch: 190/300, Score: 399160, pieces 2456
Epoch: 191/300, Score: 1419880, pieces 8618
Epoch: 192/300, Score: 58520, pieces 438
Epoch: 193/300, Score: 201770, pieces 1146
Epoch: 194/300, Score: 1940800, pieces 11590
Epoch: 195/300, Score: 1241420, pieces 7676
Epoch: 196/300, Score: 1055930, pieces 6718
Epoch: 197/300, Score: 5330220, pieces 32358
Epoch: 198/300, Score: 52000, pieces 352
Epoch: 199/300, Score: 1536740, pieces 9438
Epoch: 200/300, Score: 926380, pieces 6002
Epoch: 201/300, Score: 2238460, pieces 12932
Epoch: 202/300, Score: 184900, pieces 1168
Epoch: 203/300, Score: 2829720, pieces 16920
Epoch: 204/300, Score: 676450, pieces 4186
Epoch: 205/300, Score: 1051020, pieces 6074
Epoch: 206/300, Score: 1208130, pieces 6842
Epoch: 207/300, Score: 732550, pieces 4406
Epoch: 208/300, Score: 213670, pieces 1428
Epoch: 209/300, Score: 478460, pieces 2920
Epoch: 210/300, Score: 224460, pieces 1354
Epoch: 211/300, Score: 959980, pieces 5734
Epoch: 212/300, Score: 60710, pieces 440
Epoch: 213/300, Score: 1126890, pieces 6410
Epoch: 214/300, Score: 1527600, pieces 8298
Epoch: 215/300, Score: 556710, pieces 3266
Epoch: 216/300, Score: 656470, pieces 3828
Epoch: 217/300, Score: 905340, pieces 5262
Epoch: 218/300, Score: 1335870, pieces 8110
Epoch: 219/300, Score: 1547040, pieces 8554
Epoch: 220/300, Score: 172560, pieces 1070
Epoch: 221/300, Score: 928650, pieces 5420
Epoch: 222/300, Score: 596690, pieces 3524
Epoch: 223/300, Score: 919580, pieces 5260
Epoch: 224/300, Score: 718890, pieces 4218
Epoch: 225/300, Score: 239440, pieces 1278
Epoch: 226/300, Score: 870560, pieces 4900
Epoch: 227/300, Score: 767240, pieces 4446
Epoch: 228/300, Score: 854830, pieces 4370
Epoch: 229/300, Score: 514250, pieces 2830
Epoch: 230/300, Score: 287820, pieces 1680
Epoch: 231/300, Score: 495190, pieces 2600
Epoch: 232/300, Score: 92720, pieces 556
Epoch: 233/300, Score: 1696760, pieces 9158
Epoch: 234/300, Score: 951870, pieces 5358
Epoch: 235/300, Score: 85660, pieces 572
Epoch: 236/300, Score: 2732450, pieces 14910
Epoch: 237/300, Score: 344900, pieces 2086
Epoch: 238/300, Score: 465730, pieces 2830
Epoch: 239/300, Score: 298000, pieces 1918
Epoch: 240/300, Score: 301210, pieces 1800
Epoch: 241/300, Score: 74890, pieces 572
Epoch: 242/300, Score: 62970, pieces 482
Epoch: 243/300, Score: 565630, pieces 2818
Epoch: 244/300, Score: 68530, pieces 390
Epoch: 245/300, Score: 432940, pieces 2200
Epoch: 246/300, Score: 319140, pieces 1644
Epoch: 247/300, Score: 543820, pieces 2680
Epoch: 248/300, Score: 71640, pieces 422
Epoch: 249/300, Score: 553180, pieces 2852
Epoch: 250/300, Score: 1661210, pieces 8888
Epoch: 251/300, Score: 106700, pieces 702
Epoch: 252/300, Score: 489220, pieces 2922
Epoch: 253/300, Score: 553010, pieces 3168
Epoch: 254/300, Score: 280870, pieces 1510
Epoch: 255/300, Score: 569370, pieces 3212
Epoch: 256/300, Score: 251170, pieces 1428
Epoch: 257/300, Score: 49700, pieces 390
Epoch: 258/300, Score: 1918270, pieces 10366
Epoch: 259/300, Score: 149560, pieces 932
Epoch: 260/300, Score: 1567890, pieces 8586
Epoch: 261/300, Score: 2554640, pieces 12650
Epoch: 262/300, Score: 217530, pieces 1258
Epoch: 263/300, Score: 1106240, pieces 5840
Epoch: 264/300, Score: 985590, pieces 5320
Epoch: 265/300, Score: 277360, pieces 1622
Epoch: 266/300, Score: 1668940, pieces 8926
Epoch: 267/300, Score: 146830, pieces 834
Epoch: 268/300, Score: 585810, pieces 3298
Epoch: 269/300, Score: 300430, pieces 1700
Epoch: 270/300, Score: 403310, pieces 2096
Epoch: 271/300, Score: 1388920, pieces 7380
Epoch: 272/300, Score: 737750, pieces 3862
Epoch: 273/300, Score: 291620, pieces 1712
Epoch: 274/300, Score: 1063670, pieces 5798
Epoch: 275/300, Score: 28950, pieces 272
Epoch: 276/300, Score: 60200, pieces 298
Epoch: 277/300, Score: 100720, pieces 638
Epoch: 278/300, Score: 1593280, pieces 7740
Epoch: 279/300, Score: 230530, pieces 1216
Epoch: 280/300, Score: 263740, pieces 1322
Epoch: 281/300, Score: 124760, pieces 730
Epoch: 282/300, Score: 218650, pieces 1148
Epoch: 283/300, Score: 42300, pieces 360
Epoch: 284/300, Score: 137540, pieces 812
Epoch: 285/300, Score: 108600, pieces 684
Epoch: 286/300, Score: 45830, pieces 376
Epoch: 287/300, Score: 301890, pieces 1736
Epoch: 288/300, Score: 1257410, pieces 6910
Epoch: 289/300, Score: 59480, pieces 406
Epoch: 290/300, Score: 57810, pieces 372
Epoch: 291/300, Score: 211890, pieces 1040
Epoch: 292/300, Score: 477100, pieces 2442
Epoch: 293/300, Score: 1004790, pieces 5446
Epoch: 294/300, Score: 337180, pieces 1736
Epoch: 295/300, Score: 188010, pieces 978
Epoch: 296/300, Score: 245140, pieces 1332
Epoch: 297/300, Score: 192470, pieces 1162
Epoch: 298/300, Score: 135410, pieces 788
Epoch: 299/300, Score: 666720, pieces 3348
19分でした。