# 【将棋AI】「将棋AIで学ぶディープラーニング」を読む♪～Networkモデルを試す

### やったこと

（０）最初の１３層のモデル
（１）Resnet１０Blockでやってみた
（２）Layer23層をPolicy,Valueのマルチタスクに変更
（３）ResnetのBlockを４層に増やしてみた

### （０）最初の１３層のモデル

まず、今一番強いモデルは以下の最初の１３層モデルです。

```﻿from chainer import Chain
import chainer.functions as F

from pydlshogi.common import *

ch = 192
fcl = 256
class PolicyValueNetwork(Chain):
def __init__(self):
super(PolicyValueNetwork, self).__init__()
with self.init_scope():
self.l1=L.Convolution2D(in_channels = 104, out_channels = ch, ksize = 3, pad = 1)
self.l2=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l3=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l4=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l5=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l6=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l7=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l8=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l9=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l10=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l11=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l12=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
# policy network
self.l13=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1, nobias = True)
self.l13_bias=L.Bias(shape=(9*9*MOVE_DIRECTION_LABEL_NUM))
# value network
self.l13_v=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1)
self.l14_v=L.Linear(9*9*MOVE_DIRECTION_LABEL_NUM, fcl)
self.l15_v=L.Linear(fcl, 1)

def __call__(self, x):
h1 = F.relu(self.l1(x))
h2 = F.relu(self.l2(h1))
h3 = F.relu(self.l3(h2))
h4 = F.relu(self.l4(h3))
h5 = F.relu(self.l5(h4))
h6 = F.relu(self.l6(h5))
h7 = F.relu(self.l7(h6))
h8 = F.relu(self.l8(h7))
h9 = F.relu(self.l9(h8))
h10 = F.relu(self.l10(h9))
h11 = F.relu(self.l11(h10))
h12 = F.relu(self.l12(h11))
# policy network
h13 = self.l13(h12)
policy = self.l13_bias(F.reshape(h13, (-1, 9*9*MOVE_DIRECTION_LABEL_NUM)))
# value network
h13_v = F.relu(self.l13_v(h12))
h14_v = F.relu(self.l14_v(h13_v))
value = self.l15_v(h14_v)
return policy, value
```

そして、収束性は以下のとおりでした。

そして、以下のモデルでは学習データは同じでFloodgateの２０１６年と２０１７年の棋譜です。

### （１）Resnet１０Blockでやってみた

```﻿from chainer import Chain
import chainer.functions as F

from pydlshogi.common import *

ch = 192
fcl = 256
class Block(Chain):
def __init__(self):
super(Block, self).__init__()
with self.init_scope():
self.conv1 = L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1, nobias=True)
self.bn1 = L.BatchNormalization(ch)
self.conv2 = L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1, nobias=True)
self.bn2 = L.BatchNormalization(ch)

def __call__(self, x):
h1 = F.relu(self.bn1(self.conv1(x)))
h2 = self.bn2(self.conv2(h1))
return F.relu(x + h2)

class PolicyValueResnet(Chain):
def __init__(self, blocks = 5):
super(PolicyValueResnet, self).__init__()
self.blocks = blocks
with self.init_scope():
self.l1=L.Convolution2D(in_channels = 104, out_channels = ch, ksize = 3, pad = 1)
for i in range(1, blocks):
# policy network
self.policy=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1, nobias = True)
self.policy_bias=L.Bias(shape=(9*9*MOVE_DIRECTION_LABEL_NUM))
# value network
self.value1=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1)
self.value1_bn = L.BatchNormalization(MOVE_DIRECTION_LABEL_NUM)
self.value2=L.Linear(9*9*MOVE_DIRECTION_LABEL_NUM, fcl)
self.value3=L.Linear(fcl, 1)

def __call__(self, x):
h = F.relu(self.l1(x))
for i in range(1, self.blocks):
h = self['b{}'.format(i)](h)
# policy network
h_policy = self.policy(h)
u_policy = self.policy_bias(F.reshape(h_policy, (-1, 9*9*MOVE_DIRECTION_LABEL_NUM)))
# value network
h_value = F.relu(self.value1_bn(self.value1(h)))
h_value = F.relu(self.value2(h_value))
u_value = self.value3(h_value)
return u_policy, u_value
```
```def __init__(self, blocks = 5):
```

のblocks=10とすると１０blockで学習できる。
もちろん、ほかの数字を入れればそのblock数のResnetモデルで学習できる。

というわけで、以下のような収束性になった。

### （２）Layer23層をPolicy,Valueのマルチタスクに変更

２３層のマルチタスクは以下のとおり、もともとのモデルが分かり易いので追加する場合も簡単にできます。

```﻿from chainer import Chain
import chainer.functions as F

from pydlshogi.common import *

ch = 192
fcl = 256
class PolicyValueNetwork23(Chain):
def __init__(self):
super(PolicyValueNetwork23, self).__init__()
with self.init_scope():
self.l1=L.Convolution2D(in_channels = 104, out_channels = ch, ksize = 3, pad = 1)
self.l2=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l3=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l4=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l5=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l6=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l7=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l8=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l9=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l10=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l11=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l12=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l13=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l14=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l15=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l16=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l17=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l18=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l19=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l20=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l21=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
self.l22=L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1)
# policy network
self.l23=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1, nobias = True)
self.l23_bias=L.Bias(shape=(9*9*MOVE_DIRECTION_LABEL_NUM))
# value network
self.l23_v=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1)
self.l24_v=L.Linear(9*9*MOVE_DIRECTION_LABEL_NUM, fcl)
self.l25_v=L.Linear(fcl, 1)

def __call__(self, x):
h1 = F.relu(self.l1(x))
h2 = F.relu(self.l2(h1))
h3 = F.relu(self.l3(h2))
h4 = F.relu(self.l4(h3))
h5 = F.relu(self.l5(h4))
h6 = F.relu(self.l6(h5))
h7 = F.relu(self.l7(h6))
h8 = F.relu(self.l8(h7))
h9 = F.relu(self.l9(h8))
h10 = F.relu(self.l10(h9))
h11 = F.relu(self.l11(h10))
h12 = F.relu(self.l12(h11))
h13 = F.relu(self.l13(h12))
h14 = F.relu(self.l14(h13))
h15 = F.relu(self.l15(h14))
h16 = F.relu(self.l16(h15))
h17 = F.relu(self.l17(h16))
h18 = F.relu(self.l18(h17))
h19 = F.relu(self.l19(h18))
h20 = F.relu(self.l20(h19))
h21 = F.relu(self.l21(h20))
h22 = F.relu(self.l22(h21))
# policy network
h23 = self.l23(h22)
policy = self.l23_bias(F.reshape(h23, (-1, 9*9*MOVE_DIRECTION_LABEL_NUM)))
# value network
h23_v = F.relu(self.l23_v(h22))
h24_v = F.relu(self.l24_v(h23_v))
value = self.l25_v(h24_v)
return policy, value
```

あとは、収束してくれるかどうか。。。収束してくれると１３層が強くなったので強くなるかも。。

```2018/09/09 02:53:23     INFO    epoch = 29, iteration = 2112000, loss = 2.1264465, accuracy_pol = 0.43945312,accuracy_val = 0.67578125
2018/09/09 02:54:04     INFO    epoch = 29, iteration = 2113000, loss = 2.1251316, accuracy_pol = 0.4296875,accuracy_val = 0.68359375
2018/09/09 02:54:45     INFO    epoch = 29, iteration = 2114000, loss = 2.1300569, accuracy_pol = 0.40625,accuracy_val = 0.7285156
2018/09/09 02:55:26     INFO    epoch = 29, iteration = 2115000, loss = nan, accuracy_pol = 0.0,accuracy_val = 0.5214844
2018/09/09 02:56:08     INFO    epoch = 29, iteration = 2116000, loss = nan, accuracy_pol = 0.0,accuracy_val = 0.5097656
2018/09/09 02:56:49     INFO    epoch = 29, iteration = 2117000, loss = nan, accuracy_pol = 0.0,accuracy_val = 0.51171875
2018/09/09 02:57:30     INFO    epoch = 29, iteration = 2118000, loss = nan, accuracy_pol = 0.0,accuracy_val = 0.49414062
2018/09/09 02:58:11     INFO    epoch = 29, iteration = 2119000, loss = nan, accuracy_pol = 0.0,accuracy_val = 0.5
2018/09/09 02:58:52     INFO    epoch = 29, iteration = 2120000, loss = nan, accuracy_pol = 0.0,accuracy_val = 0.5058594
2018/09/09 02:58:55     INFO    validate test data
2018/09/09 03:01:18     INFO    epoch = 29, iteration = 2120061, train loss avr = nan, test accuracy_pol = 0.00011980524,test accuracy_val = 0.5018642
```

つまり、ほぼランダムな手を選んでいて、たぶんパラメーターのほとんどがナンセンスな値になってしまったようです。

### （３）ResnetのBlockを４層に増やしてみた

```﻿from chainer import Chain
import chainer.functions as F

from pydlshogi.common import *

ch = 192
fcl = 256

class Block(Chain):

def __init__(self):
super(Block, self).__init__()
with self.init_scope():
self.conv1 = L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1, nobias=True)
self.bn1 = L.BatchNormalization(ch)
self.conv2 = L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1, nobias=True)
self.bn2 = L.BatchNormalization(ch)
self.conv3 = L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1, nobias=True)
self.bn3 = L.BatchNormalization(ch)
self.conv4 = L.Convolution2D(in_channels = ch, out_channels = ch, ksize = 3, pad = 1, nobias=True)
self.bn4 = L.BatchNormalization(ch)

def __call__(self, x):
h1 = F.relu(self.bn1(self.conv1(x)))
h2 = F.relu(self.bn2(self.conv2(x)))
h3 = F.relu(self.bn3(self.conv3(x)))
h4 = self.bn4(self.conv4(h3))
return F.relu(x + h4)

class PolicyValueResnet(Chain):
def __init__(self, blocks = 5):
super(PolicyValueResnet, self).__init__()
self.blocks = blocks
with self.init_scope():
self.l1=L.Convolution2D(in_channels = 104, out_channels = ch, ksize = 3, pad = 1)
for i in range(1, blocks):
# policy network
self.policy=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1, nobias = True)
self.policy_bias=L.Bias(shape=(9*9*MOVE_DIRECTION_LABEL_NUM))
# value network
self.value1=L.Convolution2D(in_channels = ch, out_channels = MOVE_DIRECTION_LABEL_NUM, ksize = 1)
self.value1_bn = L.BatchNormalization(MOVE_DIRECTION_LABEL_NUM)
self.value2=L.Linear(9*9*MOVE_DIRECTION_LABEL_NUM, fcl)
self.value3=L.Linear(fcl, 1)

def __call__(self, x):
h = F.relu(self.l1(x))
for i in range(1, self.blocks):
h = self['b{}'.format(i)](h)
# policy network
h_policy = self.policy(h)
u_policy = self.policy_bias(F.reshape(h_policy, (-1, 9*9*MOVE_DIRECTION_LABEL_NUM)))
# value network
h_value = F.relu(self.value1_bn(self.value1(h)))
h_value = F.relu(self.value2(h_value))
u_value = self.value3(h_value)
return u_policy, u_value
```

これ結構、教育的だと思う。
そして、ほんとに収束するのかちょっと、。。。もちろん先ほどの２３層と同じように２２層だし、shortcutになっているから収束性はいいはずだし、Resnetだから精度も上がるはず、ということで期待がもてます。
まだ２０１６年の学習段階ですが、以下のとおりの収束性です。

### （４）Resnetのさらなる改善について

さらなる試みは、実は上記はoriginalなResnetの構造であるが、これを変更してさらなる改善が期待できるようだ。

【参考】
Residual Network(ResNet)の理解とチューニングのベストプラクティス
AutoEncoder / wide_resnet_AE.py
つまり、どちらも以下の右側の構造が最適だと云っている。

ということで、次回はChainerでも上記の構造で試そうと思う。

いずれにしても、収束性をみるだけでほぼ１昼夜くらいかかるので、かなりしんどいがとにかく、もう少し集中して納得のいくところまで追求しようと思う。

### まとめ

・まだまだ途中だが、いわゆる画像で成功しているNetworkモデルを適用して収束性を見た

・将棋AIとしての能力が方策と価値の一致率に依存しているのだろうが、振る舞いが今一つ同期しておらず、何によって決まっているのかが今一つ決定していない