かなり前に書いたGAonemax問題を応用して、とりあえずAIとプレイできるようになったので投稿。
GAonemaxの記事 : https://qiita.com/AokiMasataka/items/4fe9b2623282238ba6f3
GAオセロのgithub : https://github.com/AokiMasataka/GA_othello
実装
オセロのマス目に点数をつけて、そこに自分の石があればその点数分プラス、相手の石があればマイナスになるようにした。
今回はGAでこの点数とその配置を最適化していく、点数は-1,1の間をとるfloat型である。ただ、私はオセロはあまり詳しくないが、序盤と終盤では盤面の価値が変わってくる(序盤は石を取らないほうがいいとか、終盤はとったほうがいいとか)。そこで、序盤、終盤と二つ盤面の点数表を作り、ある一定の手数に達したら序盤の点数表から終盤の点数表に切り替えるようにした。なお、その一定の手数も私自身が決めるのではなく、GAで最適な手数を見つけてもらうことにした。
genetic.jl
module GA
include("funcs.jl")
using .Tool
mutable struct DNA
genom::Array{Float32, 2}
population::Int32
mutationRate::Float64
end
function initialGenerat(mutationRate::Float64)::DNA
popul::Int32 = 100
dna = DNA(randn(Float32, 20 + 1, popul)', popul, mutationRate)
return dna
end
function choice(self::DNA)::Array{Array{Float32, 1}, 1}
win::Array{Int8, 1} = zeros(Int8, (self.population))
for X::Int32 in 1:self.population, Y::Int32 in 1:self.population
if X != Y
win[X] += Tool.selfPlay(self.genom[X, :], self.genom[Y, :])
end
end
X::Int32 = argmax(win)
Y::Int32 = 0
while true
Y = rand(1:self.population)
if X != Y break end
end
return [self.genom[X, :], self.genom[Y, :]]
end
function crossover(self::DNA, dna::Array{Array{Float32, 1}, 1})::Array{Float32, 1}
genom::Array{Float32, 1} = []
append!(genom, dna[1])
append!(genom, dna[2])
for i in 1:self.population-2
cut = rand(2: 21-1)
append!(genom, dna[1][1: cut])
append!(genom, dna[2][cut+1: 21])
end
return reshape(genom, self.population * 21)
end
function mutation(self::DNA, genom::Array{Float32, 1})::Array{Float32, 2}
for i in 1:self.population
if rand() < self.mutationRate
genom[i] = rand(Float32)
end
end
self.genom = reshape(genom, (21, self.population))'
end
end
個体のDNAの長さは序盤10,終盤10,評価盤面の切り替えに1で一個体につき合計21の長さの配列を100個体分生成。
choice関数では総当たりで戦って最も勝利数の多い個体を1体選択し、もう一体はランダムに決定される。交配は一点交差を採用し、突然変異ではmutationRate(0~1)の確立で-1,1のランダムな値が代入される。
funcs.jl
module Tool
include("game.jl")
using .Game
function deploy(dna::Array{Float32, 1})::Array{Float32, 2}
state::Array{Float32, 2} = zeros(Float32, (8, 8))
for i in 1:4
state[i, i] = dna[i]
if i < 4
state[i + 1, i] = dna[i + 4]
state[i, i + 1] = dna[i + 4]
end
if i < 3
state[i + 2, i] = dna[i + 7]
state[i, i + 2] = dna[i + 7]
end
if i < 2
state[i + 3, i] = dna[i + 9]
state[i, i + 3] = dna[i + 9]
end
end
for i in 1:4
for j in 1:4
state[i ,9 - j] = state[9 - i ,j] = state[i ,j]
state[9 - i, 9 - j] = state[i, j]
end
end
return state
end
function getPoint(state::Array{Int8, 2}, dna::Array{Float32, 2}, player::Int8)::Float32
point::Float32 = 0
for i::Int8 in 1:8, j::Int8 in 1:8
if state[i, j] == player
point += dna[i, j]
elseif state[i, j] == 0 - player
point -= dna[i, j]
end
end
return point
end
function selfPlay(first::Array{Float32, 1}, behind::Array{Float32, 1})::Int8
size::Int8 = 8
self = Game.init(size)
fi, be = abs(60 * first[21]), abs(60 * behind[21])
first::Array{Array{Float32, 2}} = [Tool.deploy(first[1:10]), Tool.deploy(first[11:20])]
behind::Array{Array{Float32, 2}} = [Tool.deploy(behind[1:10]), Tool.deploy(behind[11:20])]
turn::Int8 = 0
while true
if Game.isDone(self) break end
actions::Array{Int8, 1} = Game.getLegalAction(self)
if length(actions) > 0
state::Array{Int8, 2} = copy(self.state)
points::Array{Float32, 1} = []
for action::Int8 in actions
Game.action(self, action)
if self.player == 1
if fi < turn
append!(points, Tool.getPoint(self.state, first[1], self.player))
else
append!(points, Tool.getPoint(self.state, first[2], self.player))
end
else
if be < turn
append!(points, Tool.getPoint(self.state, behind[1], self.player))
else
append!(points, Tool.getPoint(self.state, behind[2], self.player))
end
end
self.state = copy(state)
end
Game.action(self, actions[argmax(points)])
self.player = 0 - self.player
turn += 1
else self.player = 0 - self.player end
end
if sum(self.state) > 0 return 1
elseif sum(self.state) < 0 return -1
else return 0
end
end
function save(genom::Array{Float32, 2})
end
function load()::Array{Float32, 2}
genom::Array{Float32, 2}
return genom
end
end
funcs.jlにはgenetic.jlのchoice関数で使う関数が記述されている。
特に解説することはないので、次に移る。
game.jl
module Game
mutable struct Var
state::Array{Int8, 2}
player::Int8
size::Int8
end
function init(size::Int8)::Var
self = Var(zeros(Int8, (size, size)), 1, size)
herf::Int8 = floor(size / 2)
self.state[herf, herf] = self.state[herf + 1, herf + 1] = -1
self.state[herf + 1, herf] = self.state[herf, herf + 1] = 1
return self
end
function isDone(self::Var)::Bool
if length(getLegalAction(self)) == 0
self.player = 0 - self.player
if length(getLegalAction(self)) == 0
self.player = 0 - self.player
return true
end
end
return false
end
function action(self::Var, action::Int8)
player::Int8 = self.player
x::Int8 = (action % self.size) + 1
y::Int8 = Int8(floor(action / self.size) + 1)
self.state[x, y] = player
for dx in -1:1, dy in -1:1
if dx == 0 && dy == 0 continue end
for i in 1:self.size
posX::Int8, posY::Int8 = i*dx+x, i*dy+y
if 1 <= posX <= self.size && 1 <= posY <= self.size
if self.state[posX, posY] == player
if 1 < i
for n in 1:i
self.state[n*dx+x, n*dy+y] = player
end
end
elseif self.state[posX, posY] == 0 - player
else break
end
else
break
end
end
end
end
function getLegalAction(self::Var)::Array{Int8, 1}
actions::Array{Int8, 1} = []
for x::Int8 in 1:self.size, y::Int8 in 1:self.size
if self.state[x, y] != 0 continue end
if legalAction(self, x, y)
append!(actions, (x - 1) + (y - 1) * self.size)
end
end
return actions
end
function legalAction(self::Var, x::Int8, y::Int8)::Bool
player::Int8 = self.player
for dx::Int8 in -1:1, dy::Int8 in -1:1
if dx == 0 && dy == 0 continue end
for i in 1:self.size
posX::Int8, posY::Int8 = i*dx+x, i*dy+y
if 1 <= posX <= self.size && 1 <= posY <= self.size
if self.state[posX, posY] == player
if 1 < i return true end
break
elseif self.state[posX, posY] == 0 - player
else break
end
else
break
end
end
end
return false
end
function show(self::Var)
for i in 1:self.size
print(i, " ")
end
println()
for i in 1: self.size
for j in 1: self.size
if self.state[i, j] == 1
print("B ")
elseif self.state[i, j] == -1
print("W ")
else
print("* ")
end
end
println(i)
end
println()
end
end
main.jl
include("funcs.jl")
include("genetic.jl")
include("game.jl")
using .Tool
using .Game
using .GA
function train(step::Int64)::Array{Float32, 1}
self::GA.DNA = GA.initialGenerat(0.1)
for i in 1:step
println("trainnig step : ", i)
parent::Array{Array{Float32, 1}, 1} = GA.choice(self)
genom::Array{Float32, 1} = GA.crossover(self, parent)
self.genom = GA.mutation(self, genom)
end
parent = GA.choice(self)
return parent[1]
end
function play(genom::Array{Float32, 1})
turn::Int8 = 0
size::Int8 = 8
actions::Array{Int8, 1} = []
split::Float32 = abs(60 * genom[21])
DNA::Array{Array{Float32, 2}} = [Tool.deploy(genom[1:10]), Tool.deploy(genom[11:20])]
self = Game.init(size)
while true
Game.show(self)
if Game.isDone(self) break end
actions = Game.getLegalAction(self)
if length(actions) == 0
self.player = 0 - self.player
else
if self.player == 1
x = Base.prompt("x")
x = parse(Int8, x)
y = Base.prompt("y")
y = parse(Int8, y)
act::Int8 = (y-1) + (x-1) * self.size
println(act)
Game.action(self, act)
else
state::Array{Int8, 2} = copy(self.state)
points::Array{Float32, 1} = []
for action::Int8 in actions
Game.action(self, action)
if split < turn
append!(points, Tool.getPoint(self.state, DNA[1], self.player))
else
append!(points, Tool.getPoint(self.state, DNA[2], self.player))
end
end
self.state = copy(state)
Game.action(self, actions[argmax(points)])
end
self.player = 0 - self.player
turn += 1
end
end
end
dna = train(10)
play(dna)
main.jlにはtrain関数とplay関数が入っており、train関数は引数に学習ステップの数を入力し戻り値で最も最適化された個体のDNAを返す。play関数では、個体のDNAを入力することでその個体と戦うことができる。今回はtrainを10ステップ繰り返した個体と対戦している。