More than 5 years have passed since last update.

juliaと遺伝的アルゴリズムでオセロ実装した

Posted at 2020-05-20

かなり前に書いたGAonemax問題を応用して、とりあえずAIとプレイできるようになったので投稿。
GAonemaxの記事 : https://qiita.com/AokiMasataka/items/4fe9b2623282238ba6f3
GAオセロのgithub : https://github.com/AokiMasataka/GA_othello

実装

オセロのマス目に点数をつけて、そこに自分の石があればその点数分プラス、相手の石があればマイナスになるようにした。
今回はGAでこの点数とその配置を最適化していく、点数は-1,1の間をとるfloat型である。ただ、私はオセロはあまり詳しくないが、序盤と終盤では盤面の価値が変わってくる(序盤は石を取らないほうがいいとか、終盤はとったほうがいいとか)。そこで、序盤、終盤と二つ盤面の点数表を作り、ある一定の手数に達したら序盤の点数表から終盤の点数表に切り替えるようにした。なお、その一定の手数も私自身が決めるのではなく、GAで最適な手数を見つけてもらうことにした。

genetic.jl

module GA
include("funcs.jl")
using .Tool

mutable struct DNA
    genom::Array{Float32, 2}
    population::Int32
    mutationRate::Float64
end

function initialGenerat(mutationRate::Float64)::DNA
    popul::Int32 = 100
    dna  = DNA(randn(Float32, 20 + 1, popul)', popul, mutationRate)
    return dna
end

function choice(self::DNA)::Array{Array{Float32, 1}, 1}
    win::Array{Int8, 1} = zeros(Int8, (self.population))
    for X::Int32 in 1:self.population, Y::Int32 in 1:self.population
        if X != Y
            win[X] += Tool.selfPlay(self.genom[X, :], self.genom[Y, :])
        end
    end
    X::Int32 = argmax(win)
    Y::Int32 = 0
    while true
        Y = rand(1:self.population)
        if X != Y break end
    end
    return [self.genom[X, :], self.genom[Y, :]]
end

function crossover(self::DNA, dna::Array{Array{Float32, 1}, 1})::Array{Float32, 1}
    genom::Array{Float32, 1} = []
    append!(genom, dna[1])
    append!(genom, dna[2])
    for i in 1:self.population-2
        cut = rand(2: 21-1)
        append!(genom, dna[1][1: cut])
        append!(genom, dna[2][cut+1: 21])
    end
    return reshape(genom, self.population * 21)
end

function mutation(self::DNA, genom::Array{Float32, 1})::Array{Float32, 2}
    for i in 1:self.population
        if rand() < self.mutationRate
            genom[i] = rand(Float32)
        end
    end
    self.genom = reshape(genom, (21, self.population))'
end

end

個体のDNAの長さは序盤10,終盤10,評価盤面の切り替えに1で一個体につき合計21の長さの配列を100個体分生成。
choice関数では総当たりで戦って最も勝利数の多い個体を１体選択し、もう一体はランダムに決定される。交配は一点交差を採用し、突然変異ではmutationRate(0~1)の確立で-1,1のランダムな値が代入される。

funcs.jl

module Tool
include("game.jl")
using .Game

function deploy(dna::Array{Float32, 1})::Array{Float32, 2}
    state::Array{Float32, 2} = zeros(Float32, (8, 8))
    for i in 1:4
        state[i, i] = dna[i]
        if i < 4
            state[i + 1, i] = dna[i + 4]
            state[i, i + 1] = dna[i + 4]
        end
        if i < 3
            state[i + 2, i] = dna[i + 7]
            state[i, i + 2] = dna[i + 7]
        end
        if i < 2
            state[i + 3, i] = dna[i + 9]
            state[i, i + 3] = dna[i + 9]
        end
    end

    for i in 1:4
        for j in 1:4
            state[i ,9 - j] = state[9 - i ,j] = state[i ,j]
            state[9 - i, 9 - j] = state[i, j]
        end
    end
    return state
end

function getPoint(state::Array{Int8, 2}, dna::Array{Float32, 2}, player::Int8)::Float32
    point::Float32 = 0
    for i::Int8 in 1:8, j::Int8 in 1:8
        if state[i, j] == player
            point += dna[i, j]
        elseif state[i, j] == 0 - player
            point -= dna[i, j]
        end
    end
    return point
end

function selfPlay(first::Array{Float32, 1}, behind::Array{Float32, 1})::Int8
    size::Int8 = 8
    self = Game.init(size)
    fi, be = abs(60 * first[21]), abs(60 * behind[21])
    first::Array{Array{Float32, 2}} = [Tool.deploy(first[1:10]), Tool.deploy(first[11:20])]
    behind::Array{Array{Float32, 2}} = [Tool.deploy(behind[1:10]), Tool.deploy(behind[11:20])]
    turn::Int8 = 0


    while true
        if Game.isDone(self) break end

        actions::Array{Int8, 1} = Game.getLegalAction(self)

        if length(actions) > 0
            state::Array{Int8, 2} = copy(self.state)
            points::Array{Float32, 1} = []
            for action::Int8 in actions
                Game.action(self, action)
                if self.player == 1
                    if fi < turn
                        append!(points, Tool.getPoint(self.state, first[1], self.player))
                    else
                        append!(points, Tool.getPoint(self.state, first[2], self.player))
                    end
                else
                    if be < turn
                        append!(points, Tool.getPoint(self.state, behind[1], self.player))
                    else
                        append!(points, Tool.getPoint(self.state, behind[2], self.player))
                    end
                end
                self.state = copy(state)
            end
            Game.action(self, actions[argmax(points)])
            self.player = 0 - self.player
            turn += 1

        else self.player = 0 - self.player end
    end
    if sum(self.state) > 0 return 1
    elseif sum(self.state) < 0 return -1
    else return 0
    end
end

function save(genom::Array{Float32, 2})
end

function load()::Array{Float32, 2}
    genom::Array{Float32, 2}
    return genom
end

end

funcs.jlにはgenetic.jlのchoice関数で使う関数が記述されている。
特に解説することはないので、次に移る。

game.jl

module Game
mutable struct Var
    state::Array{Int8, 2}
    player::Int8
    size::Int8
end

function init(size::Int8)::Var
    self = Var(zeros(Int8, (size, size)), 1, size)
    herf::Int8 = floor(size / 2)
    self.state[herf, herf] = self.state[herf + 1, herf + 1] = -1
    self.state[herf + 1, herf] = self.state[herf, herf + 1] = 1
    return self
end

function isDone(self::Var)::Bool
    if length(getLegalAction(self)) == 0
        self.player = 0 - self.player
        if length(getLegalAction(self)) == 0
            self.player = 0 - self.player
            return true
        end
    end
    return false
end

function action(self::Var, action::Int8)
    player::Int8 = self.player
    x::Int8 = (action % self.size) + 1
    y::Int8 = Int8(floor(action / self.size) + 1)
    self.state[x, y] = player
    for dx in -1:1, dy in -1:1
        if dx == 0 && dy == 0 continue end
        for i in 1:self.size
            posX::Int8, posY::Int8 =  i*dx+x, i*dy+y
            if 1 <= posX <= self.size && 1 <= posY <= self.size
                if self.state[posX, posY] == player
                    if 1 < i
                        for n in 1:i
                            self.state[n*dx+x, n*dy+y] = player
                        end
                    end
                elseif self.state[posX, posY] == 0 - player
                else break
                end
            else
                break
            end
        end
    end
end


function getLegalAction(self::Var)::Array{Int8, 1}
    actions::Array{Int8, 1} = []
    for x::Int8 in 1:self.size, y::Int8 in 1:self.size
        if self.state[x, y] != 0 continue end
        if legalAction(self, x, y)
            append!(actions, (x - 1) + (y - 1) * self.size)
        end
    end
    return actions
end

function legalAction(self::Var, x::Int8, y::Int8)::Bool
    player::Int8 = self.player

    for dx::Int8 in -1:1, dy::Int8 in -1:1
        if dx == 0 && dy == 0 continue end
        for i in 1:self.size
            posX::Int8, posY::Int8 = i*dx+x, i*dy+y
            if 1 <= posX <= self.size && 1 <= posY <= self.size
                if self.state[posX, posY] == player
                    if 1 < i return true end
                    break
                elseif self.state[posX, posY] == 0 - player
                else break
                end
            else
                break
            end
        end
    end
    return false
end

function show(self::Var)
    for i in 1:self.size
        print(i, " ")
    end
    println()
    for i in 1: self.size
        for j in 1: self.size
            if self.state[i, j] == 1
                print("B ")
            elseif self.state[i, j] == -1
                print("W ")
            else
                print("* ")
            end
        end
        println(i)
    end
    println()
end

end

main.jl

include("funcs.jl")
include("genetic.jl")
include("game.jl")
using .Tool
using .Game
using .GA

function train(step::Int64)::Array{Float32, 1}
    self::GA.DNA = GA.initialGenerat(0.1)
    for i in 1:step
        println("trainnig step : ", i)
        parent::Array{Array{Float32, 1}, 1} = GA.choice(self)
        genom::Array{Float32, 1} = GA.crossover(self, parent)
        self.genom = GA.mutation(self, genom)
    end
    parent = GA.choice(self)
    return parent[1]
end


function play(genom::Array{Float32, 1})
    turn::Int8 = 0
    size::Int8 = 8
    actions::Array{Int8, 1} = []
    split::Float32 = abs(60 * genom[21])
    DNA::Array{Array{Float32, 2}} = [Tool.deploy(genom[1:10]), Tool.deploy(genom[11:20])]
    self = Game.init(size)

    while true
        Game.show(self)
        if Game.isDone(self) break end

        actions = Game.getLegalAction(self)
        if length(actions) == 0
            self.player = 0 - self.player
        else
            if self.player == 1
                x = Base.prompt("x")
                x = parse(Int8, x)

                y = Base.prompt("y")
                y = parse(Int8, y)
                act::Int8 = (y-1) + (x-1) * self.size
                println(act)
                Game.action(self, act)
            else
                state::Array{Int8, 2} = copy(self.state)
                points::Array{Float32, 1} = []
                for action::Int8 in actions
                    Game.action(self, action)
                    if split < turn
                        append!(points, Tool.getPoint(self.state, DNA[1], self.player))
                    else
                        append!(points, Tool.getPoint(self.state, DNA[2], self.player))
                    end
                end
                self.state = copy(state)
                Game.action(self, actions[argmax(points)])
            end
            self.player = 0 - self.player
            turn += 1
        end
    end
end


dna = train(10)
play(dna)

main.jlにはtrain関数とplay関数が入っており、train関数は引数に学習ステップの数を入力し戻り値で最も最適化された個体のDNAを返す。play関数では、個体のDNAを入力することでその個体と戦うことができる。今回はtrainを10ステップ繰り返した個体と対戦している。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up