LoginSignup
2
2

More than 5 years have passed since last update.

マルコフ連鎖を使った文章生成サンプル

Posted at

Go 言語タグのついた投稿がなかったので、今なら一番乗りできると思ってやった。動けばなんでも良かった。

和布蕪の(デフォルトの)出力をパースして単純マルコフ連鎖を作成し、適当な文章を印字するプログラム。遷移確率表が隣接行列なので空間効率は悪いです。

使い方

$mecab aozora/弓町より.txt | go run markov.go
 食うべきである。珍味ないしはそれらの意味でなければならぬものである。

コード

markov.go
package main

import (
    "bufio"
    "errors"
    "fmt"
    "io"
    "log"
    "math/rand"
    "os"
    "strings"
    "time"
)

type Morpheme struct{ surface, pos string }

var (
    BOS = Morpheme{surface: "BOS", pos: ""}
    EOS = Morpheme{surface: "EOS", pos: ""}
)

func MakeMorphemeFromLine(line string) (morph Morpheme, err error) {
    fields := strings.Split(line, "\t")
    if len(fields) != 2 {
        err = errors.New("Invalid format: \"" + line + "\"")
        return
    }

    surf, rest := fields[0], fields[1]
    feats := strings.Split(rest, ",")
    morph = Morpheme{surface: surf, pos: feats[0]}
    return
}

type MarkovState int

const INITIAL_STATE MarkovState = 0

type MarkovChain struct {
    currentState MarkovState
    morphemes    []Morpheme
    chain        [][]float64
    states       map[Morpheme]MarkovState
}

func MakeMarkovChain(morphs []Morpheme) *MarkovChain {
    states := map[Morpheme]MarkovState{}
    nextState := INITIAL_STATE
    uniqueMorphs := make([]Morpheme, 0)
    for _, morph := range morphs {
        if _, ok := states[morph]; !ok {
            states[morph] = nextState
            uniqueMorphs = append(uniqueMorphs, morph)
            nextState++
        }
    } // Now |nextState| holds the number of kinds of morphemes.

    // Initialize transition count/probability tables.
    numMorphKinds := int(nextState)
    transitionCount := make([][]int, numMorphKinds)
    chain := make([][]float64, numMorphKinds)
    for i := 0; i < numMorphKinds; i++ {
        transitionCount[i] = make([]int, numMorphKinds)
        chain[i] = make([]float64, numMorphKinds)
    }

    prevMorph := morphs[0]
    for i := 1; i < len(morphs); i++ {
        currMorph := morphs[i]
        transitionCount[states[prevMorph]][states[currMorph]]++

        if currMorph == EOS {
            i++
            if i >= len(morphs) {
                break
            }
            prevMorph = morphs[i] // |morphs[i]| must point BOS.
            continue
        }

        prevMorph = currMorph
    }

    for i := 0; i < numMorphKinds; i++ {
        totalCount := 0
        for j := 0; j < numMorphKinds; j++ {
            totalCount += transitionCount[i][j]
        }

        if totalCount == 0 {
            continue
        }
        for j := 0; j < numMorphKinds; j++ {
            chain[i][j] = float64(transitionCount[i][j]) / float64(totalCount)
        }
    }

    return &MarkovChain{
        currentState: INITIAL_STATE,
        morphemes:    uniqueMorphs,
        chain:        chain,
        states:       states,
    }
}

func (chain MarkovChain) nextState(state MarkovState) (nextState MarkovState, err error) {
    if int(state) < 0 || int(state) >= len(chain.morphemes) {
        err = errors.New("Invalid state.")
        return
    }

    if state == chain.states[EOS] {
        err = errors.New("No next state.")
        return
    }

    r := rand.Float64()
    for i, p := range chain.chain[state] {
        if r < p {
            nextState = MarkovState(i)
            return
        } else {
            r -= p
        }
    }

    panic("Cannot reach here.")
}

func (chain MarkovChain) morphemeFor(state MarkovState) (morph Morpheme, err error) {
    if int(state) < 0 || int(state) >= len(chain.morphemes) {
        err = errors.New("Invalid state.")
        return
    }

    morph = chain.morphemes[state]
    return
}

func (chain *MarkovChain) NextMorpheme() (morph Morpheme) {
    chain.currentState, _ = chain.nextState(chain.currentState)
    morph, _ = chain.morphemeFor(chain.currentState)
    return
}

func (chain *MarkovChain) GenerateSentence() (sent string) {
    chain.currentState = INITIAL_STATE
    for morph := chain.NextMorpheme(); morph != EOS; morph = chain.NextMorpheme() {
        sent += morph.surface
    }
    return
}

func main() {
    in := bufio.NewReader(os.Stdin)
    morphs := make([]Morpheme, 0)
    for {
        lineBytes, _, err := in.ReadLine()
        if err == io.EOF {
            break
        }
        if len(lineBytes) == 0 {
            continue
        }

        morphs = append(morphs, BOS)
        for {
            if err != nil {
                log.Fatal(err)
                os.Exit(-1)
            }

            line := string(lineBytes)
            if line == "EOS" {
                break
            }

            if morph, err := MakeMorphemeFromLine(line); err == nil {
                morphs = append(morphs, morph)
            } else {
                log.Fatal(err)
                os.Exit(-1)
            }

            lineBytes, _, err = in.ReadLine()
        }
        morphs = append(morphs, EOS)
    }

    chain := MakeMarkovChain(morphs)
    rand.Seed(time.Now().Unix())
    fmt.Println(chain.GenerateSentence())
}
2
2
0

Register as a new user and use Qiita more conveniently

  1. You get articles that match your needs
  2. You can efficiently read back useful information
  3. You can use dark theme
What you can do with signing up
2
2