More than 5 years have passed since last update.

【機械学習】Goでパーセプトロン実装

Last updated at 2017-08-11Posted at 2017-08-05

Goで機械学習フルスクラッチシリーズ

この記事は、自身のブログ、Data Science Struggleでも掲載予定。許可なき掲載とかではない。

概略

Goでパーセプトロンを書いてみる。最近Goの勉強を始めたので機械学習界のHello world、パーセプトロンを書いていく。
GoにもPythonのnumpyのような行列計算ライブラリはあるが、今回はそれは使わずに標準提供されているTypeのみを用いて作成する。
機械学習というと、ここ最近はRかPythonで行うのが一般的で、sklearnなどのライブラリを使用していないコードも大抵はPythonでの例なのでお試しがてら作ってみる。

パーセプトロンとは？

機械学習アルゴリズムの一種。基本的な説明はライブラリに頼らないパーセプトロン実装のほうで書いたのでそちらを参照。
仕組みがシンプルであり、機械学習コーディングの練習の第一歩にはちょうど良い。

コード

コードは以下の通り。使用しているデータはirisだが、今回はHello worldが目的なので、irisデータのラベルを勝手に二つにして使用する。（本来は三つのラベルがある。）


package main

import (
	"os"
	"encoding/csv"
	"io"
	"math/rand"
	"strconv"
	"fmt"
)

func main() {
	//データ読み込み
	irisMatrix := [][]string{}
	iris, err := os.Open("iris.csv")
	if err != nil {
		panic(err)
	}
	defer iris.Close()

	reader := csv.NewReader(iris)
	reader.Comma = ','
	reader.LazyQuotes = true
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		} else if err != nil {
			panic(err)
		}
		irisMatrix = append(irisMatrix, record)
	}

	//説明変数と被説明変数にデータを分割
	X := [][]float64{}
	Y := []float64{}
	for _, data := range irisMatrix {

		//strスライスデータをfloatスライスデータに変換
		temp := []float64{}
		for _, i := range data[:4] {
			parsedValue, err := strconv.ParseFloat(i, 64)
			if err != nil {
				panic(err)
			}
			temp = append(temp, parsedValue)
		}
		//説明変数へ
		X = append(X, temp)

		//被説明変数
		if data[4] == "Iris-setosa" {
			Y = append(Y, -1.0)
		} else {
			Y = append(Y, 1.0)
		}

	}

	//学習
	perceptron := Perceptron{0.01, []float64{}, 100}
	perceptron.fit(X, Y)

}

type Perceptron struct {
	eta     float64
	weights []float64
	iterNum int
}

func activate(linearCombination float64) float64 {
	if linearCombination > 0 {
		return 1.0
	} else {
		return -1.0
	}
}

func (p *Perceptron) predict(x []float64) float64 {
	var linearCombination float64

	for i := 0; i < len(x); i++ {
		linearCombination += x[i] + p.weights[i+1]
	}
	linearCombination += p.weights[0]
	return activate(linearCombination)
}

func (p *Perceptron) fit(X [][]float64, Y []float64) {
	//重みの初期化
	p.weights = []float64{}
	for i := 0; i <= len(X[0]); i++ {
		if i == 0 {
			p.weights = append(p.weights, 1.0)
		} else {
			p.weights = append(p.weights, rand.NormFloat64())
		}
	}
	//データによる重み更新
	for iter := 0; iter < p.iterNum; iter++ {
		error := 0
		for i := 0; i < len(X); i++ {
			y_pred := p.predict(X[i])
			update := p.eta * (Y[i] - y_pred)
			p.weights[0] += update
			for j := 0; j < len(X[i]); j++ {
				p.weights[j+1] += update * X[i][j]

			}
			if update != 0 {
				error += 1
			}
		}
		fmt.Println(float64(error) / float64(len(Y)))
	}
}

パーツごとに見ていく

type Perceptron struct {
	eta     float64
	weights []float64
	iterNum int
}

構造体で設定しているのはそれぞれ、

eta : 学習係数
weights : 重み
iterNum : 学習のためのデータの読み込ませ回数

となる。

func activate(linearCombination float64) float64 {
	if linearCombination > 0 {
		return 1.0
	} else {
		return -1.0
	}
}

入力データと重みとの線形結合値を入力として受付け、1か-1を返す。閾値は0。

func (p *Perceptron) predict(x []float64) float64 {
	var linearCombination float64

	for i := 0; i < len(x); i++ {
		linearCombination += x[i] + p.weights[i+1]
	}
	linearCombination += p.weights[0]
	return activate(linearCombination)
}

予測のためのメソッド。データと重みとの線形結合値を計算しactivate()に投げてデータに対する予測値を返す。ここで、p.weights[0]はバイアス項。


func (p *Perceptron) fit(X [][]float64, Y []float64) {
	//重みの初期化
	p.weights = []float64{}
	for i := 0; i <= len(X[0]); i++ {
		if i == 0 {
			p.weights = append(p.weights, 1.0)
		} else {
			p.weights = append(p.weights, rand.NormFloat64())
		}
	}
	//データによる重み更新
	for iter := 0; iter < p.iterNum; iter++ {
		error := 0
		for i := 0; i < len(X); i++ {
			y_pred := p.predict(X[i])
			update := p.eta * (Y[i] - y_pred)
			p.weights[0] += update
			for j := 0; j < len(X[i]); j++ {
				p.weights[j+1] += update * X[i][j]

			}
			if update != 0 {
				error += 1
			}
		}
		fmt.Println(float64(error) / float64(len(Y)))
	}
}

学習部分。データを読み込んで重みを更新していく。

func main() {
	//データ読み込み
	irisMatrix := [][]string{}
	iris, err := os.Open("iris.csv")
	if err != nil {
		panic(err)
	}
	defer iris.Close()

	reader := csv.NewReader(iris)
	reader.Comma = ','
	reader.LazyQuotes = true
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		} else if err != nil {
			panic(err)
		}
		irisMatrix = append(irisMatrix, record)
	}

	//説明変数と被説明変数にデータを分割
	X := [][]float64{}
	Y := []float64{}
	for _, data := range irisMatrix {

		//strスライスデータをfloatスライスデータに変換
		temp := []float64{}
		for _, i := range data[:4] {
			parsedValue, err := strconv.ParseFloat(i, 64)
			if err != nil {
				panic(err)
			}
			temp = append(temp, parsedValue)
		}
		//説明変数へ
		X = append(X, temp)

		//被説明変数
		if data[4] == "Iris-setosa" {
			Y = append(Y, -1.0)
		} else {
			Y = append(Y, 1.0)
		}

	}

	//学習
	perceptron := Perceptron{0.01, []float64{}, 100}
	perceptron.fit(X, Y)

}

実行部。データの読み込みと予測を行う前の処理を行なっている。本来Irisデータは三つの分類先を持つが、今回は『Hello world』なので、簡便のために分類先を二つに変更している。

感想

Goでの機械学習コーディングの練習だったが、書いていて、『これだとかなり無駄が多いんだろなー』といった感じ。Goでのデータ操作のノウハウを覚えていきたい。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up