More than 5 years have passed since last update.

Rで誤差逆伝播勾配降下法バッチ訓練によるニューラルネットワーク回帰

R
PRML

Last updated at 2013-02-28Posted at 2013-02-09

PRML図5.3と同様に、与えられたサンプルを近似するような出力を行うニューラルネットワークを構成して、ネットワーク訓練によって重みを学習します。訓練に伴い、隠れユニットの出力、重みの値、サンプルとの二乗和誤差、出力ユニットの出力が変化する様子を図示します。

ネットワークは1入力、3隠れユニット、1出力ユニットを持ちますが、図5.1のように、隠れユニットと出力ユニットに対してバイアス1を入力するようにしています。隠れユニットの活性化関数はz=tanh(a)、出力ユニットの活性化関数はz=aです。重みwの初期値はランダムに選んでいます。誤差関数の微分は誤差逆伝播によって求め、式5.57のようにバッチ訓練により全サンプルからの誤差をまとめて評価しています。誤差関数の極小化は式5.41のように、（恣意的に選んだ）一定の学習率パラメータによる素朴な勾配降下法によっているため、収束が遅く、また一時的に誤差関数が増大する様子も見られます。

frame()
set.seed(0)
par(mfcol=c(2, 3))
par(mar=c(3, 3, 1, 0.1))
par(mgp=c(2, 1, 0))
xrange <- c(-1.5, 1.5)
yrange <- c(-2, 2)
x <- seq(-1, 1, 1/25)
Zi <- rbind(x, 1) # bias input
# t <- x^2
t <- sin(x * pi)
# t <- abs(x)
# t <- sign(x)
ITERATION <- 1500
I <- 2
J <- 4
K <- 1
Wji <- matrix(runif((J-1) * I), nrow=J - 1, ncol=I)
Wkj <- matrix(1, nrow=K, ncol=J)
activateJ <- function(a) {
    tanh(a)
}
dActivateJ <- function(a) {
    1 - tanh(a)^2
}
activateK <- function(a) {
    a
}
hidden <- function(x,j) {
    Aj <- Wji %*% rbind(x, 1) # append bias input
    Zj <- activateJ(Aj)
    Zj[j,]
}
estimate <- function(x){
    Aj <- Wji %*% rbind(x, 1) # append bias input
    Zj <- activateJ(Aj)
    Zj <- rbind(Zj, 1) # append bias input
    Ak <- Wkj %*% Zj
    Zk <- activateK(Ak)
    Zk
}
weights <- c()

for (iteration in 1:ITERATION) {
    col <- rainbow(ITERATION)[ITERATION / 2 + iteration / 2]

    par(mfg=c(1, 1))
    par(new=T)
    curve(hidden(x, 1), type="l", xlim=xrange, ylim=yrange, col=col)

    par(mfg=c(1, 2))
    par(new=T)
    curve(hidden(x, 2), type="l", xlim=xrange, ylim=yrange, col=col)

    par(mfg=c(1, 3))
    par(new=T)
    curve(hidden(x, 3), type="l", xlim=xrange, ylim=yrange, col=col)

    par(mfg=c(2, 3))
    par(new=T)
    curve(estimate, type="l", xlim=xrange, ylim=yrange, col=col)
    par(new=T)
    plot(x, t, xlim=xrange, ylim=yrange, xlab="", ylab="")

    par(mfg=c(2, 2))
    par(new=T)
    error <- log(sum((estimate(x) - t)^2 / 2))
    plot(iteration, error, type="p", cex=0.1, xlim=c(1, ITERATION), ylim=c(-5, 5), ylab="log(E(w))", col=col)

    # forward propagation
    Aj <- Wji %*% rbind(x, 1) # append bias input
    Zj <- activateJ(Aj)
    Zj <- rbind(Zj, 1) # append bias input
    Ak <- Wkj %*% Zj
    Zk <- activateK(Ak)

    # backward propagation
    deltaK <- Zk - t
    dEdWkj <- c()
    for (j in 1:nrow(Zj)) {
        for (k in 1:nrow(deltaK)) {
            dEdWkj <- rbind(dEdWkj, Zj[j,] * deltaK[k,])
        }
    }
    deltaJ <- dActivateJ(Aj[-J,]) * Wkj[,-J] %*% deltaK # no need to compute for a bias hidden unit
    dEdWji <- c()
    for (i in 1:nrow(Zi)) {
        for (j in 1:nrow(deltaJ)) {
            dEdWji <- rbind(dEdWji, Zi[i,] * deltaJ[j,])
        }
    }
    dEdWkjSum <- rowSums(dEdWkj)
    dEdWjiSum <- rowSums(dEdWji)
    dEdWjiMat <- matrix(dEdWjiSum, ncol=I, byrow=F)

    # gradient descent
    Wkj <- Wkj - 0.01 * dEdWkjSum
    Wji <- Wji - 0.01 * dEdWjiMat

    weights <- rbind(weights, c(as.vector(Wji), as.vector(Wkj)))
    par(mfg=c(2, 1))
    matplot(1:iteration, weights, type="l", lty=1, xlim=c(1, ITERATION), ylim=c(-10, 10), ylab="w")
}
tail(weights)

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up