More than 5 years have passed since last update.

Lua版ゼロから作るDeep Learning その１３［ニューラルネットワークに関する勾配］

Last updated at 2017-07-12Posted at 2017-07-12

過去記事まとめ

はじめに

　今回は原書4章のニューラルネットワークに関する勾配の部分を実装します。具体的いうと損失関数の重みに対する勾配を求めます。これを求めることによって最適な重みを求めていくことができ、これがニューラルネットワークを最適化する第一歩となります。
　スクリプトは以下の通りです。

gradient_simplenet.lua

--Copyright (C) 2017  Kazuki Nakamae
--Released under MIT License
--license available in LICENSE file

common = require './common'


--- simpleNetクラス（オブジェクト）
-- 単純なニューラルネットワークを生成する
-- @param isConstW 重みを固定 {Type:Bool}
simpleNet={}
simpleNet.new = function(isConstW)
        local obj={}

        --メンバ変数
        if isConstW then
            --原書で紹介されている重みを使用
            obj.W = torch.Tensor({{0.47355232, 0.9977393, 0.84668094}, {0.85557411, 0.03563661, 0.69422093}})
        else
            obj.W = torch.randn(2,3)
        end
        print("重みパラメータ : ")
        print(obj.W:double())

        --メソッド
        obj.predict = function(self, x)
            return common.mulTensor(x, self.W)
        end
        obj.loss = function(self, x, t)
            local z = self:predict(x)
            local y = common.softmax(z)
            local loss = common.cross_entropy_error(y, t)

            return loss
        end

        return obj
	  end

--入力
local x = torch.Tensor({0.6, 0.9})
--正解ラベル
local t = torch.Tensor({0, 0, 1})

--NNを作成（原書の重みを使用）
local net = simpleNet.new(true)

--推定と損失関数計算の確認
local p = net:predict(x)
print("推定値 : ")
print(p)
print("推定値の最大値とそのインデックス : ")
print(torch.max(p, 1))
print("損失関数の値 : "..net:loss(x, t).."\n")

--損失関数の勾配を計算
local f = (function(w) return net:loss(x, t) end)
local dW = common.numerical_gradient(f, net.W)

print("入力値での損失関数に対する重みの勾配")
print(dW)

　以前に実装した関数などはモジュール（common）として統合しました。内容については補足をご覧ください。
　今回からニューラルネットをクラスとして実装します。しかしLuaはクラスを正式な言語仕様として含んでいないので、テーブル内にコンストラクタ（new関数）とメンバ変数、関数を定義することで擬似的に実装します。
　
　今回の処理ではある入力値と正解ラベルでの損失関数を重み(W)に関する無名関数fを定義し、推定と損失関数計算の確認と入力値での損失関数に対する重みの勾配∂f/∂wij を算出しています。
　
　では実行結果をみてみます。

実行結果

$ th gradient_simplenet.lua
重みパラメータ : 	
 0.4736  0.9977  0.8467
 0.8556  0.0356  0.6942
[torch.DoubleTensor of size 2x3]

推定値 : 	
 1.0541
 0.6307
 1.1328
[torch.DoubleTensor of size 3]

推定値の最大値とそのインデックス : 	
 1.1328
[torch.DoubleTensor of size 1]

 3
[torch.LongTensor of size 1]

損失関数の値 : 0.92806828578641
	
入力値での損失関数に対する重みの勾配	
 0.2192  0.1436 -0.3628
 0.3289  0.2153 -0.5442
[torch.DoubleTensor of size 2x3]

　推定値が本書と若干ずれるのが気になっていますが、勾配そのものは原書と全く同じように実装できました。

おわりに

　今回は以上です。

　次回からは今回の内容を踏まえて学習の枠組みを作成していきます。まず２層のニューラルネットワークを実装していきましょう。
　
　ありがとうございました。

補足：commonモジュール

　以下のような構成です。
init.lua
exTorch.lua
function.lua
gradient.lua

　init.lua内で他の3つのファイルの内容をhelpも含めて一つもモジュール（テーブル）として統合するようになっています。

init.lua

--Copyright (C) 2017  Kazuki Nakamae
--Released under MIT License
--license available in LICENSE file

require './function.lua'
require './gradient.lua'
require './exTorch.lua'

local common = {}

local help = {
softmax = [[softmax(x) -- Normalize input Tencor]],
cross_entropy_error = [[cross_entropy_error(y, t) -- Calculate the cross entropy between y and t]],
numerical_gradient = [[numerical_gradient(f, X) -- Calculate gradient of a given function f(X)]],
mulTensor = [[mulTensor(A, B) -- Calculate multiple of tensor A and tensor B]],
tensor2scalar = [[tensor2scalar(tensor) -- Convert tensor to scalar]],
makeIterTensor = [[makeIterTensor(vector,iter) -- Generate tensor whose rows are repeated]],
getRandIndex = [[getRandIndex(datasize,getsize,seed) -- Get random index of tensor which have elements of datasize]],
getElement = [[getElement(readTensor,...) -- Get value of readTensor[...]. When #{...}>=2, Access value of readTensor according to each value of elements in {...}]]
}

common.softmax = function(x)
  if not x then
    xlua.error('x must be supplied',
                'common.softmax', 
                help.softmax)
  end
  return softmax(x)
end

common.cross_entropy_error = function(y, t)
    if not y then
      xlua.error('y must be supplied',
                  'common.cross_entropy_error', 
                  help.cross_entropy_error)
    elseif not t then
      xlua.error('t must be supplied',
                  'common.cross_entropy_error', 
                  help.cross_entropy_error)
    end
    return cross_entropy_error(y, t)
end

common.numerical_gradient = function(f, X)
  if not f then
    xlua.error('f must be supplied', 
        'common.numerical_gradient', 
        help.numerical_gradient)
  elseif not X then
    xlua.error('X must be supplied', 
        'common.numerical_gradient', 
        help.numerical_gradient)
  end
  return numerical_gradient(f, X)
end

common.mulTensor = function(A, B)
  if not A then
    xlua.error('A must be supplied', 
        'common.mulTensor', 
        help.mulTensor)
  elseif not B then
    xlua.error('B must be supplied', 
        'common.mulTensor', 
        help.mulTensor)
  end
  return mulTensor(A, B)
end

common.tensor2scalar = function(tensor)
  if not A then
    xlua.error('tensor must be supplied', 
        'common.tensor2scalar', 
        help.tensor2scalar)
  end
  return tensor2scalar(tensor)
end

common.makeIterTensor = function(vector,iter)
  if not vector then
    xlua.error('vector must be supplied', 
        'common.makeIterTensor', 
        help.makeIterTensor)
  elseif not iter then
    xlua.error('iter must be supplied', 
        'common.makeIterTensor', 
        help.makeIterTensor)
  end
  return makeIterTensor(vector,iter)
end

common.getRandIndex = function(datasize,getsize,seed)
  if not datasize then
    xlua.error('datasize must be supplied', 
        'common.getRandIndex', 
        help.getRandIndex)
  elseif not getsize then
    xlua.error('getsize must be supplied', 
        'common.getRandIndex', 
        help.getRandIndex)
  elseif not seed then
    xlua.error('seed must be supplied', 
        'common.getRandIndex', 
        help.getRandIndex)
  end
  return getRandIndex(datasize,getsize,seed)
end

common.getElement = function(readTensor,...)
  if not readTensor then
    xlua.error('readTensor must be supplied', 
        'common.getElement', 
        help.getElement)
  end
  return getElement(readTensor,...)
end

return common

　
　exTorch.lua はtorchを使いやすくするために独自に定義した関数がはいっています。これがないといくつかの別ファイルのいくつかの関数が動かないので注意してください。
　

exTorch.lua

---テンソル間の積の計算関数
-- 各次元に対応したテンソルの積ABを行う。
-- @param A A (Type：Tensor)
-- @param B B (Type：Tensor)
-- @return AB (Type：torch.DoubleTensor)
function mulTensor(A, B)
    A = A:double()
    B = B:double()
    local AB = nil;
    if (A:dim() == 1 and B:dim() ~= 1) then
        --1Dvector・matrix
        AB = torch.mv(B:t(), A)
    else
        --others
        AB = A*B
    end
    return AB
end

---テンソルをスカラー変換関数
-- 1x1テンソルをスカラーへ変換する。
-- @param tensor 1x1テンソル (Type：Tensor)
-- @return スカラー (Type：number)
function tensor2scalar(tensor)
    return tensor[1]
end

---行反復行列生成関数
-- 入力されたベクトルをN行反復する行列を生成する
-- @param vector 一次元ベクトル (Type：Tensor)
-- @param iter 反復数 (Type：byte)
-- @return スカラー (Type：number)
function makeIterTensor(vector,iter)
    local iterTensor = torch.DoubleTensor(vector:size()[1],iter)
    local i = 0
    iterTensor:apply(function() --applyで各要素へ値を代入
        i = i + 1
        if vector:size()[1]<i then --ベクトルのインデックスを超えたら初期化
            i = 1
        end
        return vector[i]
    end)

    return iterTensor
end

---ランダムインデックス取得関数
-- 入力されたサイズ内で指定した数の整数を取得する
-- @param datasize データサイズ (Type：number)
-- @param getsize 取得サイズ (Type：number)
-- @param seed 乱数のシード (Type：number)
-- @return インデックスリスト (Type：long Tensor)
function getRandIndex(datasize,getsize,seed)
    torch.manualSeed(seed)
    --データサイズ分の整数のランダムな順列からgetsize分切り取る
    return torch.randperm(datasize):sub(1,getsize):long()
end

---任意要素アクセス関数
-- 入力されたテンソルに対して、次元分の指定テンソルの順序でアクセスした要素のリストを返す。
-- @param readTensor データサイズ (Type：tensor)
-- @param ... 指定テンソルを内包するテーブル (Type：table)
-- @return 取得した要素のリスト (Type：Tensor)
function getElement(readTensor,...)
    local args = {...}
    local elelist = {}
    for order = 1, args[1]:size()[1] do
        local indexlist = {}
        for dim = 1, readTensor:dim() do
            table.insert(indexlist,(args[dim])[order])
        end
        table.insert(elelist,readTensor[indexlist])
    end
    return torch.Tensor(elelist)
end

　
　function.lua にはソフトマックス関数と交差エントロピー誤差算出関数が定義されています。
　

function.lua

---ソフトマックス関数.
-- 入力値を確率に変換する
-- @param x 入力 (Type：torch.DoubleTensor)
-- @return 0-1 (Type：number)
function softmax(x)
    local c = torch.max(x)
    local exp_x = torch.exp(x - c)
    local sum_exp_x = torch.sum(exp_x)
    local y = exp_x / sum_exp_x

    return y
end

---交差エントロピー誤差算出関数
-- テンソル同士の交差エントロピー誤差(-∑tilogyi)を求める
-- @param y 入力１、今回はNNが出力する確率リスト {Type:Tensor}
-- @param t 入力２、今回は正解ラベルリスト {Type:ByteTensor}
-- @return 交差エントロピー誤差 {Type:number}
function cross_entropy_error(y, t)
    local delta = 1e-7
    return -torch.cmul(t:double(), ( y:double() + delta ):log() ):sum()
end

　gradient.lua には勾配を求めるための関数が定義されています。
　

gradient.lua

　---勾配算出関数.
-- 入力値に対する多変数関数の勾配を求める
-- @param f 多変数関数 (Type：function)
-- @param x 入力値 (Type：Tensor ※1D Tensor)
-- @return 入力値に対する勾配の値 (Type：Tensor)
function _numerical_gradient_no_batch(f, x)
    local h = 1e-4 -- 0.0001
    local grad = x:clone():zero()

    for idx = 1, x:size()[1] do
        local tmp_val = x:float()[idx]
        x[idx] = tmp_val + h --一つの要素だけ動かす
        local fxh1 = f(x) -- f(x+h)

        x[idx] = tmp_val - h 
        local fxh2 = f(x) -- f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val -- 値を元に戻す
    end
    
    return grad
end

---勾配算出関数.
-- 入力値（複数）に対する多変数関数の勾配を求める
-- @param f 多変数関数 (Type：function)
-- @param x 入力値 (Type：Tensor)
-- @return 入力値に対する勾配の値 (Type：Tensor)
function numerical_gradient(f, X)
    if X:dim() == 1 then
        return _numerical_gradient_no_batch(f, X)
    else
        local grad = X:clone():zero()

        for idx = 1, X:size()[1] do
            grad[idx] = _numerical_gradient_no_batch(f, X[idx]) --1Dずつ勾配を計算
        end

        return grad
    end
end

　以上です。

You get articles that match your needs
You can efficiently read back useful information
You can use dark theme

What you can do with signing up

Lua版 ゼロから作るDeep Learning その１３［ニューラルネットワークに関する勾配］

過去記事まとめ

はじめに

おわりに

補足：commonモジュール

Lua版ゼロから作るDeep Learning その１３［ニューラルネットワークに関する勾配］