import optimizer.*;
import network.*;
import layer.*;
import nodes.activationFunction.*;
import costFunction.*;
import matrix.*;
public class test {
public static void main(String[] str){
Network net = new Network(
new Input(4, AF.RELU),
new Output(1, AF.LINER)
GradientDescent GD = new GradientDescent(
new MeanSquaredError()
Matrix X = new Matrix(new double[10][2]);
Matrix T = new Matrix(new double[10][1]);
for (int i = 0; i < X.row; i++){
X.matrix[i][0] = i * 0.1;
X.matrix[i][1] = i * 0.2;
T.matrix[i][0] = X.matrix[i][0] + X.matrix[i][1];
MeanSquaredError f = new MeanSquaredError();
Matrix Y = GD.forward(X);
System.out.println(f.calcurate(Y, T));
for (int i = 0; i < 30; i++){
GD.back(X, Y, T);
Y = GD.forward(X);
System.out.println(f.calcurate(Y, T));
System.out.println(Matrix.hstack(Y, T));
for (int i = 0; i < X.row; i++){
X.matrix[i][0] = i * 0.15;
X.matrix[i][1] = i * 0.12;
T.matrix[i][0] = X.matrix[i][0] + X.matrix[i][1];
Y = GD.forward(X);
System.out.println("score: ");
System.out.println(f.calcurate(Y, T));
System.out.println(Matrix.hstack(Y, T));
[[2.0757 ]]
[[0.0124 ]]
[[0.0010 ]]
[[0.0001 ]]
[[0.0000 ]]
[[0.0000 ]]
[[0.0037 0.0000 ]
[0.3029 0.3000 ]
[0.6021 0.6000 ]
[0.9012 0.9000 ]
[1.2004 1.2000 ]
[1.4996 1.5000 ]
[1.7988 1.8000 ]
[2.0979 2.1000 ]
[2.3971 2.4000 ]
[2.6963 2.7000 ]]
[[0.0010 ]]
[[0.0037 0.0000 ]
[0.2673 0.2700 ]
[0.5309 0.5400 ]
[0.7945 0.8100 ]
[1.0581 1.0800 ]
[1.3217 1.3500 ]
[1.5853 1.6200 ]
[1.8488 1.8900 ]
[2.1124 2.1600 ]
[2.3760 2.4300 ]]
* Stack matrices vertical.
* @param matrices Matrices to stack.
* These should not have more than two rows.
* @return New Matrix instance stacked.
public static Matrix vstack(Matrix ... matrices){
Matrix rtn = new Matrix(new double[matrices.length][matrices[0].col]);
for (int i = 0; i < rtn.row; i++){
for (int j = 0; j < rtn.col; j++){
rtn.matrix[i][j] = matrices[i].matrix[0][j];
return rtn;
* Split a matrix vertically.
* @param in Matrix to be split.
* @param num Number of split.
* @return Array of Matrix instance.
public static Matrix[] vsplit(Matrix in, int num){
Matrix[] rtn = new Matrix[num];
int size = in.row / num;
// 過不足なく分けきれることを確認
if (size * num != in.row){
System.out.println("vsplit error");
for (int i = 0; i < num; i++){
rtn[i] = new Matrix(new double[size][in.col]);
for (int j = 0; j < size; j++){
for (int k = 0; k < in.col; k++){
rtn[i].matrix[j][k] = in.matrix[i*size+j][k];
return rtn;
* Sort a matrix vertically.
* @param in Matrix to be sort.
* @param order Order of sort.
* @return Matrix instance.
public static Matrix vsort(Matrix in, int[] order){
Matrix rtn = new Matrix(new double[order.length][in.col]);
// order.lengthがin.rowより大きくても問題ない。
// 下のvsortメソッドも同様。
// 詳しくは後述。
for (int i = 0; i < order.length; i++){
for (int j = 0; j < in.col; j++){
rtn.matrix[i][j] = in.matrix[order[i]][j];
return rtn;
* Sort a matrix vertically.
* @param in Matrix to be sort.
* @param order Order of sort.
* @return Matrix instance.
public static Matrix vsort(Matrix in, ArrayList<Integer> order){
Matrix rtn = new Matrix(new double[order.size()][in.col]);
for (int i = 0; i < order.size(); i++){
for (int j = 0; j < in.col; j++){
rtn.matrix[i][j] = in.matrix[order.get(i)][j];
return rtn;
* Calcrate sum.
* @return Result of sum.
public double sum(){
double sum = 0.;
for (int i = 0; i < this.row; i++){
for (int j = 0; j < this.col; j++){
sum += this.matrix[i][j];
return sum;
* Calcrate sum.
* @param in matrix to investigate.
* @return Result of sum.
public static double sum(Matrix in){
double sum = 0.;
for (int i = 0; i < in.row; i++){
for (int j = 0; j < in.col; j++){
sum += in.matrix[i][j];
return sum;
* Calcurate output of a layer.
* @param nodes Nodes in the layer.
* @return Output of the layer.
public Matrix calA(Node[] nodes){
Matrix rtn = new Matrix(new double[nodes[0].a.row][nodes.length]);
for (int i = 0; i < rtn.row; i++){
for (int j = 0; j < rtn.col; j++){
rtn.matrix[i][j] = nodes[j].a.matrix[i][0];
return Matrix.appendCol(rtn, 1.0);
* Get a matrix of weights related to the output of a node.
* @param nodes Nodes of next layer.
* @param num Number of the node.
* @return Matrix instance.
public Matrix calW(Node[] nodes, int num){
Matrix rtn = new Matrix(new double[nodes.length][1]);
for (int i = 0; i < nodes.length; i++){
rtn.matrix[i][0] = nodes[i].w.matrix[num][0];
return rtn;
* Doing forward propagation.
* @param in input matrix.
* @return Matrix instance of output.
public Matrix forward(Matrix in){
return this.net.forward(in);
* Make data for mini batch learning.
* @param x Input data.
* @param t Answer.
* @param batchSize Number of batch size.
* @param rand Random instance.
* @return Splited input data and answer.
public Matrix[][] makeMiniBatch(Matrix x, Matrix t, int batchSize, Random rand){
// 分ける数
int rtnSize = (int)(x.row / batchSize) + 1;
int num, i;
ArrayList<Integer> order = new ArrayList<Integer>(rtnSize);
ArrayList<Integer> check = new ArrayList<Integer>(rtnSize);
for (i = 0; i < x.row; i++){
// 並べ替えの番号
for (i = 0; i < x.row; i++){
num = rand.nextInt(x.row - order.size());
// バッチサイズできれいに割れなかった場合、データは重複するが追加
for (; i < rtnSize*batchSize; i++){
// 並べ替えの番号通りに並べ替え
Matrix x_ = Matrix.vsort(x, order);
Matrix t_ = Matrix.vsort(t, order);
// バッチサイズごとに分割
Matrix[][] rtn = {Matrix.vsplit(x_, rtnSize), Matrix.vsplit(t_, rtnSize)};
return rtn;
import optimizer.*;
import network.*;
import layer.*;
import nodes.activationFunction.*;
import costFunction.*;
import matrix.*;
import java.util.*;
public class test {
public static void main(String[] str){
Network net = new Network(
new Input(4, AF.RELU),
// new Dense(10, AF.RELU),
// new Dense(5, AF.RELU),
new Output(1, AF.LINER)
GradientDescent GD = new GradientDescent(
new MeanSquaredError()
Matrix X = new Matrix(new double[10][2]);
Matrix T = new Matrix(new double[10][1]);
for (int i = 0; i < X.row; i++){
X.matrix[i][0] = i * 0.1;
X.matrix[i][1] = i * 0.2;
T.matrix[i][0] = X.matrix[i][0] + X.matrix[i][1];
// 入力データ(無加工)
// 正解データ(無加工)
// さっき作ったメソッド
Matrix a[][] = GD.makeMiniBatch(X, T, 3, new Random());
// 分割した入力データと正解データ出力
for (int i = 0; i < a[0].length; i++){
[[0.0000 0.0000 ]
[0.1000 0.2000 ]
[0.2000 0.4000 ]
[0.3000 0.6000 ]
[0.4000 0.8000 ]
[0.5000 1.0000 ]
[0.6000 1.2000 ]
[0.7000 1.4000 ]
[0.8000 1.6000 ]
[0.9000 1.8000 ]]
[[0.0000 ]
[0.3000 ]
[0.6000 ]
[0.9000 ]
[1.2000 ]
[1.5000 ]
[1.8000 ]
[2.1000 ]
[2.4000 ]
[2.7000 ]]
[[0.7000 1.4000 ]
[0.0000 0.0000 ]
[0.2000 0.4000 ]]
[[2.1000 ]
[0.0000 ]
[0.6000 ]]
[[0.4000 0.8000 ]
[0.8000 1.6000 ]
[0.9000 1.8000 ]]
[[1.2000 ]
[2.4000 ]
[2.7000 ]]
[[0.1000 0.2000 ]
[0.6000 1.2000 ]
[0.5000 1.0000 ]]
[[0.3000 ]
[1.8000 ]
[1.5000 ]]
[[0.3000 0.6000 ]
[0.4000 0.8000 ]
[0.7000 1.4000 ]]
[[0.9000 ]
[1.2000 ]
[2.1000 ]]
w_{new} = w_{old} - \eta \frac{\partial E}{\partial w_{old}}
* Run learning.
* @param x Input layer.
* @param t Answer.
* @param nEpoch Number of epoch.
* @return Output of this network.
public Matrix fit(Matrix x, Matrix t, int nEpoch){
Matrix y = this.forward(x);
for (int i = 0; i < nEpoch; i++){
System.out.printf("Epoch %d/%d\n", i+1, nEpoch);
this.back(x, y, t);
y = this.forward(x);
System.out.printf("loss: %.4f\n", this.cFunc.calcurate(y, t).matrix[0][0]);
return y;
* Run learning.
* @param x Input layer.
* @param t Answer.
* @param nEpoch Number of epoch.
* @param valX Input layer for validation.
* @param valT Answer for validation.
* @return Output of this network.
public Matrix fit(Matrix x, Matrix t, int nEpoch, Matrix valX, Matrix valT){
Matrix y = this.forward(x);
Matrix valY;
for (int i = 0; i < nEpoch; i++){
System.out.printf("Epoch %d/%d\n", i+1, nEpoch);
this.back(x, y, t);
valY = this.forward(valX);
y = this.forward(x);
"loss: %.4f - valLoss: %.4f\n",
this.cFunc.calcurate(y, t).matrix[0][0],
this.cFunc.calcurate(valY, valT).matrix[0][0]
return y;
import optimizer.*;
import network.*;
import layer.*;
import nodes.activationFunction.*;
import costFunction.*;
import matrix.*;
public class test {
public static void main(String[] str){
Network net = new Network(
new Input(4, AF.RELU),
new Output(1, AF.LINER)
GradientDescent Opt = new GradientDescent(
new MeanSquaredError()
Matrix X = new Matrix(new double[10][2]);
Matrix T = new Matrix(new double[10][1]);
for (int i = 0; i < X.row; i++){
X.matrix[i][0] = i * 0.1;
X.matrix[i][1] = i * 0.2;
T.matrix[i][0] = X.matrix[i][0] * X.matrix[i][1];
Matrix valX = new Matrix(new double[10][2]);
Matrix valT = new Matrix(new double[10][1]);
for (int i = 0; i < valX.row; i++){
valX.matrix[i][0] = i * 0.15;
valX.matrix[i][1] = i * 0.1;
valT.matrix[i][0] = valX.matrix[i][0] * valX.matrix[i][1];
Opt.fit(X, T, 5, valX, valT);
Epoch 1/5
loss: 0.0408 - valLoss: 0.0374
Epoch 2/5
loss: 0.0302 - valLoss: 0.0299
Epoch 3/5
loss: 0.0307 - valLoss: 0.0304
Epoch 4/5
loss: 0.0308 - valLoss: 0.0304
Epoch 5/5
loss: 0.0308 - valLoss: 0.0304
w_{new} = w_{old} - \eta \frac{\partial E}{\partial w_{old}}
package optimizer;
import java.util.Random;
import network.*;
import costFunction.*;
import matrix.*;
import layer.*;
import nodes.*;
* Class for Stochastic Gradient Descent.
public class SGD extends Optimizer{
Random rand;
* Constructor for this class.
public SGD(){
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
public SGD(Network net, CostFunction f){
this.net = net;
this.cFunc = f;
rand = new Random(0);
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
* @param eta Learning rate.
public SGD(Network net, CostFunction f, double eta){
this.net = net;
this.cFunc = f;
this.eta = eta;
rand = new Random(0);
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
* @param seed Seed of random.
public SGD(Network net, CostFunction f, int seed){
this.net = net;
this.cFunc = f;
rand = new Random(seed);
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
* @param eta Learning rate.
* @param seed Seed of random.
public SGD(Network net, CostFunction f, double eta, int seed){
this.net = net;
this.cFunc = f;
this.eta = eta;
rand = new Random(seed);
* Run learning.
* @param x Input layer.
* @param t Answer.
* @param nEpoch Number of epoch.
* @param batchSize Size of batch.
* @return Output of this network.
public Matrix fit(Matrix x, Matrix t, int nEpoch, int batchSize){
Matrix[][] xt = this.makeMiniBatch(x, t, batchSize, rand);
Matrix[] xs = xt[0];
Matrix[] ts = xt[1];
Matrix y = ts[0].clone();
int backNum = (int)(x.row / batchSize) + 1;
for (int i = 0; i < nEpoch; i++){
System.out.printf("Epoch %d/%d\n", i+1, nEpoch);
for (int j = 0; j < backNum; j++){
y = this.forward(xs[j]);
this.back(xs[j], y, ts[j]);
System.out.printf("\rloss: %.4f", this.cFunc.calcurate(y, t).matrix[0][0]);
return y;
* Run learning.
* @param x Input layer.
* @param t Answer.
* @param nEpoch Number of epoch.
* @param batchSize Size of batch.
* @param valX Input layer for validation.
* @param valT Answer for validation.
* @return Output of this network.
public Matrix fit(Matrix x, Matrix t, int nEpoch, int batchSize,
Matrix valX, Matrix valT){
Matrix[][] xt = this.makeMiniBatch(x, t, batchSize, rand);
Matrix[] xs = xt[0];
Matrix[] ts = xt[1];
Matrix[][] valxt = this.makeMiniBatch(valX, valT, batchSize, rand);
Matrix[] valxs = valxt[0];
Matrix[] valts = valxt[1];
Matrix y = ts[0].clone();
Matrix valY;
int backNum = (int)(x.row / batchSize) + 1;
for (int i = 0; i < nEpoch; i++){
System.out.printf("Epoch %d/%d\n", i+1, nEpoch);
for (int j = 0; j < backNum; j++){
valY = this.forward(valxs[j]);
y = this.forward(xs[j]);
this.back(xs[j], y, ts[j]);
"\rloss: %.4f - valLoss: %.4f",
this.cFunc.calcurate(y, ts[j]).matrix[0][0],
this.cFunc.calcurate(valY, valts[j]).matrix[0][0]
return y;
* Doing back propagation.
* @param x Input layer.
* @param y Result of forward propagation.
* @param t Answer.
public void back(Matrix x, Matrix y, Matrix t){
// last layer
Layer nowLayer = this.net.layers[this.net.layers_num-1];
Layer preLayer = this.net.layers[this.net.layers_num-2];
for (int i = 0; i < nowLayer.nodes.length; i++){
Node nowNode = nowLayer.nodes[i];
Matrix cal;
cal = this.cFunc.differential(nowNode.a, t.getCol(i));
cal = Matrix.dot(cal.T(), nowNode.aFunc.differential(nowNode.x));
nowNode.delta = cal.matrix[0][0];
cal = Matrix.mult(this.calA(preLayer.nodes), nowNode.delta);
// middle layer and input layer
for (int i = this.net.layers_num-2; i >= 0; i--){
Node[] nextNodes = this.net.layers[i+1].nodes;
Node[] nowNodes = this.net.layers[i].nodes;
Node[] preNodes;
Matrix deltas = new Matrix(new double[1][nextNodes.length]);
Matrix preA;
if (i != 0){
// middle layer
preNodes = this.net.layers[i-1].nodes;
preA = this.calA(preNodes);
// input layer
preA = Matrix.appendCol(x, 1.0);
for (int j = 0; j < nextNodes.length; j++){
deltas.matrix[0][j] = nextNodes[j].delta;
for (int j = 0; j < nowNodes.length; j++){
Node nowNode = nowNodes[j];
Matrix cal;
nowNode.delta = Matrix.dot(deltas, this.calW(nextNodes, j)).matrix[0][0]
* nowNode.aFunc.differential(nowNode.x.meanCol()).matrix[0][0];
cal = Matrix.mult(preA.meanCol(), -this.eta*nowNode.delta);
Epoch 1/5
loss: 0.0424 - valLoss: 0.0216
Epoch 2/5
loss: 0.0000 - valLoss: 0.0005
Epoch 3/5
loss: 0.0038 - valLoss: 0.0045
Epoch 4/5
loss: 0.0054 - valLoss: 0.0065
Epoch 5/5
loss: 0.0054 - valLoss: 0.0073
w_{new} = w_{old} - \eta \frac{\partial E}{\partial w_{old}}+\alpha\Delta w_{old}
package optimizer;
import java.util.Random;
import java.util.ArrayList;
import network.*;
import costFunction.*;
import matrix.*;
import layer.*;
import nodes.*;
* Class for Stochastic Gradient Descent.
public class MomentumSGD extends Optimizer{
Random rand;
/** Value of momentum */
double alpha = 0.9;
/** Amount of change in weight */
ArrayList<ArrayList<Matrix>> dw;
* Constructor for this class.
public MomentumSGD(){
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
public MomentumSGD(Network net, CostFunction f){
this.net = net;
this.cFunc = f;
rand = new Random(0);
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
* @param eta Learning rate.
* @param alpha Value of momentum.
public MomentumSGD(Network net, CostFunction f, double eta, double alpha){
this.net = net;
this.cFunc = f;
this.eta = eta;
this.alpha = alpha;
rand = new Random(0);
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
* @param seed Seed of random.
public MomentumSGD(Network net, CostFunction f, int seed){
this.net = net;
this.cFunc = f;
rand = new Random(seed);
* Constructor for this class.
* @param net Network to which optimization is applied.
* @param f Cost function in this net.
* @param eta Learning rate.
* @param alpha Value of momentum.
* @param seed Seed of random.
public MomentumSGD(Network net, CostFunction f,
double eta, double alpha, int seed){
this.net = net;
this.cFunc = f;
this.eta = eta;
this.alpha = alpha;
rand = new Random(seed);
* Set dw field.
private void setDw(){
this.dw = new ArrayList<ArrayList<Matrix>>();
for (int i = 0; i < this.net.layers_num; i++){
this.dw.add(new ArrayList<Matrix>());
for (int j = 0; j < this.net.layers[i].nodes_num; j++){
* Doing back propagation.
* @param x Input layer.
* @param y Result of forward propagation.
* @param t Answer.
public void back(Matrix x, Matrix y, Matrix t){
// last layer
Layer nowLayer = this.net.layers[this.net.layers_num-1];
Layer preLayer = this.net.layers[this.net.layers_num-2];
ArrayList<Matrix> dw = this.dw.get(this.net.layers_num-1);
for (int i = 0; i < nowLayer.nodes.length; i++){
Node nowNode = nowLayer.nodes[i];
Matrix cal;
cal = this.cFunc.differential(nowNode.a, t.getCol(i));
cal = Matrix.dot(cal.T(), nowNode.aFunc.differential(nowNode.x));
nowNode.delta = cal.matrix[0][0];
cal = Matrix.mult(this.calA(preLayer.nodes), nowNode.delta);
// middle layer and input layer
for (int i = this.net.layers_num-2; i >= 0; i--){
Node[] nextNodes = this.net.layers[i+1].nodes;
Node[] nowNodes = this.net.layers[i].nodes;
Node[] preNodes;
Matrix deltas = new Matrix(new double[1][nextNodes.length]);
Matrix preA;
if (i != 0){
// middle layer
preNodes = this.net.layers[i-1].nodes;
preA = this.calA(preNodes);
// input layer
preA = Matrix.appendCol(x, 1.0);
for (int j = 0; j < nextNodes.length; j++){
deltas.matrix[0][j] = nextNodes[j].delta;
dw = this.dw.get(i);
for (int j = 0; j < nowNodes.length; j++){
Node nowNode = nowNodes[j];
Matrix cal;
nowNode.delta = Matrix.dot(deltas, this.calW(nextNodes, j)).matrix[0][0]
* nowNode.aFunc.differential(nowNode.x.meanCol()).matrix[0][0];
cal = Matrix.mult(preA.meanCol(), -this.eta*nowNode.delta);
Epoch 1/5
loss: 0.2173 - valLoss: 0.1467
Epoch 2/5
loss: 0.0106 - valLoss: 0.0004
Epoch 3/5
loss: 0.0000 - valLoss: 0.0092
Epoch 4/5
loss: 0.0181 - valLoss: 0.0426
Epoch 5/5
loss: 0.0105 - valLoss: 0.0021
h_{new}=h_{old}+\left(\frac{\partial E}{\partial w_{old}}\right)^2
w_{new} = w_{old} - \frac{\eta}{\sqrt{h_{new}}} \frac{\partial E}{\partial w_{old}}
* Doing back propagation.
* @param x Input layer.
* @param y Result of forward propagation.
* @param t Answer.
public void back(Matrix x, Matrix y, Matrix t){
double sum = 0.;
double eta = this.eta / Math.sqrt(this.h);
// last layer
Layer nowLayer = this.net.layers[this.net.layers_num-1];
Layer preLayer = this.net.layers[this.net.layers_num-2];
for (int i = 0; i < nowLayer.nodes.length; i++){
Node nowNode = nowLayer.nodes[i];
Matrix cal;
cal = this.cFunc.differential(nowNode.a, t.getCol(i));
cal = Matrix.dot(cal.T(), nowNode.aFunc.differential(nowNode.x));
nowNode.delta = cal.matrix[0][0];
cal = Matrix.mult(this.calA(preLayer.nodes), nowNode.delta);
cal = cal.meanCol();
sum += Matrix.sum(Matrix.pow(cal));
// middle layer and input layer
for (int i = this.net.layers_num-2; i >= 0; i--){
Node[] nextNodes = this.net.layers[i+1].nodes;
Node[] nowNodes = this.net.layers[i].nodes;
Node[] preNodes;
Matrix deltas = new Matrix(new double[1][nextNodes.length]);
Matrix preA;
if (i != 0){
// middle layer
preNodes = this.net.layers[i-1].nodes;
preA = this.calA(preNodes);
// input layer
preA = Matrix.appendCol(x, 1.0);
for (int j = 0; j < nextNodes.length; j++){
deltas.matrix[0][j] = nextNodes[j].delta;
for (int j = 0; j < nowNodes.length; j++){
Node nowNode = nowNodes[j];
Matrix cal;
nowNode.delta = Matrix.dot(deltas, this.calW(nextNodes, j)).matrix[0][0]
* nowNode.aFunc.differential(nowNode.x.meanCol()).matrix[0][0];
cal = Matrix.mult(preA.meanCol(), nowNode.delta);
sum += Matrix.sum(Matrix.pow(cal));
this.h += sum;
Epoch 1/5
loss: 2473214.3959 - valLoss: 1357515.9110
Epoch 2/5
loss: 2471777.0569 - valLoss: 1356725.6043
Epoch 3/5
loss: 2470778.9335 - valLoss: 1356176.7903
Epoch 4/5
loss: 2469965.5666 - valLoss: 1355729.5627
Epoch 5/5
loss: 2469261.4202 - valLoss: 1355342.3890
Epoch 1/5
loss: 0.0016 - valLoss: 0.0001
Epoch 2/5
loss: 0.0014 - valLoss: 0.0002
Epoch 3/5
loss: 0.0012 - valLoss: 0.0002
Epoch 4/5
loss: 0.0011 - valLoss: 0.0003
Epoch 5/5
loss: 0.0010 - valLoss: 0.0003
h_{new}=\alpha h_{old}+(1-\alpha)\left(\frac{\partial E}{\partial w_{old}}\right)^2
w_{new} = w_{old} - \frac{\eta}{\sqrt{h_{new}}} \frac{\partial E}{\partial w_{old}}
* Doing back propagation.
* @param x Input layer.
* @param y Result of forward propagation.
* @param t Answer.
public void back(Matrix x, Matrix y, Matrix t){
double sum = 0.;
double eta = this.eta / Math.sqrt(this.h);
// last layer
Layer nowLayer = this.net.layers[this.net.layers_num-1];
Layer preLayer = this.net.layers[this.net.layers_num-2];
for (int i = 0; i < nowLayer.nodes.length; i++){
Node nowNode = nowLayer.nodes[i];
Matrix cal;
cal = this.cFunc.differential(nowNode.a, t.getCol(i));
cal = Matrix.dot(cal.T(), nowNode.aFunc.differential(nowNode.x));
nowNode.delta = cal.matrix[0][0];
cal = Matrix.mult(this.calA(preLayer.nodes), nowNode.delta);
cal = cal.meanCol();
sum += Matrix.sum(Matrix.pow(cal));
// middle layer and input layer
for (int i = this.net.layers_num-2; i >= 0; i--){
Node[] nextNodes = this.net.layers[i+1].nodes;
Node[] nowNodes = this.net.layers[i].nodes;
Node[] preNodes;
Matrix deltas = new Matrix(new double[1][nextNodes.length]);
Matrix preA;
if (i != 0){
// middle layer
preNodes = this.net.layers[i-1].nodes;
preA = this.calA(preNodes);
// input layer
preA = Matrix.appendCol(x, 1.0);
for (int j = 0; j < nextNodes.length; j++){
deltas.matrix[0][j] = nextNodes[j].delta;
for (int j = 0; j < nowNodes.length; j++){
Node nowNode = nowNodes[j];
Matrix cal;
nowNode.delta = Matrix.dot(deltas, this.calW(nextNodes, j)).matrix[0][0]
* nowNode.aFunc.differential(nowNode.x.meanCol()).matrix[0][0];
cal = Matrix.mult(preA.meanCol(), nowNode.delta);
sum += Matrix.sum(Matrix.pow(cal));
this.h = this.alpha * this.h + (1 - this.alpha) * sum;
Epoch 1/5
loss: 9264281289.1895 - valLoss: 5405078184.33163
Epoch 2/5
loss: 9261064856.0862 - valLoss: 5403223274.55897
Epoch 3/5
loss: 9258833049.5323 - valLoss: 5401935750.16145
Epoch 4/5
loss: 9256995415.2350 - valLoss: 5400875516.32297
Epoch 5/5
loss: 9255384580.6089 - valLoss: 5399946089.83136
Epoch 1/5
loss: 0.1589 - valLoss: 0.0898
Epoch 2/5
loss: 0.0586 - valLoss: 0.0326
Epoch 3/5
loss: 0.0072 - valLoss: 0.0039
Epoch 4/5
loss: 0.0041 - valLoss: 0.0032
Epoch 5/5
loss: 0.0016 - valLoss: 0.0031
m_{new}=\beta_1m_{old}+(1-\beta_1)\frac{\partial E}{\partial w_{old}}
v_{new}=\beta_2v_{old}+(1-\beta_2)\left(\frac{\partial E}{\partial w_{old}}\right)^2
* Doing back propagation.
* @param x Input layer.
* @param y Result of forward propagation.
* @param t Answer.
public void back(Matrix x, Matrix y, Matrix t){
double sum = 0.;
double v = 1 / Math.sqrt(this.v / (1-this.beta2));
// last layer
Layer nowLayer = this.net.layers[this.net.layers_num-1];
Layer preLayer = this.net.layers[this.net.layers_num-2];
ArrayList<Matrix> m = this.m.get(this.net.layers_num-1);
for (int i = 0; i < nowLayer.nodes.length; i++){
Node nowNode = nowLayer.nodes[i];
Matrix cal;
cal = this.cFunc.differential(nowNode.a, t.getCol(i));
cal = Matrix.dot(cal.T(), nowNode.aFunc.differential(nowNode.x));
nowNode.delta = cal.matrix[0][0];
cal = Matrix.mult(this.calA(preLayer.nodes), nowNode.delta);
cal = cal.meanCol();
sum += Matrix.sum(Matrix.pow(cal));
m.get(i).add(Matrix.mult(cal.T(), (1-this.beta1)));
nowNode.w.add(Matrix.mult(m.get(i), -this.eta*v/(1-this.beta1)));
// middle layer and input layer
for (int i = this.net.layers_num-2; i >= 0; i--){
Node[] nextNodes = this.net.layers[i+1].nodes;
Node[] nowNodes = this.net.layers[i].nodes;
Node[] preNodes;
Matrix deltas = new Matrix(new double[1][nextNodes.length]);
Matrix preA;
if (i != 0){
// middle layer
preNodes = this.net.layers[i-1].nodes;
preA = this.calA(preNodes);
// input layer
preA = Matrix.appendCol(x, 1.0);
for (int j = 0; j < nextNodes.length; j++){
deltas.matrix[0][j] = nextNodes[j].delta;
m = this.m.get(i);
for (int j = 0; j < nowNodes.length; j++){
Node nowNode = nowNodes[j];
Matrix cal;
nowNode.delta = Matrix.dot(deltas, this.calW(nextNodes, j)).matrix[0][0]
* nowNode.aFunc.differential(nowNode.x.meanCol()).matrix[0][0];
cal = Matrix.mult(preA.meanCol(), nowNode.delta);
sum += Matrix.sum(Matrix.pow(cal));
m.get(j).add(Matrix.mult(cal.T(), (1-this.beta1)));
nowNode.w.add(Matrix.mult(m.get(j), -this.eta*v/(1-this.beta1)));
this.v = this.beta2 * this.v + (1 - this.beta2) * sum;
Epoch 1/5
loss: 0.8741 - valLoss: 0.5682
Epoch 2/5
loss: 0.8119 - valLoss: 0.5318
Epoch 3/5
loss: 0.7491 - valLoss: 0.4948
Epoch 4/5
loss: 0.6919 - valLoss: 0.4610
Epoch 5/5
loss: 0.6414 - valLoss: 0.4310
Epoch 1/5
loss: 0.0079 - valLoss: 0.0008
Epoch 2/5
loss: 0.0045 - valLoss: 0.0002
Epoch 3/5
loss: 0.0020 - valLoss: 0.0001
Epoch 4/5
loss: 0.0006 - valLoss: 0.0005
Epoch 5/5
loss: 0.0000 - valLoss: 0.0012