# ニューラルネットワークにおける計算式のメモ

More than 1 year has passed since last update.

# 概要

• ニューラルネットワークにおける計算式のメモ

# Softmax node with Cross Entropy

\begin{align}
\frac{\partial L}{\partial X}&=-\sum_{t,k} \frac{t_{tk}}{y_{tk}}\frac{\partial y_{tk}}{\partial X}\\
&=-\sum_{t,k} \frac{t_{tk}}{y_{tk}}\left(\frac{\exp x_{tk}}{\sum_{k'} \exp x_{tk'}}\frac{\partial x_{tk}}{\partial X}
-\frac{\exp x_{tk}}{(\sum_{k'} \exp x_{tk'})^2}\sum_{k'} \exp x_{tk'}\frac{\partial x_{tk'}}{\partial X}\right)\\
&=-\sum_{t,k}\frac{t_{tk}}{y_{tk}}\left(y_{tk}\frac{\partial x_{tk}}{\partial X}
-y_{tk}\sum_{k'} y_{tk'}\frac{\partial x_{tk'}}{\partial X}\right)\\
&=-\sum_{t,k}t_{tk}\frac{\partial x_{tk}}{\partial X}
+\sum_{t,k}t_{tk}\sum_{k'} y_{tk'}\frac{\partial x_{tk'}}{\partial X}\\
&=-\sum_{t,k}t_{tk}\frac{\partial x_{tk}}{\partial X}
+\sum_{t}\sum_{k'} y_{tk'}\frac{\partial x_{tk'}}{\partial X}
=\sum_{t,k}(y_{tk}-t_{tk})\frac{\partial x_{tk}}{\partial X}\\
\frac{\partial L}{\partial x_{tk}}&=y_{tk}-t_{tk}
\end{align}


# Batch Normalization

\begin{align}
\hat{x}_{ti}=\frac{x_{ti}-\mu_{bi}}{\sqrt{\sigma_{bi}^2+\epsilon}}\\
\sigma_{bi}^2=\frac{1}{m}\sum_{t=1}^m (x_{ti}-\mu_{bi})^2\\
\frac{\partial L}{\partial \gamma_i}=\sum_{t=1}^m\frac{\partial L}{\partial y_{ti}}\hat{x}_{ti}\\
\frac{\partial L}{\partial x_{ti}}&=\sum_{u=1}^m\frac{\partial L}{\partial y_{ui}}\gamma_i\left[
\frac{\delta_{ut}-\frac{1}{m}}{\sqrt{\sigma_{bi}^2+\epsilon}}
-\frac{1}{2}\frac{\hat{x}_{ui}}{(\sigma_{bi}^2+\epsilon)}
\frac{2}{m}\sum_{s=1}^m (x_{si}-\mu_{bi})\left(\delta_{st}-\frac{1}{m}\right)\right]\\
&=\sum_{u=1}^m\frac{\partial L}{\partial y_{ui}}\frac{\gamma_i}{\sqrt{\sigma_{bi}^2+\epsilon}}\left[
\delta_{ut}-\frac{1}{m}-\frac{\hat{x}_{ui}}{m}\sum_{s=1}^m\hat{x}_{si}\left(\delta_{st}-\frac{1}{m}\right)\right]\\
&=\frac{\gamma_i}{\sqrt{\sigma_{bi}^2+\epsilon}}\sum_{u=1}^m\frac{\partial L}{\partial y_{ui}}\left[
\delta_{ut}-\frac{1}{m}-\frac{\hat{x}_{ui}\hat{x}_{ti}}{m}\right]\\
&=\frac{\gamma_i}{\sqrt{\sigma_{bi}^2+\epsilon}}\left[
\frac{\partial L}{\partial y_{ti}}
-\frac{1}{m}\sum_{u=1}^m\frac{\partial L}{\partial y_{ui}}
-\frac{\hat{x}_{ti}}{m}\sum_{u=1}^m\frac{\partial L}{\partial y_{ui}}\hat{x}_{ui}\right]
\end{align}


\begin{align}