def sigmoid(x):
    # 単純な実装
    # return 1 / (1 + np.exp(-x))

    # expのoverflow対策を施した実装
    # x >=0 のとき sigmoid(x) = 1 / (1 + exp(-x))
    # x < 0 のとき sigmoid(x) = exp(x) / (1 + exp(x))
    return np.exp(np.minimum(x, 0)) / (1 + np.exp(- np.abs(x)))
# ORのデータセット
x_train_or = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
y_train_or = np.array([[1], [1], [0], [1]])
x_valid_or, y_valid_or = x_train_or, y_train_or
x_test_or, y_test_or = x_train_or, y_train_or

# 重み (入力の次元数: 2, 出力の次元数: 1)
W_or = np.random.uniform(low=-0.08, high=0.08, size=(2, 1)).astype('float32')
b_or = np.zeros(shape=(1,)).astype('float32')
# logの中身が0になるのを防ぐ
def np_log(x):
    return np.log(np.clip(a=x, a_min=1e-10, a_max=1e+10))
def train_or(x, y, eps=1.0):
    :param x: np.ndarray, 入力データ, shape=(batch_size, 入力の次元数)
    :param y: np.ndarray, 教師ラベル, shape=(batch_size, 出力の次元数)
    :param eps: float, 学習率
    global W_or, b_or

    batch_size = x.shape[0]

    # 予測
    y_hat = sigmoid(np.matmul(x, W_or) + b_or) # shape: (batch_size, 出力の次元数)

    # 目的関数の評価
    cost = (- y * np_log(y_hat) - (1 - y) * np_log(1 - y_hat)).mean()
    delta = y_hat-y #(Hint: y_hat, yを使う) # shape: (batch_size, 出力の次元数)

    # パラメータの更新
    dW = np.matmul(x.T,delta)/batch_size# WRITE ME (Hint: x.T, delta, batch_sizeを使う) # shape: (入力の次元数, 出力の次元数)
    db = np.matmul(np.ones(shape=(batch_size,)),delta)/batch_size# WRITE ME (Hint: np.ones, delta, batch_sizeを使う) # shape: (出力の次元数,)
    W_or -= eps * dW
    b_or -= eps * db

    return cost

def valid_or(x, y):
    y_hat = sigmoid(np.matmul(x, W_or) + b_or)
    cost = (- y * np_log(y_hat) - (1 - y) * np_log(1 - y_hat)).mean()
    return cost, y_hat
for epoch in range(1000):
    x_train_or, y_train_or = shuffle(x_train_or, y_train_or)
    cost = train_or(x_train_or, y_train_or)
    cost, y_pred = valid_or(x_valid_or, y_valid_or)



def softmax(x):
    x -= x.max(axis=1, keepdims=True) # expのunderflow & overflowを防ぐ
    x_exp = np.exp(x)
    return x_exp / np.sum(x_exp, axis=1, keepdims=True)

