Perceptron in Regression
"""
x: features
y: labels
w: weights
THETA: threshold
Iris Dataset:https://en.wikipedia.org/wiki/Iris_flower_data_set
**In this script, we explicitly select two features, and throw others away.
"""
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
binary_index = [label != 2 for label in iris.target] # we remove the label "2" to make this binary classification task
x = iris.data[binary_index, :2] # we only take the first two features.
y = iris.target[binary_index]
W = np.random.uniform(low=-1., high=1., size=x.shape[-1])
THETA = 0.5
def step_func(h):
return 1 if h > THETA else 0
def perceptron(x, w):
"""
Perceptron Algorithm
How it works:
- h = x0 * w0 + x1 * w1
- y_hat = step_func(h)
Reference:
- En => https://qiita.com/nishiy-k/items/1e795f92a99422d4ba7b
- Jp => https://towardsdatascience.com/perceptron-learning-algorithm-d5db0deab975
"""
h = x[0] * w[0] + x[1] * w[1]
y_hat = step_func(h)
return y_hat
# Before Training
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc before training: ", sum(y == y_hats)/x.shape[0]) # check the accuracy
print("Weights: ", W)
"""
Write the training code HERE
Mainly it is expected to update the weights accordingly to improve the accuracy above.
"""
# After Training
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc After training: ", sum(y == y_hats)/x.shape[0]) # check the accuracy
print("Weights: ", W)
Sample Answer
"""
x: features
y: labels
w: weights
THETA: threshold
Iris Dataset:https://en.wikipedia.org/wiki/Iris_flower_data_set
**In this script, we explicitly select two features, and throw others away.
"""
import numpy as np
from sklearn import datasets
# === Preparation ===
iris = datasets.load_iris()
binary_index = [label != 2 for label in iris.target]
x = iris.data[binary_index, :2]
y = iris.target[binary_index]
random_indices = np.random.randint(low=x.shape[0], size=x.shape[0])
x, y = x[random_indices], y[random_indices]
W = np.random.uniform(low=-1., high=1., size=x.shape[-1])
THETA = 0.5
eta = 0.001
# === Def Funcs ===
# this cane be substituted with Sigmoid
def _func(h):
""" Step Function """
h = sigmoid(h)
return 1 if h > THETA else 0
def sigmoid(h):
""" Sigmoid Function """
return 1/(1+np.exp(-h))
def perceptron(W, x):
"""
Perceptron Algorithm
How it works:
- h = x0 * w0 + x1 * w1
- y_hat = _func(h)
Reference:
- En => https://qiita.com/nishiy-k/items/1e795f92a99422d4ba7b
- Jp => https://towardsdatascience.com/perceptron-learning-algorithm-d5db0deab975
"""
h = np.dot(W, x)
y_hat = _func(h)
return y_hat
def training(W, THETA, x, label):
""" Training method
:param W:
:param x:
:param label:
:return: updated weights, binary value indicating if the prediction was correct
"""
y_hat = perceptron(W=W, x=x) # prediction
W = W + eta * (label - y_hat) * x # update the weights
THETA = THETA + eta * (label - y_hat) # update the bias
return W, THETA, y_hat == label
# === Do Training ===
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc before training: ", sum(y == y_hats) / x.shape[0])
# outer loop for the training
for ep in range(100):
# === Training phase ===
# we are using Batch Gradient Descent
for i in range(x.shape[0]):
data = x[i, :]
W, THETA, result = training(W=W, THETA=THETA, x=data, label=y[i])
# === Evaluation phase ===
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])] # check how accurate the model is
print("Epoch: {}, W: {}, Result: {}".format(ep, W, sum(y == y_hats) / x.shape[0])) # log method
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc After training: ", sum(y == y_hats) / x.shape[0])
print("Weights: ", W)
Perceptron in Classification
Random Values
import numpy as np
num_data = 10
num_feat = 3
num_class = 3
num_epochs = 100
original_y = np.random.randint(low=0, high=num_class, size=num_data).reshape(-1)
y = np.eye(num_class)[original_y]
x = np.random.randn(num_data, num_feat)
W = np.random.uniform(low=-1., high=1., size=(x.shape[-1], num_class))
eta = 0.001
# print(x.shape, y.shape)
# print(x, y)
def softmax(h):
"""
- Softmax formula
np.exp(h)/np.sum(np.exp(h))
:param h:
:return:
"""
return np.exp(h)/np.sum(np.exp(h))
def stable_softmax(h):
"""
- Stable Softmax formula
- reference: https://stackoverflow.com/questions/42599498/numercially-stable-softmax/42606665
:param h:
:return:
"""
z = h - np.max(h)
numerator = np.exp(z)
denominator = np.sum(numerator)
result = numerator/denominator
return result
def perceptron(x, W):
h = np.dot(x, W)
y_hat = softmax(h)
return y_hat
def stable_perceptron(x, w):
h = np.dot(w, x)
y_hat = stable_softmax(h)
return y_hat
def MSE(pred, target):
"""
Mean Squared Error
- Formula
1/n sum(y[i] - y_hat[i])**2
"""
return np.mean(np.sum(np.square(pred - target)))
def Cross_Entropy(pred, target):
"""
Cross Entropy
- Formula
- sum(p(target) * log p(pred))
"""
return np.sum(target*np.log(pred))
def SGD(W, x, label):
""" Stochastic Gradient Descent """
for i in range(num_data):
y_hat = perceptron(x[i, :], W)
delta_E_W = y_hat - label[i, :]
W = W + eta * delta_E_W * x[i, :]
return W
for ep in range(num_epochs):
y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
print("Acc before training: ", sum(original_y == y_hats) / x.shape[0])
# === Training ===
# print("Before update: ", W)
W = SGD(W, x, label=y)
# print("After updated: ", W)
y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
print("Acc after training: ", sum(original_y == y_hats) / x.shape[0])
"""
=== Manual Loop ===
for i in range(num_data):
pred = perceptron(x[i, :], W)
print("Without One-Hot: ", np.argmax(pred), original_y[i])
print("Without One-Hot: ", np.argmax(pred) - original_y[i])
print("With One-Hot: ", pred, y[i, :])
print("With One-Hot: ", pred - y[i, :])
print("MSE: ", MSE(pred, y[i, :]))
print("Cross Entropy Error: ", Cross_Entropy(pred, y[i, :]))
print("With One-Hot: ", y[i, :] - pred)
"""
Iris Dataset
import numpy as np
from sklearn import datasets
iris = datasets.load_iris()
num_data = iris.data.shape[0]
num_class = len(set(iris.target))
num_epochs = 100
x = iris.data
original_y = iris.target
y = np.eye(num_class)[original_y]
W = np.random.uniform(low=-1., high=1., size=(x.shape[-1], num_class))
eta = 0.001
print(W.shape, x.shape, y.shape)
# print(x, y)
def softmax(h):
"""
- Softmax formula
np.exp(h)/np.sum(np.exp(h))
:param h:
:return:
"""
return np.exp(h)/np.sum(np.exp(h))
def stable_softmax(h):
"""
- Stable Softmax formula
- reference: https://stackoverflow.com/questions/42599498/numercially-stable-softmax/42606665
:param h:
:return:
"""
z = h - np.max(h)
numerator = np.exp(z)
denominator = np.sum(numerator)
result = numerator/denominator
return result
def perceptron(x, W):
h = np.dot(x, W)
y_hat = softmax(h)
return y_hat
def stable_perceptron(x, w):
h = np.dot(w, x)
y_hat = stable_softmax(h)
return y_hat
def MSE(pred, target):
"""
Mean Squared Error
- Formula
1/n sum(y[i] - y_hat[i])**2
"""
return np.mean(np.sum(np.square(pred - target)))
def Cross_Entropy(pred, target):
"""
Cross Entropy
- Formula
- - 1/n sum(p(target) * log p(pred))
"""
return np.mean(np.sum(-target*np.log(pred)))
def SGD(W, x, label):
"""
Stochastic Gradient Descent
- Reference
https://deepnotes.io/softmax-crossentropy
"""
for i in range(num_data):
y_hat = perceptron(x[i, :], W)
delta_E_W = y_hat - label[i, :]
W = W + eta * delta_E_W
return W
for ep in range(num_epochs):
y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
print("Acc before training: ", sum(original_y == y_hats) / x.shape[0])
# === Training ===
# print("Before update: ", W)
W = SGD(W, x, label=y)
# print("After updated: ", W)
y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
print("Acc after training: ", sum(original_y == y_hats) / x.shape[0])
MLP for MNIST
import numpy as np
import tensorflow as tf
def sigmoid(x):
x = x - np.max(x)
return 1.0 / (1.0 + np.exp(x))
def softmax(x):
"""Compute the softmax of vector x."""
exps = np.exp(x - x.max())
return exps / np.sum(exps)
def derivative_sigmoid(o):
return o * (1.0 - o)
class NeuralNetwork:
def __init__(self, num_input, num_hidden, num_output, lr):
# num of neurons in layers
self.num_input = num_input
self.num_hidden = num_hidden
self.num_output = num_output
# learning rate
self.lr = lr
# initialise
self.w_ih = np.random.normal(0.0, 1.0, (self.num_hidden, self.num_input))
self.w_ho = np.random.normal(0.0, 1.0, (self.num_output, self.num_hidden))
# activation
self.af = sigmoid
self.daf = derivative_sigmoid
def backprop(self, _input, label):
o_o, o_h = self.feedforward(_input)
# calculate error
e_o = label - o_o
e_h = np.dot(e_o, self.w_ho)
# output -> hidden layer
# Note: derivative of softmax is just e_o
# w_ho = w_ho + lr * derivative_softmax @ output_hidden
self.w_ho += self.lr * np.dot(e_o.T, o_h)
# hidden layer -> input layer
self.w_ih += self.lr * np.dot((e_h * self.daf(o_h)).T, _input)
def feedforward(self, _input):
# hidden layer
x_h = np.dot(_input, self.w_ih.T)
o_h = self.af(x_h)
# output layer
x_o = np.dot(o_h, self.w_ho.T)
o_o = softmax(x_o)
return o_o, o_h
if __name__ == '__main__':
# hyperparameters
num_input = 784
num_hidden = 100
num_output = num_class = 10
num_minibatch = 100
num_epoch = 10
lr = 0.3
# data preprocessing
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] ** 2) # flatten the array
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] ** 2) # flatten the array
y_train = np.eye(num_class)[y_train] # manual one hot encoding
x_train = x_train / 255.0 # normalise the images
x_test = x_test / 255.0 # normalise the images
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)
# instantiate the NN
nn = NeuralNetwork(num_input, num_hidden, num_output, lr)
# Learning phase
for e in range(num_epoch):
minibatch_index = np.random.randint(low=0, high=x_train.shape[0], size=num_minibatch)
nn.backprop(x_train[minibatch_index, :], y_train[minibatch_index, :])
# Evaluation phase
predict, _ = nn.feedforward(x_test)
predict = np.argmax(predict, axis=-1)
score = np.average(predict == y_test)
print("Performance: {}".format(score))