Perceptron in Regression

x: features
y: labels
w: weights
THETA: threshold
Iris Dataset:https://en.wikipedia.org/wiki/Iris_flower_data_set
    **In this script, we explicitly select two features, and throw others away.

import numpy as np
from sklearn import datasets

iris = datasets.load_iris()
binary_index = [label != 2 for label in iris.target] # we remove the label "2" to make this binary classification task
x = iris.data[binary_index, :2]  # we only take the first two features.
y = iris.target[binary_index]
W = np.random.uniform(low=-1., high=1., size=x.shape[-1])
THETA = 0.5

def step_func(h):
    return 1 if h > THETA else 0

def perceptron(x, w):
    Perceptron Algorithm

    How it works:
        - h = x0 * w0 + x1 * w1
        - y_hat = step_func(h)

        - En => https://qiita.com/nishiy-k/items/1e795f92a99422d4ba7b
        - Jp => https://towardsdatascience.com/perceptron-learning-algorithm-d5db0deab975
    h = x[0] * w[0] + x[1] * w[1]
    y_hat = step_func(h)
    return y_hat

# Before Training
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc before training: ", sum(y == y_hats)/x.shape[0]) # check the accuracy
print("Weights: ", W)

Write the training code HERE
Mainly it is expected to update the weights accordingly to improve the accuracy above.

# After Training
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc After training: ", sum(y == y_hats)/x.shape[0]) # check the accuracy
print("Weights: ", W)

Sample Answer

x: features
y: labels
w: weights
THETA: threshold
Iris Dataset:https://en.wikipedia.org/wiki/Iris_flower_data_set
    **In this script, we explicitly select two features, and throw others away.

import numpy as np
from sklearn import datasets

# === Preparation ===
iris = datasets.load_iris()
binary_index = [label != 2 for label in iris.target]

x = iris.data[binary_index, :2]
y = iris.target[binary_index]
random_indices = np.random.randint(low=x.shape[0], size=x.shape[0])
x, y = x[random_indices], y[random_indices]
W = np.random.uniform(low=-1., high=1., size=x.shape[-1])
THETA = 0.5
eta = 0.001

# === Def Funcs ===
# this cane be substituted with Sigmoid
def _func(h):
    """ Step Function """
    h = sigmoid(h)
    return 1 if h > THETA else 0

def sigmoid(h):
    """ Sigmoid Function """
    return 1/(1+np.exp(-h))

def perceptron(W, x):
    Perceptron Algorithm

    How it works:
        - h = x0 * w0 + x1 * w1
        - y_hat = _func(h)

        - En => https://qiita.com/nishiy-k/items/1e795f92a99422d4ba7b
        - Jp => https://towardsdatascience.com/perceptron-learning-algorithm-d5db0deab975
    h = np.dot(W, x)
    y_hat = _func(h)
    return y_hat

def training(W, THETA, x, label):
    """ Training method

    :param W:
    :param x:
    :param label:
    :return: updated weights, binary value indicating if the prediction was correct
    y_hat = perceptron(W=W, x=x)  # prediction
    W = W + eta * (label - y_hat) * x  # update the weights
    THETA = THETA + eta * (label - y_hat) # update the bias
    return W, THETA, y_hat == label

# === Do Training ===
y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc before training: ", sum(y == y_hats) / x.shape[0])

# outer loop for the training
for ep in range(100):
    # === Training phase ===
    # we are using Batch Gradient Descent
    for i in range(x.shape[0]):
        data = x[i, :]
        W, THETA, result = training(W=W, THETA=THETA, x=data, label=y[i])

    # === Evaluation phase ===
    y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])] # check how accurate the model is
    print("Epoch: {}, W: {}, Result: {}".format(ep, W, sum(y == y_hats) / x.shape[0])) # log method

y_hats = [perceptron(x[i, :], W) for i in range(x.shape[0])]
print("Acc After training: ", sum(y == y_hats) / x.shape[0])
print("Weights: ", W)

Perceptron in Classification

Random Values

import numpy as np

num_data = 10
num_feat = 3
num_class = 3
num_epochs = 100

original_y = np.random.randint(low=0, high=num_class, size=num_data).reshape(-1)
y = np.eye(num_class)[original_y]
x = np.random.randn(num_data, num_feat)
W = np.random.uniform(low=-1., high=1., size=(x.shape[-1], num_class))
eta = 0.001
# print(x.shape, y.shape)
# print(x, y)

def softmax(h):
    - Softmax formula

    :param h:
    return np.exp(h)/np.sum(np.exp(h))

def stable_softmax(h):
    - Stable Softmax formula
        - reference: https://stackoverflow.com/questions/42599498/numercially-stable-softmax/42606665

    :param h:
    z = h - np.max(h)
    numerator = np.exp(z)
    denominator = np.sum(numerator)
    result = numerator/denominator
    return result

def perceptron(x, W):
    h = np.dot(x, W)
    y_hat = softmax(h)
    return y_hat

def stable_perceptron(x, w):
    h = np.dot(w, x)
    y_hat = stable_softmax(h)
    return y_hat

def MSE(pred, target):
    Mean Squared Error

    - Formula
        1/n sum(y[i] - y_hat[i])**2
    return np.mean(np.sum(np.square(pred - target)))

def Cross_Entropy(pred, target):
    Cross Entropy

    - Formula
        - sum(p(target) * log p(pred))
    return np.sum(target*np.log(pred))

def SGD(W, x, label):
    """ Stochastic Gradient Descent """
    for i in range(num_data):
        y_hat = perceptron(x[i, :], W)
        delta_E_W = y_hat - label[i, :]
        W = W + eta * delta_E_W * x[i, :]
    return W

for ep in range(num_epochs):
    y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
    print("Acc before training: ", sum(original_y == y_hats) / x.shape[0])

    # === Training ===
    # print("Before update: ", W)
    W = SGD(W, x, label=y)
    # print("After updated: ", W)

    y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
    print("Acc after training: ", sum(original_y == y_hats) / x.shape[0])

=== Manual Loop ===

for i in range(num_data):
    pred = perceptron(x[i, :], W)
    print("Without One-Hot: ", np.argmax(pred), original_y[i])
    print("Without One-Hot: ", np.argmax(pred) - original_y[i])
    print("With One-Hot: ", pred, y[i, :])
    print("With One-Hot: ", pred - y[i, :])
    print("MSE: ", MSE(pred, y[i, :]))
    print("Cross Entropy Error: ", Cross_Entropy(pred, y[i, :]))
    print("With One-Hot: ", y[i, :] - pred)

Iris Dataset

import numpy as np
from sklearn import datasets

iris = datasets.load_iris()

num_data = iris.data.shape[0]
num_class = len(set(iris.target))
num_epochs = 100

x = iris.data
original_y = iris.target
y = np.eye(num_class)[original_y]
W = np.random.uniform(low=-1., high=1., size=(x.shape[-1], num_class))
eta = 0.001
print(W.shape, x.shape, y.shape)
# print(x, y)

def softmax(h):
    - Softmax formula

    :param h:
    return np.exp(h)/np.sum(np.exp(h))

def stable_softmax(h):
    - Stable Softmax formula
        - reference: https://stackoverflow.com/questions/42599498/numercially-stable-softmax/42606665

    :param h:
    z = h - np.max(h)
    numerator = np.exp(z)
    denominator = np.sum(numerator)
    result = numerator/denominator
    return result

def perceptron(x, W):
    h = np.dot(x, W)
    y_hat = softmax(h)
    return y_hat

def stable_perceptron(x, w):
    h = np.dot(w, x)
    y_hat = stable_softmax(h)
    return y_hat

def MSE(pred, target):
    Mean Squared Error

    - Formula
        1/n sum(y[i] - y_hat[i])**2
    return np.mean(np.sum(np.square(pred - target)))

def Cross_Entropy(pred, target):
    Cross Entropy

    - Formula
        - - 1/n sum(p(target) * log p(pred))
    return np.mean(np.sum(-target*np.log(pred)))

def SGD(W, x, label):
    Stochastic Gradient Descent

    - Reference
    for i in range(num_data):
        y_hat = perceptron(x[i, :], W)
        delta_E_W = y_hat - label[i, :]
        W = W + eta * delta_E_W
    return W

for ep in range(num_epochs):
    y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
    print("Acc before training: ", sum(original_y == y_hats) / x.shape[0])

    # === Training ===
    # print("Before update: ", W)
    W = SGD(W, x, label=y)
    # print("After updated: ", W)

    y_hats = [np.argmax(perceptron(x[i, :], W)) for i in range(x.shape[0])]
    print("Acc after training: ", sum(original_y == y_hats) / x.shape[0])


import numpy as np
import tensorflow as tf

def sigmoid(x):
    x = x - np.max(x)
    return 1.0 / (1.0 + np.exp(x))

def softmax(x):
    """Compute the softmax of vector x."""
    exps = np.exp(x - x.max())
    return exps / np.sum(exps)

def derivative_sigmoid(o):
    return o * (1.0 - o)

class NeuralNetwork:
    def __init__(self, num_input, num_hidden, num_output, lr):
        # num of neurons in layers
        self.num_input = num_input
        self.num_hidden = num_hidden
        self.num_output = num_output

        # learning rate
        self.lr = lr

        # initialise
        self.w_ih = np.random.normal(0.0, 1.0, (self.num_hidden, self.num_input))
        self.w_ho = np.random.normal(0.0, 1.0, (self.num_output, self.num_hidden))

        # activation
        self.af = sigmoid
        self.daf = derivative_sigmoid

    def backprop(self, _input, label):
        o_o, o_h = self.feedforward(_input)

        # calculate error
        e_o = label - o_o
        e_h = np.dot(e_o, self.w_ho)

        # output -> hidden layer
        # Note: derivative of softmax is just e_o
        # w_ho = w_ho + lr * derivative_softmax @ output_hidden
        self.w_ho += self.lr * np.dot(e_o.T, o_h)

        # hidden layer -> input layer
        self.w_ih += self.lr * np.dot((e_h * self.daf(o_h)).T, _input)

    def feedforward(self, _input):
        # hidden layer
        x_h = np.dot(_input, self.w_ih.T)
        o_h = self.af(x_h)

        # output layer
        x_o = np.dot(o_h, self.w_ho.T)
        o_o = softmax(x_o)
        return o_o, o_h

if __name__ == '__main__':
    # hyperparameters
    num_input = 784
    num_hidden = 100
    num_output = num_class = 10
    num_minibatch = 100
    num_epoch = 10
    lr = 0.3

    # data preprocessing
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    x_train = x_train.reshape(x_train.shape[0], x_train.shape[1] ** 2)  # flatten the array
    x_test = x_test.reshape(x_test.shape[0], x_test.shape[1] ** 2)  # flatten the array
    y_train = np.eye(num_class)[y_train]  # manual one hot encoding
    x_train = x_train / 255.0  # normalise the images
    x_test = x_test / 255.0  # normalise the images
    print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

    # instantiate the NN
    nn = NeuralNetwork(num_input, num_hidden, num_output, lr)

    # Learning phase
    for e in range(num_epoch):
        minibatch_index = np.random.randint(low=0, high=x_train.shape[0], size=num_minibatch)
        nn.backprop(x_train[minibatch_index, :], y_train[minibatch_index, :])

    # Evaluation phase
    predict, _ = nn.feedforward(x_test)
    predict = np.argmax(predict, axis=-1)
    score = np.average(predict == y_test)
    print("Performance: {}".format(score))


