Skip to content

This is my first attempt at creating a neural net (multi-layered-perceptron) from scratch, only using numpy

Notifications You must be signed in to change notification settings

CasKirch/MLP-from-scratch

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

5 Commits
 
 
 
 
 
 
 
 

Repository files navigation

MLP-from-scratch

This is my first attempt at creating a neural net (multi-layered-perceptron) from scratch, only using numpy

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
## Read in the data
df = pd.read_csv('https://raw.githubusercontent.com/CasKirch/MLP-from-scratch/main/churn.csv')
del df['customerID']
del df['MultipleLines'] 
df = df[df.TotalCharges != ' ']
df['TotalCharges'] = df['TotalCharges'].astype(float)
df.shape
(7032, 19)

Preprocessing

df.head()
ohe_columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
              'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
              'PaymentMethod']

## OneHotEncoding dependent variable
'''Very ugly but I need it to be coded correctly for backprop to work, for IVs I don't care '''

df = pd.get_dummies(df, columns=['Churn'], drop_first=True)
y = df.Churn_Yes
X = df
del X['Churn_Yes']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)
ohe = OneHotEncoder(sparse=False)
ohe.fit(X_train[ohe_columns])
OneHotEncoder(sparse=False)
feature_array = ohe.transform(X_train[ohe_columns])
feature_labels = ohe.categories_

ohe_data = pd.DataFrame(feature_array, columns=np.concatenate(feature_labels).ravel())
## Deleting string columns etc
for i in X_train.columns:
    if i in ohe_columns:
        del X_train[i]
## Replacing with ohe variables
ohe_data.reset_index(drop=True, inplace=True)
X_train.reset_index(drop=True, inplace=True)

X_train = pd.concat([X_train, ohe_data], axis=1)
## Now same for X_test

feature_array = ohe.transform(X_test[ohe_columns])
ohe_data = pd.DataFrame(feature_array, columns=np.concatenate(feature_labels).ravel())

for i in X_test.columns:
    if i in ohe_columns:
        del X_test[i]
## Replacing with ohe variables

ohe_data.reset_index(drop=True, inplace=True)
X_test.reset_index(drop=True, inplace=True)

X_test = pd.concat([X_test, ohe_data], axis=1, join = 'inner')

Creating the model

## We need to work with numpy arrays rather than pandas dataframe
X_train = np.array(X_train)
y_train = np.array(y_train)
## Save the dimensions of the data
m, n = X_train.shape
## Transpose input data
X_train = X_train.T
y_train = y_train.T
## Initialize parameters  --> w1, w2, b1, b2
'''I suspect that our network needs to be deeper (more hidden layers) because our dataset does not have high dimensionality'''
def init_params():
    W1 = np.random.rand(10, 42) ## Create an array with random initialization values [-.5, .5] that fit our model (i.e. 10 x 43)
    b1 = np.random.rand(10,1) - 0.5 # 10x1
    W2 = np.random.rand(1, 10) # 10 x 10 --> Should this be 1 x 10 or 10 x 10 ???! Although shape of A2 still does not change!!
    b2 = np.random.rand(1,1) -0.5 # 10 x 1
    
    return W1, b1, W2, b2
## Forward prop
def ReLU(Z):
    '''define a ReLU function using numpy'''
    return np.maximum(Z, 0)

def Sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

def forward_prop(W1, b1, W2, b2, X): ## Not sure about the X here
    '''Here we can calculate Z1'''
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = Sigmoid(Z2)
    
    return Z1, A1, Z2, A2 
## Back prop
def deriv_ReLU(Z):
    '''Derivative of ReLU is either zero or one (is x>0, ReLU is just linear)'''
    return Z > 0 ## True is interpreted as 1, False as 0

def back_prop(Z1, A1, Z2, A2, W2, Y, X):
    ## Second layer
    dZ2 = A2 - Y
    dW2 = (1 / m) * dZ2.dot(A1.T)
    db2 = (1 / m)* np.sum(dZ2)
    dZ1 = W2.T.dot(dZ2) * deriv_ReLU(Z1)
    
    
    # First layer
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    
    return dW1, db1, dW2, db2 
    
## Update params
def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):
    W1 = W1 - learning_rate * dW1
    b1 = b1 - learning_rate * db1
    
    W2 = W2 - learning_rate * dW2
    b2 = b2 - learning_rate * db2
    
    return W1, b1, W2, b2
## Gradient_descent 
accuracy = list()
def get_predictions(A2):
    prediction=list()
    for i in A2:
        for j in i: ## Really ugly but ok
            if j > .5:
                prediction.append(1)
            else:
                prediction.append(0)
    return prediction

def get_accuracy(predictions, y):
    return np.sum(predictions == y) / len(y)

def gradient_descent(X, y, iterations, learning_rate):
    # 1. Initiate values
    W1, b1, W2, b2 = init_params()
    
    # 2. For iterations:
    for i in range(iterations):
        
            #a Forward prop
        Z1, A1, Z2, A2 = forward_prop(W1, b1, W2, b2, X_train) 
            #b back_prop
        dW1, db1, dW2, db2 = back_prop(Z1, A1, Z2, A2, W2, y_train, X_train)
            #c update_params
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)
        
        
        accuracy.append(get_accuracy(get_predictions(A2), y_train))
    # 3. Show accuracy for each iteration
        if i % 1000 == 0:
            print("On iteration {w}".format(w=i))
            print('You accuracy is {w}'.format(w=get_accuracy(get_predictions(A2), y_train)))
            accuracy_over_time = list()
            accuracy_over_time.append(get_accuracy(get_predictions(A2), y_train))
    return accuracy                         
    
iterations = 10000
acc = gradient_descent(X_train, y_train, iterations, 0.0001)
plt.plot(acc)
On iteration 0
You accuracy is 0.26801517067003794
On iteration 1000
You accuracy is 0.7447850821744627
On iteration 2000
You accuracy is 0.7493678887484198
On iteration 3000
You accuracy is 0.7586915297092288
On iteration 4000
You accuracy is 0.7710176991150443
On iteration 5000
You accuracy is 0.7738621997471555
On iteration 6000
You accuracy is 0.7779709228824273
On iteration 7000
You accuracy is 0.7752844500632111
On iteration 8000
You accuracy is 0.7749683944374209
On iteration 9000
You accuracy is 0.775126422250316

About

This is my first attempt at creating a neural net (multi-layered-perceptron) from scratch, only using numpy

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published