Monday, May 23, 2022

Create a 3-Layer Back Propagation Neural Network with Sigmoid, SGD and Relu Activation Function

 The demo program shows how to create a 3-layer back propagation neural network with sigmoid, stocastic gradient descent(sgd) and rectified linear unit(relu) activation function. The dataset used is the same as in this post (Predictive Model: Customer Subsciption(Continue/Discontinue) with Tensorflow). The dataset can be downloaded from kaggle. I used 50 records from the dataset.

The program is derived from this article.

The result in the original post using tensorflow is 79% accuracy while this program's result is 86%.

The output:


The code:

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
import numpy as np
import pandas as pd

df = pd.read_csv('churn1.csv')

for column in df:
    var = df[column].name
    print("\n" + var + ': ')
    print(df[var].unique())
	
df['gender']  = df['gender'].apply(lambda x: 1 if x=='Female' else 0)
df['Partner'] = df['Partner'].apply(lambda x: 1 if x=='Yes' else 0)
df['Dependents'] = df['Dependents'].apply(lambda x: 1 if x=='Yes' else 0)
df['PhoneService'] = df['PhoneService'].apply(lambda x: 1 if x=='Yes' else 0)
df['MultipleLines'] = df['MultipleLines'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['InternetService'] = df['InternetService'].apply(lambda x : 1 if x=='DSL' else (0 if x=='Fiber optic' else 2))
df['OnlineSecurity'] = df['OnlineSecurity'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['DeviceProtection'] = df['DeviceProtection'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['TechSupport'] = df['TechSupport'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['OnlineBackup'] = df['OnlineBackup'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['StreamingTV'] = df['StreamingTV'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['StreamingMovies'] = df['StreamingMovies'].apply(lambda x : 1 if x=='Yes' else (0 if x=='No' else 2))
df['Contract'] = df['Contract'].apply(lambda x : 1 if x=='Month-to-month' else (0 if x=='One year' else 2))
df['PaperlessBilling'] = df['PaperlessBilling'].apply(lambda x: 1 if x=='Yes' else 0)
df['PaymentMethod'] = df['PaymentMethod'].apply(lambda x : 1 if x=='Electronic check' else (2 if x=='Mailed check' else (3 if x=='Bank transfer (automatic)' else 0)))
df['TotalCharges'] = df['TotalCharges'].replace('', '0', regex=True)
df['TotalCharges'] = df['TotalCharges'].replace(' ', '0', regex=True)
df['TotalCharges'] = df['TotalCharges'].astype(float)

Xx = df.drop(['Churn', 'customerID'], axis=1)

Yy = df['Churn'].apply(lambda x: 1 if x=='Yes' else 0)
Xxx = Xx.to_numpy()
print(Xxx.shape)
Yyy = np.array([[Yy]])
Yyy =np.reshape(Yyy, (-1, 1))
print(Xx.head(10))	

alphas = [0.001]
hiddenSize = 80

batches = 128
# compute sigmoid nonlinearity
def sigmoid(x):
  if x.any() < 0.0:
    output = 0.0
  elif x.any() > 20.0:
    output = 1.0
  else:
    #return 1.0 / (1.0 + np.exp(-x))
    output = 1/(1+np.exp(-x))
  
  return output


# convert output of sigmoid function to its derivative
def sigmoid_output_to_derivative(output):
    return output*(1-output)
    
# rectified linear function
def relu(x):
	return max(0.0, x.all())
    
for alpha in alphas:
    print("\nTraining With Alpha:" + str(alpha))
    np.random.seed(1)

    # randomly initialize our weights with mean 0
    synapse_0 = 2*np.random.random((19,hiddenSize)) - 1
    synapse_1 = 2*np.random.random((hiddenSize,hiddenSize)) - 1
    synapse_2 = 2*np.random.random((hiddenSize,1)) - 1
    
    for j in range(400000):

        # Feed forward through layers 0, 1, and 2
        layer_0 = Xxx
      
        layer_1 = sigmoid(np.dot(layer_0,synapse_0))
        layer_2 = sigmoid(np.dot(layer_1,synapse_1))
        layer_3 = sigmoid(np.dot(layer_2,synapse_2))
        
        # how much did we miss the target value?
        layer_3_error = layer_3 - Yyy
        
        if (j% 10000) == 0:
            print( "Error after "+str(j)+" iterations:" + str(np.mean(np.abs(layer_3_error))))

        # in what direction is the target value?
        # were we really sure? if so, don't change too much.
        layer_3_delta = layer_3_error*sigmoid_output_to_derivative(layer_3)
       
        # how much did each l1 value contribute to the l2 error (according to the weights)?
        layer_2_error = layer_3_delta.dot(synapse_2.T)

        # in what direction is the target l1?
        # were we really sure? if so, don't change too much.
        layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2)
        
        layer_1_error = layer_2_delta.dot(synapse_1.T)
        layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1)
        synapse_2 -= alpha * (layer_1.T.dot(layer_3_delta))
        synapse_1 -= alpha * (layer_2.T.dot(layer_2_delta))
        synapse_0 -= alpha * (layer_0.T.dot(layer_1_delta))


No comments:

Post a Comment