The Titanic Dataset found in kaggle is a good way to use my python program I posted earlier which you can read it here. The basic problem is to predict who among the passengers died and survived. And surpringly I got 100% accuracy.
The dataset link: Titanic Kaggle Dataset
The output:
The code:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 | import numpy as np import pandas as pd from sklearn.metrics import accuracy_score from sklearn.metrics import ConfusionMatrixDisplay, classification_report, confusion_matrix from matplotlib import style style.use('classic') df = pd.read_csv('tested.csv') df['Sex'] = df['Sex'].apply(lambda x: 1 if x=='female' else 0) df['Embarked'] = df['Embarked'].apply(lambda x : 1 if x=='Q' else (0 if x=='S' else 2)) df['Age'] = df['Age'].fillna(0) df['Fare'] = df['Fare'].fillna(0) X = df.drop(['PassengerId', 's', 'Name', 'Ticket', 'Cabin'], axis=1) y = df['s'].apply(lambda x: 1 if x==1 else 0) y = np.array([[y]]) y =np.reshape(y, (-1, 1)) Xxx = X.to_numpy() alphas = [0.001] hiddenSize = 80 batches = 128 # compute sigmoid nonlinearity def sigmoid(x): if x.any() < 0.0: output = 0.0 elif x.any() > 20.0: output = 1.0 else: #return 1.0 / (1.0 + np.exp(-x)) output = 1/(1+np.exp(-x)) return output # convert output of sigmoid function to its derivative def sigmoid_output_to_derivative(output): return output*(1-output) # rectified linear function def relu(x): return max(0.0, x.all()) for alpha in alphas: print("\nTraining With Alpha:" + str(alpha)) np.random.seed(1) # randomly initialize our weights with mean 0 synapse_0 = 2*np.random.random((7,hiddenSize)) - 1 #print(synapse_0.shape) synapse_1 = 2*np.random.random((hiddenSize,hiddenSize)) - 1 #print(synapse_1.shape) synapse_2 = 2*np.random.random((hiddenSize,1)) - 1 #print(synapse_2.shape) for j in range(10000): # Feed forward through layers 0, 1, and 2 layer_0 = Xxx #print(layer_0.shape) layer_1 = sigmoid(np.dot(layer_0,synapse_0)) #print(layer_1.shape) layer_2 = sigmoid(np.dot(layer_1,synapse_1)) #print(layer_2.shape) layer_3 = sigmoid(np.dot(layer_2,synapse_2)) #print(layer_3.shape) # how much did we miss the target value? layer_3_error = layer_3 - y #print(layer_3.shape) if (j% 1000) == 0: print( "Error after "+str(j)+" iterations:" + str(np.mean(np.abs(layer_3_error)))) # in what direction is the target value? # were we really sure? if so, don't change too much. layer_3_delta = layer_3_error*sigmoid_output_to_derivative(layer_3) # how much did each l1 value contribute to the l2 error (according to the weights)? layer_2_error = layer_3_delta.dot(synapse_2.T) # in what direction is the target l1? # were we really sure? if so, don't change too much. layer_2_delta = layer_2_error * sigmoid_output_to_derivative(layer_2) layer_1_error = layer_2_delta.dot(synapse_1.T) layer_1_delta = layer_1_error * sigmoid_output_to_derivative(layer_1) synapse_2 -= alpha * (layer_1.T.dot(layer_3_delta)) synapse_1 -= alpha * (layer_2.T.dot(layer_2_delta)) synapse_0 -= alpha * (layer_0.T.dot(layer_1_delta)) y_hat = [0 if val < 0.5 else 1 for val in layer_3] print(accuracy_score(y, y_hat)) y_hat = pd.DataFrame(y_hat) y = pd.DataFrame(y) cm = confusion_matrix(y, y_hat) print(cm) |
No comments:
Post a Comment