-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathann_with_numpy.py
More file actions
175 lines (135 loc) · 5.71 KB
/
ann_with_numpy.py
File metadata and controls
175 lines (135 loc) · 5.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
#############################################################################################
# ANN with numpy
#############################################################################################
# written by Yang, Soyoung
# 2017.07.30 v1.0
# 2017.08.08 v1.1
# ANN caculation to see how backpropagation, gradient descent really work.
# - Data generation with numpy
# - Feed forward using dot product and sigmoid function
# - Backpropagation with Gradient Descent directly using numpy
# - Optimize hyper-parameter with cost
#
# reference :
# https://github.com/stephencwelch/Neural-Networks-Demystified/blob/master/Part%204%20Backpropagation.ipynb
# https://www.youtube.com/watch?v=ttE0F7fghfk
#############################################################################################
############
# 1. import
############
# import necessary functions
from __future__ import print_function
import numpy as np
import matplotlib.pyplot as plt
###############
# 2. ANN model
###############
class Neural_Network(object):
def __init__(self):
# Define Hyperparameters
self.inputLayerSize = 2
self.outputLayerSize = 1
self.hiddenLayerSize = 128
# Weights (parameters)
self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
self.W2 = np.random.randn(self.hiddenLayerSize, self.hiddenLayerSize)
self.W3 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
self.b1 = np.random.randn(1, self.hiddenLayerSize)
self.b2 = np.random.randn(1, self.hiddenLayerSize)
self.b3 = np.random.randn(1, self.outputLayerSize)
def forward(self, X):
# Propogate inputs though network
self.z2 = np.dot(X, self.W1) + self.b1
self.a2 = self.sigmoid(self.z2)
self.z3 = np.dot(self.a2, self.W2) + self.b2
self.a3 = self.sigmoid(self.z3)
self.z4 = np.dot(self.a3, self.W3) + self.b3
yHat = self.z4
return yHat
def sigmoid(self, z):
# Apply sigmoid activation function to scalar, vector, or matrix
return 1 / (1 + np.exp(-z))
def sigmoidPrime(self, z):
# Gradient of sigmoid
return np.exp(-z) / ((1 + np.exp(-z)) ** 2)
def costFunction(self, X, y):
# Compute cost for given X,y, use weights already stored in class.
self.yHat = self.forward(X)
J = 0.5 * sum((y - self.yHat) ** 2)
return J[0]
def costFunctionPrime(self, X, y):
# Compute derivative with respect to W and W2 for a given X and y:
self.yHat = self.forward(X)
# don't get sigmoid prime in output layer.
# we don't use activation function in output layer
delta4 = -(y - self.yHat)
dJdW3 = np.dot(self.a3.T, delta4)
dJdb3 = np.mean(delta4, axis=0)
# after then, we need sigmoid prime because of activate function(sigmoid)
delta3 = np.dot(delta4, self.W3.T) * self.sigmoidPrime(self.z3)
dJdW2 = np.dot(self.a2.T, delta3)
dJdb2 = np.mean(delta3, axis=0)
delta2 = np.dot(delta3, self.W2.T) * self.sigmoidPrime(self.z2)
dJdW1 = np.dot(X.T, delta2)
dJdb1 = np.mean(delta2, axis=0)
return dJdW1, dJdW2, dJdW3, dJdb1, dJdb2, dJdb3
def gradient_descent(self, lr, dJdW1, dJdW2, dJdW3, dJdb1, dJdb2, dJdb3):
self.W1 = self.W1 - lr * dJdW1
self.W2 = self.W2 - lr * dJdW2
self.W3 = self.W3 - lr * dJdW3
self.b1 = self.b1 - lr * dJdb1
self.b2 = self.b2 - lr * dJdb2
self.b3 = self.b3 - lr * dJdb3
# not necessary but worthy to reconsider what is hyper parameter
def opt_hyper_params(self, X, y):
best_cost = 100000
best_params = {'input_dim': None, 'hidden_dim': None}
for dim_in in range(1, 30):
for dim_hid in range(128, 527, 40):
self.inputLayerSize = dim_in
self.hiddenLayerSize = dim_hid
cost = self.costFunction(X, y)
if cost < best_cost:
best_params['input_dim'] = dim_in
best_params['hidden_dim'] = dim_hid
best_cost = cost
self.inputLayerSize = best_params['input_dim']
self.hiddenLayerSize = best_params['hidden_dim']
#################
# 3. train model
#################
def train(epoch, lr, X, y):
NN = Neural_Network()
print('Neaural Network is formed.')
# not necessary
NN.opt_hyper_params(X, y)
print('\nHyper parameter optimization is done.')
print(' Input layer size: {} \t Hidden layer size: {}'.format(NN.inputLayerSize, NN.hiddenLayerSize))
print('\nTraining ANN...')
costs = [] # list of cost for each epoch
# start training
for epoch in range(epoch + 1):
# calculate gradients
dJdW1, dJdW2, dJdW3, dJdb1, dJdb2, dJdb3 = NN.costFunctionPrime(X, y)
# update Weight with gradient
NN.gradient_descent(lr, dJdW1, dJdW2, dJdW3, dJdb1, dJdb2, dJdb3)
cost = NN.costFunction(X, y)
costs.append(cost)
print("epoch: {}, cost: {}".format(epoch, cost))
# plot train process, showing cost of each epoch
x_axis = np.arange(0, epoch + 1)
plt.plot(x_axis, costs)
plt.xlabel = 'epoch'
plt.ylabel = 'cost'
plt.grid(1)
plt.show()
###################
# 4. generate data
###################
# X = (hours sleeping, hours studying), y = Score on test
X = np.array(([3,5], [5,1], [10,2]), dtype=float)
y = np.array(([75], [82], [93]), dtype=float)
#################
# 5.training ANN
#################
train(50, 0.001, X, y)