-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlayers.py
More file actions
226 lines (182 loc) · 9.19 KB
/
layers.py
File metadata and controls
226 lines (182 loc) · 9.19 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import numpy as np
import activations
class Layer:
def __init__(self, activation, input_shape, output_shape):
# not every layer has activation, meaning it can be None
self.activation = activation
self.input_shape = None
self.output_shape = None
self.set_input_shape(input_shape)
self.set_output_shape(output_shape)
self.has_parameters = False # signifies that a layer does not have values that can be trained,
# like weights and bias
self.init_activation()
def forward(self, inputs):
raise NotImplementedError()
def forward_test_time(self, inputs):
raise NotImplementedError()
def backward(self, error, layer_outputs, previous_layer_outputs, mult_error_and_actv):
raise NotImplementedError()
def calculate_error(self, propagated_error):
raise NotImplementedError()
def apply_gradients(self, weight_gradients, bias_gradients):
return
def init_activation(self):
if self.activation:
if self.activation == "relu":
self.activation = activations.Relu(self.activation)
elif self.activation == "leaky_relu":
self.activation = activations.Leaky_Relu(self.activation)
elif self.activation == "sigmoid":
self.activation = activations.Sigmoid(self.activation)
elif self.activation == "softmax":
self.activation = activations.Softmax(self.activation)
elif self.activation == "linear":
self.activation = activations.Linear(self.activation)
else:
self.activation = activations.Linear(self.activation)
def init_parameters(self):
if self.has_parameters:
raise NotImplementedError()
print("Layer has no parameters, therefore cannot initialize them")
def set_input_shape(self, input_shape):
if input_shape and not isinstance(input_shape, tuple):
raise TypeError("Input shape has to be a tuple")
self.input_shape = input_shape
def set_output_shape(self, output_shape):
if output_shape and not isinstance(output_shape, tuple):
raise TypeError("Output shape has to be a tuple")
self.output_shape = output_shape
def set_parameters(self, weights, bias):
"""
This function will not deepcopy the given parameters, it will simply assign them
"""
return
def get_parameters(self):
return None, None
def mult_error_with_derivative_of_actv_func(self, error, output):
actv_derivative = self.activation.derivative(output)
# in case the derivative of the activation function is a jacobian matrix, and not a vector,
# the dimension of the error need to be expanded for the matrix multiplication.
# after the matrix multiplication the resulting redundant dimension needs to be removed
if error.shape == actv_derivative.shape:
return error * actv_derivative
error = error[:, np.newaxis, :]
return np.squeeze(np.matmul(error, actv_derivative), axis=1)
class FullyConnected(Layer):
def __init__(self, neurons, activation="relu", use_bias=False, input_shape=None):
# 1 is batch size, modify later when at that step (make variable or smth for vectorization)
super().__init__(activation, input_shape, (1, neurons))
self.neurons = neurons
self.use_bias = use_bias
self.weights = None
self.bias = None
self.has_parameters = True
def forward(self, inputs):
res = np.matmul(inputs, self.weights)
if self.use_bias:
res += self.bias
res = self.activation.forward(res)
return res
def forward_test_time(self, inputs):
return self.forward(inputs)
def backward(self, error, layer_outputs, previous_layer_outputs, mult_error_and_actv):
weight_gradients, bias_grads = self.calculate_weight_gradients(error, layer_outputs, previous_layer_outputs, mult_error_and_actv)
return weight_gradients, bias_grads
def calculate_error(self, propagated_error):
error = np.matmul(propagated_error, np.transpose(self.weights))
return error
def calculate_weight_gradients(self, propagated_error, layer_outputs, previous_layer_outputs, mult_error_and_actv):
"""
:param: mult_error_and_actv: Boolean that signifies if the derivative of the activation function and the
propagated error need to be multiplied or not. This is necessary as for example in the case of softmax + CE loss
they don't, as their derivatives are usually combined
"""
full_error = propagated_error if mult_error_and_actv else self.mult_error_with_derivative_of_actv_func(propagated_error, layer_outputs)
weight_gradients = np.matmul(np.transpose(previous_layer_outputs), full_error)
# return full_error because that is also the bias_gradients, divide by batch size
batch_size = full_error.shape[0]
return weight_gradients / batch_size, np.sum(full_error, axis=0) / batch_size
def apply_gradients(self, weight_gradients, bias_gradients):
# plus and not minus because that is handled in the optimizers
self.weights += weight_gradients
if self.use_bias:
self.bias += bias_gradients
def init_parameters(self):
self.init_weights()
if self.use_bias:
self.init_bias()
# call when input and output shapes are known
def init_weights(self):
self.weights = np.random.normal(0.0, pow(self.input_shape[-1], -0.5),
(self.input_shape[-1], self.output_shape[-1]))
def init_bias(self):
self.bias = np.atleast_2d(np.zeros(self.output_shape[-1]))
def set_parameters(self, weights, bias):
if weights is not None:
self.weights = weights
if bias is not None and self.use_bias:
self.bias = bias
def get_parameters(self):
return self.weights, self.bias
class InvertedDropout(Layer):
# good stackoverflow explanation: https://stackoverflow.com/questions/54109617/implementing-dropout-from-scratch
# dropout is what percentage of activations is kept!
# input_shape parameter in case you want it as first layer, needs to match input in that case
def __init__(self, dropout=1.0, input_shape=None):
super().__init__(None, input_shape, input_shape)
self.dropout = dropout
self.dropout_mask = None
if self.dropout > 1.0 or self.dropout < 0.0:
raise ValueError("Dropout value has to be between 0.0 and 1.0")
def forward(self, outputs):
# neuron is kept with probability q = 1 - p, meaning p is how many are dropped, here self.dropout is q
output_mask = np.random.rand(*outputs.shape) < self.dropout
self.dropout_mask = output_mask
masked_outputs = outputs * output_mask
masked_outputs /= self.dropout # this step is what makes it inverted dropout,
# making it so nothing needs to be done at inference-time
return masked_outputs
def forward_test_time(self, inputs):
# Do nothing, because inverted_dropout
return inputs
def backward(self, error, layer_outputs, previous_layer_outputs, mult_error_and_actv):
# No parameters so nothing has to be set / returned
return None, None
def calculate_error(self, propagated_error):
# the error is again masked ( the outputs that didnt contribute / were set to 0 are again set to 0)
# and the output is scaled to make up for the masking
return (propagated_error * self.dropout_mask) / self.dropout
def set_input_shape(self, input_shape):
super(InvertedDropout, self).set_input_shape(input_shape)
super(InvertedDropout, self).set_output_shape(input_shape)
def set_output_shape(self, output_shape):
super(InvertedDropout, self).set_input_shape(output_shape)
super(InvertedDropout, self).set_output_shape(output_shape)
# not correctly implemented yet, because for flatten to be useful other layers beside fully connected are necessary :)
class Flatten(Layer):
# input_shape parameter in case you want it as first layer, needs to match input in that case
# make sure to NOT flatten the batch dimension!
def __init__(self, input_shape=None):
# not sure about shapes here, check later
super().__init__(None, None, None)
if input_shape:
self.set_input_shape(input_shape)
def forward(self, inputs):
return np.atleast_2d(inputs.flatten()).transpose()
def forward_test_time(self, inputs):
return self.forward(inputs)
def backward(self, error, layer_outputs, previous_layer_outputs, mult_error_and_actv):
raise NotImplementedError()
def set_input_shape(self, input_shape):
if not input_shape:
return
super(Flatten, self).set_input_shape(input_shape)
elems = 1
for i in input_shape:
elems *= i
super(Flatten, self).set_output_shape((elems, 1))
class BatchNormalization(Layer):
def __init__(self, activation, input_shape, output_shape):
super().__init__(None, None, None)
raise NotImplementedError()