So far, we have only implemented the feed-forward functionality for our network and its layers. First, let's update our FullyConnectedLayer class so that we can add methods for backpropagation and optimization:
class FullyConnectedLayer(object):
# [...] (code unchanged)
def __init__(self, num_inputs, layer_size, activation_fn, d_activation_fn):
# [...] (code unchanged)
self.d_activation_fn = d_activation_fn # Deriv. activation function
self.x, self.y, self.dL_dW, self.dL_db = 0, 0, 0, 0 # Storage attr.
def forward(self, x):
z = np.dot(x, self.W) + self.b
self.y = self.activation_fn(z)
self.x = x # we store values for back-propagation
return self.y
def backward(self, dL_dy):
"""Back-propagate the loss."""
dy_dz = self.d_activation_fn(self.y) # = f'
dL_dz = (dL_dy * dy_dz) # dL/dz = dL/dy * dy/dz = l'_{k+1} * f'
dz_dw...