Implementation of backpropagation from scratch to understand the process:
- Forward pass computation
- Loss calculation
- Backward pass implementation
- Parameter updates
import numpy as np
class NeuralNetworkWithBackprop:
def __init__(self, layer_sizes):
self.weights = []
self.biases = []
# Initialize weights and biases
for i in range(len(layer_sizes) - 1):
self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i+1]) * 0.01)
self.biases.append(np.zeros((1, layer_sizes[i+1])))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
s = self.sigmoid(x)
return s * (1 - s)
def forward_propagation(self, X):
self.activations = [X]
self.z_values = []
# Forward pass
activation = X
for W, b in zip(self.weights, self.biases):
z = np.dot(activation, W) + b
self.z_values.append(z)
activation = self.sigmoid(z)
self.activations.append(activation)
return activation
def backward_propagation(self, X, y, learning_rate=0.1):
m = X.shape[0]
# Calculate output layer error
delta = self.activations[-1] - y
# Initialize gradients
dW = []
db = []
# Backward pass
for l in reversed(range(len(self.weights))):
# Calculate gradients for current layer
dW_l = np.dot(self.activations[l].T, delta) / m
db_l = np.sum(delta, axis=0, keepdims=True) / m
# Store gradients
dW.insert(0, dW_l)
db.insert(0, db_l)
# Calculate error for previous layer (if not input layer)
if l > 0:
delta = np.dot(delta, self.weights[l].T) * self.sigmoid_derivative(self.z_values[l-1])
# Update weights and biases
for l in range(len(self.weights)):
self.weights[l] -= learning_rate * dW[l]
self.biases[l] -= learning_rate * db[l]
def train(self, X, y, epochs=1000, learning_rate=0.1):
for epoch in range(epochs):
# Forward propagation
output = self.forward_propagation(X)
# Compute loss (MSE)
loss = np.mean(np.square(output - y))
# Backward propagation
self.backward_propagation(X, y, learning_rate)
if epoch % 100 == 0:
print(f"Epoch {epoch}, Loss: {loss:.4f}")
# Example usage
if __name__ == "__main__":
# Create a simple XOR problem
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# Create and train network
nn = NeuralNetworkWithBackprop([2, 4, 1])
# nn.train(X, y) # Uncomment to train