Neural Network Architecture Design
How would you design a neural network architecture for a given problem? What factors would you consider?
Explain the relationship between network depth, width, and capacity
How do depth and width affect a neural network's learning capabilities?
Implement a Simple Feedforward Neural Network (NumPy)
Write a basic neural network from scratch using NumPy, including forward and backward propagation.
import numpy as np
class SimpleNN:
def __init__(self, input_size, hidden_size, output_size):
self.W1 = np.random.randn(input_size, hidden_size) * 0.01
self.b1 = np.zeros((1, hidden_size))
self.W2 = np.random.randn(hidden_size, output_size) * 0.01
self.b2 = np.zeros((1, output_size))
def sigmoid(self, x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(self, x):
s = self.sigmoid(x)
return s * (1 - s)
def forward(self, X):
self.z1 = np.dot(X, self.W1) + self.b1
self.a1 = self.sigmoid(self.z1)
self.z2 = np.dot(self.a1, self.W2) + self.b2
self.a2 = self.sigmoid(self.z2)
return self.a2
def backward(self, X, y, learning_rate=0.1):
m = X.shape[0]
dz2 = self.a2 - y
dW2 = np.dot(self.a1.T, dz2) / m
db2 = np.sum(dz2, axis=0, keepdims=True) / m
da1 = np.dot(dz2, self.W2.T)
dz1 = da1 * self.sigmoid_derivative(self.z1)
dW1 = np.dot(X.T, dz1) / m
db1 = np.sum(dz1, axis=0, keepdims=True) / m
self.W2 -= learning_rate * dW2
self.b2 -= learning_rate * db2
self.W1 -= learning_rate * dW1
self.b1 -= learning_rate * db1
# Example usage
X = np.array([[0,0],[0,1],[1,0],[1,1]])
y = np.array([[0],[1],[1],[0]])
nn = SimpleNN(2, 4, 1)
for epoch in range(1000):
output = nn.forward(X)
nn.backward(X, y)
if epoch % 100 == 0:
loss = np.mean((output - y) ** 2)
print(f"Epoch {epoch}, Loss: {loss:.4f}")
Implement a Simple LSTM Cell (NumPy)
Write a single LSTM cell forward pass using NumPy.
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def tanh(x):
return np.tanh(x)
def lstm_cell_forward(x_t, h_prev, c_prev, params):
Wf, bf, Wi, bi, Wo, bo, Wc, bc = params
concat = np.concatenate((h_prev, x_t), axis=1)
f_t = sigmoid(np.dot(concat, Wf) + bf)
i_t = sigmoid(np.dot(concat, Wi) + bi)
o_t = sigmoid(np.dot(concat, Wo) + bo)
c_tilde = tanh(np.dot(concat, Wc) + bc)
c_next = f_t * c_prev + i_t * c_tilde
h_next = o_t * tanh(c_next)
return h_next, c_next
# Example usage
np.random.seed(0)
x_t = np.random.randn(1, 3)
h_prev = np.random.randn(1, 5)
c_prev = np.random.randn(1, 5)
params = [np.random.randn(8, 5) for _ in range(4)] + [np.random.randn(1, 5) for _ in range(4)]
h_next, c_next = lstm_cell_forward(x_t, h_prev, c_prev, params)
print(h_next)