This section provides Python implementations of common activation functions using NumPy and shows how to visualize them and their derivatives using Matplotlib. Understanding their shapes and gradients is crucial for deep learning.
import numpy as np
import matplotlib.pyplot as plt
# Note: PyTorch is imported but not used in this specific snippet from the original content.
# import torch
# import torch.nn as nn
# import torch.nn.functional as F
# --- Define activation functions ---
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def tanh(x):
return np.tanh(x)
def relu(x):
return np.maximum(0, x)
def leaky_relu(x, alpha=0.01):
return np.where(x > 0, x, alpha * x)
def elu(x, alpha=1.0):
# Ensure x is a NumPy array for np.exp to work element-wise
x_arr = np.asarray(x)
return np.where(x_arr > 0, x_arr, alpha * (np.exp(x_arr) - 1))
def softmax(x):
# Ensure x is a NumPy array
x_arr = np.asarray(x)
# Subtract max for numerical stability, crucial for avoiding overflow with exp
exp_x = np.exp(x_arr - np.max(x_arr, axis=-1, keepdims=True))
return exp_x / np.sum(exp_x, axis=-1, keepdims=True)
# --- Plot activation functions and their derivatives ---
def plot_activation_functions():
x = np.linspace(-5, 5, 200) # Reduced points for brevity in example
plt.figure(figsize=(12, 8)) # Adjusted figure size
# Sigmoid
plt.subplot(2, 3, 1)
plt.plot(x, sigmoid(x), label='sigmoid(x)')
plt.plot(x, sigmoid(x) * (1 - sigmoid(x)), label='sigmoid'(x)', linestyle='--')
plt.title('Sigmoid')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.legend()
# Tanh
plt.subplot(2, 3, 2)
plt.plot(x, tanh(x), label='tanh(x)')
plt.plot(x, 1 - tanh(x)**2, label='tanh'(x)', linestyle='--')
plt.title('Tanh')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.legend()
# ReLU
plt.subplot(2, 3, 3)
plt.plot(x, relu(x), label='ReLU(x)')
plt.plot(x, np.where(x > 0, 1, 0), label='ReLU'(x)', linestyle='--') # Derivative is 0 for x<0, 1 for x>0
plt.title('ReLU')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.legend()
# Leaky ReLU
plt.subplot(2, 3, 4)
alpha_leaky = 0.1 # Using a more visible alpha for plotting
plt.plot(x, leaky_relu(x, alpha=alpha_leaky), label=f'Leaky ReLU(x, α={alpha_leaky})')
plt.plot(x, np.where(x > 0, 1, alpha_leaky), label=f'Leaky ReLU'(x, α={alpha_leaky})', linestyle='--')
plt.title('Leaky ReLU')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.legend()
# ELU
plt.subplot(2, 3, 5)
alpha_elu = 1.0
plt.plot(x, elu(x, alpha=alpha_elu), label=f'ELU(x, α={alpha_elu})')
plt.plot(x, np.where(x > 0, 1, alpha_elu * np.exp(x)), label=f'ELU'(x, α={alpha_elu})', linestyle='--')
plt.title('ELU')
plt.xlabel('x')
plt.ylabel('y')
plt.grid(True)
plt.legend()
# Softmax (visualization is tricky for a 1D input, usually applied to a vector)
# For demonstration, let's show softmax on a small vector
softmax_example_input = np.array([1.0, 2.0, 0.5])
softmax_output = softmax(softmax_example_input)
# print(f"Softmax example: input {softmax_example_input}, output {softmax_output}, sum {np.sum(softmax_output)}")
# Plotting softmax directly like others isn't standard as it's a vector function.
# Instead, we can show its effect on a sample vector.
plt.subplot(2, 3, 6)
labels = ['x1', 'x2', 'x3']
x_indices = np.arange(len(softmax_example_input))
plt.bar(x_indices - 0.2, softmax_example_input, width=0.4, label='Input values')
plt.bar(x_indices + 0.2, softmax_output, width=0.4, label='Softmax output')
plt.xticks(x_indices, labels)
plt.title('Softmax Example')
plt.ylabel('Value / Probability')
plt.legend()
plt.grid(True)
plt.suptitle("Activation Functions and Their Derivatives", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96]) # Adjust layout to make space for suptitle
# plt.show() # plt.show() will block execution, usually not for backend content generation
# To save the plot to a file instead of showing:
# plt.savefig("activation_functions_plot.png")
# plt.close() # Close the figure to free memory
# To generate and potentially save the plot (uncomment if needed):
# plot_activation_functions()