import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import matplotlib.pyplot as plt
import seaborn as sns
def kmeans_example():
# Generate synthetic data
n_samples = 300
n_features = 2
n_clusters = 4
X, y_true = make_blobs(
n_samples=n_samples,
n_features=n_features,
centers=n_clusters,
cluster_std=0.60,
random_state=42
)
# Initialize and fit KMeans
kmeans = KMeans(
n_clusters=n_clusters,
init='k-means++',
n_init=10,
random_state=42
)
kmeans.fit(X)
# Get cluster assignments
y_pred = kmeans.predict(X)
# Calculate silhouette score
silhouette_avg = silhouette_score(X, y_pred)
print(f"Silhouette Score: {silhouette_avg:.3f}")
# Visualize results
plt.figure(figsize=(12, 5))
# Plot original data
plt.subplot(1, 2, 1)
plt.scatter(X[:, 0], X[:, 1], c=y_true, cmap='viridis', alpha=0.7)
plt.title('Original Data')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
# Plot clustered data
plt.subplot(1, 2, 2)
plt.scatter(X[:, 0], X[:, 1], c=y_pred, cmap='viridis', alpha=0.7)
centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', marker='x', s=200, linewidths=3, label='Centroids')
plt.title('K-Means Clustering')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.tight_layout()
# plt.show()
# Elbow method for optimal k
inertias = []
silhouette_scores = []
k_range = range(2, 11)
for k in k_range:
kmeans = KMeans(n_clusters=k, random_state=42)
kmeans.fit(X)
inertias.append(kmeans.inertia_)
silhouette_scores.append(silhouette_score(X, kmeans.labels_))
plt.figure(figsize=(12, 5))
# Plot elbow curve
plt.subplot(1, 2, 1)
plt.plot(k_range, inertias, 'bo-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Inertia')
plt.title('Elbow Method')
# Plot silhouette scores
plt.subplot(1, 2, 2)
plt.plot(k_range, silhouette_scores, 'ro-')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('Silhouette Score')
plt.title('Silhouette Analysis')
plt.tight_layout()
# plt.show()