import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
def lightgbm_example():
# Generate synthetic dataset
X, y = make_classification(
n_samples=1000,
n_features=20,
n_informative=15,
n_redundant=5,
n_classes=3,
random_state=42
)
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Create LightGBM datasets
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)
# Set LightGBM parameters
params = {
'objective': 'multiclass', # multiclass classification
'num_class': 3, # number of classes
'boosting_type': 'gbdt', # traditional Gradient Boosting Decision Tree
'num_leaves': 31, # max number of leaves in one tree
'max_depth': 6, # maximum tree depth
'learning_rate': 0.1, # learning rate
'feature_fraction': 0.8, # fraction of features to be used in each iteration
'bagging_fraction': 0.8, # fraction of data to be used in each iteration
'bagging_freq': 5, # perform bagging every k iterations
'verbose': -1, # suppress printing
'metric': ['multi_logloss', 'multi_error'] # evaluation metrics
}
# Train LightGBM model with early stopping
num_round = 100
lgb_model = lgb.train(
params,
train_data,
num_round,
valid_sets=[train_data, test_data],
valid_names=['train', 'test'],
early_stopping_rounds=10,
verbose_eval=False
)
# Make predictions
y_pred = lgb_model.predict(X_test)
y_pred_class = np.argmax(y_pred, axis=1)
# Print performance metrics
print("Classification Report:")
print(classification_report(y_test, y_pred_class))
# Feature importance plot
plt.figure(figsize=(10, 6))
lgb.plot_importance(lgb_model, max_num_features=10)
plt.title('LightGBM Feature Importance')
# plt.show()
# Learning curves
results = lgb_model.evals_result_
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(results['train']['multi_logloss'], label='Train')
plt.plot(results['test']['multi_logloss'], label='Test')
plt.legend()
plt.title('LightGBM Log Loss')
plt.xlabel('Iterations')
plt.ylabel('Log Loss')
plt.subplot(1, 2, 2)
plt.plot(results['train']['multi_error'], label='Train')
plt.plot(results['test']['multi_error'], label='Test')
plt.legend()
plt.title('LightGBM Classification Error')
plt.xlabel('Iterations')
plt.ylabel('Classification Error')
plt.tight_layout()
# plt.show()
# Hyperparameter tuning example
lgb_clf = lgb.LGBMClassifier(
objective='multiclass',
num_class=3,
random_state=42
)
param_grid = {
'num_leaves': [15, 31, 63],
'max_depth': [3, 6, 9],
'learning_rate': [0.01, 0.1, 0.3],
'n_estimators': [100, 200],
'feature_fraction': [0.8, 1.0],
'bagging_fraction': [0.8, 1.0]
}
grid_search = GridSearchCV(
estimator=lgb_clf,
param_grid=param_grid,
cv=5,
n_jobs=-1,
scoring='accuracy'
)
grid_search.fit(X_train, y_train)
print("\nBest parameters:", grid_search.best_params_)
print("Best cross-validation score:", grid_search.best_score_)