# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso, Ridge
from sklearn.metrics import mean_squared_error, r2_score
from ipywidgets import interact, FloatLogSlider
# Generate data
42)
np.random.seed(= 200
n_samples = np.zeros((n_samples, 6))
X 0] = np.random.normal(0, 1, n_samples) # X1 - Important feature
X[:, 1] = np.random.normal(0, 1, n_samples) # X2 - Important feature
X[:, 2] = X[:, 0] + np.random.normal(0, 0.1, n_samples) # Correlated with X1
X[:, 3] = X[:, 1] + np.random.normal(0, 0.1, n_samples) # Correlated with X2
X[:, 4] = np.random.normal(0, 0.1, n_samples) # Noise
X[:, 5] = np.random.normal(0, 0.1, n_samples) # Noise
X[:,
= 3 * X[:, 0] + 2 * X[:, 1] + 0.5 * X[:, 2] + np.random.normal(0, 0.1, n_samples) y
Introduction
In this week’s discussion section, we will create some interactive plots to better undertsand how lasso and ridge regression are at work. To do so, we will use synthesized data that is made with the intention of better understanding how ridge and lasso regression are different based on the relationship of your parameters. It is important to note that your results with real data may look very different - unlike this notebook, the real world data you will be working with was not made to better understand regression models.
Data Loading
Copy the code below to load the neessary libraries genereate the data we will use. Read the comments to on each feature to get an idea of the relationship between variables.
Regression
Now that you have your data, do the following:
- Split your data into training and testing.
# Split data into training and testing sets
= train_test_split(X, y, test_size=0.3, random_state=42) X_train, X_test, y_train, y_test
- Create and fit a ridge regression
# Create and fit Ridge regression model
= Ridge()
ridge_model
ridge_model.fit(X_train, y_train)= ridge_model.predict(X_test) ridge_predictions
- Calculate the MSE and \(R^2\) for your ridge regression.
# Calculate MSE and R^2 for Ridge regression
= np.sqrt(mean_squared_error(y_test, ridge_predictions))
ridge_rmse = r2_score(y_test, ridge_predictions)
ridge_r2 print("Ridge Regression RMSE:", ridge_rmse)
print("Ridge Regression R²:", ridge_r2)
Ridge Regression RMSE: 0.14410020171824725
Ridge Regression R²: 0.9984722762470866
- Create and fit a lasso model.
# Create and fit Lasso regression model
= Lasso()
lasso_model
lasso_model.fit(X_train, y_train)= lasso_model.predict(X_test) lasso_predictions
- Calculate the MSE and \(R^2\) for your lasso model.
# Calculate RMSE and R^2 for Lasso regression
= np.sqrt(mean_squared_error(y_test, lasso_predictions))
lasso_rmse = r2_score(y_test, lasso_predictions)
lasso_r2 print("Lasso Regression RMSE:", lasso_rmse)
print("Lasso Regression R²:", lasso_r2)
Lasso Regression RMSE: 1.2984978990079017
Lasso Regression R²: 0.8759496036905758
Visualizing Ridge vs Regression
- Create a plot that looks at the alpha against the MSE for both lasso and ridge regression.
# Visualize alphas against RMSE for lasso and ridge
# Initialize lists to append data into
= []
rmse_lasso = []
rmse_ridge
# Define alpha values to iterate over
= [0.1,1,10]
alphas
# Create and fit a lasso and ridge model for each predefined alpha
for alpha in alphas:
= Lasso(alpha=alpha)
lasso = Ridge(alpha=alpha)
ridge
lasso.fit(X_train, y_train)
ridge.fit(X_train, y_train)
# Calculate rmse for both models
rmse_lasso.append(np.sqrt(mean_squared_error(y_test, lasso.predict(X_test))))
rmse_ridge.append(np.sqrt(mean_squared_error(y_test, ridge.predict(X_test))))
# Create plot of MSE again alpha values
=(10, 5))
plt.figure(figsize='Lasso MSE')
plt.plot(alphas, rmse_lasso, label='Ridge MSE')
plt.plot(alphas, rmse_ridge, label'log')
plt.xscale('Alpha')
plt.xlabel('Root Mean Squared Error')
plt.ylabel('RMSE vs. Alpha for Lasso and Ridge Regression')
plt.title(
plt.legend() plt.show()
- Create an interactive plot (for both lasso and ridge) that allows you to adjust alpha to see how the actual vs predicted values are changing.
# Create function to run model and create plot
def update_alphas(alpha, model_type):
# Condition to allow user to select different models
if model_type == 'Lasso':
= Lasso(alpha=alpha)
model else:
= Ridge(alpha=alpha)
model
# Fit and predict model
model.fit(X_train, y_train)= model.predict(X_test)
y_pred
# Calculate model metrics
= np.sqrt(mean_squared_error(y_test, y_pred))
rmse = r2_score(y_test, y_pred)
r2
# Create plot of predicted values against actual values with line of best fit
=(10, 5))
plt.figure(figsize# Add predicted and actual values
='blue', alpha=0.5, label=f'Predictions (alpha={alpha})')
plt.scatter(y_test, y_pred, color# Add line of best fit
min(), y_test.max()], [y_test.min(), y_test.max()], 'k--', lw=2)
plt.plot([y_test.f'{model_type} Regression: Predictions vs Actual (alpha={alpha})')
plt.title('Actual Values')
plt.xlabel('Predicted Values')
plt.ylabel(
plt.legend()# Adjust the position and aesthetics of the metric box
0.5, -0.05, f'MSE: {rmse:.2f}, R²: {r2:.2f}', ha="center", fontsize=12, bbox={"facecolor":"orange", "alpha":0.5, "pad":5})
plt.figtext(
plt.show()
# Create interactive widgets
# Create alpha slider for choosing alpha value
= FloatLogSlider(value= 0 , base=10, min=-3, max=3, step=0.1, description='Pick an Alpha!')
alpha_slider
# Create model selector for picking which model user wants to look at
= {'Lasso': 'Lasso', 'Ridge': 'Ridge'}
model_selector
# Combine two widgets with model/plot output
=alpha_slider, model_type=model_selector) interact(update_alphas, alpha
<function __main__.update_alphas(alpha, model_type)>
- Create three different bar plots with the following guidelines: Each plot should represent a different alpha value: Alpha = 0.1, Alpha = 1, Alpha = 10 Each plot should show how both the ridge and lasso model performed The y axis should represent the six different variables:
X1
,X2
,X1_corr
,X2_corr
,Noise1
,Noise2
. The y axis should represent the coefficients
# Define alpha values to iterate over
= [0.1, 1.0, 10.0]
alphas = []
data
# Create and fit ridge and lasso models and store coefficients in a new dataframe
for alpha in alphas:
= Ridge(alpha=alpha).fit(X_train, y_train)
ridge = Lasso(alpha=alpha).fit(X_train, y_train)
lasso
data.append(pd.DataFrame({'Ridge': ridge.coef_, # coef has as many indexes as there are variables
'Lasso': lasso.coef_
=['X1', 'X2', 'X1_corr', 'X2_corr', 'Noise1', 'Noise2'])) # create feature names in new dataframe
}, index
# Create barplot to visualize how coefficients change across alpha values and models
= plt.subplots(1, 3, figsize=(12, 4), sharey=True)
fig, axes for i, df in enumerate(data):
=axes[i], width= 0.8)
df.plot.bar(axf'Alpha = {alphas[i]}')
axes[i].set_title(=45)
axes[i].set_xticklabels(df.index, rotation
plt.show()