Reputation: 11
import os
import pandas as pd
from sklearn.linear_model import ElasticNet
from sklearn.model_selection import train_test_split, GridSearchCV, cross_validate
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
# Set the working directory to the desktop
desktop_path = os.path.expanduser("~/Desktop")
os.chdir(desktop_path)
# Load data from CSV file
data = pd.read_csv('stan_func_conn.csv')
# Split data into input features (X) and target variable (y)
X = data.iloc[:, 1:] # Independent variables (excluding the first column)
y = data.iloc[:, 0] # Dependent variable (the first column)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Define the parameter grid for tuning
param_grid = {
'alpha': [0.1, 0.5, 1.0], # Adjust these values as per your requirement
'l1_ratio': [0.2, 0.5, 0.8] # Adjust these values as per your requirement
}
# Create an instance of the ElasticNet model
elastic_net = ElasticNet(max_iter=10000)
# Perform grid search for hyperparameter tuning and model evaluation
grid_search = GridSearchCV(estimator=elastic_net, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)
# Get the best lambda (alpha) and l1_ratio parameters
best_alpha = grid_search.best_params_['alpha']
best_l1_ratio = grid_search.best_params_['l1_ratio']
print("Best lambda (alpha):", best_alpha)
print("Best l1_ratio:", best_l1_ratio)
# Create an instance of the ElasticNet model with the best lambda (alpha) and l1_ratio parameters
elastic_net_tuned = ElasticNet(alpha=best_alpha, l1_ratio=best_l1_ratio, max_iter=10000)
# Fit the tuned model to the training data
elastic_net_tuned.fit(X_train, y_train)
# Make predictions on the testing data
y_pred = elastic_net_tuned.predict(X_test)
# Compute the R-squared on the testing data
r2 = r2_score(y_test, y_pred)
print('R-squared on testing data:', r2)
# Compute the RMSE on the testing data
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
print('RMSE on testing data:', rmse)
# Compare RMSE to the scale of the target variable
if rmse < 0.5:
print('The model has a very good fit.')
elif rmse < 1:
print('The model has a good fit.')
else:
print('The model has a moderate to poor fit.')
I got a negative R^2 which I was not expecting. I suspect something is wrong with my code. The model evaluation results looks like this: Best lambda (alpha): 1.0 Best l1_ratio: 0.2 R-squared on testing data: -0.00499349856926945 RMSE on testing data: 0.8576623398551885
I would very much appreciate any suggestions to improve the model and goodness of fit.
Upvotes: 1
Views: 109