Reputation: 1
# RF
rf_optimal = RandomForestRegressor(**best_params, random_state=42)
# leave one out cross validation
loo = LeaveOneOut()
r2_train_scores = []
rmse_train_scores = []
y_test_true = []
y_test_pred = []
for train_index, test_index in loo.split(X_train):
X_train_fold, X_test_fold = X_train.values[train_index], X_train.values[test_index]
y_train_fold, y_test_fold = y_train.values[train_index], y_train.values[test_index]
rf_optimal1 = RandomForestRegressor(**best_params, random_state=42)
rf_optimal1.fit(X_train_fold, y_train_fold)
y_train_pred_fold = rf_optimal1.predict(X_train_fold)
y_test_pred_fold = rf_optimal1.predict(X_test_fold)
r2_train = r2_score(y_train_fold, y_train_pred_fold)
rmse_train = np.sqrt(mean_squared_error(y_train_fold, y_train_pred_fold))
r2_train_scores.append(r2_train)
rmse_train_scores.append(rmse_train)
y_test_true.append(y_test_fold[0])
y_test_pred.append(y_test_pred_fold[0])
r2_cal = np.mean(r2_train_scores)
rmse_cal = np.mean(rmse_train_scores)
r2_cv = r2_score(y_test_true,y_test_pred)
rmse_cv = np.sqrt(mean_squared_error(y_test_true,y_test_pred))
print(f"Rc²: {r2_cal}, RMSEc: {rmse_cal}")
print(f"Rcv²: {r2_cv}, RMSEcv: {rmse_cv}")
# train on the whole train-dataset and test on the test-dataset
rf_optimal.fit(X_train.values, y_train.values)
y_test_pred = rf_optimal.predict(X_test.values)
I'm trying to evaluate the rf model using LeaveOneOut().So,my confusion is if I should define a new RandomForestRegressor in a new loo.split and fit with the whole train-dataset,then predict the test-dataset with the regressor defined out of the loo.split? I want to get the Rc2 , Rcv2 and Rp2.
Upvotes: 0
Views: 30