RS-MJH
RS-MJH

Reputation: 1

leave one out cross validation for model evaluation

# RF 
    rf_optimal = RandomForestRegressor(**best_params, random_state=42)

    # leave one out cross validation
    loo = LeaveOneOut()
    r2_train_scores = []
    rmse_train_scores = []
    y_test_true = []
    y_test_pred = []


    for train_index, test_index in loo.split(X_train):
        X_train_fold, X_test_fold = X_train.values[train_index], X_train.values[test_index]
        y_train_fold, y_test_fold = y_train.values[train_index], y_train.values[test_index]

        rf_optimal1 = RandomForestRegressor(**best_params, random_state=42)
        rf_optimal1.fit(X_train_fold, y_train_fold)
        y_train_pred_fold = rf_optimal1.predict(X_train_fold)
        y_test_pred_fold = rf_optimal1.predict(X_test_fold)

        r2_train = r2_score(y_train_fold, y_train_pred_fold)
        rmse_train = np.sqrt(mean_squared_error(y_train_fold, y_train_pred_fold))

        r2_train_scores.append(r2_train)
        rmse_train_scores.append(rmse_train)
        y_test_true.append(y_test_fold[0])
        y_test_pred.append(y_test_pred_fold[0])

    r2_cal = np.mean(r2_train_scores)
    rmse_cal = np.mean(rmse_train_scores)
    r2_cv = r2_score(y_test_true,y_test_pred)
    rmse_cv = np.sqrt(mean_squared_error(y_test_true,y_test_pred))

    print(f"Rc²: {r2_cal}, RMSEc: {rmse_cal}")
    print(f"Rcv²: {r2_cv}, RMSEcv: {rmse_cv}")


    # train on the whole train-dataset and test on the test-dataset
    rf_optimal.fit(X_train.values, y_train.values)
    y_test_pred = rf_optimal.predict(X_test.values)

I'm trying to evaluate the rf model using LeaveOneOut().So,my confusion is if I should define a new RandomForestRegressor in a new loo.split and fit with the whole train-dataset,then predict the test-dataset with the regressor defined out of the loo.split? I want to get the Rc2 , Rcv2 and Rp2.

Upvotes: 0

Views: 30

Answers (0)

Related Questions