wondim
wondim

Reputation: 727

tidyr unnest with purrr::map2 shows error Error: object 'model' not found

I got the error Error: object 'model' not found when running the code below. Is there any way of fixing the issue using another library or using the code differently?

The code below is runable until the error is shown. It is aimed at running k-fold validation.

K_folds_regression_function <- function(inputData, col_pos_Y, col_pos_Xn){
  # ------------------------------- User Inputs -------------------------------------------------------------------------------------------------------
  # inputFile <- 'Input_Data-MLR.csv'   #   <-- provide file name .csv
  # col_pos_Y <-  11  # position of the Y variable
  # col_pos_Xn <- 14  # position of LAST Xs

  # ------------------------------- libraries --------------------------------------------------------------------------------------------- 
  library(modelr) ; library(dplyr) ; library(purrr) ; library(ggplot2)
  library(tidyr) ; library(broom) ; library(tibble)
  library(modelr)
  library(broom)

  my_y_x_data <- inputData[   , c(1,col_pos_Y:col_pos_Xn)]; head(my_y_x_data)
  no_of_vars <- dim (my_y_x_data)[2] - 1  # including x and y and excluding the 1st ID column
  no_of_xs <- no_of_vars - 1  # only Xs

  set.seed(6)

  colnames(my_y_x_data)[2] <- 'yy'   # making sure that 2nd column which is a depedent variable is named 'yy'
  k_folds_regr <- crossv_kfold(data = my_y_x_data, k = 10)    # k is the no of folds, in this case 10 folds. around 7-11 folds will have minimum error, uually 10 is used

  colnames(my_y_x_data)[2] <- 'yy'   # making sure that 2nd column which is a dependent variable is named 'yy'
  head(my_y_x_data)
  library(tidyr)
  # fitting models
  k_folds_regr <- k_folds_regr %>% mutate(                                                          
    model = map(     train, ~ lm( yy ~ . , data = my_y_x_data[, -1])     ))
  # k_fold_regr %>% mutate --> adds a new column       # map (train, ) -> applys a function to each rows of train data

  interim_regression_result_table <- k_folds_regr  %>% unnest(
    map2(model, test, ~augment(.x, newdata = .y) )   # Error happens here 
  )
}

Example Data:

Sites,SPM,Total_Rrs
WMS04,36,1.428096339
WMS05,36,1.468391179
WMS06,35,1.480664998
WMS07,30,1.563063147
WMS09,28.25,1.575149555
WMS13,29.5,1.527970637
WMS14,36.75,1.697142346
WMS15,54.5,1.701675753
WMS16,61.75,1.647421816
WMS17,46.5,1.734592117
WMS18,43.5,1.658748318
WMS19,44,1.56844909
WMS20,46.25,1.523165904
WMS22,46.5,1.727354533
WMS23,45.75,1.583000425
WMS24,43.5,1.679314139
WMS25,40.5,1.583000425
WMS26,16,1.722078046
WMS27,68.5,1.356190243
WMS29,44.25,1.65871572
WMS30,21.5,1.55141119
WMS32,49,1.549374723
WMS33,49.75,1.623284831
WMS34,47.5,1.716614557
WMS35,55.25,1.693683697

Usage:

inputFile = 'path/to/csv.csv'
inputData <- read.csv(inputFile, header=TRUE)
K_folds_regression_function(inputData, 2, 3)

Upvotes: 0

Views: 765

Answers (1)

Allan Cameron
Allan Cameron

Reputation: 173803

It seems that map2 didn't like the fact that it was being passed a list of lists. Base R's mapply is less fussy.

Note that it's not good practice to use library inside a function - it can lead to unintended side effects for users of your function, since packages are loaded that the user may not have wanted and thus some functions may have been overwritten. It is better to use the namespace qualifier package::function

You also had multiple calls to library for the same package, so I have tidied the function up to use namespace qualifiers instead of library calls:

K_folds_regression_function <- function(inputData, col_pos_Y, col_pos_Xn) 
{
  `%>%`       <- dplyr::`%>%`
  my_y_x_data <- inputData[ , c(1,col_pos_Y:col_pos_Xn)]
  no_of_vars  <- dim(my_y_x_data)[2] - 1  
  no_of_xs    <- no_of_vars - 1

  set.seed(6)

  k_folds_regr <- modelr::crossv_kfold(data = my_y_x_data, k = 10)  
  colnames(my_y_x_data)[2] <- 'yy'  

  k_folds_regr <- k_folds_regr %>% 
    dplyr::mutate(model = purrr::map(train, ~ lm(yy~. ,data = my_y_x_data[, -1])))

  mapply(function(.x, .y) broom::augment(.x, newdata = .y),
         k_folds_regr$model, k_folds_regr$test, SIMPLIFY = FALSE)
}

Now when we run the function we get:

K_folds_regression_function(inputData, 2, 3)
#> $`1`
#> # A tibble: 3 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS04  36        1.43    42.1    4.86
#> 2 WMS18  43.5      1.66    42.3    2.81
#> 3 WMS23  45.8      1.58    42.2    2.46
#> 
#> $`2`
#> # A tibble: 3 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS14  36.8      1.70    42.3    3.38
#> 2 WMS17  46.5      1.73    42.3    4.07
#> 3 WMS29  44.2      1.66    42.3    2.81
#> 
#> $`3`
#> # A tibble: 3 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS06  35        1.48    42.1    3.80
#> 2 WMS13  29.5      1.53    42.2    3.01
#> 3 WMS34  47.5      1.72    42.3    3.72
#> 
#> $`4`
#> # A tibble: 3 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS07  30        1.56    42.2    2.59
#> 2 WMS09  28.2      1.58    42.2    2.51
#> 3 WMS32  49        1.55    42.2    2.73
#> 
#> $`5`
#> # A tibble: 3 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS20  46.2      1.52    42.2    3.08
#> 2 WMS25  40.5      1.58    42.2    2.46
#> 3 WMS35  55.2      1.69    42.3    3.32
#> 
#> $`6`
#> # A tibble: 2 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS24  43.5      1.68    42.3    3.09
#> 2 WMS33  49.8      1.62    42.3    2.49
#> 
#> $`7`
#> # A tibble: 2 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS26  16        1.72    42.3    3.83
#> 2 WMS27  68.5      1.36    42.0    6.44
#> 
#> $`8`
#> # A tibble: 2 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS16  61.8      1.65    42.3    2.68
#> 2 WMS22  46.5      1.73    42.3    3.93
#> 
#> $`9`
#> # A tibble: 2 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS15  54.5      1.70    42.3    3.46
#> 2 WMS30  21.5      1.55    42.2    2.71
#> 
#> $`10`
#> # A tibble: 2 x 5
#>   Sites    yy Total_Rrs .fitted .se.fit
#>   <fct> <dbl>     <dbl>   <dbl>   <dbl>
#> 1 WMS05    36      1.47    42.1    4.04
#> 2 WMS19    44      1.57    42.2    2.55

Upvotes: 1

Related Questions