user113156
user113156

Reputation: 7107

adding if statements to a function returning an error

This is a follow on post to one I posted earlier. The code is the following:

data(iris)
df <- iris %>% 
  filter(Species != "setosa") %>% 
  mutate(Species = +(Species == "virginica")) %>% 
  sample_n(10)

##########################################
var_combos <- expand.grid(colnames(df[,1:4]), colnames(df[,1:4])) %>% 
  filter(!Var1 == Var2)

boundary_lists <- map2(
  .x = var_combos$Var1,
  .y = var_combos$Var2,
  ~select(df, .x, .y) %>% 
    summarise(
      minX = min(.[[1]], na.rm = TRUE),
      maxX = max(.[[1]], na.rm = TRUE),
      minY = min(.[[2]], na.rm = TRUE),
      maxY = max(.[[2]], na.rm = TRUE)
      )
  ) %>% 
  map(.,
      ~tibble(
        x = seq(.x$minX, .x$maxX, length.out = 200),
        y = seq(.x$minY, .x$maxY, length.out = 200),
        )
      ) %>% 
  map(.,
      ~tibble(
        xx = rep(.x$x, each = 200),
        yy = rep(.x$y, time = 200)
        )
      ) %>% 
  map2(.,
       asplit(var_combos, 1), ~ .x %>% 
         set_names(.y))


# xgboost(
#   objective='binary:logistic',
#   eval_metric = 'auc',
#   data = as.matrix(df[, 1:2]),
#   label = as.matrix(df[, 5]), # binary variable
#   nrounds = 10
#   )

models_list <- map2(
  var_combos$Var1,
  var_combos$Var2,
  ~df %>%
    select(Species, .x, .y) %>%
    group_by(grp = 'grp') %>% 
    nest() %>%
    mutate(
      models = map(
        data, 
        ~{
          list(
            glm(Species ~ ., data = .x, family = binomial(link='logit')),
            e1071::svm(Species ~ ., data = .x,  type = 'C-classification', kernel = 'linear'),
            randomForest::randomForest(formula = as.factor(Species) ~ ., data = .),
            xgboost(
              objective='binary:logistic',
              eval_metric = 'auc',
              data = as.matrix(.x[, 2:3]),
              label = as.matrix(.x$Species), # binary variable
              nrounds = 10
            )
          )
        }
      )
    )
) %>% 
  map(
    ., ~unlist(., recursive = FALSE)
  )

This works but I am trying to expand it to take on more models with some changes to the predict function.

I would like to create a new predict function and tibble for each model I train on where I can modify the data inside each of the ~if statements. The code I have is:

    map2_dfr(models_list, boundary_lists, ~{
  mods <- pluck(.x, "models")
  dat <- .y
  map_dfr(mods,

          ~if(attr(.x, "class")[1] == "glm"){              # predict the logistic model
            tibble(
              modelname = attr(.x, "class")[1],
              prediction = predict(.x, newdata = dat)
              )
          }

          ~if(attr(.x, "class")[1] == "svm.formula"){      # predict the SVM model
            tibble(
              modelname = attr(.x, "class")[1],
              prediction = predict(.x, newdata = dat)
              )
          }

          ~if(attr(.x, "class")[1] == "randomForest.formula"){   # predict the RF model
            tibble(
              modelname = attr(.x, "class")[1],
              prediction = predict(.x, newdata = dat)
              )
          }

          ~if(attr(.x, "class")[1] == "xgb.Booster"){       # predict the XGBoost model
            tibble(
              modelname = attr(.x, "class")[1], 
              prediction = predict(.x, newdata = as.matrix(dat), type = 'prob')
              ) 
          }
      #else {
      #  tibble(modelname = attr(.x, "class")[1],
      #         prediction = predict(.x, newdata = dat))}   )   

  )
  }
  )

Which is giving me the error:

Error: unexpected ')' in "  )"
>   }
Error: unexpected '}' in "  }"
>   )
Error: unexpected ')' in "  )"

I cannot seem to find where the unexpected part of the code is. I "assume" there is a problem with me passing the ~if statements. How can I pass multiple if statements in this mannor?

Upvotes: 0

Views: 48

Answers (1)

Ronak Shah
Ronak Shah

Reputation: 388907

Here is the method which works :

library(tidyverse)

output <- map2_df(models_list, boundary_lists, ~{
    mods <- purrr::pluck(.x, "models")
    dat <- .y
    map_df(mods, function(x)
      tryCatch({
          if(attr(x, "class")[1] == "glm"){   
           # predict the logistic model
            tibble(
              modelname = attr(x, "class")[1],
              prediction = predict(x, newdata = dat)
             )
            }    
          else if(attr(x, "class")[1] == "svm.formula"){ 
               # predict the SVM model
            tibble(
              modelname = attr(x, "class")[1],
              prediction = as.numeric(as.character(predict(x, newdata = dat)))
             )
            }
          else if(attr(x, "class")[1] == "randomForest.formula"){  
               # predict the RF model
            tibble(
             modelname = attr(x, "class")[1],
             prediction = as.numeric(as.character(predict(x, newdata = dat)))
            )
          }    
          else if(attr(x, "class")[1] == "xgb.Booster"){      
               # predict the XGBoost model
            tibble(
             modelname = attr(x, "class")[1], 
             prediction = predict(x, newdata = as.matrix(dat), type = 'prob')
            ) 
          }
       }, error = function(e) { print('skipping\n')})
      )
 })

The changes which I did are :

  • Used ~ only once at the beginning of the function and not for every if
  • Since this is a nested map, using .x can create confusion about what it refers to. Hence, used an anonymous function in the inside map_df block so arguments of map2_df are referred with .x and .y and arguments of map_df are referred with x.
  • Some models return predicted variable as factors, hence wrapping as.numeric(as.character()) around them.
  • Added tryCatch for better handling of exceptions.

Upvotes: 1

Related Questions