Reputation: 7107
This is a follow on post to one I posted earlier. The code is the following:
data(iris)
df <- iris %>%
filter(Species != "setosa") %>%
mutate(Species = +(Species == "virginica")) %>%
sample_n(10)
##########################################
var_combos <- expand.grid(colnames(df[,1:4]), colnames(df[,1:4])) %>%
filter(!Var1 == Var2)
boundary_lists <- map2(
.x = var_combos$Var1,
.y = var_combos$Var2,
~select(df, .x, .y) %>%
summarise(
minX = min(.[[1]], na.rm = TRUE),
maxX = max(.[[1]], na.rm = TRUE),
minY = min(.[[2]], na.rm = TRUE),
maxY = max(.[[2]], na.rm = TRUE)
)
) %>%
map(.,
~tibble(
x = seq(.x$minX, .x$maxX, length.out = 200),
y = seq(.x$minY, .x$maxY, length.out = 200),
)
) %>%
map(.,
~tibble(
xx = rep(.x$x, each = 200),
yy = rep(.x$y, time = 200)
)
) %>%
map2(.,
asplit(var_combos, 1), ~ .x %>%
set_names(.y))
# xgboost(
# objective='binary:logistic',
# eval_metric = 'auc',
# data = as.matrix(df[, 1:2]),
# label = as.matrix(df[, 5]), # binary variable
# nrounds = 10
# )
models_list <- map2(
var_combos$Var1,
var_combos$Var2,
~df %>%
select(Species, .x, .y) %>%
group_by(grp = 'grp') %>%
nest() %>%
mutate(
models = map(
data,
~{
list(
glm(Species ~ ., data = .x, family = binomial(link='logit')),
e1071::svm(Species ~ ., data = .x, type = 'C-classification', kernel = 'linear'),
randomForest::randomForest(formula = as.factor(Species) ~ ., data = .),
xgboost(
objective='binary:logistic',
eval_metric = 'auc',
data = as.matrix(.x[, 2:3]),
label = as.matrix(.x$Species), # binary variable
nrounds = 10
)
)
}
)
)
) %>%
map(
., ~unlist(., recursive = FALSE)
)
This works but I am trying to expand it to take on more models with some changes to the predict
function.
I would like to create a new predict
function and tibble
for each model I train on where I can modify the data inside each of the ~if
statements. The code I have is:
map2_dfr(models_list, boundary_lists, ~{
mods <- pluck(.x, "models")
dat <- .y
map_dfr(mods,
~if(attr(.x, "class")[1] == "glm"){ # predict the logistic model
tibble(
modelname = attr(.x, "class")[1],
prediction = predict(.x, newdata = dat)
)
}
~if(attr(.x, "class")[1] == "svm.formula"){ # predict the SVM model
tibble(
modelname = attr(.x, "class")[1],
prediction = predict(.x, newdata = dat)
)
}
~if(attr(.x, "class")[1] == "randomForest.formula"){ # predict the RF model
tibble(
modelname = attr(.x, "class")[1],
prediction = predict(.x, newdata = dat)
)
}
~if(attr(.x, "class")[1] == "xgb.Booster"){ # predict the XGBoost model
tibble(
modelname = attr(.x, "class")[1],
prediction = predict(.x, newdata = as.matrix(dat), type = 'prob')
)
}
#else {
# tibble(modelname = attr(.x, "class")[1],
# prediction = predict(.x, newdata = dat))} )
)
}
)
Which is giving me the error:
Error: unexpected ')' in " )"
> }
Error: unexpected '}' in " }"
> )
Error: unexpected ')' in " )"
I cannot seem to find where the unexpected part of the code is. I "assume" there is a problem with me passing the ~if
statements. How can I pass multiple if
statements in this mannor?
Upvotes: 0
Views: 48
Reputation: 388907
Here is the method which works :
library(tidyverse)
output <- map2_df(models_list, boundary_lists, ~{
mods <- purrr::pluck(.x, "models")
dat <- .y
map_df(mods, function(x)
tryCatch({
if(attr(x, "class")[1] == "glm"){
# predict the logistic model
tibble(
modelname = attr(x, "class")[1],
prediction = predict(x, newdata = dat)
)
}
else if(attr(x, "class")[1] == "svm.formula"){
# predict the SVM model
tibble(
modelname = attr(x, "class")[1],
prediction = as.numeric(as.character(predict(x, newdata = dat)))
)
}
else if(attr(x, "class")[1] == "randomForest.formula"){
# predict the RF model
tibble(
modelname = attr(x, "class")[1],
prediction = as.numeric(as.character(predict(x, newdata = dat)))
)
}
else if(attr(x, "class")[1] == "xgb.Booster"){
# predict the XGBoost model
tibble(
modelname = attr(x, "class")[1],
prediction = predict(x, newdata = as.matrix(dat), type = 'prob')
)
}
}, error = function(e) { print('skipping\n')})
)
})
The changes which I did are :
~
only once at the beginning of the function and not for every if
map
, using .x
can create confusion about what it refers to. Hence, used an anonymous function in the inside map_df
block so arguments of map2_df
are referred with .x
and .y
and arguments of map_df
are referred with x
.factors
, hence wrapping as.numeric(as.character())
around them.tryCatch
for better handling of exceptions. Upvotes: 1