Reputation: 911
I have a data.table object which contains 3 columns (spec_fun, param_1, param_2). I wanted to use mutate to create a new column based on the result of the 3 mentioned columns but I got the error "Error in mutate() caused by error in do.call() ! 'what' must be a function or character string". I tried to create a separated function but it didn't work. This is my data.table object:
library(mixtools)
library(dplyr)
library(data.table)
my_data <- data.table(spec_fun = c("weibull", "rgamma", "rgamma"),
param_1 = c(0.7767294, 0.7362431, 0.8118086),
param_2 = c(6.8048834,0.1088231,0.1357522))
Here the code which generates an error:
final <- my_data %>%
mutate(my_med = median(do.call(spec_fun,
args = lapply(list(1000, param_1, param_2),
function(x) if(!is.na(x)) x))))
Thanks in advance.
Upvotes: 6
Views: 119
Reputation: 102529
You can run
set.seed(0)
my_data %>%
mutate(my_med = map_dbl(str_glue(
"median({spec_fun}(1000, {param_1}, {param_2}))"
), ~ eval(str2lang(.x))))
or
library(rlang)
set.seed(0)
my_data %>%
mutate(my_med = mapply(
\(f, ...) median(exec(f, ...)),
spec_fun, list(1000), param_1, param_2
))
which gives
spec_fun param_1 param_2 my_med
<char> <num> <num> <num>
1: rweibull 0.7767294 6.8048834 4.492247
2: rgamma 0.7362431 0.1088231 4.047691
3: rgamma 0.8118086 0.1357522 3.734777
Upvotes: 1
Reputation: 7858
Since you're using two different working framework (tidyverse
and data.table
), I'll suggest you one working way for each.
library(purrr) # new library to use pmap_dbl !
library(data.table)
my_data <- data.table(spec_fun = c("rweibull", "rgamma", "rgamma"),
param_1 = c(0.7767294, 0.7362431, 0.8118086),
param_2 = c(6.8048834, 0.1088231, 0.1357522))
Tidyverse
set.seed(1)
my_data$my_med <- pmap_dbl(my_data, ~median(do.call(..1, args = list(1000, ..2, ..3))))
data.table
set.seed(1)
my_data[, my_med := median(do.call(spec_fun, args = list(1000, param_1, param_2))), by = .I][]
Output for both
#> spec_fun param_1 param_2 my_med
#> <char> <num> <num> <num>
#> 1: rweibull 0.7767294 6.8048834 4.515552
#> 2: rgamma 0.7362431 0.1088231 4.047691
#> 3: rgamma 0.8118086 0.1357522 3.734777
pmap
allows you to elaborate rowwise, _dbl
transforms the output from a list to a numeric vectorby = .I
allows the data.table to work by rowNotes:
r
to rweibull
library(mixtools)
because it served no purpose herelapply(..., function(x) if(!is.na(x)) x)))
because it made no differenceUpvotes: 3
Reputation: 7979
Base R, similar but different.
my_data |>
transform(m = mapply(\(f, a, b) getFunction(f)(n = 1e3, a, b),
spec_fun, param_1, param_2) |> median())
Or transpose and operate column-wise (length 3 again).
my_data = setNames(as.data.frame(t(my_data[-1])), my_data$spec_fun)
sapply(names(my_data),
\(x) getFunction(x)(n = 1e3, my_data[[x]][1], my_data[[x]][2]) |> meadin())
Typo-corrected Data
my_data = data.frame(spec_fun = c("rweibull", "rgamma", "rgamma"), # corrected
param_1 = c(0.7767294, 0.7362431, 0.8118086),
param_2 = c(6.8048834,0.1088231,0.1357522))
Upvotes: 3
Reputation: 132969
You should not use mutate
with a data.table. Also, it is unclear why you are simulating the median instead of calculating it.
library(data.table)
my_data <- data.table(spec_fun = c("rweibull", "rgamma", "rgamma"),
param_1 = c(0.7767294, 0.7362431, 0.8118086),
param_2 = c(6.8048834,0.1088231,0.1357522))
#simulate medians
set.seed(42)
my_data[, sim_med := mapply(\(f, p1, p2) median(getFunction(f)(n = 1e3, p1, p2)),
spec_fun, param_1, param_2)]
#use quantile function
my_data[, q50 := getFunction(sub("^r", "q", spec_fun))(p = 0.5, param_1, param_2), by = spec_fun]
# spec_fun param_1 param_2 sim_med q50
# <char> <num> <num> <num> <num>
#1: rweibull 0.7767294 6.8048834 4.564007 4.245135
#2: rgamma 0.7362431 0.1088231 4.214413 4.054822
#3: rgamma 0.8118086 0.1357522 3.805266 3.775631
Upvotes: 4
Reputation: 73562
You might use mapply
to avoid row-wise operations.
> set.seed(42)
> my_data |>
+ transform(
+ my_med=mapply(\(f, x, y) {
+ do.call(f,
+ lapply(list(1000, x, y), \(x) {
+ if (!is.na(x)) x
+ })) |> median()
+ }, spec_fun, param_1, param_2))
spec_fun param_1 param_2 my_med
1 rweibull 0.7767294 6.8048834 4.564007
2 rgamma 0.7362431 0.1088231 4.214413
3 rgamma 0.8118086 0.1357522 3.805266
Upvotes: 2
Reputation: 19339
Add a rowwise()
my_data %>%
rowwise() %>%
mutate(my_med = median(do.call(spec_fun,
args = lapply(list(1000, param_1, param_2),
function(x) if(!is.na(x)) x))))
# A tibble: 3 × 4
# Rowwise:
spec_fun param_1 param_2 my_med
<chr> <dbl> <dbl> <dbl>
1 rweibull 0.777 6.80 4.13
2 rgamma 0.736 0.109 3.86
3 rgamma 0.812 0.136 3.77
Note "rweibull" typo:
my_data <- data.table(spec_fun = c("rweibull", "rgamma", "rgamma"),
param_1 = c(0.7767294, 0.7362431, 0.8118086),
param_2 = c(6.8048834,0.1088231,0.1357522))
Upvotes: 4