Reputation: 4879
I am wondering if there is a way to use dplyr::across
with a function that requires multiple arguments, and, if not, how can the following be done in dplyr
/tidyverse
.
library(dplyr)
# create a dataframe
df <-
structure(list(
x1_estimate = c(
0.185050288587259, 0.151839113724119,
0.134106347795535, 0.16816621423223
), x2_estimate = c(
0.210983518279099,
0.337090844267208, 0.324663150698154, 0.254871197876221
), x3_estimate = c(
0.122881208643618,
0.0707293652735489, 0.0981291893590288, -0.0214831044826657
),
x1_se = c(
0.00986950954467025, 0.00625871919316588, 0.0445182168165812,
0.0244314083271791
), x2_se = c(
0.00954593822897476, 0.00669845532512913,
0.0478789857255503, 0.0237263111649421
), x3_se = c(
0.017952784431167,
0.0122226237123911, 0.0836135673502282, 0.041558861509543
)
), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"))
For example, let's say we just want to compute variance, which requires only a single argument (standard error)
df %>% mutate(across(contains("_se"), ~ (.^2), .names = "{.col}_var"))
#> # A tibble: 4 x 9
#> x1_estimate x2_estimate x3_estimate x1_se x2_se x3_se x1_se_var x2_se_var
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 0.185 0.211 0.123 0.00987 0.00955 0.0180 0.0000974 0.0000911
#> 2 0.152 0.337 0.0707 0.00626 0.00670 0.0122 0.0000392 0.0000449
#> 3 0.134 0.325 0.0981 0.0445 0.0479 0.0836 0.00198 0.00229
#> 4 0.168 0.255 -0.0215 0.0244 0.0237 0.0416 0.000597 0.000563
#> # … with 1 more variable: x3_se_var <dbl>
Now let's say we want to compute the confidence interval, which requires both the estimate and the standard error for computation.
x1_conf.low = x1_estimate - 1.96 * x1_se
x2_conf.low = x2_estimate - 1.96 * x2_se
x3_conf.low = x3_estimate - 1.96 * x3_se
I know that this won't work, but it's just for illustrative purposes:
df %>%
mutate(
across(matches("_se|_estimate"),
~ (contains("_estimate") - 1.96 * contains("_se")),
.names = "{.col}_conf.low"
)
)
#> Error: Problem with `mutate()` input `..1`.
#> x `contains()` must be used within a *selecting* function.
#> ℹ See <https://tidyselect.r-lib.org/reference/faq-selection-context.html>.
#> ℹ Input `..1` is `(function (.cols = everything(), .fns = NULL, ..., .names = NULL) ...`.
If not, I would also be happy to see another solution with dplyr
/tidyverse
to achieve the same.
Upvotes: 5
Views: 1192
Reputation: 887108
We could use single across
as well
library(dplyr)
df %>%
mutate(across(ends_with('_estimate'), ~ . -
1.96 * get(str_replace(cur_column(), 'estimate', 'se')), .names = '{.col}_conf.low'))
# A tibble: 4 x 9
# x1_estimate x2_estimate x3_estimate x1_se x2_se x3_se x1_estimate_conf.low x2_estimate_conf.low x3_estimate_conf.low
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1 0.185 0.211 0.123 0.00987 0.00955 0.0180 0.166 0.192 0.0877
#2 0.152 0.337 0.0707 0.00626 0.00670 0.0122 0.140 0.324 0.0468
#3 0.134 0.325 0.0981 0.0445 0.0479 0.0836 0.0469 0.231 -0.0658
#4 0.168 0.255 -0.0215 0.0244 0.0237 0.0416 0.120 0.208 -0.103
Upvotes: 3
Reputation: 388982
You can use across
as :
library(dplyr)
df %>%
mutate(across(contains("_estimate"), .names = "{.col}_conf.low") -
1.96 * across(contains("_se")))
In base R, you can do :
estimate_cols <- grep('estimate', names(df), value = TRUE)
se_cols <- grep('se', names(df), value = TRUE)
df[paste0(estimate_cols, '_conf.low')] <- df[estimate_cols] - 1.96 * df[se_cols]
Upvotes: 3