Reputation: 15
I want to mutate one column of a data frame dynamically with dplyr by passing column names with a variable. For example, I have the following data frame:
DF <- data.frame(A = 1:10,
B = 11:20,
C = c(23:30, 21:22),
D = c(39:40, 31:38),
E = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE))
DF
A B C D E
1 1 11 23 39 TRUE
2 2 12 24 40 TRUE
3 3 13 25 31 TRUE
4 4 14 26 32 TRUE
5 5 15 27 33 TRUE
6 6 16 28 34 TRUE
7 7 17 29 35 TRUE
8 8 18 30 36 TRUE
9 9 19 21 37 TRUE
10 10 20 22 38 TRUE
Now I want to change the value of column E to FALSE for those rows where
So the resulting data frame should look like this:
A B C D E
1 1 11 23 39 FALSE
2 2 12 24 40 FALSE
3 3 13 25 31 FALSE
4 4 14 26 32 TRUE
5 5 15 27 33 TRUE
6 6 16 28 34 TRUE
7 7 17 29 35 TRUE
8 8 18 30 36 FALSE
9 9 19 21 37 FALSE
10 10 20 22 38 FALSE
I wrote a script that mutates the data frame based on just one row and it works well:
DF <- DF %>%
dplyr::mutate(E = if_else(B < quantile(B, 0.9), E, FALSE)) %>%
dplyr::mutate(E = if_else(B > quantile(B, 0.1), E, FALSE))
DF
A B C D E
1 1 11 23 39 FALSE
2 2 12 24 40 TRUE
3 3 13 25 31 TRUE
4 4 14 26 32 TRUE
5 5 15 27 33 TRUE
6 6 16 28 34 TRUE
7 7 17 29 35 TRUE
8 8 18 30 36 TRUE
9 9 19 21 37 TRUE
10 10 20 22 38 FALSE
However, when I try to make this dynamic, it doesn't work:
for (col in cols) {
DF <- DF %>%
dplyr::mutate_(E = if_else(col < quantile(col, 0.9), E, FALSE)) %>%
dplyr::mutate_(E = if_else(col > quantile(col, 0.1), E, FALSE))
}
Error in (1 - h) * qs[i] : non-numeric argument to binary operator
How can I solve this?
Upvotes: 1
Views: 1420
Reputation: 71
Using get() base R function -
for (col in cols) {
DF <- DF %>%
dplyr::mutate(E = if_else(get(col) < quantile(get(col), 0.9), E, FALSE)) %>%
dplyr::mutate(E = if_else(get(col) > quantile(get(col), 0.1), E, FALSE))
}
Upvotes: 0
Reputation: 887811
We can use the interp
library(dplyr)
library(lazyeval)
for (col in cols) {
DF <- DF %>%
mutate_(E = interp(~if_else(Col<quantile(Col, 0.9), E, FALSE),
Col=as.name(col))) %>%
mutate_(E = interp(~if_else(Col>quantile(Col, 0.1), E, FALSE),
Col = as.name(col)))
}
DF
# A B C D E
#1 1 11 23 39 FALSE
#2 2 12 24 40 FALSE
#3 3 13 25 31 FALSE
#4 4 14 26 32 TRUE
#5 5 15 27 33 TRUE
#6 6 16 28 34 TRUE
#7 7 17 29 35 TRUE
#8 8 18 30 36 FALSE
#9 9 19 21 37 FALSE
#10 10 20 22 38 FALSE
where
cols <- names(DF)[2:4]
If we also need to pass the 'E' column
for (col in cols) {
DF <- DF %>%
mutate_(.dots = setNames(list(interp(~if_else(Col < quantile(Col, 0.9), Col2, FALSE),
.values = list(Col= as.name(col), Col2 = as.name(names(DF)[5])))), names(DF)[5])) %>%
mutate_(.dots = setNames(list(interp(~if_else(Col > quantile(Col, 0.1), Col2, FALSE),
.values = list(Col= as.name(col), Col2 = as.name(names(DF)[5])))), names(DF)[5]))
}
DF
# A B C D E
#1 1 11 23 39 FALSE
#2 2 12 24 40 FALSE
#3 3 13 25 31 FALSE
#4 4 14 26 32 TRUE
#5 5 15 27 33 TRUE
#6 6 16 28 34 TRUE
#7 7 17 29 35 TRUE
#8 8 18 30 36 FALSE
#9 9 19 21 37 FALSE
With the devel version of dplyr
(and soon to be released 0.6.0
) we can also pass the variables as quosures and evaluate by unquoting with mutate
varN <- quo(E)
cols <- rlang::parse_quosures(paste(names(DF)[2:4], collapse=";"))
varN1 <- quo_name(varN)
for(i in seq_along(cols)) {
DF <- DF %>%
mutate(!!varN1 := if_else((!!cols[[i]]) < quantile((!!cols[[i]]), 0.9),
(!!varN), FALSE),
!!varN1 := if_else((!!cols[[i]]) > quantile((!!cols[[i]]), 0.1),
(!!varN), FALSE))
}
DF
# A B C D E
#1 1 11 23 39 FALSE
#2 2 12 24 40 FALSE
#3 3 13 25 31 FALSE
#4 4 14 26 32 TRUE
#5 5 15 27 33 TRUE
#6 6 16 28 34 TRUE
#7 7 17 29 35 TRUE
#8 8 18 30 36 FALSE
#9 9 19 21 37 FALSE
#10 10 20 22 38 FALSE
Or another option is data.table
library(data.table)
setDT(DF)[, E := Reduce(`&`, lapply(.SD, function(x) x < quantile(x, 0.9) &
x > quantile(x, .1))), .SDcols = 2:4]
DF
# A B C D E
#1: 1 11 23 39 FALSE
#2: 2 12 24 40 FALSE
#3: 3 13 25 31 FALSE
#4: 4 14 26 32 TRUE
#5: 5 15 27 33 TRUE
#6: 6 16 28 34 TRUE
#7: 7 17 29 35 TRUE
#8: 8 18 30 36 FALSE
#9: 9 19 21 37 FALSE
#10:10 20 22 38 FALSE
Or with only base R
functions
DF$E <- Reduce(`&`, lapply(DF[2:4], function(x) x < quantile(x, 0.9) & x > quantile(x, .1)))
DF
# A B C D E
#1 1 11 23 39 FALSE
#2 2 12 24 40 FALSE
#3 3 13 25 31 FALSE
#4 4 14 26 32 TRUE
#5 5 15 27 33 TRUE
#6 6 16 28 34 TRUE
#7 7 17 29 35 TRUE
#8 8 18 30 36 FALSE
#9 9 19 21 37 FALSE
#10 10 20 22 38 FALSE
Note: No external packages used
Note2: All the options return the same output
Upvotes: 1
Reputation: 43354
You can iterate directly within mutate
:
DF %>% mutate(E = apply(sapply(list(B, C, D),
function(x){x < quantile(x, .9) & x > quantile(x, .1)}),
1, all))
## A B C D E
## 1 1 11 23 39 FALSE
## 2 2 12 24 40 FALSE
## 3 3 13 25 31 FALSE
## 4 4 14 26 32 TRUE
## 5 5 15 27 33 TRUE
## 6 6 16 28 34 TRUE
## 7 7 17 29 35 TRUE
## 8 8 18 30 36 FALSE
## 9 9 19 21 37 FALSE
## 10 10 20 22 38 FALSE
or with purrr,
library(tidyverse)
DF %>% mutate(E = list(B, C, D) %>%
map(~.x < quantile(.x, .9) & .x > quantile(.x, .1)) %>%
pmap_lgl(all))
or go all in on matrices:
DF %>% mutate(E = cbind(B, C, D) %>%
apply(2, function(x){x < quantile(x, .9) & x > quantile(x, .1)}) %>%
apply(1, all))
All return the same thing.
If you like, substitute between
for the inequalities, e.g. between(x, quantile(x, .1), quantile(x, .9))
, though because it's defined as x >= left & x <= right
it may differ when boundaries matter.
Upvotes: 0