Reputation: 4230
I have a data.frame
I want to filter based on whether the range from low
to high
contains zero. Here's an example
head(toy)
# A tibble: 6 x 3
difference low high
<dbl> <dbl> <dbl>
1 0.0161 -0.143 0.119
2 0.330 0.0678 0.656
3 0.205 -0.103 0.596
4 0.521 0.230 0.977
5 0.328 0.177 0.391
6 -0.0808 -0.367 0.200
I could swear I have used dplyr::between()
to do this kind of filtering operation a million times (even with columns of class datetime
, where it warns about S3 objects). But I can't find what's wrong with this one.
# Does does not find anything
toy %>%
filter(!dplyr::between(0, low, high))
# Maybe it's because it needs `x` to be a vector, using mutate
# Does not find anything
toy %>%
mutate(zero = 0) %>%
filter(!dplyr::between(zero, low, high))
# if we check the logic, all "keep" go to FALSE
toy %>%
mutate(zero = 0,
keep = !dplyr::between(zero, low, high))
# data.table::between works
toy %>%
filter(!data.table::between(0, low, high))
# regular logic works
toy %>%
filter(low > 0 | high < 0)
The data below:
> dput(toy)
structure(list(difference = c(0.0161058505175378, 0.329976207353122,
0.20517072042705, 0.520837282826481, 0.328289597476641, -0.0807728725339096,
0.660320444135006, 0.310679750033675, -0.743294517440579, -0.00665462977775899,
0.0890903981794149, 0.0643321993757249, 0.157453334405998, 0.107320325893175,
-0.253664041938671, -0.104025850079389, -0.284835573264143, -0.330557762091307,
-0.0300387610595219, 0.081297046765014), low = c(-0.143002432870633,
0.0677907794288728, -0.103344717845837, 0.229753302951895, 0.176601773133456,
-0.366899428200429, 0.403702557199546, 0.0216878391530755, -1.01129163487875,
-0.222395625167488, -0.135193611295608, -0.116654715121314, -0.168581379777843,
-0.281919444558125, -0.605918194917671, -0.364539852350809, -0.500147478407119,
-0.505906196974183, -0.233810558283787, -0.193048952382206),
high = c(0.118860787421672, 0.655558974886329, 0.595905673925067,
0.97748896372657, 0.391043536410999, 0.199727242557477, 0.914173497837859,
0.633804982827898, -0.549942089679123, 0.19745782761473,
0.340823604797603, 0.317956343103116, 0.501279107093568,
0.442497779066522, 0.0721480109893818, 0.280593530192991,
-0.0434862536882377, -0.229723776097642, 0.22550243301984,
0.252686968655449)), row.names = c(NA, -20L), class = c("tbl_df",
"tbl", "data.frame"))
Just in case somebody finds it useful
> "between" %in% conflicts()
[1] FALSE
> packageVersion("dplyr")
[1] ‘1.0.2’
Upvotes: 1
Views: 38
Reputation: 887511
We could use map2
library(dplyr)
library(purrr)
toy %>%
filter(!map2_lgl(low, high, ~ between(0, .x, .y)))
-output
# A tibble: 8 x 3
difference low high
<dbl> <dbl> <dbl>
1 0.330 0.0678 0.656
2 0.521 0.230 0.977
3 0.328 0.177 0.391
4 0.660 0.404 0.914
5 0.311 0.0217 0.634
6 -0.743 -1.01 -0.550
7 -0.285 -0.500 -0.0435
8 -0.331 -0.506 -0.230
Upvotes: 1
Reputation: 40131
dplyr::between()
is not vectorized. One thing you could do is:
df %>%
rowwise() %>%
filter(!dplyr::between(0, low, high))
difference low high
<dbl> <dbl> <dbl>
1 0.330 0.0678 0.656
2 0.521 0.230 0.977
3 0.328 0.177 0.391
4 0.660 0.404 0.914
5 0.311 0.0217 0.634
6 -0.743 -1.01 -0.550
7 -0.285 -0.500 -0.0435
8 -0.331 -0.506 -0.230
data.table::between()
is vectorized: that's the reason why it works.
Upvotes: 2