Petr
Petr

Reputation: 1817

Filter columns based on the list of columns and list of values

Is there a way how to filter in R (using tidyverse or data.table) when having lists of columns and equal values?

One list contains columns to be filtered, the second list contains which values of columns on the first list shall be selected respectively.


library(tidyverse)

df <- tibble(mtcars)

col1 <- c('mpg', 'am')
val1 <- c(21, 1)

# Pseudo idea of what I need
df %>%
  filter(col1 == val1)

# This is the results that shall be obtained by using col1 and val1

df %>% 
  filter(mpg == 21,
         am == 1)


col2 <- c('mpg', 'am', 'carb', 'vs')
val2 <- c(21, 1, 4, 0)

df %>%
  filter(col2 == val2)

However, it is very important that this should be generalizable and only filter condition will always be ==.

ALSO, it needs to be automated so it work for lists with only one element e.g.:

col4 <- c('vs')
val4 <- c(0)

Upvotes: 1

Views: 1342

Answers (1)

akrun
akrun

Reputation: 887291

In the newer version of dplyr, we could also use if_all (equivalent to all i.e. it will return the rows where all the compound logical expressions are collapsed by & logical operator). The one for | is if_any). In this case, we can have a named vector` of 'val's

library(dplyr)
names(val1) <- col1
df %>%
    filter(if_all(all_of(col1), ~ . == val1[cur_column()]))
# A tibble: 2 x 11
#   mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1    21     6   160   110   3.9  2.62  16.5     0     1     4     4
#2    21     6   160   110   3.9  2.88  17.0     0     1     4     4

For the second case

names(val2) <- col2
df %>% 
    filter(if_all(all_of(col2), ~ . == val2[cur_column()]))
# A tibble: 2 x 11
#    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1    21     6   160   110   3.9  2.62  16.5     0     1     4     4
#2    21     6   160   110   3.9  2.88  17.0     0     1     4     4 

Or for the one element vector

names(val4) <- col4
df %>% 
   filter(if_all(all_of(col4), ~ . == val4[cur_column()]))
# A tibble: 18 x 11
#     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
# 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
# 3  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
# 4  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
# 5  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3
# 6  17.3     8  276.   180  3.07  3.73  17.6     0     0     3     3
# 7  15.2     8  276.   180  3.07  3.78  18       0     0     3     3
# 8  10.4     8  472    205  2.93  5.25  18.0     0     0     3     4
# 9  10.4     8  460    215  3     5.42  17.8     0     0     3     4
#10  14.7     8  440    230  3.23  5.34  17.4     0     0     3     4
#11  15.5     8  318    150  2.76  3.52  16.9     0     0     3     2
#12  15.2     8  304    150  3.15  3.44  17.3     0     0     3     2
#13  13.3     8  350    245  3.73  3.84  15.4     0     0     3     4
#14  19.2     8  400    175  3.08  3.84  17.0     0     0     3     2
#15  26       4  120.    91  4.43  2.14  16.7     0     1     5     2
#16  15.8     8  351    264  4.22  3.17  14.5     0     1     5     4
#17  19.7     6  145    175  3.62  2.77  15.5     0     1     5     6
#18  15       8  301    335  3.54  3.57  14.6     0     1     5     8

Or another option is to create a single expression and parse it with parse_expr

library(stringr)
df %>%
   filter(!! rlang::parse_expr(str_c(col1, val1, sep = '==', collapse="&")))

Same approach for the second case as well

df %>%
   filter(!! rlang::parse_expr(str_c(col2, val2, sep = '==', collapse="&")))

# A tibble: 2 x 11
#    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#1    21     6   160   110   3.9  2.62  16.5     0     1     4     4
#2    21     6   160   110   3.9  2.88  17.0     0     1     4     4

Or with the updated case

df %>%
  filter(!! rlang::parse_expr(str_c(col4, val4, sep = '==', collapse="&")))

# A tibble: 18 x 11
#     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
#   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
# 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
# 3  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
# 4  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
# 5  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3
# 6  17.3     8  276.   180  3.07  3.73  17.6     0     0     3     3
# 7  15.2     8  276.   180  3.07  3.78  18       0     0     3     3
# 8  10.4     8  472    205  2.93  5.25  18.0     0     0     3     4
# 9  10.4     8  460    215  3     5.42  17.8     0     0     3     4
#10  14.7     8  440    230  3.23  5.34  17.4     0     0     3     4
#11  15.5     8  318    150  2.76  3.52  16.9     0     0     3     2
#12  15.2     8  304    150  3.15  3.44  17.3     0     0     3     2
#13  13.3     8  350    245  3.73  3.84  15.4     0     0     3     4
#14  19.2     8  400    175  3.08  3.84  17.0     0     0     3     2
#15  26       4  120.    91  4.43  2.14  16.7     0     1     5     2
#16  15.8     8  351    264  4.22  3.17  14.5     0     1     5     4
#17  19.7     6  145    175  3.62  2.77  15.5     0     1     5     6
#18  15       8  301    335  3.54  3.57  14.6     0     1     5     8

Or can use map2/reduce. Within the filter, loop over the 'col2', 'val2' values, subset the data based on the 'col2' check if it is equal to the corresponding 'val2' value, reduce the list of logical vectors into a single logical vector with & to filter the rows

library(purrr)
df %>%
  filter(map2(col2, val2, ~  cur_data()[[.x]] == .y) %>% reduce(`&`))

Or another option is to create the logical expression with rowSums after selecting the columns of interest from the dataset

df %>%
   filter(!rowSums(select(., col2) != val2[col(select(., col2))]))

Or using base R

subset(df, Reduce(`&`, Map(`==`, df[col2], val2)))

Can create a function to do this

f1 <- function(dat, colnms, vals) {

  dat %>%
     filter(map2(colnms, vals, ~  cur_data()[[.x]] == .y) %>%
      reduce(`&`))


 }

-testing

f1(df, col1, val1)
# A tibble: 2 x 11
    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1    21     6   160   110   3.9  2.62  16.5     0     1     4     4
2    21     6   160   110   3.9  2.88  17.0     0     1     4     4
f1(df, col2, val2)
# A tibble: 2 x 11
    mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
  <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1    21     6   160   110   3.9  2.62  16.5     0     1     4     4
2    21     6   160   110   3.9  2.88  17.0     0     1     4     4
f1(df, col4, val4)
# A tibble: 18 x 11
     mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1  21       6  160    110  3.9   2.62  16.5     0     1     4     4
 2  21       6  160    110  3.9   2.88  17.0     0     1     4     4
 3  18.7     8  360    175  3.15  3.44  17.0     0     0     3     2
 4  14.3     8  360    245  3.21  3.57  15.8     0     0     3     4
 5  16.4     8  276.   180  3.07  4.07  17.4     0     0     3     3
 6  17.3     8  276.   180  3.07  3.73  17.6     0     0     3     3
 7  15.2     8  276.   180  3.07  3.78  18       0     0     3     3
 8  10.4     8  472    205  2.93  5.25  18.0     0     0     3     4
 9  10.4     8  460    215  3     5.42  17.8     0     0     3     4
10  14.7     8  440    230  3.23  5.34  17.4     0     0     3     4
11  15.5     8  318    150  2.76  3.52  16.9     0     0     3     2
12  15.2     8  304    150  3.15  3.44  17.3     0     0     3     2
13  13.3     8  350    245  3.73  3.84  15.4     0     0     3     4
14  19.2     8  400    175  3.08  3.84  17.0     0     0     3     2
15  26       4  120.    91  4.43  2.14  16.7     0     1     5     2
16  15.8     8  351    264  4.22  3.17  14.5     0     1     5     4
17  19.7     6  145    175  3.62  2.77  15.5     0     1     5     6
18  15       8  301    335  3.54  3.57  14.6     0     1     5     8

Upvotes: 3

Related Questions