Asw
Asw

Reputation: 3

Filter by group and multiple conditions, dplyr

I am trying to filter Countries, across the years 2000 to 2016, where indic.no must equal 10 across ALL years.

I have tried using different filtering using multiple conditions, but never seem to get the expected results.

#subset of df
structure(list(ISO3 = c("ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT"), 
                        NAME_0 = c("Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                        "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                        "Aruba", "Aruba", "Aruba", "Aruba", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                        "United Arab Emirates", "United Arab Emirates", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Australia", "Australia", 
                        "Australia", "Australia", "Australia", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                        "Barbados", "Barbados", "Barbados", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                        "Colombia", "Colombia", "Colombia", "Colombia", "Fiji", "Fiji", 
                        "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                        "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                        "Fiji", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                        "Kiribati", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                        "Saint Vincent and the Grenadines"), Year = c(2000, 2001, 2002, 
                                                                      2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
                        2014, 2015, 2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004, 
                        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                        2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
                        2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016), indic.no = c(10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        7, 7, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 
                        10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                        10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 
                        9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 9, 9, 
                        9, 10, 9, 9, 8, 8, 8, 8, 8, 8, 9, 9, 9, 7, 8, 8, 8, 9, 9, 9, 
                        9, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 7, 7, 8, 8, 8, 8, 8, 8, 
                        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)), row.names = c(NA, -166L), class = c("tbl_df", 
                                                                                    

Plot of countries and indic.no across years

#Want countries, that have indic.no = 10 across ALL years between 2000 to 2016
#Here I thought I am selecting all years between 2000 to 2016 AND where indic.no equals 10

df2 <-df %>%
  group_by(ISO3, NAME_0)%>%
  filter(all(Year %in%(2000:2016)) & indic.no == 10)

However, it only seems to give me "BRB", where I would expect to also have "ABW", "AUS"& "COL".

Filtered data plot

I have spent so much time on this one filtering issue, and cannot see where I am going wrong.

Upvotes: 0

Views: 153

Answers (3)

akrun
akrun

Reputation: 887711

Here is the most easiest way. After grouping by 'ISO3' and 'NAME_0', use a single filter by subsetting the 'indic.no' that are within the 'Year' range and check whether the all the values are 10

library(dplyr)
df2 <-  df %>%
   group_by(ISO3, NAME_0)%>%
   filter(all(indic.no[Year %in% 2000:2016] == 10))

unique(df2$ISO3)
#[1] "ABW" "AUS" "BRB" "COL"

Upvotes: 0

GenesRus
GenesRus

Reputation: 1057

The easiest way to get "countries that have indic.no = 10 across ALL years between 2000 to 2016" (assuming you don't need to keep the other data for any reason) is to separate the filtering steps and use the all around indic.no:

library(dplyr)
df %>% 
  group_by(ISO3, NAME_0) %>%
  filter(between(Year, 2000, 2016)) %>% 
  filter(all(indic.no == 10)) %>% 
  ungroup %>% 
  pull(ISO3) %>% unique()

[1] "ABW" "AUS" "BRB" "COL"

Upvotes: 0

luismf
luismf

Reputation: 371

My approach would be:

df <- tibble::tibble(ISO3 = c("ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", "ABW", 
                        "ABW", "ABW", "ABW", "ABW", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", "ARE", 
                        "ARE", "ARE", "ARE", "ARE", "ARE", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                        "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", "BRB", 
                        "BRB", "BRB", "BRB", "BRB", "BRB", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", "COL", 
                        "COL", "COL", "COL", "COL", "COL", "COL", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", 
                        "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "FJI", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", 
                        "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "KIR", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", 
                        "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "PNG", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", 
                        "VCT", "VCT", "VCT", "VCT", "VCT", "VCT", "VCT"), 
               NAME_0 = c("Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                          "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", "Aruba", 
                          "Aruba", "Aruba", "Aruba", "Aruba", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "United Arab Emirates", 
                          "United Arab Emirates", "United Arab Emirates", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Australia", "Australia", 
                          "Australia", "Australia", "Australia", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", "Barbados", 
                          "Barbados", "Barbados", "Barbados", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", "Colombia", 
                          "Colombia", "Colombia", "Colombia", "Colombia", "Fiji", "Fiji", 
                          "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                          "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", "Fiji", 
                          "Fiji", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", "Kiribati", 
                          "Kiribati", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", "Papua New Guinea", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines", "Saint Vincent and the Grenadines", 
                          "Saint Vincent and the Grenadines"),
               Year = c(2000, 2001, 2002, 
                        2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 
                        2014, 2015, 2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2000, 2001, 2002, 2003, 2004, 2005, 
                        2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 
                        2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 
                        2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2000, 
                        2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 
                        2012, 2013, 2014, 2015, 2016, 2017, 2000, 2001, 2002, 2003, 2004, 
                        2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 
                        2016, 2017, 2018, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 
                        2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016),
               indic.no = c(10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            7, 7, 10, 10, 10, 10, 10, 10, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 
                            10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 
                            10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 10, 10, 10, 10, 
                            9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 7, 7, 10, 10, 9, 9, 
                            9, 10, 9, 9, 8, 8, 8, 8, 8, 8, 9, 9, 9, 7, 8, 8, 8, 9, 9, 9, 
                            9, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10, 7, 7, 8, 8, 8, 8, 8, 8, 
                            8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8)
           )

library(dplyr)
df %>%
  filter(between(Year, 2000, 2016), indic.no==10) %>% 
  select(ISO3, NAME_0) %>% 
  distinct()

Upvotes: 0

Related Questions