EML
EML

Reputation: 671

Performing two actions based on two conditions with dplyr

In the below dataset, for each id, I have flagged (m_flag for column m and f_flag for column w) the first occurrence of a 1 or 2 following a 3 in columns m OR w.

I am trying to:

1) set m_flag to 1 in the row preceding a 3 in m if m is missing but var 1 is not

Then, convert the previous 1 in m_flag to 0

2) set f_flag to 1 in the row preceding a 3 in w if w is missing but var 2 is not (e.g., row 7)

Then, convert the previous 1 in f_flag to 0 (e.g., row 6)

df <- data.frame(id=c(1,1,1, 2,2, 3,3,3, 4,4,4),
                  m=c(2,NA,NA,  2,3, 2,2,3, 2,2,3),
                  w=c(2,NA,3, 2,NA, 2,NA,3, 2,NA,3),
               var1=c(5,NA,NA, 6,6,7,7,7, 8,8,8),
               var2=c(3,3,3, 4,NA, 5,5,5, 6,NA,6), 
               m_flag=c(1,0,NA, 1,NA, 0,1,NA, 0,1,NA),
               f_flag=c(1,0,NA, 1,NA, 1,0,NA, 1,0,NA))
        > df
       id  m  w var1 var2 m_flag f_flag
    1   1  2  2    5    3      1      1
    2   1 NA NA   NA    3      0      0
    3   1 NA  3   NA    3     NA     NA
    4   2  2  2    6    4      1      1
    5   2  3 NA    6   NA     NA     NA
    6   3  2  2    7    5      0      1
    7   3  2 NA    7    5      1      0
    8   3  3  3    7    5     NA     NA
    9   4  2  2    8    6      0      1
    10  4  2 NA    8   NA      1      0
    11  4  3  3    8    6     NA     NA

Output (note: only 1 in row 7 would change from 0 to 1 and 0 in row 6 from 1 to 0)

 output <- data.frame(id=c(1,1,1, 2,2, 3,3,3, 4,4,4),
                  m=c(2,NA,NA,  2,3, 2,2,3, 2,2,3),
                  w=c(2,NA,3, 2,NA, 2,NA,3, 2,NA,3),
               var1=c(5,NA,NA, 6,6,7,7,7, 8,8,8),
               var2=c(3,3,3, 4,NA, 5,5,5, 6,NA,6), 
               m_flag=c(1,0,NA, 1,NA, 0,1,NA, 0,1,NA),
               f_flag=c(1,0,NA, 1,NA, 0,1,NA, 1,0,NA))

> output
   id  m  w var1 var2 m_flag f_flag
1   1  2  2    5    3      1      1
2   1 NA NA   NA    3      0      0
3   1 NA  3   NA    3     NA     NA
4   2  2  2    6    4      1      1
5   2  3 NA    6   NA     NA     NA
6   3  2  2    7    5      0    **0**
7   3  2 NA    7    5      1    **1**
8   3  3  3    7    5     NA     NA
9   4  2  2    8    6      0      1
10  4  2 NA    8   NA      1      0
11  4  3  3    8    6     NA     NA

Thank you

Upvotes: 0

Views: 51

Answers (1)

smingerson
smingerson

Reputation: 1438

First, create columns corresponding to the condition in step(s) 1. We'll call these meet_condition_f and meet_condition_m. Then, we'll use lead() to look at the value of the condition in the next row. If it's true, we'll reset the corresponding flag to 0. Then, for rows where the condition is true, we'll set the flag to 1 (this is the second piece of step 1).

If you needed to do it by group, add group_by(id), for example, prior to the mutate. Don't forget to ungroup afterwards.

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
df <- data.frame(id=c(1,1,1, 2,2, 3,3,3, 4,4,4),
                 m=c(2,NA,NA,  2,3, 2,2,3, 2,2,3),
                 w=c(2,NA,3, 2,NA, 2,NA,3, 2,NA,3),
                 var1=c(5,NA,NA, 6,6,7,7,7, 8,8,8),
                 var2=c(3,3,3, 4,NA, 5,5,5, 6,NA,6), 
                 m_flag=c(1,0,NA, 1,NA, 0,1,NA, 0,1,NA),
                 f_flag=c(1,0,NA, 1,NA, 1,0,NA, 1,0,NA))

df %>% mutate(
  # Create an indicator column for the condition specified.
  # `lead` looks at the "m" value for the next row.
  # `if_else` takes a logical condition and returns the result
  # from true/false/missing depending which criteria each one meets.
  meet_condition_m = if_else(
    is.na(m) &
      lead(m) == 3 &
      !is.na(var1),
    true = TRUE,
    false = FALSE,
    missing = NA),
  meet_condition_f = if_else(
    is.na(w) &
      lead(w) == 3 &
      !is.na(var2),
    true = TRUE, 
    false = FALSE,
    missing = NA
  ),
  # First, perform step to to convert the previous 1 to 0
  m_flag = if_else(lead(meet_condition_m) & m_flag == 1, 0, m_flag, m_flag),
  # Then execute the first step
  m_flag = if_else(meet_condition_m, 1, m_flag, m_flag),
  # Repeat for f
  f_flag = if_else(lead(meet_condition_f) & f_flag == 1, 0, f_flag, f_flag),
  f_flag = if_else(meet_condition_f, 1, f_flag, f_flag)) %>% 
  # Drop intermediate columns.
  select(-meet_condition_m, -meet_condition_f)
#>    id  m  w var1 var2 m_flag f_flag
#> 1   1  2  2    5    3      1      0
#> 2   1 NA NA   NA    3      0      1
#> 3   1 NA  3   NA    3     NA     NA
#> 4   2  2  2    6    4      1      1
#> 5   2  3 NA    6   NA     NA     NA
#> 6   3  2  2    7    5      0      0
#> 7   3  2 NA    7    5      1      1
#> 8   3  3  3    7    5     NA     NA
#> 9   4  2  2    8    6      0      1
#> 10  4  2 NA    8   NA      1      0
#> 11  4  3  3    8    6     NA     NA
Created on 2019-11-20 by the reprex package (v0.3.0)

Upvotes: 1

Related Questions