Ph.D.Student
Ph.D.Student

Reputation: 726

Use group_by and mutate... to create multiple conditions per group

I have data "datFL" composed of several rows per identifier "id":

datFL <- data.frame(id = rep(1:3, each = 4), 
                    time = rep(1:4, time=3), 
                    x1 = c("N", "N", "N", "Y", "N", "N", "N", "N", "N", "N", "N", "N"), 
                    x2 = c("Y", "Y", "N", "Y", "Y", "N", "Y", "N", "Y", "Y", "N", "N"), 
                    x3 = c("Y", "Y", "N", "N", "N", "Y", "N", "Y", "N", "N", "N", "N"), 
                    x4 = c("N", "N", "N", "N", "Y", "N", "Y", "N", "N", "N", "Y", "Y")) 

I want to create a "CND" Yes / No vector with the following conditions:

  1. At least 3 of the 4 columns x1, ..., x4 == "Y" at any single time; or
  2. At least 2 of the 4 columns x1, ..., x4 == "Y" for 2 consecutive times.

Here is my code to create a vector d1 for the first condition, d2 for the second condition and CND = union of both vectors d1 and d2.

# Condition 1.
d1 <- d1. <- NULL
for(s in 1:length(unique(datFL$id))){
  t <- 0
  vect <- NULL
  for(i in which(datFL$id %in% unique(datFL$id)[s])){
    for(j in 3:ncol(datFL)){
      if(datFL[i,j] == "Y"){
        t <- t+1
        vect[t] <- colnames(datFL)[j]
      }
    }
    ifelse(length(unique(vect))>=3, 
           d1[which(datFL$id %in% unique(datFL$id)[s])] <- "Y", 
           d1[which(datFL$id %in% unique(datFL$id)[s])] <- "N" )
    ifelse(length(unique(vect))>=3,
           d1.[s] <- "Y",
           d1.[s] <- "N" )
  }
}
> d1
[1] "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "N" "N" "N" "N"

# Condition 2.
d2 <- d2. <- NULL
for(s in 1:length(unique(datFL$id))){
  t <- 0
  vect <- NULL
  for(j in 3:ncol(datFL)){
    if(any(abs(diff(which(datFL[which(datFL$id %in% unique(datFL$id)[s]),j] == "Y"))) == 1)){
      t <- t+1
      vect[t] <- colnames(datFL)[j]
    }
  }
  ifelse(length(unique(vect))>=2, 
         d2[which(datFL$id %in% unique(datFL$id)[s])] <- "Y", 
         d2[which(datFL$id %in% unique(datFL$id)[s])] <- "N" )
  ifelse(length(unique(vect))>=2, 
         d2.[s] <- "Y", 
         d2.[s] <- "N" )
}

CND <- CND. <- NULL
for(i in 1:length(d1)){
  ifelse(d1[i] == "Y" | d2[i] == "Y", CND[i] <- "Y", CND[i] <- "N")
  ifelse(d1.[i] == "Y" | d2.[i] == "Y", CND.[i] <- "Y", CND.[i] <- "N")
}

The result:

> CND
 [1] "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y"
# Yes / No by identifier
> CND.
[1] "Y" "Y" "Y"

I want to code these conditions using the packages: tidyr, dplyr, tidyverse. Here is my attempt to do the first condition, but I don't get what I want:

> s <- datFL %>% 
+   select(c(1,3:6)) %>% 
+   group_by(id) %>%
+   mutate(
+    ss = across(c(1:4), ~ifelse(length(. == "Y")>=3, "Y", "N") 
+     )
+   )
> s
# A tibble: 12 x 6
# Groups:   id [3]
      id x1    x2    x3    x4    ss$x1 $x2   $x3   $x4  
   <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
 1     1 N     Y     Y     N     Y     Y     Y     Y    
 2     1 N     Y     Y     N     Y     Y     Y     Y    
 3     1 N     N     N     N     Y     Y     Y     Y    
 4     1 Y     Y     N     N     Y     Y     Y     Y    
 5     2 N     Y     N     Y     Y     Y     Y     Y    
 6     2 N     N     Y     N     Y     Y     Y     Y    
 7     2 N     Y     N     Y     Y     Y     Y     Y    
 8     2 N     N     Y     N     Y     Y     Y     Y    
 9     3 N     Y     N     N     Y     Y     Y     Y    
10     3 N     Y     N     N     Y     Y     Y     Y    
11     3 N     N     N     Y     Y     Y     Y     Y    
12     3 N     N     N     Y     Y     Y     Y     Y  

Could you help me to make these conditions?

Thank you.

Upvotes: 2

Views: 54

Answers (1)

langtang
langtang

Reputation: 24722

You can create two simple helper functions that describe your conditions, and then apply them by group:

f1 <- function(x) any(x=="Y")
f2 <- function(x) grepl("YY", paste0(x,collapse=""))

datFL %>% 
  group_by(id) %>% 
  summarize(CND = sum(across(x1:x4,f1))>=3 | sum(across(x1:x4,f2))>=2)

Output:

     id CND  
  <int> <lgl>
1     1 TRUE 
2     2 TRUE 
3     3 TRUE 

Upvotes: 1

Related Questions