Reputation: 726
I have data "datFL" composed of several rows per identifier "id":
datFL <- data.frame(id = rep(1:3, each = 4),
time = rep(1:4, time=3),
x1 = c("N", "N", "N", "Y", "N", "N", "N", "N", "N", "N", "N", "N"),
x2 = c("Y", "Y", "N", "Y", "Y", "N", "Y", "N", "Y", "Y", "N", "N"),
x3 = c("Y", "Y", "N", "N", "N", "Y", "N", "Y", "N", "N", "N", "N"),
x4 = c("N", "N", "N", "N", "Y", "N", "Y", "N", "N", "N", "Y", "Y"))
I want to create a "CND" Yes / No vector with the following conditions:
Here is my code to create a vector d1 for the first condition, d2 for the second condition and CND = union of both vectors d1 and d2.
# Condition 1.
d1 <- d1. <- NULL
for(s in 1:length(unique(datFL$id))){
t <- 0
vect <- NULL
for(i in which(datFL$id %in% unique(datFL$id)[s])){
for(j in 3:ncol(datFL)){
if(datFL[i,j] == "Y"){
t <- t+1
vect[t] <- colnames(datFL)[j]
}
}
ifelse(length(unique(vect))>=3,
d1[which(datFL$id %in% unique(datFL$id)[s])] <- "Y",
d1[which(datFL$id %in% unique(datFL$id)[s])] <- "N" )
ifelse(length(unique(vect))>=3,
d1.[s] <- "Y",
d1.[s] <- "N" )
}
}
> d1
[1] "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "N" "N" "N" "N"
# Condition 2.
d2 <- d2. <- NULL
for(s in 1:length(unique(datFL$id))){
t <- 0
vect <- NULL
for(j in 3:ncol(datFL)){
if(any(abs(diff(which(datFL[which(datFL$id %in% unique(datFL$id)[s]),j] == "Y"))) == 1)){
t <- t+1
vect[t] <- colnames(datFL)[j]
}
}
ifelse(length(unique(vect))>=2,
d2[which(datFL$id %in% unique(datFL$id)[s])] <- "Y",
d2[which(datFL$id %in% unique(datFL$id)[s])] <- "N" )
ifelse(length(unique(vect))>=2,
d2.[s] <- "Y",
d2.[s] <- "N" )
}
CND <- CND. <- NULL
for(i in 1:length(d1)){
ifelse(d1[i] == "Y" | d2[i] == "Y", CND[i] <- "Y", CND[i] <- "N")
ifelse(d1.[i] == "Y" | d2.[i] == "Y", CND.[i] <- "Y", CND.[i] <- "N")
}
The result:
> CND
[1] "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y" "Y"
# Yes / No by identifier
> CND.
[1] "Y" "Y" "Y"
I want to code these conditions using the packages: tidyr, dplyr, tidyverse. Here is my attempt to do the first condition, but I don't get what I want:
> s <- datFL %>%
+ select(c(1,3:6)) %>%
+ group_by(id) %>%
+ mutate(
+ ss = across(c(1:4), ~ifelse(length(. == "Y")>=3, "Y", "N")
+ )
+ )
> s
# A tibble: 12 x 6
# Groups: id [3]
id x1 x2 x3 x4 ss$x1 $x2 $x3 $x4
<int> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 1 N Y Y N Y Y Y Y
2 1 N Y Y N Y Y Y Y
3 1 N N N N Y Y Y Y
4 1 Y Y N N Y Y Y Y
5 2 N Y N Y Y Y Y Y
6 2 N N Y N Y Y Y Y
7 2 N Y N Y Y Y Y Y
8 2 N N Y N Y Y Y Y
9 3 N Y N N Y Y Y Y
10 3 N Y N N Y Y Y Y
11 3 N N N Y Y Y Y Y
12 3 N N N Y Y Y Y Y
Could you help me to make these conditions?
Thank you.
Upvotes: 2
Views: 54
Reputation: 24722
You can create two simple helper functions that describe your conditions, and then apply them by group:
f1 <- function(x) any(x=="Y")
f2 <- function(x) grepl("YY", paste0(x,collapse=""))
datFL %>%
group_by(id) %>%
summarize(CND = sum(across(x1:x4,f1))>=3 | sum(across(x1:x4,f2))>=2)
Output:
id CND
<int> <lgl>
1 1 TRUE
2 2 TRUE
3 3 TRUE
Upvotes: 1