Reputation: 83
I have the following data:
structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L)), row.names = c(NA, 48L), class = "data.frame")
I want to find IDs who have >3 observations for A in a row, and create the following data:
structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L), Censor = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L), Day_2 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L)), row.names = c(NA,
48L), class = "data.frame")
Where Censor starts from the first Day that an ID has >3 observations for A in a row, and Day_2 is the day that first observation occurred.
Upvotes: 2
Views: 64
Reputation: 30494
Another approach could use rollapply
from zoo
. Here, after grouping by ID
, you can use rollapply
over a width/window of 4 values, and add indicator of 1 if all of those values from A
are 1. The next cumany
will make the rest of the Censor
column 1 following the first 1 value. Last, which
will provide the index where Censor
is first equal to 1.
library(tidyverse)
library(zoo)
df %>%
group_by(ID) %>%
mutate(Censor = rollapply(A == 1, width = 4, all, fill = 0, align = "left"),
Censor = +cumany(Censor == 1),
Day_2 = which(Censor == 1)[1])
Output
ID A Day Censor Day_2
<int> <int> <int> <int> <int>
1 1 0 1 0 NA
2 1 0 2 0 NA
3 1 0 3 0 NA
4 1 0 4 0 NA
5 1 0 5 0 NA
6 1 1 6 0 NA
7 1 1 7 0 NA
8 1 0 8 0 NA
9 1 0 9 0 NA
10 1 0 10 0 NA
11 1 0 11 0 NA
12 1 1 12 0 NA
13 1 1 13 0 NA
14 1 1 14 0 NA
15 1 0 15 0 NA
16 1 0 16 0 NA
17 1 0 17 0 NA
18 2 0 1 0 5
19 2 0 2 0 5
20 2 0 3 0 5
21 2 0 4 0 5
22 2 1 5 1 5
23 2 1 6 1 5
24 2 1 7 1 5
25 2 1 8 1 5
26 2 1 9 1 5
27 2 1 10 1 5
28 2 1 11 1 5
29 2 1 12 1 5
30 2 1 13 1 5
31 2 1 14 1 5
32 3 0 1 0 14
33 3 0 2 0 14
34 3 0 3 0 14
35 3 0 4 0 14
36 3 0 5 0 14
37 3 1 6 0 14
38 3 1 7 0 14
39 3 1 8 0 14
40 3 0 9 0 14
41 3 1 10 0 14
42 3 0 11 0 14
43 3 0 12 0 14
44 3 0 13 0 14
45 3 1 14 1 14
46 3 1 15 1 14
47 3 1 16 1 14
48 3 1 17 1 14
Upvotes: 2
Reputation: 73592
Wrapping an rle
approach in a f
unction that identifies those subsequent 1
s with length > 3, and then using it in ave
.
f <- \(x) with(rle(x), rep.int(replace(numeric(length(values)),
which(values == 1 & lengths > 3), 1), lengths))
res <- within(dat, {
censor <- ave(A, ID, FUN=f)
Day_2 <- ave(censor, ID, FUN=\(x) if (sum(x) != 0) which.max(x) else NA_integer_)
})
res
# ID A Day Day_2 censor
# 1 1 0 1 NA 0
# 2 1 0 2 NA 0
# 3 1 0 3 NA 0
# 4 1 0 4 NA 0
# 5 1 0 5 NA 0
# 6 1 1 6 NA 0
# 7 1 1 7 NA 0
# 8 1 0 8 NA 0
# 9 1 0 9 NA 0
# 10 1 0 10 NA 0
# 11 1 0 11 NA 0
# 12 1 1 12 NA 0
# 13 1 1 13 NA 0
# 14 1 1 14 NA 0
# 15 1 0 15 NA 0
# 16 1 0 16 NA 0
# 17 1 0 17 NA 0
# 18 2 0 1 5 0
# 19 2 0 2 5 0
# 20 2 0 3 5 0
# 21 2 0 4 5 0
# 22 2 1 5 5 1
# 23 2 1 6 5 1
# 24 2 1 7 5 1
# 25 2 1 8 5 1
# 26 2 1 9 5 1
# 27 2 1 10 5 1
# 28 2 1 11 5 1
# 29 2 1 12 5 1
# 30 2 1 13 5 1
# 31 2 1 14 5 1
# 32 3 0 1 14 0
# 33 3 0 2 14 0
# 34 3 0 3 14 0
# 35 3 0 4 14 0
# 36 3 0 5 14 0
# 37 3 1 6 14 0
# 38 3 1 7 14 0
# 39 3 1 8 14 0
# 40 3 0 9 14 0
# 41 3 1 10 14 0
# 42 3 0 11 14 0
# 43 3 0 12 14 0
# 44 3 0 13 14 0
# 45 3 1 14 14 1
# 46 3 1 15 14 1
# 47 3 1 16 14 1
# 48 3 1 17 14 1
Data:
dat <- structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), A = c(0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L), Day = c(1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L)), row.names = c(NA, 48L), class = "data.frame")
Upvotes: 2