Reputation: 305
I need to find and count the ID's that appear with a 1 after 3 or more consecutive zeros.
This is a example of what I have:
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# 1 0 0 0 1 0 0 1 1 1 0
# 2 0 0 0 0 0 0 1 0 0 0
# 3 0 0 0 0 0 0 0 0 0 1
# 4 1 0 0 1 0 1 0 1 0 1
# 5 0 0 1 0 0 1 1 0 0 1
c1<- c("ID","Jan","Feb", "Mar","Apr", "May","Jun", "Jul", "Aug", "Sept", "Oct")
c2<- c(1,0,0,0,1,0,0,1,1,1,0)
c3<- c(2,0,0,0,0,0,0,1,0,0,0)
c4<- c(3,0,0,0,0,0,0,0,0,0,1)
c5<- c(4,1,0,0,1,0,1,0,1,0,1)
c6<- c(5,0,0,1,0,0,1,1,0,0,1)
BD<-data.frame(rbind(c2,c3,c4,c5,c6))
colnames(BD)<-c1
The result of what I expect is something like this:
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# 1 0 0 0 1 0 0 1 1 1 0
# 2 0 0 0 0 0 0 1 0 0 1
# 3 0 0 0 0 0 0 0 0 0 1
Anyone know how to do it? Thanks!
Upvotes: 3
Views: 222
Reputation: 25225
An option using data.table
to melt and filter for rows that match conditions.
library(data.table)
setDT(BD)[ID %in%
melt(BD, id.vars="ID")[,
mth := .GRP, variable][
value==1L, ID[mth[1L]>3L | any(diff(mth) > 3L)], ID]$V1
]
It should be faster for large datasets with sparse data.
Upvotes: 1
Reputation: 73134
You could collapse to string and use grep()
to search for pattern.
k <- 3
grep(sprintf(paste0("%0", k + 1, "d"), 1), apply(d[-1], 1, paste, collapse=""))
# [1] 2 4 5 6 8
If no following 1 is needed you could use the rle()
.
d
# id Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
# c1 1 1 1 0 1 1 0 0 1 1 1 0 0
# c2 2 0 0 0 1 1 1 0 1 1 0 1 0
# c3 3 1 0 0 1 1 0 1 1 1 0 1 0
# c4 4 0 0 0 0 0 1 1 0 0 1 1 0
# c5 5 0 0 0 1 1 1 1 0 0 1 0 1
# c6 6 1 0 0 0 1 0 1 0 0 0 0 1
# c7 7 0 1 0 0 1 0 1 1 1 0 0 1
# c8 8 0 1 1 1 1 1 1 1 0 0 0 1
# c9 9 0 1 0 0 1 1 0 0 1 1 1 0
# c10 10 1 1 0 1 0 1 1 0 0 1 0 1
k <- 3
d$id[sapply(as.data.frame(t(d[-1])), function(x) any(rle(x)$lengths[rle(x)$values == 0] >= k))]
# [1] 2 4 5 6 8
Data:
set.seed(0)
d <- data.frame(id=1:10,
`dimnames<-`(matrix(sample(0:1, 120, r=1), 10),
list(paste0("c", 1:10), month.abb)))
Upvotes: 2
Reputation: 101916
Here is a base R solution that can make it
BDout <- subset(BD,apply(BD[-1], 1, function(x) head(which(x==1),1))>3)
such that
> BDout
ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
1 1 0 0 0 1 0 0 1 1 1 0
2 2 0 0 0 0 0 0 1 0 0 0
3 3 0 0 0 0 0 0 0 0 0 1
Upvotes: 1
Reputation: 3388
You can consolidate the rows into strings and use a regular expression to match '0001':
library(tidyverse)
rows = BD %>%
purrr::pmap(function(...) paste0(list(...)[-1], collapse='')) %>%
stringr::str_detect('0001')
BD[rows,]
Upvotes: 1
Reputation: 28695
If you take the rowid(rleid(x))
of a vector x
you get the number of steps into each "run" each element is*. You can check that this is >= 3 and the element is 0. If that is true for the previous element (for the shifted output) and the element is 1, return TRUE. Then check if this is TRUE for any
of the elements in the row.
library(data.table)
rows <-
apply(BD, 1, function(r) any(shift(rowid(rleid(r)) >= 3 & r == 0) & r == 1))
BD[rows,]
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# c2 1 0 0 0 1 0 0 1 1 1 0
# c3 2 0 0 0 0 0 0 1 0 0 0
# c4 3 0 0 0 0 0 0 0 0 0 1
* Here's an example for a particular row (the first)
rbind(
rowid_rleid = rowid(rleid(unlist(BD[1,]))),
original = unlist(BD[1,]))
# ID Jan Feb Mar Apr May Jun Jul Aug Sept Oct
# rowid_rleid 1 1 2 3 1 1 2 1 2 3 1
# original 1 0 0 0 1 0 0 1 1 1 0
Upvotes: 2