Reputation: 4645
I need to detect from data the first element of the first sequence of length 5 of consecutively decreasing numbers. There is a similar post here but when I applied to my data it failed.
set.seed(201)
az <- c(sort(runif(10,0,0.9),decreasing = T),sort(runif(3,-0.3,0),decreasing = T),sort(runif(3,-0.3,0),decreasing = F),sort(runif(4,-0.3,0),decreasing = T),sort(runif(4,-0.3,0),decreasing = F),sort(runif(6,-0.3,0),decreasing = T))
tz <- seq(1,length(az))
df <- data.frame(tz,az=round(az,2))
In the figure above it would be somewhere around tz = 25.
The post says that this function need to improve and so far I cannot get my desired result!
getFirstBefore<-function(x,len){
r<-rle(sign(diff(x)))
n<-which(r$lengths>=len & r$values<0)
if(length(n)==0)
return(-1)
1+sum(r$lengths[seq_len(n[1]-1)])
}
df1 <- df%>%
mutate(cns_tz=getFirstBefore(az,5))
tz az cns_tz
#1 1 0.56 4
#2 2 0.55 4
#3 3 0.33 4
#4 4 0.33 4
#5 5 0.26 4
#6 6 0.15 4
#7 7 0.12 4
#8 8 0.09 4
#9 9 0.04 4
#10 10 0.04 4
#11 11 -0.10 4
#12 12 -0.12 4
#13 13 -0.16 4
#14 14 -0.16 4
#15 15 -0.14 4
#16 16 -0.14 4
#17 17 -0.13 4
#18 18 -0.15 4
#19 19 -0.22 4
#20 20 -0.30 4
#21 21 -0.12 4
#22 22 -0.12 4
#23 23 -0.11 4
#24 24 -0.07 4
#25 25 -0.05 4
#26 26 -0.09 4
#27 27 -0.10 4
#28 28 -0.15 4
#29 29 -0.17 4
#30 30 -0.22 4
Upvotes: 8
Views: 608
Reputation: 3964
My naive pure-dplyr approach would be to compute a rolling sum of signs for differences and identify rows where the next five diffs have a negative sign. I say "naive" because this solution does not use rle
for detecting streaks.
library(dplyr)
diff_details <- df %>%
mutate(diff = c(0, diff(az)),
diff_sign = sign(diff),
rolling_signs = cumsum(diff_sign),
next_five = lead(rolling_signs, 5) - rolling_signs)
diff_details
#> tz az diff diff_sign rolling_signs next_five
#> 1 1 0.56 0.00 0 0 -4
#> 2 2 0.55 -0.01 -1 -1 -4
#> 3 3 0.33 -0.22 -1 -2 -4
#> 4 4 0.33 0.00 0 -2 -5
#> 5 5 0.26 -0.07 -1 -3 -4
#> 6 6 0.15 -0.11 -1 -4 -4
#> 7 7 0.12 -0.03 -1 -5 -4
#> 8 8 0.09 -0.03 -1 -6 -4
#> 9 9 0.04 -0.05 -1 -7 -3
#> 10 10 0.04 0.00 0 -7 -2
#> 11 11 -0.10 -0.14 -1 -8 -1
#> 12 12 -0.12 -0.02 -1 -9 1
#> 13 13 -0.16 -0.04 -1 -10 1
#> 14 14 -0.16 0.00 0 -10 0
#> 15 15 -0.14 0.02 1 -9 -2
#> 16 16 -0.14 0.00 0 -9 -1
#> 17 17 -0.13 0.01 1 -8 -2
#> 18 18 -0.15 -0.02 -1 -9 0
#> 19 19 -0.22 -0.07 -1 -10 2
#> 20 20 -0.30 -0.08 -1 -11 4
#> 21 21 -0.12 0.18 1 -10 2
#> 22 22 -0.12 0.00 0 -10 1
#> 23 23 -0.11 0.01 1 -9 -1
#> 24 24 -0.07 0.04 1 -8 -3
#> 25 25 -0.05 0.02 1 -7 -5
#> 26 26 -0.09 -0.04 -1 -8 NA
#> 27 27 -0.10 -0.01 -1 -9 NA
#> 28 28 -0.15 -0.05 -1 -10 NA
#> 29 29 -0.17 -0.02 -1 -11 NA
#> 30 30 -0.22 -0.05 -1 -12 NA
Instead of identifying streaks in a sequence, we look at a cumulative sum of the signs of the differences in rolling_signs
. next_five
computes the difference in rolling_signs
over the next five rows. When next_five
is -5, then the next five rows have decreasing changes.
(diff_details$next_five %in% -5) %>% which %>% max
#> [1] 25
Each of the steps/columns could be abstracted into a function, like:
cum_diff_signs <- function(xs, window) {
rolling_signs <- cumsum(sign(c(0, diff(xs))))
next_diffs <- dplyr::lead(rolling_signs, window) - rolling_signs
next_diffs
}
cum_diff_signs(df$az, 5)
#> [1] -4 -4 -4 -5 -4 -4 -4 -4 -3 -2 -1 1 1 0 -2 -1 -2 0 2 4 2 1 -1
#> [24] -3 -5 NA NA NA NA NA
(cum_diff_signs(df$az, 5) %in% -5) %>% which %>% max
#> [1] 25
Upvotes: 3
Reputation: 887511
We can use rleid
from data.table
library(data.table)
n <- 5
v1 <- setDT(df)[sign(az)<0, .I[which(.N==n)] , rleid(c(1, sign(diff(az))))]$V1[1L]
v1
#[1] 26
df[, cnz_tz := v1]
Or another option is shift
with Reduce
setDT(df)[, cnz_tz := .I[Reduce(`&`, shift((az - shift(az, fill=az[1])) < 0,
0:4, type = "lead", fill=FALSE)) & sign(az) < 0][1]]
We can also use rleid
in dplyr
library(dplyr)
v1 <- df %>%
group_by(rl= rleid(c(1, sign(diff(az))))) %>%
mutate(rn = sign(az) < 0 & n()==5) %>%
.$rn %>%
which() %>%
head(., 1)
v1
#[1] 26
df %>%
mutate(cnz_tz = v1)
Upvotes: 4
Reputation: 6020
I would sort every 5 consecutive values, and see if that matches with the unsorted data. Then find the first occurance of such a match:
set.seed(123)
test <- rnorm(100)
decr <- sapply(seq_along(test),function(x){all(sort(test[x:(x+5)],decreasing = T) == test[x:(x+5)])})
firstdecr <- min(which(decr)):(min(which(decr))+5)
plot(test)
lines(firstdecr, test[firstdecr], col="red")
Only flaw I can see if there are equal values in a 5 value epoch, but you could also test for that.
Upvotes: 7