Reputation: 65
b <- c("true", "true", "true", "true", "true", "false", "false", "true","true", "true", "false", "false", "false","true", "true", "false", "true", "false", "true", "false")
I'm trying to write a function that takes the above vector as an input and finds the indices of the first and last occurrences of the desired string (e.g. "true") in every 'zone' (zone being defined as a subvector where there are two or more consecutive identical elements). The desired output for the above would be a data-frame such as:
x | y
----|----
1 | 5
8 | 10
14 | 15
I have successfully written a function (below) that does this, but takes far too long for my Shiny app. Would be great if there was a cleaner and faster way of doing this.
zone_identifier <- function(dataframe, zone_source_col_index, match_string){
zones_df <- data.frame()
zone_source_vector <- data.frame[,zone_source_col_index]
for(i in 1:(length(zone_source_vector)-1){
zone_component_recorder <-vector()
for(j in 1:(length(zone_source_vector)-i)){
if(zone_source_vector[i]==match_string && zone_source_vector[i+j]==match_string){ if(i>1 && zone_source_vector[i-1]==match_string{
break}
zone_component_recorder <-c(i, i+j)
else if(zone_source_vector[i]==match_string && zone_source_vector[i+j]!=match_string){break}
zones_df <- rbind.data.frame(zones_df, zone_component_recorder)}
return(zones_df)
}
Upvotes: 2
Views: 99
Reputation: 1981
Solution using dplyr
library(dplyr)
run <- rle(b)$lengths
data.frame( type= rle(b)$values ,
x =c(1,cumsum(run)[-length(run)]+1 ) ,
y =cumsum(run) ) %>%
filter(type=="true") %>%
select(-type)
Upvotes: 2
Reputation: 887183
Here is an option using data.table
library(data.table)
v1 <- data.table(b)[, {
i1 <- .I[b=="true" & seq_len(.N) %in% c(1,.N)]
if(.N==1) rep(i1, 2) else i1} , by = rleid(b)]$V1
data.table(x= v1[c(TRUE, FALSE)], y = v1[c(FALSE, TRUE)])
# x y
#1: 1 5
#2: 8 10
#3: 14 15
#4: 17 17
#5: 19 19
Upvotes: 2
Reputation: 4474
You can use rle
to find the solution
#use rle to find runs of same value in b
rle_res=rle(b)
#find starting position of each true and false
start_vec=c(1,1+cumsum(rle_res$lengths))
start_vec=start_vec[-length(start_vec)]
#same for end position
end_vec=c(start_vec[-1]-1,NA_integer_)
#filter on true values
data.frame(x=start_vec[rle_res$values=="true"],
y=end_vec[rle_res$values=="true"])
# x y
#1 1 5
#2 8 10
#3 14 15
#4 17 17
#5 19 19
Upvotes: 4