Isuru
Isuru

Reputation: 101

Selecting the elements in a column that are in given time period

I want to select the activities between 30min range for corresponding node and remove the activities if there are not in 30min range.

node <- c("ABC","ABC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC")

activity <-c("LOSS_OF_MULTIPLEX_SECTION-OMS_A","LOSS_OF_MULTIPLEX_SECTION-OMS_A","NODE_ISOLATION","NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF","NODE_ISOLATION","LOSS_OF_MULTIPLEX_SECTION-OMS_A","NODE_ISOLATION","NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF","NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF", "UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A","UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A","UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A") 

e <-c("2020-05-09 04:50:42","2020-05-09 06:16:54","2020-05-08 16:11:58","2020-05-08 16:11:58","2020-05-08 16:30:07","2020-05-09 03:00:08","2020-05-09 03:08:08","2020-05-09 03:28:08","2020-05-09 13:08:08","2020-05-09 13:10:08","2020-05-09 13:28:08","2020-05-09 14:28:08")

df <- data.frame(node, activity, e)
df
df <- data.frame(node, activity, e)

I have tried this code, but It's not correct

df %>% 
  arrange(node,activity,e)%>%
  group_by(node)%>%
  select(which(difftime(e)<=30)))

desired output

node <- c("ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC","ABCC")

activity <-c("NODE_ISOLATION","NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF","NODE_ISOLATION","LOSS_OF_MULTIPLEX_SECTION-OMS_A","NODE_ISOLATION","NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF","NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF", "UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A","UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A") 

e <-c("2020-05-08 16:11:58","2020-05-08 16:11:58","2020-05-08 16:30:07","2020-05-09 03:00:08","2020-05-09 03:08:08","2020-05-09 03:28:08","2020-05-09 13:08:08","2020-05-09 13:10:08","2020-05-09 13:28:08")

df1 <- data.frame(node, activity, e)
df1 

Upvotes: 0

Views: 54

Answers (1)

ekoam
ekoam

Reputation: 8844

Is this what you want?

library(dplyr)

tlead <- . %>% lead(., order_by = ., default = max(.) + 1801) # 1801 secs
tlag <- . %>% lag(., order_by = ., default = min(.) - 1801)

df %>% 
  mutate(e = as.POSIXct(e, tz = "")) %>% 
  group_by(node) %>% 
  filter(e - tlag(e) <= as.difftime("00:30:00") | tlead(e) - e <= as.difftime("00:30:00"))

Output

# A tibble: 9 x 3
# Groups:   node [1]
  node  activity                              e                  
  <chr> <chr>                                 <dttm>             
1 ABCC  NODE_ISOLATION                        2020-05-08 16:11:58
2 ABCC  NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF   2020-05-08 16:11:58
3 ABCC  NODE_ISOLATION                        2020-05-08 16:30:07
4 ABCC  LOSS_OF_MULTIPLEX_SECTION-OMS_A       2020-05-09 03:00:08
5 ABCC  NODE_ISOLATION                        2020-05-09 03:08:08
6 ABCC  NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF   2020-05-09 03:28:08
7 ABCC  NE_NOT_REACH_VIA_PRIMARY_MNG_INTERF   2020-05-09 13:08:08
8 ABCC  UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A 2020-05-09 13:10:08
9 ABCC  UNDERLYING_RESOURCE_UNAVAILABLE-OMS_A 2020-05-09 13:28:08

Upvotes: 1

Related Questions