Reputation: 1
How to do below task in R?
df <- tribble(
~ID, ~StartTime, ~EndTime
, 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
, 02, "2018-05-14 21:30:00", "2018-05-15 02:00:00"
, 03, "2018-05-15 07:00:00", "2018-05-16 22:30:00"
, 04, "2018-05-16 23:00:00", "2018-05-16 23:40:00"
, 05, "2018-05-17 01:00:00", "2018-05-19 15:00:00"
)
df$StartDate <- as.POSIXlt(df$StartDate, tryFormats = "%Y-%m-%d %H:%M:%S")
df$EndDate <- as.POSIXlt(df$EndDate, tryFormats = "%Y-%m-%d %H:%M:%S")
Note: Multiple duplicate rows needs to be created from single row,
For example Original Single row:
01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
Post processing Multiple row:
01, "2018-05-14 09:30:00", "2018-05-14 10:00:00"
01, "2018-05-14 10:00:00", "2018-05-14 11:00:00"
01, "2018-05-14 11:00:00", "2018-05-14 12:00:00"
01, "2018-05-14 12:00:00", "2018-05-14 12:10:00"
Hoping my request is clear.
Upvotes: 0
Views: 267
Reputation: 388807
We can write a function which generates an hourly sequence between two timestamps. Using map2
we call that function for every pair of StartTime
and EndTime
and expand the dataframe.
library(dplyr)
library(lubridate)
generate_hourly_time <- function(x, y) {
EndTime <- ceiling_date(x, 'hour')
EndTime2 <- seq(EndTime, floor_date(y, 'hour'), 'hour')
tibble(StartTime = c(x, EndTime2), EndTime = c(EndTime2, y))
}
df %>%
mutate(across(-1, ymd_hms)) %>%
#For dplyr < 1.0.0
#mutate_at(-1, ymd_hms) %>%
mutate(time = purrr::map2(StartTime, EndTime, generate_hourly_time)) %>%
select(ID, time) %>%
tidyr::unnest(time)
# A tibble: 117 x 3
# ID StartTime EndTime
# <dbl> <dttm> <dttm>
# 1 1 2018-05-14 09:30:00 2018-05-14 10:00:00
# 2 1 2018-05-14 10:00:00 2018-05-14 11:00:00
# 3 1 2018-05-14 11:00:00 2018-05-14 12:00:00
# 4 1 2018-05-14 12:00:00 2018-05-14 12:10:00
# 5 2 2018-05-14 21:30:00 2018-05-14 22:00:00
# 6 2 2018-05-14 22:00:00 2018-05-14 23:00:00
# 7 2 2018-05-14 23:00:00 2018-05-15 00:00:00
# 8 2 2018-05-15 00:00:00 2018-05-15 01:00:00
# 9 2 2018-05-15 01:00:00 2018-05-15 02:00:00
#10 2 2018-05-15 02:00:00 2018-05-15 02:00:00
# … with 107 more rows
Upvotes: 1
Reputation: 79188
You could also do:
library(tidyverse)
df %>%
pivot_longer(-ID)%>%
group_by(ID)%>%
mutate(start = list(unique(c(value[1],seq(strptime(value[1],"%F %H"),
value[2],"1 hour")[-1],value[2]))),
name = NULL, value = NULL)%>%
slice(1)%>%
unnest(start)%>%
mutate(end = lead(start,1,last(start)))
# A tibble: 117 x 3
# Groups: ID [5]
ID start end
<dbl> <dttm> <dttm>
1 1 2018-05-14 09:30:00 2018-05-14 10:00:00
2 1 2018-05-14 10:00:00 2018-05-14 11:00:00
3 1 2018-05-14 11:00:00 2018-05-14 12:00:00
4 1 2018-05-14 12:00:00 2018-05-14 12:10:00
5 1 2018-05-14 12:10:00 2018-05-14 12:10:00
6 2 2018-05-14 21:30:00 2018-05-14 22:00:00
7 2 2018-05-14 22:00:00 2018-05-14 23:00:00
8 2 2018-05-14 23:00:00 2018-05-15 00:00:00
9 2 2018-05-15 00:00:00 2018-05-15 01:00:00
10 2 2018-05-15 01:00:00 2018-05-15 02:00:00
# ... with 107 more rows
Upvotes: 0
Reputation: 21
I hope it's useful:
df <- tribble(
~ID, ~StartTime, ~EndTime
, 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
, 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
, 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
, 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
, 01, "2018-05-14 09:30:00", "2018-05-14 12:10:00"
, 02, "2018-05-14 21:30:00", "2018-05-15 02:00:00"
, 03, "2018-05-15 07:00:00", "2018-05-16 22:30:00"
, 04, "2018-05-16 23:00:00", "2018-05-16 23:40:00"
, 05, "2018-05-17 01:00:00", "2018-05-19 15:00:00"
)
nrow(df)
id.unique <- unique(df[,'ID'])
id.unique.numeric <- as.numeric(unlist(id.unique))
id.i <- id.unique.numeric
for (i in id.i) {
out.pre <- subset(df, ID==i)
name.out <- paste('df', i, '<-out.pre', sep = '')
eval(parse(text=name.out))
}
df1
FM
Upvotes: 0