Saaz
Saaz

Reputation: 55

Time data populating in r

I have a dataframe which records at what time cabs are doing trips. I need to transform this data into 24 hour duration when they are doing trips and idle. Below I'm sharing a sample data and the required dataframe

df <- data.frame(cab_id = c("c1","c1","c2*","c3","c3","c3","c4","c4"),
                 trip_id = c("101","102","103","104","105","106","107","108"),
                 trip_start = c("15:00", "21:27", " 23:11", "   09:33", "   17:41", "22:11", "21:31", "23:47"),
                 trip_end = c("18:30", "23:33", "02:30", "12:11", "20:18", "01:15", "22:45", "02:12"))



reqd_df <- data.frame(time = c("0","0.25","0.5","0.75","1","1.25","1.5","1.75","2","2.25","2.5","2.75","3","3.25","3.5","3.75","4","4.25","4.5","4.75","5","5.25","5.5","5.75","6","6.25","6.5","6.75","7","7.25","7.5","7.75","8","8.25","8.5","8.75","9","9.25","9.5","9.75","10","10.25","10.5","10.75","11","11.25","11.5","11.75","12","12.25","12.5","12.75","13","13.25","13.5","13.75","14","14.25","14.5","14.75","15","15.25","15.5","15.75","16","16.25","16.5","16.75","17","17.25","17.5","17.75","18","18.25","18.5","18.75","19","19.25","19.5","19.75","20","20.25","20.5","20.75","21","21.25","21.5","21.75","22","22.25","22.5","22.75","23","23.25","23.5","23.75"),
                      c1 = c("0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","0"),
                      c2 = c("1","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1"),
                      c3 = c("1","1","1","0","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1","1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","1","1","1","1","1","1","1"),
                      c4 = c("1","1","1","1","1","1","1","1","1","1","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","0","1"))

reqd_df_2 <- melt(reqd_df,id = 1)


ggplot(reqd_df_2, aes(x = time, y = value,  label = value, fill= variable ))+
  geom_bar(stat = "identity")+
  facet_grid(rows = vars(variable))



Q1: how to transform dataframe df to dataframe reqd_df, where time is rounded to nearby quarter of the hour , intervals like (1:15, 1:30, 1:45, 2:00). If cab is doing trip in a particular quarter of hour populate it by 1 and if cab is idle populate it by 0.

Note1: 0, 0.25, 0:50, 0:75, 1:00 represent 0:00, 0:15, 0:30, 0:45, 1:00 respectively and so on.

Note2: some trips start at end hour of the days like 22:11 hours and ends around 02:15 hour, from 22:15 to 23:45 and 00:00 to 02:15 data will be populated by 1

Thanks in advance

Upvotes: 0

Views: 75

Answers (1)

chinsoon12
chinsoon12

Reputation: 25225

Using data.table::as.ITime and nearest rolling join (i.e. roll="nearest"):

library(data.table)
setDT(df)

#convert into ITime
cols <- c("trip_start", "trip_end")
df[, (cols) := lapply(.SD, function(x) as.ITime(x)), .SDcols=cols]

#sequence of times with 15 mins interval
sec15min <- 15 * 60
alltimes <- as.ITime(seq(as.ITime("00:00"), as.ITime("23:59"), sec15min))
intl <- data.table(INTL=alltimes)[, VAL:=INTL]

#find nearest 15min interval
df[, near_start := intl[df, on=.(INTL=trip_start), roll="nearest", VAL]]    
df[, near_end := intl[df, on=.(INTL=trip_end), roll="nearest", VAL]]

#get straight to reqd_df_2 
df[, {
        if (near_start <= near_end)
            biz <- seq(near_start, near_end, sec15min)
        else
            biz <- c(seq(as.ITime("00:00"), near_end, sec15min),
                seq(near_start, as.ITime("23:59"), sec15min))

        .(time=as.ITime(alltimes),
            busy=replace(logical(length(alltimes)), alltimes %in% biz, TRUE))
    },
    by=.(cab_id, trip_id)][,
        .(busy=+any(busy)), by=.(time, cab_id)]

output:

         time cab_id busy
  1: 00:00:00     c1    0
  2: 00:15:00     c1    0
  3: 00:30:00     c1    0
  4: 00:45:00     c1    0
  5: 01:00:00     c1    0
 ---                     
380: 22:45:00     c4    1
381: 23:00:00     c4    0
382: 23:15:00     c4    0
383: 23:30:00     c4    0
384: 23:45:00     c4    1

p.s.: I think OP is missing trip 107 in the desired output.

Upvotes: 1

Related Questions