T.O.
T.O.

Reputation: 47

Subsetting a data frame according to different time ranges specified in other data frame

I have two data frames here.

SCORE_df <- data.frame("Participant" = rep(1:2, times=1, each=7), "Score" = as.integer(runif(14, 0, 100)), "Time" = c('17:00:00', '17:00:01', '17:00:02', '17:00:03', '17:00:04', '17:00:05', '17:00:06', '19:50:30', '19:50:31', '19:50:32', '19:50:33', '19:50:34', '19:50:35', '19:50:36'))
              
TIME_df <- data.frame("Participant" = c(1,2), "Start" = c('17:00:02', '19:50:31'), "End" = c('17:00:05', '19:50:33'))

> SCORE_df
   Participant Score     Time
1            1    56 17:00:00
2            1    77 17:00:01
3            1    27 17:00:02
4            1    78 17:00:03
5            1    46 17:00:04
6            1    22 17:00:05
7            1    35 17:00:06
8            2    26 19:50:30
9            2    64 19:50:31
10           2    29 19:50:32
11           2    29 19:50:33
12           2    90 19:50:34
13           2     0 19:50:35
14           2    51 19:50:36

> TIME_df
  Participant    Start      End
1           1 17:00:02 17:00:05
2           2 19:50:31 19:50:33

I want to use the Start and End information in TIME_df to subset SCORE_df data -- i.e., only retain the Score data "within" the Start to End time (inclusive) for "each" participant.

The lines below does wrong subsetting.

for(p in 1:nrow(SCORE_df)){
  ppt <- SCORE_df$Participant[p]
  SCORE_df_trimmed <- with(SCORE_df[which(SCORE_df$Participant==ppt),], SCORE_df[strptime(Time, "%H:%M:%S") >= strptime(TIME_df$Start[TIME_df$Participant==ppt], "%H:%M:%S") & strptime(Time, "%H:%M:%S") <= strptime(TIME_df$End[TIME_df$Participant==ppt], "%H:%M:%S"),])
}

> SCORE_df_trimmed
   Participant Score     Time
2            1    77 17:00:01
3            1    27 17:00:02
4            1    78 17:00:03
9            2    64 19:50:31
10           2    29 19:50:32
11           2    29 19:50:33

I will truly appreciate it if anyone could pinpoint the error in the lines mentioned above.

Upvotes: 0

Views: 38

Answers (3)

Roman
Roman

Reputation: 17668

using functions from the tidyverse you can try

library(tidyverse)
TIME_df %>% 
mutate_at(vars(Start, End), lubridate::hms) %>% 
   pmap(., ~SCORE_df %>% 
       mutate(Time = lubridate::hms(Time)) %>% 
       filter(Time >= ..2 & Time <= ..3)) %>% 
  bind_rows()
Participant Score        Time
1           1    21   17H 0M 2S
2           1    19   17H 0M 3S
3           1    83   17H 0M 4S
4           1    92   17H 0M 5S
5           2    23 19H 50M 31S
6           2    65 19H 50M 32S
7           2    70 19H 50M 33S

Without any transformation you can try

SCORE_df %>% 
  left_join(pivot_longer(TIME_df, c(Start, End)), by=c("Participant", "Time" = "value"))  %>% 
  group_by(Participant) %>% 
  slice(which(name=="Start"):which(name=="End"))
# A tibble: 7 x 4
# Groups:   Participant [2]
Participant Score Time     name 
<dbl> <int> <chr>    <chr>
1           1    21 17:00:02 Start
2           1    19 17:00:03 NA   
3           1    83 17:00:04 NA   
4           1    92 17:00:05 End  
5           2    23 19:50:31 Start
6           2    65 19:50:32 NA   
7           2    70 19:50:33 End 

Upvotes: 0

s_baldur
s_baldur

Reputation: 33613

An option with data.table:

library(data.table)
setDT(SCORE_df)
setDT(TIME_df)

# POSIXct works with a date reference, so I add in just any date
SCORE_df[, Time := as.POSIXct(paste0("2000-01-01 ", Time), tz = "UTC")]
TIME_df[,  Start := as.POSIXct(paste0("2000-01-01 ", Start), tz = "UTC")]
TIME_df[,  End := as.POSIXct(paste0("2000-01-01 ", End), tz = "UTC")]


SCORE_df[TIME_df, 
         on = .(Participant = Participant, Time >= Start, Time <= End), 
         .(Participant, Score, Time = x.Time)]


#    Participant Score                Time
# 1:           1    49 2000-01-01 17:00:02
# 2:           1    75 2000-01-01 17:00:03
# 3:           1     8 2000-01-01 17:00:04
# 4:           1     7 2000-01-01 17:00:05
# 5:           2    49 2000-01-01 19:50:31
# 6:           2    59 2000-01-01 19:50:32
# 7:           2    13 2000-01-01 19:50:33

Upvotes: 1

kmacierzanka
kmacierzanka

Reputation: 825

You can use this:

library(lubridate)

SCORE_df$Time <- hms(SCORE_df$Time)
TIME_df$Start <- hms(TIME_df$Start)
TIME_df$End <- hms(TIME_df$End)

library(dplyr)

SCORE_df1 <- SCORE_df %>%
        filter(Participant == 1 & Time >= TIME_df$Start[1] & Time <= TIME_df$End[1])

SCORE_df2 <- SCORE_df %>%
        filter(Participant == 2 & Time >= TIME_df$Start[2] & Time <= TIME_df$End[2])

rbind(SCORE_df1, SCORE_df2)
  Participant Score        Time
1           1    82   17H 0M 2S
2           1    35   17H 0M 3S
3           1    46   17H 0M 4S
4           1    42   17H 0M 5S
5           2    44 19H 50M 31S
6           2    61 19H 50M 32S
7           2    69 19H 50M 33S

Upvotes: 0

Related Questions