Reputation: 47
I have two data frames here.
SCORE_df <- data.frame("Participant" = rep(1:2, times=1, each=7), "Score" = as.integer(runif(14, 0, 100)), "Time" = c('17:00:00', '17:00:01', '17:00:02', '17:00:03', '17:00:04', '17:00:05', '17:00:06', '19:50:30', '19:50:31', '19:50:32', '19:50:33', '19:50:34', '19:50:35', '19:50:36'))
TIME_df <- data.frame("Participant" = c(1,2), "Start" = c('17:00:02', '19:50:31'), "End" = c('17:00:05', '19:50:33'))
> SCORE_df
Participant Score Time
1 1 56 17:00:00
2 1 77 17:00:01
3 1 27 17:00:02
4 1 78 17:00:03
5 1 46 17:00:04
6 1 22 17:00:05
7 1 35 17:00:06
8 2 26 19:50:30
9 2 64 19:50:31
10 2 29 19:50:32
11 2 29 19:50:33
12 2 90 19:50:34
13 2 0 19:50:35
14 2 51 19:50:36
> TIME_df
Participant Start End
1 1 17:00:02 17:00:05
2 2 19:50:31 19:50:33
I want to use the Start and End information in TIME_df to subset SCORE_df data -- i.e., only retain the Score data "within" the Start to End time (inclusive) for "each" participant.
The lines below does wrong subsetting.
for(p in 1:nrow(SCORE_df)){
ppt <- SCORE_df$Participant[p]
SCORE_df_trimmed <- with(SCORE_df[which(SCORE_df$Participant==ppt),], SCORE_df[strptime(Time, "%H:%M:%S") >= strptime(TIME_df$Start[TIME_df$Participant==ppt], "%H:%M:%S") & strptime(Time, "%H:%M:%S") <= strptime(TIME_df$End[TIME_df$Participant==ppt], "%H:%M:%S"),])
}
> SCORE_df_trimmed
Participant Score Time
2 1 77 17:00:01
3 1 27 17:00:02
4 1 78 17:00:03
9 2 64 19:50:31
10 2 29 19:50:32
11 2 29 19:50:33
I will truly appreciate it if anyone could pinpoint the error in the lines mentioned above.
Upvotes: 0
Views: 38
Reputation: 17668
using functions from the tidyverse
you can try
library(tidyverse)
TIME_df %>%
mutate_at(vars(Start, End), lubridate::hms) %>%
pmap(., ~SCORE_df %>%
mutate(Time = lubridate::hms(Time)) %>%
filter(Time >= ..2 & Time <= ..3)) %>%
bind_rows()
Participant Score Time
1 1 21 17H 0M 2S
2 1 19 17H 0M 3S
3 1 83 17H 0M 4S
4 1 92 17H 0M 5S
5 2 23 19H 50M 31S
6 2 65 19H 50M 32S
7 2 70 19H 50M 33S
Without any transformation you can try
SCORE_df %>%
left_join(pivot_longer(TIME_df, c(Start, End)), by=c("Participant", "Time" = "value")) %>%
group_by(Participant) %>%
slice(which(name=="Start"):which(name=="End"))
# A tibble: 7 x 4
# Groups: Participant [2]
Participant Score Time name
<dbl> <int> <chr> <chr>
1 1 21 17:00:02 Start
2 1 19 17:00:03 NA
3 1 83 17:00:04 NA
4 1 92 17:00:05 End
5 2 23 19:50:31 Start
6 2 65 19:50:32 NA
7 2 70 19:50:33 End
Upvotes: 0
Reputation: 33613
An option with data.table
:
library(data.table)
setDT(SCORE_df)
setDT(TIME_df)
# POSIXct works with a date reference, so I add in just any date
SCORE_df[, Time := as.POSIXct(paste0("2000-01-01 ", Time), tz = "UTC")]
TIME_df[, Start := as.POSIXct(paste0("2000-01-01 ", Start), tz = "UTC")]
TIME_df[, End := as.POSIXct(paste0("2000-01-01 ", End), tz = "UTC")]
SCORE_df[TIME_df,
on = .(Participant = Participant, Time >= Start, Time <= End),
.(Participant, Score, Time = x.Time)]
# Participant Score Time
# 1: 1 49 2000-01-01 17:00:02
# 2: 1 75 2000-01-01 17:00:03
# 3: 1 8 2000-01-01 17:00:04
# 4: 1 7 2000-01-01 17:00:05
# 5: 2 49 2000-01-01 19:50:31
# 6: 2 59 2000-01-01 19:50:32
# 7: 2 13 2000-01-01 19:50:33
Upvotes: 1
Reputation: 825
You can use this:
library(lubridate)
SCORE_df$Time <- hms(SCORE_df$Time)
TIME_df$Start <- hms(TIME_df$Start)
TIME_df$End <- hms(TIME_df$End)
library(dplyr)
SCORE_df1 <- SCORE_df %>%
filter(Participant == 1 & Time >= TIME_df$Start[1] & Time <= TIME_df$End[1])
SCORE_df2 <- SCORE_df %>%
filter(Participant == 2 & Time >= TIME_df$Start[2] & Time <= TIME_df$End[2])
rbind(SCORE_df1, SCORE_df2)
Participant Score Time
1 1 82 17H 0M 2S
2 1 35 17H 0M 3S
3 1 46 17H 0M 4S
4 1 42 17H 0M 5S
5 2 44 19H 50M 31S
6 2 61 19H 50M 32S
7 2 69 19H 50M 33S
Upvotes: 0