Reputation: 423
I need help trying to figure out how to calculate the average of a variable every ___ hours. I want to calculate the average every 1/2 hour, and then every 1, 2, 4, and 6 hours.
Here is my dataset:
dput(head(R3L12, 10))
structure(list(Date = c("2015-05-23", "2015-05-23", "2015-05-23",
"2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23",
"2015-05-23", "2015-05-23"), Time = c("07:25:00", "07:40:00",
"07:45:00", "09:10:00", "11:45:00", "11:55:00", "12:05:00", "12:35:00",
"12:45:00", "13:30:00"), Turtle = structure(c(3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("R3L1", "R3L11", "R3L12",
"R3L2", "R3L4", "R3L8", "R3L9", "R4L8", "R8L1", "R8L4", "R8NAT123"
), class = "factor"), Tex = c(11.891, 12.008, 12.055, 13.219,
18.727, 18.992, 19.477, 20.367, 20.641, 28.305), m.Tb = c(12.477,
12.54, 12.54, 12.978, 16.362, 16.612, 17.238, 19.617, 19.993,
24.371), m.HR = c(7.56457, 6.66759, 17.51107, 9.72277, 19.44553,
13.07674, 28.115, 14.99467, 17.16947, 40.40479), season = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("beginning",
"end", "middle"), class = "factor"), year = c(2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L), Mass = c(360L,
360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L)), row.names = c(NA,
10L), class = "data.frame")
I want to be able to calculate the average m.Tb for each time period for each date. For example, for 2015-05-23, I want to have the average m.Tb for every 30 minutes, 1 hr, 2 hr, 4 hr, and 6 hr. Then I want to repeat that with the next day. Sometimes there are "missing" rows in the Time column and that's because the NA rows were already taken out.
Let me know if you need clarification or have questions as I'm still new to r.
Upvotes: 4
Views: 763
Reputation: 21938
I hope this is what you are looking for. Due to the fact that the resulting data frames have different row numbers I had to store them in a list. For this purpose I first created a character vector of all the time spans you would like to calculate the average for and then I used map
function from purrr
package to iterate over them replacing breaks
argument in cut
to create your desired time spans.
library(dplyr)
library(lubridate)
library(purrr)
breaks <- c("15 min", "30 min", "1 hour", "2 hour", "4 hour", "6 hour")
breaks %>%
map(~ df %>%
unite("Date-Time", c("Date", "Time"), sep = " ", remove = FALSE) %>%
mutate(`Date-Time` = ymd_hms(`Date-Time`)) %>%
mutate(DS = cut(`Date-Time`, breaks = .x)) %>%
group_by(ymd(Date), DS) %>%
summarise(avg = mean(m.Tb))) %>%
set_names(breaks)
$`15 min`
# A tibble: 8 x 3
# Groups: ymd(Date) [1]
`ymd(Date)` DS avg
<date> <fct> <dbl>
1 2015-05-23 2015-05-23 07:25:00 12.5
2 2015-05-23 2015-05-23 07:40:00 12.5
3 2015-05-23 2015-05-23 09:10:00 13.0
4 2015-05-23 2015-05-23 11:40:00 16.4
5 2015-05-23 2015-05-23 11:55:00 16.9
6 2015-05-23 2015-05-23 12:25:00 19.6
7 2015-05-23 2015-05-23 12:40:00 20.0
8 2015-05-23 2015-05-23 13:25:00 24.4
$`30 min`
# A tibble: 6 x 3
# Groups: ymd(Date) [1]
`ymd(Date)` DS avg
<date> <fct> <dbl>
1 2015-05-23 2015-05-23 07:25:00 12.5
2 2015-05-23 2015-05-23 08:55:00 13.0
3 2015-05-23 2015-05-23 11:25:00 16.4
4 2015-05-23 2015-05-23 11:55:00 16.9
5 2015-05-23 2015-05-23 12:25:00 19.8
6 2015-05-23 2015-05-23 13:25:00 24.4
$`1 hour`
# A tibble: 5 x 3
# Groups: ymd(Date) [1]
`ymd(Date)` DS avg
<date> <fct> <dbl>
1 2015-05-23 2015-05-23 07:00:00 12.5
2 2015-05-23 2015-05-23 09:00:00 13.0
3 2015-05-23 2015-05-23 11:00:00 16.5
4 2015-05-23 2015-05-23 12:00:00 18.9
5 2015-05-23 2015-05-23 13:00:00 24.4
$`2 hour`
# A tibble: 4 x 3
# Groups: ymd(Date) [1]
`ymd(Date)` DS avg
<date> <fct> <dbl>
1 2015-05-23 2015-05-23 07:00:00 12.5
2 2015-05-23 2015-05-23 09:00:00 13.0
3 2015-05-23 2015-05-23 11:00:00 18.0
4 2015-05-23 2015-05-23 13:00:00 24.4
$`4 hour`
# A tibble: 2 x 3
# Groups: ymd(Date) [1]
`ymd(Date)` DS avg
<date> <fct> <dbl>
1 2015-05-23 2015-05-23 07:00:00 12.6
2 2015-05-23 2015-05-23 11:00:00 19.0
$`6 hour`
# A tibble: 2 x 3
# Groups: ymd(Date) [1]
`ymd(Date)` DS avg
<date> <fct> <dbl>
1 2015-05-23 2015-05-23 07:00:00 15.6
2 2015-05-23 2015-05-23 13:00:00 24.4
Upvotes: 2
Reputation: 4150
This is how I would do it, you have a lot of missing periods so it is not the best of outputs for half hourly aggregation
data_example <- structure(list(Date = c("2015-05-23", "2015-05-23", "2015-05-23",
"2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23", "2015-05-23",
"2015-05-23", "2015-05-23"), Time = c("07:25:00", "07:40:00",
"07:45:00", "09:10:00", "11:45:00", "11:55:00", "12:05:00", "12:35:00",
"12:45:00", "13:30:00"), Turtle = structure(c(3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L), .Label = c("R3L1", "R3L11", "R3L12",
"R3L2", "R3L4", "R3L8", "R3L9", "R4L8", "R8L1", "R8L4", "R8NAT123"
), class = "factor"), Tex = c(11.891, 12.008, 12.055, 13.219,
18.727, 18.992, 19.477, 20.367, 20.641, 28.305), m.Tb = c(12.477,
12.54, 12.54, 12.978, 16.362, 16.612, 17.238, 19.617, 19.993,
24.371), m.HR = c(7.56457, 6.66759, 17.51107, 9.72277, 19.44553,
13.07674, 28.115, 14.99467, 17.16947, 40.40479), season = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("beginning",
"end", "middle"), class = "factor"), year = c(2015L, 2015L, 2015L,
2015L, 2015L, 2015L, 2015L, 2015L, 2015L, 2015L), Mass = c(360L,
360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L, 360L)), row.names = c(NA,
10L), class = "data.frame")
library(tidyverse)
floor_30 <- function(x) clock::date_floor(x = x,precision = "minute",n = 30)
mean_at_inteval <- function(data,date_col,interval_func) {
data |>
group_by(interval = {{date_col}} |> interval_func()) |>
summarise(sum_interval = sum(m.Tb)) |>
summarise(mean_interval = mean(sum_interval))
}
nest_example_data <- data_example %>%
mutate(date_timer = str_c(Date,Time) %>% clock::date_time_parse(zone = "UTC")) |>
nest_by(Date)
final_data <- nest_example_data |> mutate(floor_30 = data |> mean_at_inteval(date_col = date_timer,interval_func = floor_30))
final_data
#> # A tibble: 1 x 3
#> # Rowwise: Date
#> Date data floor_30$mean_interval
#> <chr> <list<tibble[,9]>> <dbl>
#> 1 2015-05-23 [10 x 9] 23.5
Created on 2021-05-30 by the reprex package (v2.0.0)
Upvotes: 1
Reputation: 887741
We could use ceiling_date
from lubridate
library(lubridate)
library(dplyr)
library(stringr)
R3L12 %>%
group_by(DS = ceiling_date(as.POSIXct(str_c(Date, Time, sep=" ")),
unit = '30 min' )) %>%
summarise(avg_30 = mean(m.Tb)) %>%
mutate(date = as.Date(DS))
-output
# A tibble: 7 x 3
# DS avg_30 date
# <dttm> <dbl> <date>
#1 2015-05-23 07:30:00 12.5 2015-05-23
#2 2015-05-23 08:00:00 12.5 2015-05-23
#3 2015-05-23 09:30:00 13.0 2015-05-23
#4 2015-05-23 12:00:00 16.5 2015-05-23
#5 2015-05-23 12:30:00 17.2 2015-05-23
#6 2015-05-23 13:00:00 19.8 2015-05-23
#7 2015-05-23 13:30:00 24.4 2015-05-23
Upvotes: 3