Reputation: 332
I have a long term time series (22 years), and want to group the data by month and average each hour to get a diurnal temperature profile. Below is my data set.
Datetime.LST ruc.197.118 narr.225.118 KLAF VAIv212_08748740771
1 12/31/1996 18:00 NA -0.7843 NA NA
2 12/31/1996 19:00 NA -1.2432 NA NA
3 12/31/1996 20:00 NA -1.7022 NA NA
4 12/31/1996 21:00 NA -2.1611 NA NA
5 12/31/1996 22:00 NA -2.1026 NA NA
6 12/31/1996 23:00 NA -2.0440 NA NA
7 1/1/1997 0:00 NA -1.9854 NA 0.4120480
8 1/1/1997 1:00 NA -1.7506 NA 0.4400940
9 1/1/1997 2:00 NA -1.5157 NA 0.4594420
10 1/1/1997 3:00 NA -1.2808 NA 0.3232730
11 1/1/1997 4:00 NA -0.9326 NA 0.0754700
12 1/1/1997 5:00 NA -0.5844 NA -0.0730896
13 1/1/1997 6:00 NA -0.2363 NA -0.1574400
14 1/1/1997 7:00 NA 0.3422 NA -0.2461240
15 1/1/1997 8:00 NA 0.9207 NA -0.0277405
temperature_data <- read.csv(temp_hourly, stringsAsFactors = FALSE)
monthly_hourly <- temperature_data %>%
dmy_hm(.$Datetime.LST, tz = "GMT") %>%
dplyr::group_by(month=format(Datetime.LST,'%m'), hour=format(as.POSIXct(cut(Datetime.LST, breaks = 'hour')),"%H")) %>%
summarise(meanVal=mean(val,na.rm=TRUE))
}
Example dataset using dput
> dput(head(temperature_data[, c(1,2,3,4,5)]))
structure(list(Datetime.LST = c("12/31/1996 18:00", "12/31/1996 19:00",
"12/31/1996 20:00", "12/31/1996 21:00", "12/31/1996 22:00", "12/31/1996 23:00"
), ruc.197.118 = c(NA_real_, NA_real_, NA_real_, NA_real_, NA_real_,
NA_real_), narr.225.118 = c(-0.7843, -1.2432, -1.7022, -2.1611,
-2.1026, -2.044), KLAF = c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), VAIv212_08748740771 = c(NA_real_,
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_)), .Names = c("Datetime.LST",
"ruc.197.118", "narr.225.118", "KLAF", "VAIv212_08748740771"), row.names = c(NA,
6L), class = "data.frame")
Datetime.LST is brought in as ch
data type. I have had some issues getting different date packages(lubridate
) to agree with dplyr
's group_by
function.
Attached is an example plot output that I would like to see.
With using Andrews comments below, updated code and current output:
temperature_data <- read.csv(temp_hourly, stringsAsFactors = FALSE)
monthly_hourly <- temperature_data %>%
mutate(Datetime.LST = mdy_hm(Datetime.LST, tz = "GMT"),
day = day(Datetime.LST),
month = month(Datetime.LST),
hour = hour(Datetime.LST)) %>%
group_by(month, hour) %>%
summarise(
ruc.197.118 = mean(ruc.197.118, na.rm = TRUE),
narr.225.118 = mean(narr.225.118, na.rm = TRUE),
KLAF = mean(KLAF, na.rm = TRUE),
VAIv212_08748740771 = mean(VAIv212_08748740771, na.rm = TRUE)
) %>%
# next step creates a dummy year to group month/day/hr as datetime
mutate(month_name = month(month, label = TRUE))
ggplot(data = monthly_hourly,aes(x = hour)) +
geom_line(color = "red", aes(y = ruc.197.118)) +
geom_line(color = "blue", aes(y = narr.225.118)) +
geom_line(color = "black", aes(y = KLAF)) +
geom_line(color = "orange", aes(y = VAIv212_08748740771)) +
xlab("Temperature degC") +
ylab("Hour of Day") +
facet_wrap(~ month_name)
Upvotes: 0
Views: 722
Reputation: 7626
Following on from your code above, it would work to extract date, month and hour as new variables, then group, summarise and graph:
monthly_hourly <- df %>%
bind_rows(data.frame(Datetime.LST = "1/1/1970 1:00")) %>%
mutate(Datetime.LST = mdy_hm(Datetime.LST, tz = "GMT"),
day = day(Datetime.LST),
month = month(Datetime.LST, label = TRUE),
hour = hour(Datetime.LST)) %>%
group_by(month, hour) %>%
summarise(
ruc.197.118 = mean(ruc.197.118, na.rm = TRUE),
narr.225.118 = mean(narr.225.118, na.rm = TRUE),
KLAF = mean(KLAF, na.rm = TRUE),
VAIv212_08748740771 = mean(VAIv212_08748740771, na.rm = TRUE)
)
monthly_hourly %>%
ggplot(aes(hour)) +
geom_line(color = "red", aes(y = ruc.197.118)) +
geom_line(color = "blue", aes(y = narr.225.118)) +
geom_line(color = "black", aes(y = KLAF)) +
geom_line(color = "orange", aes(y = VAIv212_08748740771)) +
facet_wrap(~ month, scales = "free")
Upvotes: 0