Kim Falinski
Kim Falinski

Reputation: 21

Building legends with time series data, in ggplot

Aloha all,

I've struggled to build a legend for a mix/match of time series data I'm making. Here is some code:

My understanding is that I need to somehow clean my data and put it all in the same data frame, but all of the time series don't line up very well. Some is at 15 minutes, other one hour. Is there any way to force a legend for these datasets? I don't know what else to post here - since the 5 datasets are quite large.

Plot I'm working on:

enter image description here

q<- ggplot(subset(cr200_Auwai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS)) +
  geom_point(data=subset(dsloi_wl, timedate>startd & timedate<endd), aes(timedate, level), color="blue")+
  #geom_point(data=subset(flow_data, mdate>startd & mdate<endd), aes(as.POSIXct(mdate), flow_cfs*1000), color="red")+
  geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Lvl_m*1000), color="forestgreen", size=1)+ #aquamarine3
  geom_point(data=subset(cr300_Wai1, timedate>startd & timedate<endd), aes(timedate, Turb_SS), color="orange")+
  #geom_point(data=subset(hihimanu_wl, timedate>startd & timedate<endd), aes(timedate, level), color="azure4", size=0.1)+
  #geom_point(data=subset(rain_data, timedate>startd & timedate<endd), aes(timedate, rainmm), color="red",size=5)+
  geom_point(data=subset(haptuk_ysi, datetime>startd & datetime<endd), aes(datetime, Turb), color="pink")+
  #scale_x_date(breaks=date_breaks("month"), labels = date_format("%b-%y"))+
  ylab("Turbidity (NTU) and Water Level (mm)")+
  coord_cartesian(ylim=c(0, 1500))+
        legend.justification = c(1, 1), 
        legend.position = c(1, 1),

Here is a sample of two of the datasets: Note that the times don't line up at all... since I'm mixing sources.


structure(list(ReceptionTime = c(1533895414.1134, 1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648, 1533896013.87724, 1533896013.8894), d2w = c(776.7, 789.7, 790.2, 777.1, 777.2, 777.7, 778.4, 793.4, 779.6, 794.1, 819.9, 780.7, 794.1, 806.9, 781.9, 781.9, 782.7, 782.8, 783.1, 783.4), timedate = structure(c(1533895414.1134, 1533895414.1733, 1533895414.19397, 1533895414.20708, 1533895414.22283, 1533895414.23634, 1533895414.25135, 1533895414.26387, 1533895414.27653, 1533895414.29126, 1533896013.68755, 1533896013.7638, 1533896013.79232, 1533896013.80917, 1533896013.82312, 1533896013.83648, 1533896013.84988, 1533896013.8648, 1533896013.87724, 1533896013.8894), class = c("POSIXct", "POSIXt"), tzone = ""), level = c(723.3, 710.3, 709.8, 722.9, 722.8, 722.3, 721.6, 706.6, 720.4, 705.9, 680.1, 719.3, 705.9, 693.1, 718.1, 718.1, 717.3, 717.2, 716.9, 716.6)), .Names = c("ReceptionTime", "d2w", "timedate", "level"), row.names = c(NA, 20L), class = "data.frame")


structure(list(RECORD = 73027:73046, Temp_C = c(24.62861, 24.62332, 24.61533, 24.60857, 24.60189, 24.59733, 24.59068, 24.58404, 24.57869, 24.57327, 24.56781, 24.5606, 24.55551, 24.55218, 24.54648, 24.5416, 24.5358, 24.5319, 24.52781, 24.52294), Turb_BS = c(94.50522, 88.65939, 109.354, 57.71527, 134.1903, 46.37191, 78.17719, 52.22319, 58.07111, 96.95719, 51.47488, 44.65616, 70.43825, 99.58217, 93.68374, 87.4787, 175.5395, 167.6757, 110.8119, 132.5971), Turb_SS = c(36.63349, 34.31228, 37.02223, 32.97258, 36.68553, 33.82083, 37.43391, 33.43639, 31.17306, 33.6327, 34.69954, 30.99891, 34.69988, 33.64369, 32.54948, 32.1177, 32.86558, 48.97706, 30.65004, 33.71646), Temp_C_2 = c(24.9014, 24.89474, 24.88837, 24.88279, 24.87574, 24.86852, 24.86357, 24.85751, 24.85236, 24.84759, 24.84091, 24.83577, 24.83192, 24.82713, 24.8229, 24.81832, 24.81237, 24.80821, 24.8051, 24.80015), WD_OBS = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Lvl_m = c(0.6907353, 0.6905226, 0.6896195, 0.6890779, 0.6881586, 0.6878724, 0.6862501, 0.6848835, 0.6844589, 0.6837503, 0.6836612, 0.6831629, 0.6821692, 0.6812283, 0.6799452, 0.6791196, 0.6782504, 0.6772775, 0.6763596, 0.6755115), timedate = structure(c(1533895500, 1533895800, 1533896100, 1533896400, 1533896700, 1533897000, 1533897300, 1533897600, 1533897900, 1533898200, 1533898500, 1533898800, 1533899100, 1533899400, 1533899700, 1533900000, 1533900300, 1533900600, 1533900900, 1533901200), class = c("POSIXct", "POSIXt"), tzone = "")), .Names = c("RECORD", "Temp_C", "Turb_BS", "Turb_SS", "Temp_C_2", "WD_OBS", "Lvl_m", "timedate"), row.names = c(NA, 20L), class = "data.frame")

Upvotes: 0

Views: 610

Answers (1)


Reputation: 3948

Here is a solution using mock data (next time provide a sample of your data) :

#> Attachement du package : 'lubridate'
#> The following object is masked from 'package:base':
#>     date

# mock data
time_15m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "15 min")
time_30m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "30 min")
time_60m <- seq(as.POSIXct("2018-08-30 00:00:00"), as.POSIXct("2018-08-31 00:00:00"), by = "60 min")

data_1 <- data.frame(time = time_15m,
                     var_1 = cos(hour(time_15m) + minute(time_15m)))

data_2 <- data.frame(time = time_30m,
                     var_2 = sin(hour(time_30m) + minute(time_30m)))

data_3 <- data.frame(time = time_60m,
                     var_3 = cos(1 - hour(time_60m) + minute(time_60m)))

# the kind of plot you have (prefer the 2nd version)
ggplot(data_1, aes(x = time, y = var_1)) +
  geom_point(color = "red") +
  geom_point(data = data_2, aes(time, var_2), color = "green") + 
  geom_point(data = data_3, aes(time, var_3), color = "blue") +

# a version with long format data and use of gather function
data_1 %>%
  left_join(data_2) %>% # join data from data_2 (timestep = 30m), missing data is NA
  left_join(data_3) %>% # join data from data_3 (timestep = 60m), missing data is NA
  gather(variable_name, variable_value, var_1, var_2, var_3) %>% # gather var_1, var_2 and var_3 in a single column
  ggplot(., aes(x = time, y = variable_value, color = variable_name)) +
  theme_bw() +
  geom_point(size = 2)
#> Joining, by = "time"
#> Joining, by = "time"
#> Warning: Removed 120 rows containing missing values (geom_point).

Created on 2018-08-22 by the reprex package (v0.2.0).

EDIT 1 (include provided datasets)

dsloi_wl %>%
  full_join(cr300_Wai1) %>%
  mutate(Lvl_m = 100 * Lvl_m) %>%
  gather(variable_name, variable_value, level, Lvl_m, Turb_SS)  %>%
  ggplot(., aes(x = timedate, y = variable_value, color = variable_name)) +
  geom_point() +
  scale_color_manual("Legend title", 
                     values = c("level" = "blue",
                                "Lvl_m" = "forestgreen",
                                "Turb_SS" = "orange"))
#> Joining, by = "timedate"
#> Warning: Removed 60 rows containing missing values (geom_point).

Created on 2018-08-23 by the reprex package (v0.2.0).

Upvotes: 0

Related Questions