ggplot2: Splitting 1 level of grouping variable into multiple geom items within aes()?

Question

I have a plot that works for me, but need some help putting all the geom_line(s) and ribbon(s) in the same legend, like the image of the legend below (done in Excel). As I understand it, ggplot puts items in a legend using the aes() function and the color/fill commands, but each row of data needs a unique grouping variable. I think my resultant plot would be asking ggplot to try and split the same group level into two different aesthetics (making 4 lines from 2 grouping levels?). I'm not sure how to create this though without restructuring the data and loosing what I made so far. Essentially, I'm trying to group my "q2" column by the "group" variable (High/Low) like my geom_ribbon, but with their own lines and colors (called "Median (High Years)" and "Median (Low Years)"). Additionally, I hope to add the secondary dataframe ("blackline") it to the legend as well.

My goal:

Attempt:

Plot:

library(ggplot2)  

ggplot(ribbons2, aes(x=Month, fill=group)) + 
  geom_ribbon(aes(ymin=q1,ymax=q3), alpha=0.4) + 
  geom_line(aes(x=Month, y=q2, fill =group, color = c("#DC143C", "#556B2F")), linewidth = 0.4, linetype = "dashed") + 
  geom_line(data=blackline, mapping=aes(x=Month, y=q2, fill = group), inherit.aes = FALSE, linewidth = 0.8) + 
  facet_wrap(factor(Zone)~ ., labeller=as_labeller(c("West" = "West", 
                                                     "Whipray" = "Whipray", 
                                                     "Rankin" = "Rankin", 
                                                     "Crocodile" = "Crocodile Dragover*")), 
             scales='free') + 
  
  scale_x_continuous(breaks=c(5,6,7,8,9,10,11)) +
  
  scale_y_continuous(expand = c(0, 0), 
                     limits=c(0, 1), 
                     breaks=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0)) +
  
  scale_fill_manual(name="", labels=c('25th - 75th percentile (High years)',
                                      '25th - 75th percentile (Low years)'),
                                      values=c("#d8e4bc", "#e8bcbc")) +
  
  theme_classic()+
  theme(strip.text = element_text(face = "bold"), 
        legend.justification = "top", 
        strip.background = element_blank(), 
        axis.title.y = element_text(face="bold", margin = margin(t = 0, r = 10, b = 0, l = 0)),
        axis.title.x = element_text(face="bold", margin = margin(t = 10, r = 0, b = 0, l = 0)), 
        axis.text.x = element_text(size = 8),
        axis.text.y = element_text(size = 7)) + 
  ylab("Frequency of Occurence")

Error:

Error in `geom_line()`:
! Problem while computing aesthetics.
ℹ Error occurred in the 2nd layer.
Caused by error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (55)
✖ Fix the following mappings: `colour`
Run `rlang::last_trace()` to see where the error occurred.
Warning messages:
1: In geom_line(aes(x = Month, y = q2, fill = group, color = c("#DC143C",  :
  Ignoring unknown aesthetics: fill
2: In geom_line(data = blackline, mapping = aes(x = Month, y = q2,  :
  Ignoring unknown aesthetics: fill

Data (2 data.frames):

> dput(blackline)
structure(list(Zone = c("Crocodile", "Crocodile", "Crocodile", 
"Crocodile", "Crocodile", "Crocodile", "Crocodile", "Rankin", 
"Rankin", "Rankin", "Rankin", "Rankin", "Rankin", "Rankin", "West", 
"West", "West", "West", "West", "West", "West", "Whipray", "Whipray", 
"Whipray", "Whipray", "Whipray", "Whipray", "Whipray"), Month = c(5, 
6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 9, 10, 
11, 5, 6, 7, 8, 9, 10, 11), q2 = c(`50%` = 0, `50%` = 0, `50%` = 0, 
`50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0.0909, 
`50%` = 0.1818, `50%` = 0.1364, `50%` = 0.0909, `50%` = 0.2727, 
`50%` = 0.1818, `50%` = 0.05, `50%` = 0.1, `50%` = 0.2, `50%` = 0.25, 
`50%` = 0.05, `50%` = 0.1, `50%` = 0.05, `50%` = 0, `50%` = 0, 
`50%` = 0, `50%` = 0.1053, `50%` = 0, `50%` = 0, `50%` = 0), 
    group = c("2021", "2021", "2021", "2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021", "2021", "2021", 
    "2021", "2021", "2021", "2021", "2021", "2021")), row.names = c(NA, 
-28L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), groups = structure(list(
    Zone = c("Crocodile", "Rankin", "West", "Whipray"), .rows = structure(list(
        1:7, 8:14, 15:21, 22:28), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -4L), .drop = TRUE))


> dput(ribbons)
structure(list(Zone = c("Crocodile", "Crocodile", "Crocodile", 
"Crocodile", "Crocodile", "Crocodile", "Crocodile", "Rankin", 
"Rankin", "Rankin", "Rankin", "Rankin", "Rankin", "Rankin", "West", 
"West", "West", "West", "West", "West", "West", "Whipray", "Whipray", 
"Whipray", "Whipray", "Whipray", "Whipray", "Whipray", "Crocodile", 
"Crocodile", "Crocodile", "Crocodile", "Crocodile", "Crocodile", 
"Crocodile", "Rankin", "Rankin", "Rankin", "Rankin", "Rankin", 
"Rankin", "Rankin", "West", "West", "West", "West", "West", "West", 
"Whipray", "Whipray", "Whipray", "Whipray", "Whipray", "Whipray", 
"Whipray"), Month = c(5, 6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 9, 10, 
11, 5, 6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 
9, 10, 11, 5, 6, 7, 8, 9, 10, 11, 5, 6, 7, 8, 9, 10, 5, 6, 7, 
8, 9, 10, 11), q1 = c(`25%` = 0, `25%` = 0, `25%` = 0.05, `25%` = 0, 
`25%` = 0.1, `25%` = 0.0625, `25%` = 0.0125, `25%` = 0.0909, 
`25%` = 0.086, `25%` = 0.0649, `25%` = 0.2273, `25%` = 0.2208, 
`25%` = 0.4302, `25%` = 0.2727, `25%` = 0.05, `25%` = 0.2, `25%` = 0.25, 
`25%` = 0.25, `25%` = 0.3636, `25%` = 0.25, `25%` = 0.15, `25%` = 0.0132, 
`25%` = 0, `25%` = 0.0789, `25%` = 0.1096, `25%` = 0.2763, `25%` = 0.2868, 
`25%` = 0.0395, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, 
`25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, 
`25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0.05, 
`25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, `25%` = 0, 
`25%` = 0, `25%` = 0, `25%` = 0.0132, `25%` = 0), q2 = c(`50%` = 0, 
`50%` = 0, `50%` = 0.05, `50%` = 0, `50%` = 0.1, `50%` = 0.1, 
`50%` = 0.075, `50%` = 0.1364, `50%` = 0.1136, `50%` = 0.1357, 
`50%` = 0.2273, `50%` = 0.2273, `50%` = 0.4773, `50%` = 0.2792, 
`50%` = 0.075, `50%` = 0.25, `50%` = 0.25, `50%` = 0.25, `50%` = 0.3684, 
`50%` = 0.2857, `50%` = 0.1923, `50%` = 0.0263, `50%` = 0.0526, 
`50%` = 0.3026, `50%` = 0.2056, `50%` = 0.4079, `50%` = 0.3684, 
`50%` = 0.1316, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, 
`50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, 
`50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0.05, 
`50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, `50%` = 0, 
`50%` = 0, `50%` = 0, `50%` = 0.0526, `50%` = 0), q3 = c(`75%` = 0, 
`75%` = 0, `75%` = 0.1, `75%` = 0.0125, `75%` = 0.15, `75%` = 0.175, 
`75%` = 0.1, `75%` = 0.1818, `75%` = 0.1591, `75%` = 0.2295, 
`75%` = 0.2922, `75%` = 0.2273, `75%` = 0.5114, `75%` = 0.3929, 
`75%` = 0.15, `75%` = 0.35, `75%` = 0.3, `75%` = 0.25, `75%` = 0.45, 
`75%` = 0.35, `75%` = 0.2, `75%` = 0.0395, `75%` = 0.1289, `75%` = 0.55, 
`75%` = 0.375, `75%` = 0.575, `75%` = 0.4658, `75%` = 0.2329, 
`75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, 
`75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0.0109, `75%` = 0, 
`75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0.05, `75%` = 0, 
`75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, `75%` = 0, 
`75%` = 0, `75%` = 0.0882, `75%` = 0), group = c("high", "high", 
"high", "high", "high", "high", "high", "high", "high", "high", 
"high", "high", "high", "high", "high", "high", "high", "high", 
"high", "high", "high", "high", "high", "high", "high", "high", 
"high", "high", "low", "low", "low", "low", "low", "low", "low", 
"low", "low", "low", "low", "low", "low", "low", "low", "low", 
"low", "low", "low", "low", "low", "low", "low", "low", "low", 
"low", "low")), row.names = c(NA, -55L), groups = structure(list(
    Zone = c("Crocodile", "Rankin", "West", "Whipray"), .rows = structure(list(
        c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 29L, 30L, 31L, 32L, 33L, 
        34L, 35L), c(8L, 9L, 10L, 11L, 12L, 13L, 14L, 36L, 37L, 
        38L, 39L, 40L, 41L, 42L), c(15L, 16L, 17L, 18L, 19L, 
        20L, 21L, 43L, 44L, 45L, 46L, 47L, 48L), c(22L, 23L, 
        24L, 25L, 26L, 27L, 28L, 49L, 50L, 51L, 52L, 53L, 54L, 
        55L)), ptype = integer(0), class = c("vctrs_list_of", 
    "vctrs_vctr", "list"))), class = c("tbl_df", "tbl", "data.frame"
), row.names = c(NA, -4L), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

stefan · Accepted Answer

Getting the legend right when mixing different aesthetics and geoms is always a bit tricky. First, as your legend should display 5 different categories, simply mapping group on aesthetics isn't sufficient. Instead we have to crate two more categories for the colored lines and one for the black line. For the colored ones I use paste0(group, "_line") whereas for the black one I simply use "2021". Second, to make sure that the legends for the different aesthetics, i.e. color and fill get merged I make sure that all five categories are displayed in each legend for which I use the limits argument. And of course do we need the same labels for each. Moreover, we have to assign a value to each category. Finally, for the perfect look at to reflect the different linetypes and line widths in the legend too I also map on the linetype and the linewidth aesthetic.

library(ggplot2)

limits <- c("high", "high_line", "2021", "low", "low_line")
labels <- c(
  high = "25th - 75th percentile (High years)",
  low = "25th - 75th percentile (Low years)",
  high_line = "Median (High years)",
  low_line = "Median (Low years)",
  "2021" = "2021"
)

ggplot(ribbons, aes(x = Month, fill = group)) +
  geom_ribbon(aes(ymin = q1, ymax = q3), alpha = 0.4) +
  geom_line(
    aes(
      x = Month, y = q2,
      color = paste0(group, "_line"),
      linetype = paste0(group, "_line"),
      linewidth = paste0(group, "_line")
    )
  ) +
  geom_line(
    data = blackline, mapping = aes(
      x = Month, y = q2, color = "2021",
      linetype = "2021",
      linewidth = "2021"
    ),
    inherit.aes = FALSE
  ) +
  facet_wrap(factor(Zone) ~ .,
    labeller = as_labeller(c(
      "West" = "West",
      "Whipray" = "Whipray",
      "Rankin" = "Rankin",
      "Crocodile" = "Crocodile Dragover*"
    )),
    scales = "free"
  ) +
  scale_x_continuous(breaks = c(5, 6, 7, 8, 9, 10, 11)) +
  scale_y_continuous(
    expand = c(0, 0),
    limits = c(0, 1),
    breaks = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0)
  ) +
  scale_fill_manual(
    limits = limits,
    labels = labels,
    values = c(
      high = "#d8e4bc", low = "#e8bcbc",
      high_line = "transparent", low_line = "transparent", "2021" = "transparent"
    )
  ) +
  scale_color_manual(
    limits = limits,
    labels = labels,
    values = c(
      low_line = "#DC143C", high_line = "#556B2F",
      low = "transparent", high = "transparent", "2021" = "black"
    )
  ) +
  scale_linetype_manual(
    limits = limits,
    labels = labels,
    values = c(
      low_line = "dashed", high_line = "dashed",
      low = "blank", high = "blank", "2021" = "solid"
    )
  ) +
  scale_linewidth_manual(
    limits = limits,
    labels = labels,
    values = c(
      low_line = .4, high_line = .4,
      low = .1, high = .1, "2021" = .8
    )
  ) +
  theme_classic() +
  theme(
    strip.text = element_text(face = "bold"),
    legend.justification = "top",
    strip.background = element_blank(),
    axis.title.y = element_text(face = "bold", margin = margin(t = 0, r = 10, b = 0, l = 0)),
    axis.title.x = element_text(face = "bold", margin = margin(t = 10, r = 0, b = 0, l = 0)),
    axis.text.x = element_text(size = 8),
    axis.text.y = element_text(size = 7)
  ) +
  labs(y = "Frequency of Occurence", color = NULL, fill = NULL, linetype = NULL, linewidth = NULL)

ggplot2: Splitting 1 level of grouping variable into multiple geom items within aes()?

Answers (1)

Related Questions