Reputation: 695

How to make a plot with mean data and data values as shading in ggplot2?

how to make a plot similar to the one attached here

where bold lines represent mean and shading represent the variability of the data. Sample data df and code.

>dput(df)
structure(list(yr = c(1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 
1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 
1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 
1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 
1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 
1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 
1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 
1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 
1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 
1990L, 1991L, 1992L, 1993L, 1994L, 1995L), no = c(11L, 12L, 13L, 
17L, 14L, 17L, 15L, 12L, 12L, 18L, 7L, 10L, 10L, 6L, 7L, 11L, 
8L, 7L, 11L, 6L, 8L, 9L, 12L, 15L, 14L, 10L, 18L, 13L, 15L, 14L, 
14L, 11L, 15L, 7L, 11L, 6L, 5L, 10L, 9L, 8L, 5L, 6L, 12L, 8L, 
17L, 18L, 14L, 15L, 16L, 18L, 18L, 15L, 13L, 12L, 9L, 12L, 5L, 
5L, 7L, 5L, 9L, 7L, 5L, 5L, 6L, 10L, 12L, 5L, 13L, 8L, 13L, 12L, 
11L, 4L, 12L, 6L, 6L, 10L, 6L, 11L, 8L, 6L, 3L, 6L), lval = c("l4651", 
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156", 
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661", 
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651", 
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156", 
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661", 
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651", 
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156", 
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661", 
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651", 
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156", 
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661", 
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661"), CCR = c("CR1", 
"CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", 
"CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", 
"CR1", "CR1", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", 
"CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", 
"CR2", "CR2", "CR2", "CR2", "CR2", "CR3", "CR3", "CR3", "CR3", 
"CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", 
"CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR4", 
"CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", 
"CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", 
"CR4", "CR4")), .Names = c("yr", "no", "lval", "CCR"), row.names = c(NA, 
-84L), vars = "Year", drop = TRUE, indices = list(c(0L, 7L, 14L, 
21L, 28L, 35L, 42L, 49L, 56L, 63L, 70L, 77L), c(1L, 8L, 15L, 
22L, 29L, 36L, 43L, 50L, 57L, 64L, 71L, 78L), c(2L, 9L, 16L, 
23L, 30L, 37L, 44L, 51L, 58L, 65L, 72L, 79L), c(3L, 10L, 17L, 
24L, 31L, 38L, 45L, 52L, 59L, 66L, 73L, 80L), c(4L, 11L, 18L, 
25L, 32L, 39L, 46L, 53L, 60L, 67L, 74L, 81L), c(5L, 12L, 19L, 
26L, 33L, 40L, 47L, 54L, 61L, 68L, 75L, 82L), c(6L, 13L, 20L, 
27L, 34L, 41L, 48L, 55L, 62L, 69L, 76L, 83L)), group_sizes = c(12L, 
12L, 12L, 12L, 12L, 12L, 12L), biggest_group_size = 12L, labels = structure(list(
    Year = 1989:1995), row.names = c(NA, -7L), class = "data.frame", vars = "Year", drop = TRUE, .Names = "Year"), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

    library(dplyr) # to calculate mean of each CCR
df_mn<-df %>%
       group_by(yr,lval) %>%
       summarise(meanno=mean(no))
df_mn
df
plot <- ggplot() +
  geom_point(data=df, aes(x=yr,y=no,group=lval,color=lval),size = 1, lty = "solid")   +         
  geom_line(data=df_mn, aes(yr,meanno,color=lval,group=lval),size = 1, lty = "solid")
plot

Upvotes: 1

Answers (3)

Billy Jackson

Reputation: 75

ggplot() +
  geom_point(data = df, aes(x = yr,y = no,group = lval,color = lval),size = 1)   +         
  geom_smooth(data = df_mn, aes(yr, meanno, color = lval, group = lval, fill = lval), se = 0.05)

It doesn't look nearly as pretty as your sample posted, but that's because the regression lines in your data overlap a lot and your sample sizes are very small in your desired plot which makes the variability very large.

The se = 0.05 argument is a confidence interval. The default is se = 0.95 (industry standard), but the lower you set it will be tighter fitting shading (albeit that tighter fit comes with a much lower accuracy/confidence that your data actually lies in that shading).

Also, if you really want it to look like your sample graph, you can eliminate the + geom_point() line.

Upvotes: 2

shiny

Reputation: 3502

I assume you are after the minimum and maximum values for your variability. I might be wrong.

library(tidyverse)
df %>% 
  group_by(yr,lval) %>% 
  mutate(value = mean(no),
         min = min(no),
         max = max(no)) %>% 
  ggplot(., aes(x=yr, y=value, group = lval, fill =lval)) +
  geom_ribbon(aes(ymin = min, ymax = max),  alpha = 0.2) +
  geom_line(aes(group = lval, color = lval))

I think it will be better if you have them in different facets like below by just adding one more line to the code above

+facet_grid(lval~.)

Upvotes: 2

Scransom

Reputation: 3335

Use geom_line for your mean and geom_ribbon for your CIs.

E.g. from the excellent refrence material

huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))

ggplot(huron, aes(year)) +
  geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
  geom_line(aes(y = level))

You can pass different data sets to each geom if necessary.

Upvotes: 1

How to make a plot with mean data and data values as shading in ggplot2?

Answers (3)

Related Questions