Reputation: 695
how to make a plot similar to the one attached here
where bold lines represent mean and shading represent the variability of the data.
Sample data df
and code.
>dput(df)
structure(list(yr = c(1989L, 1990L, 1991L, 1992L, 1993L, 1994L,
1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L,
1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L,
1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L,
1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L,
1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L,
1991L, 1992L, 1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L,
1993L, 1994L, 1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L,
1995L, 1989L, 1990L, 1991L, 1992L, 1993L, 1994L, 1995L, 1989L,
1990L, 1991L, 1992L, 1993L, 1994L, 1995L), no = c(11L, 12L, 13L,
17L, 14L, 17L, 15L, 12L, 12L, 18L, 7L, 10L, 10L, 6L, 7L, 11L,
8L, 7L, 11L, 6L, 8L, 9L, 12L, 15L, 14L, 10L, 18L, 13L, 15L, 14L,
14L, 11L, 15L, 7L, 11L, 6L, 5L, 10L, 9L, 8L, 5L, 6L, 12L, 8L,
17L, 18L, 14L, 15L, 16L, 18L, 18L, 15L, 13L, 12L, 9L, 12L, 5L,
5L, 7L, 5L, 9L, 7L, 5L, 5L, 6L, 10L, 12L, 5L, 13L, 8L, 13L, 12L,
11L, 4L, 12L, 6L, 6L, 10L, 6L, 11L, 8L, 6L, 3L, 6L), lval = c("l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661", "l4651",
"l4651", "l4651", "l4651", "l4651", "l4651", "l4651", "l5156",
"l5156", "l5156", "l5156", "l5156", "l5156", "l5156", "l5661",
"l5661", "l5661", "l5661", "l5661", "l5661", "l5661"), CCR = c("CR1",
"CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1",
"CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1", "CR1",
"CR1", "CR1", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2",
"CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2", "CR2",
"CR2", "CR2", "CR2", "CR2", "CR2", "CR3", "CR3", "CR3", "CR3",
"CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3",
"CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR3", "CR4",
"CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4",
"CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4", "CR4",
"CR4", "CR4")), .Names = c("yr", "no", "lval", "CCR"), row.names = c(NA,
-84L), vars = "Year", drop = TRUE, indices = list(c(0L, 7L, 14L,
21L, 28L, 35L, 42L, 49L, 56L, 63L, 70L, 77L), c(1L, 8L, 15L,
22L, 29L, 36L, 43L, 50L, 57L, 64L, 71L, 78L), c(2L, 9L, 16L,
23L, 30L, 37L, 44L, 51L, 58L, 65L, 72L, 79L), c(3L, 10L, 17L,
24L, 31L, 38L, 45L, 52L, 59L, 66L, 73L, 80L), c(4L, 11L, 18L,
25L, 32L, 39L, 46L, 53L, 60L, 67L, 74L, 81L), c(5L, 12L, 19L,
26L, 33L, 40L, 47L, 54L, 61L, 68L, 75L, 82L), c(6L, 13L, 20L,
27L, 34L, 41L, 48L, 55L, 62L, 69L, 76L, 83L)), group_sizes = c(12L,
12L, 12L, 12L, 12L, 12L, 12L), biggest_group_size = 12L, labels = structure(list(
Year = 1989:1995), row.names = c(NA, -7L), class = "data.frame", vars = "Year", drop = TRUE, .Names = "Year"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
library(dplyr) # to calculate mean of each CCR
df_mn<-df %>%
group_by(yr,lval) %>%
summarise(meanno=mean(no))
df_mn
df
plot <- ggplot() +
geom_point(data=df, aes(x=yr,y=no,group=lval,color=lval),size = 1, lty = "solid") +
geom_line(data=df_mn, aes(yr,meanno,color=lval,group=lval),size = 1, lty = "solid")
plot
Upvotes: 1
Views: 1056
Reputation: 75
ggplot() +
geom_point(data = df, aes(x = yr,y = no,group = lval,color = lval),size = 1) +
geom_smooth(data = df_mn, aes(yr, meanno, color = lval, group = lval, fill = lval), se = 0.05)
It doesn't look nearly as pretty as your sample posted, but that's because the regression lines in your data overlap a lot and your sample sizes are very small in your desired plot which makes the variability very large.
The se = 0.05 argument is a confidence interval. The default is se = 0.95 (industry standard), but the lower you set it will be tighter fitting shading (albeit that tighter fit comes with a much lower accuracy/confidence that your data actually lies in that shading).
Also, if you really want it to look like your sample graph, you can eliminate the + geom_point() line.
Upvotes: 2
Reputation: 3502
I assume you are after the minimum and maximum values for your variability. I might be wrong.
library(tidyverse)
df %>%
group_by(yr,lval) %>%
mutate(value = mean(no),
min = min(no),
max = max(no)) %>%
ggplot(., aes(x=yr, y=value, group = lval, fill =lval)) +
geom_ribbon(aes(ymin = min, ymax = max), alpha = 0.2) +
geom_line(aes(group = lval, color = lval))
I think it will be better if you have them in different facets like below by just adding one more line to the code above
+facet_grid(lval~.)
Upvotes: 2
Reputation: 3335
Use geom_line
for your mean and geom_ribbon
for your CIs.
E.g. from the excellent refrence material
huron <- data.frame(year = 1875:1972, level = as.vector(LakeHuron))
ggplot(huron, aes(year)) +
geom_ribbon(aes(ymin = level - 1, ymax = level + 1), fill = "grey70") +
geom_line(aes(y = level))
You can pass different data sets to each geom
if necessary.
Upvotes: 1