user16368421
user16368421

Reputation:

How can I plot multiple different columns in ggplot?

i want to plot raw birth counts by month, stratified by year (month # is on the x axis, whereas the raw counts is on the y axis; there should be multiple lines representing each year).

Here is the structure of my data:

> dput(births_monthly_cross_022822_clean)
structure(list(month = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12
), `2010` = c(323816, 302551, 339219, 325582, 328960, 335180, 
345875, 350473, 351439, 337477, 326868, 339665), `2011` = c(321104, 
298537, 330752, 313875, 327242, 337890, 346192, 360079, 346254, 
329174, 322135, 327986), `2012` = c(316959, 305060, 324944, 307262, 
330779, 327811, 348157, 361902, 340901, 346251, 325810, 324960
), `2013` = c(324314, 292360, 321166, 312274, 330193, 320475, 
349920, 354293, 338863, 341353, 319233, 336320), `2014` = c(327154, 
299087, 324317, 319788, 335444, 326757, 356446, 355201, 349403, 
344265, 318819, 341494), `2015` = c(326747, 298815, 329714, 321618, 
328709, 331400, 354384, 352782, 348479, 339904, 319605, 336576


> head(births_monthly_cross_022822_clean)
# A tibble: 6 × 12
  month `2010` `2011` `2012` `2013` `2014` `2015` `2016` `2017` `2018` `2019` `2020`
  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
1     1 323816 321104 316959 324314 327154 326747 317445 314597 315593 311678 305536
2     2 302551 298537 305060 292360 299087 298815 306750 289694 284940 280679 283385
3     3 339219 330752 324944 321166 324317 329714 329341 320327 316824 304999 302331
4     4 325582 313875 307262 312274 319788 321618 314312 300801 299125 299755 290940
5     5 328960 327242 330779 330193 335444 328709 328434 323169 321448 317160 301902
6     6 335180 337890 327811 320475 326757 331400 333166 324633 315585 304843 302574

Thank you!

Upvotes: 0

Views: 336

Answers (3)

Rfanatic
Rfanatic

Reputation: 2280

Sample data:

 df<-structure(data.frame(
       "month" = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12), 
       "2010" = c(323816, 302551, 339219, 325582, 328960, 335180, 345875, 350473, 351439, 337477, 326868, 339665), 
       "2011" = c(321104, 298537, 330752, 313875, 327242, 337890, 346192, 360079, 346254, 329174, 322135, 327986), 
       "2012" = c(316959, 305060, 324944, 307262, 330779, 327811, 348157, 361902, 340901, 346251, 325810, 324960),                                                             
       "2013" = c(324314, 292360, 321166, 312274, 330193, 320475, 349920, 354293, 338863, 341353, 319233, 336320), 
       "2014" = c(327154, 299087, 324317, 319788, 335444, 326757, 356446, 355201, 349403, 344265, 318819, 341494), 
       "2015" = c(326747, 298815, 329714, 321618, 328709, 331400, 354384, 352782, 348479, 339904, 319605, 336576)))
                               
    colnames(df)=c("month", "2010","2011","2012","2013","2014", "2015")
    

Reshape data:

df_n <- data.frame(x = df$"month",                            # Reshape data frame
                       y = c(df$"2010", df$"2011", df$"2012", df$"2013", df$"2014", df$"2015"),
                       Year= c(rep("2010", nrow(df)),
                                 rep("2011", nrow(df)),
                                 rep("2012", nrow(df)),
                                 rep("2013", nrow(df)),
                                 rep("2014", nrow(df)),
                                 rep("2015", nrow(df))))
             
    df_n$x <- factor(df_n$x, levels = c("1","2", "3","4","5","6","7","8","9","10","11","12"))

Draw plot:

library(ggplot2)

(ggplot <- ggplot(df_n, aes(x, y)) +             # Create ggplot2 plot
       geom_line(aes(color=Year, group=Year), size=1)+                                         
       labs(x="month", y="value", title="", fill="Year")+
       theme_bw() +
       theme(plot.title = element_text(hjust = 0.5,  face="bold", size=20, color="black")) + 
    theme(axis.title.x = element_text(family="Times", face="bold", size=16, color="black"))+
    theme(axis.title.y = element_text(family="Times", face="bold", size=16, color="black"))+
    theme(axis.text.x = element_text( hjust = 1,  face="bold", size=14, color="black") )+
    theme(axis.text.y = element_text( hjust = 1,  face="bold", size=14, color="black") )+
    theme(plot.title = element_text(hjust = 0.5)))

Plot:

enter image description here

Upvotes: 0

Allan Cameron
Allan Cameron

Reputation: 173793

Currently your data frame is in wide format, where the variable you wish to plot on the y axis (counts of births) is represented in several columns. It is much easier to plot if you reshape your data to long format, where you have a column for counts and a column that labels according to year. You can achieve this with pivot_longer from the tidyr package.

It also makes for an easier-to-understand plot if you convert month numbers into month names.

Here's how you can do all this within the tidyverse framework:

library(ggplot2)
library(dplyr)
library(tidyr)

births_monthly_cross_022822_clean %>%
  pivot_longer(-1) %>%
  mutate(year = substr(name, 2, 6),
         month = factor(month.abb[month], month.abb)) %>%
  ggplot(aes(month, value, color = year, group = year)) + 
  geom_line() +
  theme_bw()

enter image description here

Upvotes: 2

You need to pivot longer your data in this way:

library(tidyverse)

births_monthly_cross_022822_clean %>% 
   pivot_longer(!month, names_to = 'year', values_to = 'q_births')

Upvotes: 0

Related Questions