ah bon
ah bon

Reputation: 10011

Plot stacked areas and lines with ggplot2

I would like to use ggplot2 to visualize a, b, c with stacked areas and ratio_a, ..., ratio_c with lines from the following dataset:

   year        a         b         c  ratio_a  ratio_b  ratio_c
0  2010  40533.6  187383.2  173596.0      4.3     12.3      9.8
1  2011  47712.0  220591.6  203260.1      4.5     10.6      8.9
2  2012  52377.0  235319.0  231626.0      4.5      8.1      8.1
3  2013  56957.0  249684.0  262204.0      4.0      7.8      8.3
4  2014  58332.0  271392.0  306739.0      4.1      7.3      8.1
5  2015  60863.0  274278.0  341567.0      3.9      6.0      8.3
6  2016  63671.0  296236.0  384221.0      3.3      6.1      7.8
7  2017  65468.0  334623.0  427032.0      3.9      6.1      8.0
8  2018  64734.0  366000.9  469574.6      3.5      5.8      7.6
9  2019  70467.0  386165.0  534233.0      3.1      5.7      6.9

I have used:

df_m <- melt(df, id.vars='year')
ggplot(data = df_m, aes(x = year, y = value) ) + 
  geom_bar(aes(fill = variable), stat = 'identity') + 
  xlab('year') +
  ylab('million')

Out:

enter image description here

How could I modify the code above to achive that? Thanks.

Please note we need to use left and right axis as the scale difference between data is quite huge.

Update: output from @KoenV's code:

enter image description here

Upvotes: 1

Views: 105

Answers (3)

Guido Berning
Guido Berning

Reputation: 197

... without reshape ...

library(tidyverse)

df <- tribble(~year, ~a, ~b, ~c, ~ratio_a,  ~ratio_b, ~ratio_c,
2010,  40533.6,  187383.2,  173596.0,      4.3,     12.3,      9.8,
2011,  47712.0,  220591.6,  203260.1,      4.5,     10.6,      8.9,
2012,  52377.0,  235319.0,  231626.0,      4.5,      8.1,      8.1,
2013,  56957.0,  249684.0,  262204.0,      4.0,      7.8,      8.3,
2014,  58332.0,  271392.0,  306739.0,      4.1,      7.3,      8.1,
2015,  60863.0,  274278.0,  341567.0,      3.9,      6.0,      8.3,
2016,  63671.0,  296236.0,  384221.0,      3.3,      6.1,      7.8,
2017,  65468.0,  334623.0,  427032.0,      3.9,      6.1,      8.0,
2018,  64734.0,  366000.9,  469574.6,      3.5,      5.8,      7.6,
2019,  70467.0,  386165.0,  534233.0,      3.1,      5.7,      6.9)

df_dat1 <- df %>% 
  pivot_longer(!year, names_to = "variable") %>% 
  mutate(
    vartyp = ifelse(grepl("ratio_",variable),"ratio","volume"),
    year = as.factor(year)
  )

#plotting
df_dat1 %>%
  ggplot() +
  geom_area(data = subset(df_dat1, vartyp=="volume"), aes(year, value/100000, group = variable, fill = variable),
            position = position_stack(), alpha = .6) +
  geom_line(data = subset(df_dat1, vartyp=="ratio"), aes(year, value, group = variable, colour = variable),
            position = position_dodge(width = 0), size = 1) +
  geom_point(data = subset(df_dat1, vartyp=="ratio"), aes(year, value, group = variable, colour = variable, shape = variable),
             position = position_dodge(width = 0), size = 2.5) +
  scale_y_continuous(sec.axis = sec_axis(~.*1, name = "Ratio (in %)")) +
  labs(fill = "Volume", colour = "Ratio", shape = "Ratio") +
  theme_classic()

Upvotes: 1

iouraich
iouraich

Reputation: 3034

Try this

#load packages
require(tidyverse)
require(reshape2)

#read the data
df_dat = read.csv("question1.csv", header = TRUE)

#melt the data
df_dat1 = melt(df_dat, id.vars = c("year"))
df_dat1 = df_dat1 %>%
  mutate(
    vartyp = ifelse(grepl("ratio_",variable),"ratio","volume"),
    year = as.factor(year)
  )

#plotting
df_dat1 %>%
  ggplot() +
  geom_area(data = subset(df_dat1, vartyp=="volume"), aes(year, value/100000, group = variable, fill = variable),
            position = "stack", alpha = .6) +
  geom_line(data = subset(df_dat1, vartyp=="ratio"), aes(year, value, group = variable, colour = variable, shape = variable),
            position = "dodge", size = 1) +
  geom_point(data = subset(df_dat1, vartyp=="ratio"), aes(year, value, group = variable, colour = variable, shape = variable),
             position = "dodge", size = 2.5) +
  scale_y_continuous(sec.axis = sec_axis(~.*1, name = "Ratio (in %)")) +
  labs(fill = "Volume", colour = "Ratio", shape = "Ratio") +
  theme_classic()

result2

Upvotes: 1

KoenV
KoenV

Reputation: 4283

You could do this with the following code:

It is possible to plot different data sets within one call to ggplot. Moreover, you may use the sec_axis option in a call to scale_y_continuous to set a second axis. The coeff variable is used to transform the scale.

In order to change the color of the "ratio_x" lines, you may use scale_color_manual

library(ggplot2)
library(reshape)
library(dplyr)

df_m <- melt(df, id.vars='year')

df_m_x <- df_m %>% 
  filter(variable %in% c("a", "b", "c")) 

df_m_ratio_x <- df_m %>% 
  filter(variable %in% c("ratio_a", "ratio_b", "ratio_c")) %>%
  mutate(value = value * 100000)

coeff = 1/100000

ggplot() +
  geom_bar(data = df_m_x,  aes(x = year, y = value, fill = variable), alpha = I(0.8), stat = 'identity') +
  geom_line(data = df_m_ratio_x,  aes(x = year, y = value, col = variable)) +
  xlab('year') +
  # ylab('million') +
  scale_y_continuous(
    # Features of the first axis
    name = "million",
    # Add a second axis and specify its features
    sec.axis = sec_axis(~.*coeff, name = "Ratio")
  ) +
  scale_color_manual(values = c("ratio_a" = "#00AFBB", "ratio_b" = "#E7B800", "ratio_c" = "#00AFBB")) +
  theme_minimal()

This yields the following graph:

enter image description here

Upvotes: 1

Related Questions