C. Guff
C. Guff

Reputation: 478

Add a cumulative frequency to a bar chart in ggplot

I would like to add a cumulative frequency to a bar chart but I have difficulties. Here is the code of the bar chart graph:

GZ1 <- ggplot(data = ZOO_long2) + geom_bar(aes(x=interaction(Family,Group),fill = Group),width = 0.7, color = "black", alpha = 9/10) +
  scale_x_discrete("Family",breaks=interaction(ZOO_long2$Family, ZOO_long2$Group),labels= ZOO_long2$Family) +
  labs(title="Observations of zooplancton family", subtitle="by Groups", legend) +
  ylim(0, 1000) +
  theme_bw() +
  theme(plot.title = element_text(face="bold", hjust=0.5),  plot.subtitle = element_text(hjust = 0.5), legend.background = element_rect(fill="grey90", size=0.5, linetype="solid", colour ="black"), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
GZ1

And there is the dataset:

dput(ZOO_long2[1300:1375,])
structure(list(Obs = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
), Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L), .Label = c("Cladocera", "Copepoda", "Rotifera"), class = "factor"), 
    Family = structure(c(8L, 8L, 4L, 8L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 7L, 7L, 7L, 7L, 
    14L, 14L, 14L, 14L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Bosminidae", 
    "Calanoida", "Chydoridae", "Cladocera f.", "Copepoda f.", 
    "Cyclopoida", "Cyclopoidae", "Daphnidae", "Harpacticoida", 
    "Holopediidae", "Leptodoridae", "Macrothricidae", "Moinidae", 
    "Nauplius larvae", "Polyphemidae", "Rotifera f.", "Sididae"
    ), class = "factor"), frequency = c(0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822, 
    0.000428082191780822, 0.000428082191780822, 0.000428082191780822
    ), `cum frequency` = c(0.556506849315068, 0.556934931506849, 
    0.55736301369863, 0.557791095890411, 0.558219178082192, 0.558647260273973, 
    0.559075342465753, 0.559503424657534, 0.559931506849315, 
    0.560359589041096, 0.560787671232877, 0.561215753424657, 
    0.561643835616438, 0.562071917808219, 0.5625, 0.562928082191781, 
    0.563356164383562, 0.563784246575342, 0.564212328767123, 
    0.564640410958904, 0.565068493150685, 0.565496575342466, 
    0.565924657534247, 0.566352739726027, 0.566780821917808, 
    0.567208904109589, 0.56763698630137, 0.568065068493151, 0.568493150684931, 
    0.568921232876712, 0.569349315068493, 0.569777397260274, 
    0.570205479452055, 0.570633561643836, 0.571061643835616, 
    0.571489726027397, 0.571917808219178, 0.572345890410959, 
    0.57277397260274, 0.57320205479452, 0.573630136986301, 0.574058219178082, 
    0.574486301369863, 0.574914383561644, 0.575342465753425, 
    0.575770547945205, 0.576198630136986, 0.576626712328767, 
    0.577054794520548, 0.577482876712329, 0.57791095890411, 0.57833904109589, 
    0.578767123287671, 0.579195205479452, 0.579623287671233, 
    0.580051369863014, 0.580479452054794, 0.580907534246575, 
    0.581335616438356, 0.581763698630137, 0.582191780821918, 
    0.582619863013699, 0.583047945205479, 0.58347602739726, 0.583904109589041, 
    0.584332191780822, 0.584760273972603, 0.585188356164384, 
    0.585616438356164, 0.586044520547945, 0.586472602739726, 
    0.586900684931507, 0.587328767123288, 0.587756849315068, 
    0.588184931506849, 0.58861301369863)), row.names = 1300:1375, class = "data.frame")

Also, I succeed to organize the x labels by groups, with the function interaction(). But I have an issue to have an increasing order by groups. If possible.

Upvotes: 2

Views: 2355

Answers (3)

C. Guff
C. Guff

Reputation: 478

Finally,

I obtained that graphFinal graph

by using that code, inspired by Duck's update. I added the cumulative frequency instead of the value. I divided it by a fraction of 2336, the total number of observations (2336/1000 = 2.336), to include it in the y-axis limitation. I also added a second axis :

#manual reorganization 
ZOO_long2$Family = factor(ZOO_long2$Family,levels = c("Leptodoridae","Macrothricidae","Cladocera f.","Moinidae","Polyphemidae","Holopediidae","Chydoridae","Sididae","Daphnidae","Bosminidae","Copepoda f.","Harpacticoida","Calanoida","Nauplius larvae","Cyclopoidae","Rotifera f."))
#graph
ZOO_long2 %>% group_by(Group,Family) %>%
  summarise(Val=n()) %>% ungroup() %>%
  mutate(Cum=cumsum(Val)) %>%
  ggplot(aes(x=Family,y=Val,fill = Group)) +
  geom_bar(stat='identity',
           width = 0.7, color = "black", alpha = 9/10) +
  geom_line(aes(x = Family,y=((Cum /2.336)+5), group=1))+ 
  labs(title="Observations of zooplancton families", subtitle="by Groups", legend) +
  geom_text(aes(label=Val),position = position_dodge(0.9),vjust=-0.5)+
  theme_bw() +
  theme(plot.title = element_text(face="bold", hjust=0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.background = element_rect(fill="grey90", size=0.5,
                                         linetype="solid", colour ="black"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
   ylim(0, 1000) +
  xlab('Families') +
   scale_y_continuous(name = "Observations", 
    sec.axis = sec_axis(~./5, name = "Cumulative frequency", 
      labels = function(b) { paste0(round((b /200) * 100, 0), "%")})) 

I used ggplot answer instead of ggpubr because I'm not acclimated to use the second one.

Thank you for your time, Duck and stefanH.

Upvotes: 5

stefanH
stefanH

Reputation: 353

a ggpubr aproach

utilizing Duck's group() and mutate() together with fct_reorder() you can pipe the data into ggpubr's ggbarplot() function which has an easy interface to show the frequencies.

library(tidyvserse)
library(ggpubr)


ZOO_long2 %>% 
  group_by(Group,Family) %>%
  summarise(Val=n()) %>% 
  ungroup() %>%
  mutate(Cum=cumsum(Val)) %>%
  #sort the data
  mutate(Family = fct_reorder(Family,Val)) %>% 
  #plot the data
  ggpubr::ggbarplot(.,
                    x="Family", 
                    y="Val",
                    fill = "Group",
                    label = TRUE, 
                    lab.pos = "out", 
                    lab.col = "black",
                    ylim = c(0,100)
                    )

enter image description here

EDIT to adress @C.Guffins question

Add a geom_line as you would in ggplot

ZOO_long2 %>% 
  group_by(Group,Family) %>%
  summarise(Val=n()) %>% 
  ungroup() %>%
  mutate(Cum=cumsum(Val)) %>%
  #sort the data
  mutate(Family = fct_reorder(Family,Val)) %>% 
  #plot the data
  ggpubr::ggbarplot(.,
                    x="Family", 
                    y="Val",
                    fill = "Group",
                    label = TRUE, 
                    lab.pos = "out", 
                    lab.col = "black",
                    ylim = c(0,100)
                    ) +
 geom_line(aes(x=Family,y=Val, group=1))

enter image description here

Upvotes: 2

Duck
Duck

Reputation: 39585

Maybe this can help. You can compute the counts directly and also the cumulative variable using dplyr. After that you can sketch the plot using ggplot2:

library(ggplot2)
library(dplyr)
#Code
ZOO_long2 %>% group_by(Group,Family) %>%
  summarise(Val=n()) %>% ungroup() %>%
  mutate(Cum=cumsum(Val)) %>%
  ggplot(aes(x=reorder(Family,Val),y=Val,fill = Group)) +
  geom_bar(stat='identity',
           width = 0.7, color = "black", alpha = 9/10) +
  labs(title="Observations of zooplancton family", subtitle="by Groups", legend) +
  geom_text(aes(label=Val),position = position_dodge(0.9),vjust=-0.5)+
  ylim(0, 1000) +
  theme_bw() +
  theme(plot.title = element_text(face="bold", hjust=0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.background = element_rect(fill="grey90", size=0.5,
                                         linetype="solid", colour ="black"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  xlab('Family')

Output:

enter image description here

Update: If you want to add a line try this:

#Code 2
ZOO_long2 %>% group_by(Group,Family) %>%
  summarise(Val=n()) %>% ungroup() %>%
  mutate(Cum=cumsum(Val)) %>%
  ggplot(aes(x=reorder(Family,Val),y=Val,fill = Group)) +
  geom_bar(stat='identity',
           width = 0.7, color = "black", alpha = 9/10) +
  geom_line(aes(reorder(Family,Val),y=Val+25,group=1))+
  labs(title="Observations of zooplancton family", subtitle="by Groups", legend) +
  geom_text(aes(label=Val),position = position_dodge(0.9),vjust=-0.5)+
  ylim(0, 1000) +
  theme_bw() +
  theme(plot.title = element_text(face="bold", hjust=0.5),
        plot.subtitle = element_text(hjust = 0.5),
        legend.background = element_rect(fill="grey90", size=0.5,
                                         linetype="solid", colour ="black"),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
  xlab('Family')

Output:

enter image description here

Upvotes: 1

Related Questions