Reputation: 478
I would like to add a cumulative frequency to a bar chart but I have difficulties. Here is the code of the bar chart graph:
GZ1 <- ggplot(data = ZOO_long2) + geom_bar(aes(x=interaction(Family,Group),fill = Group),width = 0.7, color = "black", alpha = 9/10) +
scale_x_discrete("Family",breaks=interaction(ZOO_long2$Family, ZOO_long2$Group),labels= ZOO_long2$Family) +
labs(title="Observations of zooplancton family", subtitle="by Groups", legend) +
ylim(0, 1000) +
theme_bw() +
theme(plot.title = element_text(face="bold", hjust=0.5), plot.subtitle = element_text(hjust = 0.5), legend.background = element_rect(fill="grey90", size=0.5, linetype="solid", colour ="black"), axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
GZ1
And there is the dataset:
dput(ZOO_long2[1300:1375,])
structure(list(Obs = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
), Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Cladocera", "Copepoda", "Rotifera"), class = "factor"),
Family = structure(c(8L, 8L, 4L, 8L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 8L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 7L, 7L, 7L, 7L,
14L, 14L, 14L, 14L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Bosminidae",
"Calanoida", "Chydoridae", "Cladocera f.", "Copepoda f.",
"Cyclopoida", "Cyclopoidae", "Daphnidae", "Harpacticoida",
"Holopediidae", "Leptodoridae", "Macrothricidae", "Moinidae",
"Nauplius larvae", "Polyphemidae", "Rotifera f.", "Sididae"
), class = "factor"), frequency = c(0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822,
0.000428082191780822, 0.000428082191780822, 0.000428082191780822
), `cum frequency` = c(0.556506849315068, 0.556934931506849,
0.55736301369863, 0.557791095890411, 0.558219178082192, 0.558647260273973,
0.559075342465753, 0.559503424657534, 0.559931506849315,
0.560359589041096, 0.560787671232877, 0.561215753424657,
0.561643835616438, 0.562071917808219, 0.5625, 0.562928082191781,
0.563356164383562, 0.563784246575342, 0.564212328767123,
0.564640410958904, 0.565068493150685, 0.565496575342466,
0.565924657534247, 0.566352739726027, 0.566780821917808,
0.567208904109589, 0.56763698630137, 0.568065068493151, 0.568493150684931,
0.568921232876712, 0.569349315068493, 0.569777397260274,
0.570205479452055, 0.570633561643836, 0.571061643835616,
0.571489726027397, 0.571917808219178, 0.572345890410959,
0.57277397260274, 0.57320205479452, 0.573630136986301, 0.574058219178082,
0.574486301369863, 0.574914383561644, 0.575342465753425,
0.575770547945205, 0.576198630136986, 0.576626712328767,
0.577054794520548, 0.577482876712329, 0.57791095890411, 0.57833904109589,
0.578767123287671, 0.579195205479452, 0.579623287671233,
0.580051369863014, 0.580479452054794, 0.580907534246575,
0.581335616438356, 0.581763698630137, 0.582191780821918,
0.582619863013699, 0.583047945205479, 0.58347602739726, 0.583904109589041,
0.584332191780822, 0.584760273972603, 0.585188356164384,
0.585616438356164, 0.586044520547945, 0.586472602739726,
0.586900684931507, 0.587328767123288, 0.587756849315068,
0.588184931506849, 0.58861301369863)), row.names = 1300:1375, class = "data.frame")
Also, I succeed to organize the x labels by groups, with the function interaction(). But I have an issue to have an increasing order by groups. If possible.
Upvotes: 2
Views: 2355
Reputation: 478
Finally,
by using that code, inspired by Duck's update. I added the cumulative frequency instead of the value. I divided it by a fraction of 2336, the total number of observations (2336/1000 = 2.336), to include it in the y-axis limitation. I also added a second axis :
#manual reorganization
ZOO_long2$Family = factor(ZOO_long2$Family,levels = c("Leptodoridae","Macrothricidae","Cladocera f.","Moinidae","Polyphemidae","Holopediidae","Chydoridae","Sididae","Daphnidae","Bosminidae","Copepoda f.","Harpacticoida","Calanoida","Nauplius larvae","Cyclopoidae","Rotifera f."))
#graph
ZOO_long2 %>% group_by(Group,Family) %>%
summarise(Val=n()) %>% ungroup() %>%
mutate(Cum=cumsum(Val)) %>%
ggplot(aes(x=Family,y=Val,fill = Group)) +
geom_bar(stat='identity',
width = 0.7, color = "black", alpha = 9/10) +
geom_line(aes(x = Family,y=((Cum /2.336)+5), group=1))+
labs(title="Observations of zooplancton families", subtitle="by Groups", legend) +
geom_text(aes(label=Val),position = position_dodge(0.9),vjust=-0.5)+
theme_bw() +
theme(plot.title = element_text(face="bold", hjust=0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.background = element_rect(fill="grey90", size=0.5,
linetype="solid", colour ="black"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ylim(0, 1000) +
xlab('Families') +
scale_y_continuous(name = "Observations",
sec.axis = sec_axis(~./5, name = "Cumulative frequency",
labels = function(b) { paste0(round((b /200) * 100, 0), "%")}))
I used ggplot answer instead of ggpubr because I'm not acclimated to use the second one.
Thank you for your time, Duck and stefanH.
Upvotes: 5
Reputation: 353
a ggpubr aproach
utilizing Duck's group() and mutate() together with fct_reorder() you can pipe the data into ggpubr's ggbarplot() function which has an easy interface to show the frequencies.
library(tidyvserse)
library(ggpubr)
ZOO_long2 %>%
group_by(Group,Family) %>%
summarise(Val=n()) %>%
ungroup() %>%
mutate(Cum=cumsum(Val)) %>%
#sort the data
mutate(Family = fct_reorder(Family,Val)) %>%
#plot the data
ggpubr::ggbarplot(.,
x="Family",
y="Val",
fill = "Group",
label = TRUE,
lab.pos = "out",
lab.col = "black",
ylim = c(0,100)
)
EDIT to adress @C.Guffins question
Add a geom_line as you would in ggplot
ZOO_long2 %>%
group_by(Group,Family) %>%
summarise(Val=n()) %>%
ungroup() %>%
mutate(Cum=cumsum(Val)) %>%
#sort the data
mutate(Family = fct_reorder(Family,Val)) %>%
#plot the data
ggpubr::ggbarplot(.,
x="Family",
y="Val",
fill = "Group",
label = TRUE,
lab.pos = "out",
lab.col = "black",
ylim = c(0,100)
) +
geom_line(aes(x=Family,y=Val, group=1))
Upvotes: 2
Reputation: 39585
Maybe this can help. You can compute the counts directly and also the cumulative variable using dplyr
. After that you can sketch the plot using ggplot2
:
library(ggplot2)
library(dplyr)
#Code
ZOO_long2 %>% group_by(Group,Family) %>%
summarise(Val=n()) %>% ungroup() %>%
mutate(Cum=cumsum(Val)) %>%
ggplot(aes(x=reorder(Family,Val),y=Val,fill = Group)) +
geom_bar(stat='identity',
width = 0.7, color = "black", alpha = 9/10) +
labs(title="Observations of zooplancton family", subtitle="by Groups", legend) +
geom_text(aes(label=Val),position = position_dodge(0.9),vjust=-0.5)+
ylim(0, 1000) +
theme_bw() +
theme(plot.title = element_text(face="bold", hjust=0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.background = element_rect(fill="grey90", size=0.5,
linetype="solid", colour ="black"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
xlab('Family')
Output:
Update: If you want to add a line try this:
#Code 2
ZOO_long2 %>% group_by(Group,Family) %>%
summarise(Val=n()) %>% ungroup() %>%
mutate(Cum=cumsum(Val)) %>%
ggplot(aes(x=reorder(Family,Val),y=Val,fill = Group)) +
geom_bar(stat='identity',
width = 0.7, color = "black", alpha = 9/10) +
geom_line(aes(reorder(Family,Val),y=Val+25,group=1))+
labs(title="Observations of zooplancton family", subtitle="by Groups", legend) +
geom_text(aes(label=Val),position = position_dodge(0.9),vjust=-0.5)+
ylim(0, 1000) +
theme_bw() +
theme(plot.title = element_text(face="bold", hjust=0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.background = element_rect(fill="grey90", size=0.5,
linetype="solid", colour ="black"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
xlab('Family')
Output:
Upvotes: 1