Reputation: 39
Forgive me is this is asked and answered, but so far I have not found a solution that meets my use case. If you know of one, please point me in the right direction.
Here is a small working sample of my code:
library(tidyverse)
source <- c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P")
subject <- c("M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R")
grade <- c(1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2)
domain <- c("Alg", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc")
placement <- c("A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A")
qty <- c(425, 389, 96, 460, 293, 163, 518, 291, 101, 366, 349, 201, 889, 661, 150, 680, 617, 465, 445, 293, 112, 381, 292, 208, 223, 232, 131, 270, 72, 27, 45, 9, 99, 40, 79, 194, 72, 126, 133, 123, 456, 98, 234, 432, 65)
test <- data.frame(source, subject, grade, domain, placement, qty)
plot4 <- test %>%
ggplot(aes(x = grade, y = qty, fill = placement)) +
geom_bar(stat = "identity", position = "fill") +
stat_count(aes(label = paste(sprintf("%1.2f", ..count../sum(..count..)*100), "%\n", ..count..), y = 0.5*..count..),
geom = "text",
colour = "black",
size = 2.5,
position = position_fill(vjust = 0.5)) +
scale_x_discrete("Grade", limits = c(1, 2),
labels = c("1st", "2nd"),
guide = guide_axis(angle = 90)) +
scale_y_continuous("Prop place") +
scale_fill_manual("Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C",
"B",
"A"))
) +
theme(axis.title.y = element_blank(),
axis.text.y = element_blank()) +
facet_wrap(vars(subject, domain), scales = "free_x")
plot4
which produces the following plot:
What I need for each grade level (in this case, 1st and 2nd) is to have two stacked bars with the data centered in each region of the stack. There should be one bar from each source ("D" and "P") associated with each grade level.
I do not know how to add second variable to the x-axis to create the bar from each source FOR EACH grade. I can either do grade, or source, but not both.
*BONUS POINTS for helping me figure out the stat_count line. I need the n() and % of each stack relative to the total for each source/grade/placement value. These numbers aren't even close, but this code has worked for me on less complicated plots.
Upvotes: 1
Views: 155
Reputation: 39
Thank you @e.matt. With your answer (and a little tweaking) I was able to get this exactly how I wanted it. I used mutate()
to add a pcnt
column, making the geom_text()
layer a bit easier to work with. The interaction()
function worked perfectly to give me two stacked and dodged bar charts per category (Grade in this case). Below is the modified solution code that I was able to make work out:
source <- c("D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "D", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P", "P")
subject <- c("M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R", "R")
grade <- c(1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2)
domain <- c("Alg", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc", "Alg", "Alg", "Alg", "Alg", "Alg", "Geo", "Geo", "Geo", "Geo", "Geo", "Comp", "Comp", "Comp", "Comp", "Comp", "Voc", "Voc", "Voc", "Voc", "Voc", "Voc")
placement <- c("A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A", "B", "A", "C", "B", "A", "C", "B", "A", "C", "B", "A")
qty <- c(425, 389, 96, 460, 293, 163, 518, 291, 101, 366, 349, 201, 889, 661, 150, 680, 617, 465, 445, 293, 112, 381, 292, 208, 223, 232, 131, 270, 72, 27, 45, 9, 99, 40, 79, 194, 72, 126, 133, 123, 456, 98, 234, 432, 65)
#pcnt was added to the dataframe using mutate() as shown below
pcnt <- pcnt <- c(46.7, 41.16, 11.31, 50.22, 30.08, 16.77, 56.92, 38.39, 12.38, 39.96, 32.92, 20.85, 56.04, 30.97, 16.74, 45.52, 32.07, 19.21, 52.75, 35.9, 16.38, 43.67, 19.5, 13.5, 52.29, 44.33, 10.37, 38.59, 41.58, 37.74, 47.29, 38.26, 10.65, 57.43, 22.92, 19.21, 53.41, 44.51, 9.94, 37.68, 33.44, 22.72, 48, 34.29, 7.8, 72.64, 29.01, 5.38, 52.35, 37.28, 14.27, 43.25, 40.39, 31.37, 38.05, 36.65, 27.7, 73.17, 50, 33.33, 29.41, 6.08, 60, 15.27, 10.7, 58.68, 7.22, 21.83, 16.94, 29.65, 51.99, 22.14, 50.34, 44.29, 22.9, 49.49, 21.75, 41.04, 39.82, 21.24, 78.06, 22.12, 12.5, 17.2, 15.56, 77.62, 10.13, 15.18, 37.73, 32.9, 17.36, 100)
test <- data.frame(source, subject, grade, domain, placement, qty)
test <- test %>%
group_by(subject, grade, domain, placement) %>%
mutate(pcnt = round(qty / sum(qty) * 100, 2)) %>%
arrange(domain, desc(qty))
plot <- test %>%
ggplot(aes(x = interaction(source, grade),
y = qty,
fill = placement)) +
geom_bar(stat = "identity", position = "fill") +
scale_x_discrete("Grade",
labels = c("1.D", "1.P","2.D", "2.P"),
guide = guide_axis(angle = 90)) +
scale_y_continuous("Prop place") +
scale_fill_manual("Relative Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C", "B", "A"))
) +
facet_wrap(vars(domain), scales = "free_x") +
geom_text(size = 2.5, aes(label = paste(sprintf("%1.2f", pcnt), "%\n", qty), y = 0.5 * qty),
position = position_fill(vjust=0.5))
plot
Upvotes: 0
Reputation: 886
This may be what you are after
my_labels <- test %>%
group_by(grade, source,domain, subject) %>%
summarise(n = qty,p = qty/sum(qty)) %>%
mutate(lab = paste(n,"\n",sprintf("%1.2f",p*100),"%"),
x = interaction (grade,source)) # get the labels calculated and add at the end
The interaction()
function helps to get combinations of grade and source for plotting
test %>%
ggplot(aes(x = interaction (grade,source),
y = qty, fill = placement)) +
geom_bar(stat = "identity", position ="fill") +
scale_x_discrete("Grade",
labels = c("1st D","2nd D","1st P","2nd P"), # add new X labels
guide = guide_axis(angle = 90)) +
scale_fill_manual("Placement",
values = rev(c("#de7e7e", "#ebeb4d", "#70e65e")),
labels = rev(c("C","B","A"))) +
scale_y_continuous("Prop place") +
facet_wrap(vars(subject, domain), scales = "free_x") +
geom_text(data = my_labels, aes(label = lab,y = p),position = position_stack(vjust = 0.2)) # adjust position to get labels where you want.
Some comments here may help Adding percentage labels to ggplot when using stat_count
Upvotes: 1