Reputation: 31
I am a master student working on my data science and visualization project with the GBD data. I wish to graph a bar graph that shows the total attributed DALYs of three different cancers by five different SDI regions, and side-by-sided to compare males and females. I wish to create a bar graph that should look something like this (for my graph the y-axis should be the five SDI locations, while in the stacked bar graph shows the number of DALYs for each cancer):
Here is my code for visualizing the bar graph:
ggplot(globaldata%>%filter(measure_id==2,
location_id!=1,
sex_id!=3,
age_id==22,
rei_id==110,
metric_id==1,
year==2021), aes(x=reorder(location_name, val), y=val, fill=cause_name))+
geom_bar(stat = 'identity')+
facet_wrap(~sex_name, ncol=1)+
theme_classic()+
labs(x='Location', y='Total DALYs attributable to dietary risks', title='2021', fill='GI cancer')+
coord_flip()
Here is what it looked like: enter image description here I had difficulty visualizing the side-by-side graph based on the variable 'sex_id', as when I sued position='dodge' within the geom_bar() function, it gives the side-by-side bars based on the different cancers (variable 'cause_name'). As I used facet_wrap, it showed two separated stacked bar graphs. I wish to combine them in one, as the female and male graphs are side-by-sided based on each location.
Attached is the deput( ) data of my previous data:
structure(list(measure_id = c(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2), location_id = c(44634, 44634, 44635, 44635, 44634,
44634, 44634, 44634, 44636, 44636, 44637, 44637, 44639, 44639,
44635), location_name = c("High-middle SDI", "High-middle SDI",
"High SDI", "High SDI", "High-middle SDI", "High-middle SDI",
"High-middle SDI", "High-middle SDI", "Low-middle SDI", "Low-middle SDI",
"Low SDI", "Low SDI", "Middle SDI", "Middle SDI", "High SDI"),
sex_id = c(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1),
sex_name = c("Male", "Female", "Male", "Female", "Male",
"Female", "Male", "Female", "Male", "Female", "Male", "Female",
"Male", "Female", "Male"), age_id = c(22, 22, 22, 22, 22,
22, 22, 22, 22, 22, 22, 22, 22, 22, 22), cause_id = c(411,
411, 411, 411, 441, 441, 414, 414, 411, 411, 411, 411, 411,
411, 441), cause_name = c("Esophageal cancer", "Esophageal cancer",
"Esophageal cancer", "Esophageal cancer", "Colon and rectum cancer",
"Colon and rectum cancer", "Stomach cancer", "Stomach cancer",
"Esophageal cancer", "Esophageal cancer", "Esophageal cancer",
"Esophageal cancer", "Esophageal cancer", "Esophageal cancer",
"Colon and rectum cancer"), rei_id = c(110, 110, 110, 110,
110, 110, 110, 110, 110, 110, 110, 110, 110, 110, 110), rei_name = c("Dietary risks",
"Dietary risks", "Dietary risks", "Dietary risks", "Dietary risks",
"Dietary risks", "Dietary risks", "Dietary risks", "Dietary risks",
"Dietary risks", "Dietary risks", "Dietary risks", "Dietary risks",
"Dietary risks", "Dietary risks"), metric_id = c(1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), metric_name = c("Number",
"Number", "Number", "Number", "Number", "Number", "Number",
"Number", "Number", "Number", "Number", "Number", "Number",
"Number", "Number"), year = c(1990, 1990, 1990, 1990, 1990,
1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990, 1990
), val = c(460695.042829528, 145049.177193469, 195007.739437153,
53050.8912015806, 900498.811619103, 865347.948534737, 444699.585454242,
208852.577212405, 101958.484232596, 74652.7533758179, 66135.6911372065,
51358.063337564, 627554.246311558, 249829.178558246, 1059015.25574152
)), row.names = c(NA, -15L), class = c("tbl_df", "tbl", "data.frame"
))
Upvotes: 3
Views: 48
Reputation: 19169
You can try the following, which facets on the SDI location and positions on the sex:
sex_data <- summarise(data,
val=sum(val), .by=c(sex_name, location_name))
ggplot(data, aes(x=reorder(sex_name, val), y=val))+
geom_bar(aes(fill=cause_name), stat = 'identity',
position = "stack", width=1)+
facet_grid(location_name~., switch="y")+
geom_text(data=sex_data, aes(label = substr(sex_name,1,1)),
hjust=-1) +
scale_y_continuous(expand = expansion(mult = c(0.01, 0.08))) +
theme_classic()+
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y = element_blank(),
strip.text.y.left = element_text(angle = 0)) +
labs(x='Location',
y='Total DALYs attributable to dietary risks',
title='2021', fill='GI cancer')+
coord_flip()
Upvotes: 2
Reputation: 125163
Following GGplot: Two stacked bar plots side by side (not facets) one possible option would be to use two separate geom_col
s to draw the stacked bars for females and males, and to manually dodge or shift the bars which requires to convert location_name
to a numeric plus some additional data wrangling:
library(ggplot2)
library(dplyr, warn = FALSE)
globaldata <- globaldata |>
mutate(
location_name = reorder(location_name, val),
location_name_num = as.numeric(location_name),
sex_name_num = as.numeric(factor(sex_name)),
sex_name_num = scales::rescale(sex_name_num, to = c(-1, 1))
)
width <- .9 / 2 # Bar Width
padding <- .025 # Padding between male and female bars.
ggplot(
globaldata,
aes(
x = val,
y = location_name_num + width / 2 * sex_name_num,
fill = cause_name
)
) +
geom_col(
data = ~ filter(., sex_name == "Female"),
width = width - padding, orientation = "y"
) +
geom_col(
data = ~ filter(., sex_name == "Male"),
width = width - padding, orientation = "y"
) +
stat_summary(
data = ~ filter(., sex_name == "Female"),
aes(label = "F"),
orientation = "y",
fun = sum,
geom = "label",
fill = NA,
label.size = 0,
hjust = 0
) +
stat_summary(
data = ~ filter(., sex_name == "Male"),
aes(label = "M"),
orientation = "y",
fun = sum,
geom = "label",
fill = NA,
label.size = 0,
hjust = 0
) +
scale_y_continuous(
breaks = unique(globaldata$location_name_num),
labels = unique(globaldata$location_name)
) +
facet_wrap(~year, ncol = 1) +
theme_classic() +
labs(
x = "Location", y = "Total DALYs attributable to dietary risks",
title = "2021", fill = "GI cancer"
)
Upvotes: 2