Reputation: 530
EDIT: This has been solved and I have posted my learning and the code used at the bottom of the question
I would like to plot to datasets across the same x-axis with the second dataset being mirrored below the x-axis. I have attached the data set below.
So far I have tried:
#first two give me simple enough graphs
ggplot(data = plot_case) + geom_bar(mapping = aes(x= Exon, fill = Variant_Classification))
ggplot(data = plot_control) + geom_bar(mapping = aes(x= Exon, fill = Variant_Classification))
I tried to then create a negative set in the control group
plot_control$Exon <- as.numeric(plot_control$Exon) * -1
plot_all <- rbind(plot_case, plot_control)
#plotting it gives me this
ggplot(data = plot_all) + geom_col(mapping = aes(x= Exon, y=stat_count(Exon), fill = Variant_Classification))
When I actually want the second dataset below the first one like this picture:
Your help would be much appreciated, full data set below
head(plot_case)
Variant_Classification Exon
1: Nonsense_Mutation 22
2: Frame_Shift_Del 28
3: Nonsense_Mutation 7
4: Missense_Mutation 27
5: Missense_Mutation 28
6: Missense_Mutation 18
full:
dput(plot_case)
structure(list(Variant_Classification = structure(c(5L, 1L, 5L,
4L, 4L, 4L, 5L, 2L, 5L, 2L, 5L, 4L, 1L, 2L, 1L, 4L, 5L, 5L, 5L,
5L, 6L, 5L, 3L, 2L, 3L, 4L, 4L), .Label = c("Frame_Shift_Del",
"Frame_Shift_Ins", "In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation",
"Splice_Site"), class = "factor"), Exon = structure(c(22L, 28L,
7L, 27L, 28L, 18L, 12L, 18L, 20L, 26L, 21L, 11L, 12L, 7L, 14L,
13L, 22L, 20L, 15L, 20L, 20L, 21L, 19L, 7L, 3L, 11L, 4L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30"), class = "factor")), row.names = c(NA,
-27L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x5642b661b980>)
head(plot_control)
Variant_Classification Exon
1: Missense_Mutation 1
2: Missense_Mutation 1
3: Missense_Mutation 1
4: Missense_Mutation 1
5: Missense_Mutation 1
6: Missense_Mutation 1
dput(plot_control)
structure(list(Variant_Classification = structure(c(4L, 4L, 4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 7L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
2L, 2L, 4L, 5L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 7L, 7L,
5L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L), .Label = c("Frame_Shift_Del", "Frame_Shift_Ins",
"In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation",
"Splice_Site"), class = "factor"), Exon = c(1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 17, 17, 17, 17, 17, 17,
17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19,
20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23,
23, 23, 23, 23, 23, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, 25,
25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27,
28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29)), row.names = c(NA,
-456L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x56115dc87e30>)
Thanks to further help I ran the code suggested below and got the following image:
I now need to get the axis to be more informative so as not to lose all the data
With further help from @__S I used:
plot_control %>%
mutate(type = 'control') %>%
bind_rows(plot_case %>% mutate(type ='case')) %>%
group_by(Variant_Classification, Exon, type) %>%
summarise(freq = n()) %>%
mutate(freq = ifelse(type == 'control', -log(freq), freq)) %>%
ggplot(aes(x= Exon, y = freq, fill = Variant_Classification)) +
geom_col() +
geom_hline(yintercept = 0)
Which looks awesome! Looking at the accepted answerthe problem was my dataset needed tidying up before putting it through ggplot. I needed to turn it into a frequency based table which is what the group_by and summarise dplyr functions helped to do.
Upvotes: 1
Views: 549
Reputation: 9495
What about something like this:
library(dplyr)
plot_control %>%
mutate(type = 'control') %>%
bind_rows(plot_case %>% mutate(type ='case')) %>%
group_by(Variant_Classification, Exon, type) %>%
summarise(freq = n()) %>%
#Edit2: here you join with a data.frame with all the Exon
right_join(data.frame(Exon = c(min(.$Exon):max(.$Exon)))) %>%
# Edit1: here you can add the log()
mutate(freq = ifelse(type == 'control', -log(freq), freq)) %>%
ggplot(aes(x= as.factor(Exon), y = freq, fill = Variant_Classification)) +
geom_col() +
geom_hline(yintercept = 0) +
xlab("Exon")
With data:
plot_case <- read.table(text = ' Variant_Classification Exon
1: Nonsense_Mutation 22
2: Frame_Shift_Del 28
3: Nonsense_Mutation 7
4: Missense_Mutation 27
5: Missense_Mutation 28
6: Missense_Mutation 18
7: Missense_Mutation 1', header = T)
plot_control <- read.table(text = ' Variant_Classification Exon
1: Missense_Mutation 1
2: Missense_Mutation 1
3: Missense_Mutation 1
', header = T)
Upvotes: 1