Reputation: 530
head(dt1)
Variant_Classification Exon
1: Nonsense_Mutation 22
2: Frame_Shift_Del 28
3: Nonsense_Mutation 7
4: Missense_Mutation 27
5: Missense_Mutation 28
6: Missense_Mutation 18
full:
dput(plot_case)
structure(list(Variant_Classification = structure(c(5L, 1L, 5L,
4L, 4L, 4L, 5L, 2L, 5L, 2L, 5L, 4L, 1L, 2L, 1L, 4L, 5L, 5L, 5L,
5L, 6L, 5L, 3L, 2L, 3L, 4L, 4L), .Label = c("Frame_Shift_Del",
"Frame_Shift_Ins", "In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation",
"Splice_Site"), class = "factor"), Exon = structure(c(22L, 28L,
7L, 27L, 28L, 18L, 12L, 18L, 20L, 26L, 21L, 11L, 12L, 7L, 14L,
13L, 22L, 20L, 15L, 20L, 20L, 21L, 19L, 7L, 3L, 11L, 4L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25", "26", "27", "28", "29", "30"), class = "factor")), row.names = c(NA,
-27L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x5642b661b980>)
I have another dataset which is the same but much larger:
Variant_Classification Exon
1: Nonsense_Mutation 23
2: Frame_Shift_Del 22
3: Nonsense_Mutation 3
4: Missense_Mutation 27
5: Nonsense_Mutation 23
6: Frame_Shift_Del 1
(full dput for this table below)
I would like to create a lollipop plot with the x-axis being the total number of exons (30) and then the frequency of each occurrence divided by the "variant_classification" in dataset 1 or 2 being above and below the x-axis to allow for easy comparison. As per this diagram below (red is say nonsense and black could be frame_shift) or if easier a single bar but subdivided by colour for each type of variant:
I really don't know where to start with this and any pointers would be much appreciated!
Full dput for table 2 below:
dput(plot_control)
structure(list(Variant_Classification = structure(c(4L, 4L, 4L,
4L, 4L, 4L, 4L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 7L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
2L, 2L, 4L, 5L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 7L, 7L,
5L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 1L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L), .Label = c("Frame_Shift_Del", "Frame_Shift_Ins",
"In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation",
"Splice_Site"), class = "factor"), Exon = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L,
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L,
14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 17L, 17L,
17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 20L,
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L,
21L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L,
23L, 23L, 23L, 23L, 23L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 25L,
25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L,
28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L), .Label = c("1", "2", "3",
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15",
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26",
"27", "28", "29", "30"), class = "factor")), row.names = c(NA,
-456L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x5642b661b980>)
>
So with help from @Roman I tried:
df1 %>% count(Exon=as.numeric(Exon), Variant_Classification) %>%
bind_rows(df2 %>%count(Exon=as.numeric(Exon), Variant_Classification) %>%
mutate(ymin = ifelse(n<0, -0.5, 0))) %>%
ggplot(aes(Exon, color = Variant_Classification)) +
geom_point(aes(y = n), size=2,position = position_dodge(width = 0.5)) +
geom_linerange(aes(xmax = Exon, ymin = ymin, ymax= n), position = position_dodge(width = 0.5)) +
scale_x_continuous(breaks = 1:30) +
scale_y_continuous(breaks = -4:4, labels = abs) +
geom_hline(yintercept = 0) +
geom_text(data = tibble(x=1:30, y=-0.1), aes(x = x, label = x, y=y), vjust= 1,inherit.aes = F)+
annotate("label", x= 1, y = c(-3,3), label = c("DT2", "DT1")) +
theme_minimal() +
theme(legend.position = "bottom",
axis.text.x = element_blank(),panel.grid.minor = element_blank())
But this give me a very skewed axis due to the size imbalances between the two data frames:
Upvotes: 1
Views: 386
Reputation: 17668
You can try
library(tidyverse)
df_res <- df %>%
as_tibble() %>%
count(Exon=as.numeric(Exon), Variant_Classification)
df_res %>%
bind_rows(df_res %>% mutate(n=-n)) %>%
ggplot(aes(Exon, color = Variant_Classification)) +
geom_point(aes(y = n), size=2,position = position_dodge(width = 0.5)) +
geom_linerange(aes(xmax = Exon, ymin = 0, ymax= n), position = position_dodge(width = 0.5)) +
scale_x_continuous(breaks = 1:30) +
scale_y_continuous(breaks = -4:4, labels = abs) +
geom_hline(yintercept = 0) +
theme_bw() +
theme(legend.position = "bottom")
And similar to your painting
df_res %>%
bind_rows(df_res %>% mutate(n=-n)) %>%
mutate(ymin = ifelse(n<0, -0.5, 0)) %>%
ggplot(aes(Exon, color = Variant_Classification)) +
geom_point(aes(y = n), size=2,position = position_dodge(width = 0.5)) +
geom_linerange(aes(xmax = Exon, ymin = ymin, ymax= n), position = position_dodge(width = 0.5)) +
scale_x_continuous(breaks = 1:30) +
scale_y_continuous(breaks = -4:4, labels = abs) +
geom_hline(yintercept = 0) +
geom_text(data = tibble(x=1:30, y=-0.1), aes(x = x, label = x, y=y), vjust= 1,inherit.aes = F)+
annotate("label", x= 1, y = c(-3,3), label = c("DT2", "DT1")) +
theme_minimal() +
theme(legend.position = "bottom",
axis.text.x = element_blank(),panel.grid.minor = element_blank())
Check also position_dodge2
with values preserve = c("total", "single")
Upvotes: 3