tacrolimus
tacrolimus

Reputation: 530

Lollipop plot with mirror across two axis in R

head(dt1)

   Variant_Classification Exon
1:      Nonsense_Mutation   22
2:        Frame_Shift_Del   28
3:      Nonsense_Mutation    7
4:      Missense_Mutation   27
5:      Missense_Mutation   28
6:      Missense_Mutation   18

full: 
dput(plot_case)
    structure(list(Variant_Classification = structure(c(5L, 1L, 5L, 
    4L, 4L, 4L, 5L, 2L, 5L, 2L, 5L, 4L, 1L, 2L, 1L, 4L, 5L, 5L, 5L, 
    5L, 6L, 5L, 3L, 2L, 3L, 4L, 4L), .Label = c("Frame_Shift_Del", 
    "Frame_Shift_Ins", "In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation", 
    "Splice_Site"), class = "factor"), Exon = structure(c(22L, 28L, 
    7L, 27L, 28L, 18L, 12L, 18L, 20L, 26L, 21L, 11L, 12L, 7L, 14L, 
    13L, 22L, 20L, 15L, 20L, 20L, 21L, 19L, 7L, 3L, 11L, 4L), .Label = c("1", 
    "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", 
    "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
    "25", "26", "27", "28", "29", "30"), class = "factor")), row.names = c(NA, 
    -27L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x5642b661b980>)

I have another dataset which is the same but much larger:

  Variant_Classification Exon
    1:      Nonsense_Mutation   23
    2:        Frame_Shift_Del   22
    3:      Nonsense_Mutation    3
    4:      Missense_Mutation   27
    5:      Nonsense_Mutation   23
    6:      Frame_Shift_Del   1

(full dput for this table below)

I would like to create a lollipop plot with the x-axis being the total number of exons (30) and then the frequency of each occurrence divided by the "variant_classification" in dataset 1 or 2 being above and below the x-axis to allow for easy comparison. As per this diagram below (red is say nonsense and black could be frame_shift) or if easier a single bar but subdivided by colour for each type of variant:

red =

I really don't know where to start with this and any pointers would be much appreciated!

Full dput for table 2 below:

dput(plot_control)
structure(list(Variant_Classification = structure(c(4L, 4L, 4L, 
4L, 4L, 4L, 4L, 2L, 2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 7L, 4L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
2L, 2L, 4L, 5L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 6L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 7L, 7L, 
5L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 1L, 4L, 4L, 4L, 4L, 4L, 1L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 7L, 7L, 4L, 4L, 4L, 4L, 5L, 4L, 4L, 4L, 
4L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 5L), .Label = c("Frame_Shift_Del", "Frame_Shift_Ins", 
"In_Frame_Del", "Missense_Mutation", "Nonsense_Mutation", "Nonstop_Mutation", 
"Splice_Site"), class = "factor"), Exon = structure(c(1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 17L, 17L, 
17L, 17L, 17L, 17L, 17L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
19L, 19L, 19L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 20L, 20L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 23L, 23L, 
23L, 23L, 23L, 23L, 23L, 24L, 24L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 26L, 26L, 26L, 26L, 26L, 26L, 
26L, 26L, 27L, 27L, 27L, 27L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 
28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L), .Label = c("1", "2", "3", 
"4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", 
"16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", 
"27", "28", "29", "30"), class = "factor")), row.names = c(NA, 
-456L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x5642b661b980>)
> 

So with help from @Roman I tried:

df1 %>% count(Exon=as.numeric(Exon), Variant_Classification) %>%
  bind_rows(df2 %>%count(Exon=as.numeric(Exon), Variant_Classification) %>% 
              mutate(ymin = ifelse(n<0, -0.5, 0))) %>% 
  ggplot(aes(Exon, color = Variant_Classification)) + 
  geom_point(aes(y = n), size=2,position = position_dodge(width = 0.5)) +
  geom_linerange(aes(xmax = Exon, ymin = ymin, ymax= n), position = position_dodge(width = 0.5)) + 
  scale_x_continuous(breaks = 1:30) + 
  scale_y_continuous(breaks = -4:4, labels = abs) + 
  geom_hline(yintercept = 0) +
  geom_text(data = tibble(x=1:30, y=-0.1), aes(x = x, label = x, y=y), vjust= 1,inherit.aes = F)+
  annotate("label", x= 1, y = c(-3,3), label = c("DT2", "DT1")) +
  theme_minimal() + 
  theme(legend.position = "bottom", 
        axis.text.x = element_blank(),panel.grid.minor = element_blank())

But this give me a very skewed axis due to the size imbalances between the two data frames:

enter image description here

Upvotes: 1

Views: 386

Answers (1)

Roman
Roman

Reputation: 17668

You can try

library(tidyverse)
df_res <- df %>% 
  as_tibble() %>% 
  count(Exon=as.numeric(Exon), Variant_Classification)



df_res %>% 
   bind_rows(df_res %>% mutate(n=-n)) %>% 
  ggplot(aes(Exon, color = Variant_Classification)) + 
   geom_point(aes(y = n), size=2,position = position_dodge(width = 0.5)) +
   geom_linerange(aes(xmax = Exon, ymin = 0, ymax= n), position = position_dodge(width = 0.5)) + 
   scale_x_continuous(breaks = 1:30) + 
   scale_y_continuous(breaks = -4:4, labels = abs) + 
   geom_hline(yintercept = 0) +
   theme_bw() + 
   theme(legend.position = "bottom")

enter image description here

And similar to your painting

df_res %>% 
   bind_rows(df_res %>% mutate(n=-n)) %>% 
   mutate(ymin = ifelse(n<0, -0.5, 0)) %>% 
   ggplot(aes(Exon, color = Variant_Classification)) + 
   geom_point(aes(y = n), size=2,position = position_dodge(width = 0.5)) +
   geom_linerange(aes(xmax = Exon, ymin = ymin, ymax= n), position = position_dodge(width = 0.5)) + 
   scale_x_continuous(breaks = 1:30) + 
   scale_y_continuous(breaks = -4:4, labels = abs) + 
   geom_hline(yintercept = 0) +
   geom_text(data = tibble(x=1:30, y=-0.1), aes(x = x, label = x, y=y), vjust= 1,inherit.aes = F)+
   annotate("label", x= 1, y = c(-3,3), label = c("DT2", "DT1")) +
   theme_minimal() + 
   theme(legend.position = "bottom", 
         axis.text.x = element_blank(),panel.grid.minor = element_blank())

enter image description here

Check also position_dodge2 with values preserve = c("total", "single")

Upvotes: 3

Related Questions