franjo
franjo

Reputation: 11

ggplot loop output messes up on hline

I wrote a loop in R that takes my dataframe of validation metrics and cycles through them, plotting the results using ggplot. However, geom_hline portion on the plot has weird shifting that I cannot explain and is not occurring when running the identical code outside of the loop, see example below.

Complete code for the loop:

PLOTS <- list()

for(a in c("All", "Deletions", "Duplications")){
  for(b in c("All", "Small", "Medium", "Large", "Very Large")){
    for(c in c("Sensitivity", "PPV", "FNR", "FDR", "F1", "FMI", "CSI")){
      
      H1 <- ANALYSIS %>%
        filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
        pull(.data[[c]])
        
      H2 <- ANALYSIS %>%
        filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
        pull(.data[[c]])
              
      H3 <- ANALYSIS %>%
        filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
        pull(.data[[c]])
      
      H4 <- ANALYSIS %>%
        filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
        pull(.data[[c]])
      
      PLOTS[[a]][[b]][[c]] <- ANALYSIS %>%
        filter(!Matching_Method %in% c("PerfectMatch", "Full")) %>%
        filter(CNV_Type==a & CNV_Size==b) %>%
        ggplot(aes(x=MaxD_LOG, y=.data[[c]], linetype=Matching_Type, color=Matching_Method)) +
        geom_hline(aes(yintercept=H1, color="Perfect Match", linetype="Raw"), size=1) +
        geom_hline(aes(yintercept=H2, color="Perfect Match", linetype="QCd"), size=1) +
        geom_hline(aes(yintercept=H3, color="Reference", linetype="Raw"), size=1) +
        geom_hline(aes(yintercept=(H4-0.004), color="Reference", linetype="QCd"), size=1) +
        geom_line(size=1) +
        scale_color_manual(values=c("darkorange1", "slateblue2", "seagreen4", "hotpink3", "red3", "steelblue3"),
                           breaks=c("BAF", "LRRmean", "LRRsd", "Pos", "Perfect Match", "Reference")) +
        labs(x=expression(bold("LOG"["10"] ~ "[MAXIMUM MATCHING DISTANCE]")),
             y=toupper(c),
             linetype="CNV CALLSET QC",
             color="MATCHING METHOD") +
        ylim(0, 1) +
        theme_bw() + 
        theme(axis.text.x=element_text(vjust=0.5, hjust=0.5, size=12),
              axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5, size=12),
              axis.title.x=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=10, r=0, b=0, l=0, unit="pt")),
              axis.title.y=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=0, r=10, b=0, l=0, unit="pt")),
              legend.position="top",
              legend.justification="left",
              legend.title=element_text(size=12, face="bold")) +
        guides(color=guide_legend(title.position="top", nrow=1),
               linetype=guide_legend(title.position="top"))
    }
  }
}

If I want to print the following:

print(PLOTS$All$All$Sensitivity)  

I get the following incorrect plot:

Incorrect plot

But, if I hard-set a, b, and c and run the identical code:

a="All"
b="All"
c="Sensitivity"
  
H1 <- ANALYSIS %>%
  filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
  pull(.data[[c]])

H2 <- ANALYSIS %>%
  filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
  pull(.data[[c]])

H3 <- ANALYSIS %>%
  filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
  pull(.data[[c]])

H4 <- ANALYSIS %>%
  filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
  pull(.data[[c]])

ANALYSIS %>%
  filter(!Matching_Method %in% c("PerfectMatch", "Full")) %>%
  filter(CNV_Type==a & CNV_Size==b) %>%
  ggplot(aes(x=MaxD_LOG, y=.data[[c]], linetype=Matching_Type, color=Matching_Method)) +
  geom_hline(aes(yintercept=H1, color="Perfect Match", linetype="Raw"), size=1) +
  geom_hline(aes(yintercept=H2, color="Perfect Match", linetype="QCd"), size=1) +
  geom_hline(aes(yintercept=H3, color="Reference", linetype="Raw"), size=1) +
  geom_hline(aes(yintercept=(H4-0.004), color="Reference", linetype="QCd"), size=1) +
  geom_line(size=1) +
  scale_color_manual(values=c("darkorange1", "slateblue2", "seagreen4", "hotpink3", "red3", "steelblue3"),
                     breaks=c("BAF", "LRRmean", "LRRsd", "Pos", "Perfect Match", "Reference")) +
  labs(x=expression(bold("LOG"["10"] ~ "[MAXIMUM MATCHING DISTANCE]")),
       y=toupper(c),
       linetype="CNV CALLSET QC",
       color="MATCHING METHOD") +
  ylim(0, 1) +
  theme_bw() + 
  theme(axis.text.x=element_text(vjust=0.5, hjust=0.5, size=12),
        axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5, size=12),
        axis.title.x=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=10, r=0, b=0, l=0, unit="pt")),
        axis.title.y=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=0, r=10, b=0, l=0, unit="pt")),
        legend.position="top",
        legend.justification="left",
        legend.title=element_text(size=12, face="bold")) +
  guides(color=guide_legend(title.position="top", nrow=1),
         linetype=guide_legend(title.position="top"))

I get the following correct plot:

Correct plot

I though c values were not correctly updated as the looping occurs. I tested if H-values (calculated based on c values) are computed correctly as follows:

CASE.TEST <- data.frame(NULL)

for(a in c("All", "Deletions", "Duplications")){
  for(b in c("All", "Small", "Medium", "Large", "Very Large")){
    for(c in c("Sensitivity", "PPV", "FNR", "FDR", "F1", "FMI", "CSI")){
      
      H1 <- ANALYSIS %>%
        filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
        pull(.data[[c]])
      
      H2 <- ANALYSIS %>%
        filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
        pull(.data[[c]])
      
      H3 <- ANALYSIS %>%
        filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
        pull(.data[[c]])
      
      H4 <- ANALYSIS %>%
        filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
        pull(.data[[c]])
      
      CASE.TEST <- rbind(CASE.TEST,
                         data.frame(
                           H1=H1,
                           H2=H2,
                           H3=H3,
                           H4=H4))
 
    }
  }
}

And they are computed correctly, so it seems like the issue might be with ggplot portion itself?!

I tried several various ways of setting up geom_hline, which worked when plot would be made outside of loop, but not inside of loop.

Here's the CSV file of the dataframe in question: ANALYSIS.csv.

Upvotes: 0

Views: 65

Answers (2)

r2evans
r2evans

Reputation: 161110

I've used nested lapplys, but sometimes they can go a little overboard.

An alternative is to use a frame and store the plots in a list-column:

eg <- expand.grid(A=A, B=B, C=C)
head(eg)
#              A     B           C
# 1          All   All Sensitivity
# 2    Deletions   All Sensitivity
# 3 Duplications   All Sensitivity
# 4          All Small Sensitivity
# 5    Deletions Small Sensitivity
# 6 Duplications Small Sensitivity
nrow(eg)
# [1] 105

eg$gg <- Map(MetricPlot, eg$A, eg$B, eg$C)
filter(eg, A == "Deletions", B == "Small", C == "Sensitivity")$gg[[1]]

Upvotes: 1

franjo
franjo

Reputation: 11

Thanks to the tip from @r2evans, instead of running nested for loops, I took advantage of nested purrr::map functions. I'll include the full functional code below for reference.

MetricPlot <- function(a, b, c){
  H1 <- ANALYSIS %>%
    filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
    pull(.data[[c]])
  
  H2 <- ANALYSIS %>%
    filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
    pull(.data[[c]])
  
  H3 <- ANALYSIS %>%
    filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
    pull(.data[[c]])
  
  H4 <- ANALYSIS %>%
    filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
    pull(.data[[c]])
  
  PLOT <- ANALYSIS %>%
    filter(!Matching_Method %in% c("PerfectMatch", "Full")) %>%
    filter(CNV_Type==a & CNV_Size==b) %>%
    ggplot(aes(x=MaxD_LOG, y=.data[[c]], linetype=Matching_Type, color=Matching_Method)) +
    geom_hline(aes(yintercept=H1, color="Perfect Match", linetype="Raw"), size=1) +
    geom_hline(aes(yintercept=H2, color="Perfect Match", linetype="QCd"), size=1) +
    geom_hline(aes(yintercept=H3, color="Reference", linetype="Raw"), size=1) +
    geom_hline(aes(yintercept=(H4-0.004), color="Reference", linetype="QCd"), size=1) +
    geom_line(size=1) +
    scale_color_manual(values=c("goldenrod1", "slateblue2", "seagreen4", "lightsalmon4", "red3", "steelblue3"),
                       breaks=c("BAF", "LRRmean", "LRRsd", "Pos", "Perfect Match", "Reference")) +
    labs(x=expression(bold("LOG"["10"] ~ "[MAXIMUM MATCHING DISTANCE]")),
         y=toupper(c),
         linetype="CNV CALLSET QC",
         color="MATCHING METHOD") +
    ylim(0, 1) +
    theme_bw() + 
    theme(axis.text.x=element_text(vjust=0.5, hjust=0.5, size=12),
          axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5, size=12),
          axis.title.x=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=10, r=0, b=0, l=0, unit="pt")),
          axis.title.y=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=0, r=10, b=0, l=0, unit="pt")),
          legend.position="top",
          legend.justification="left",
          legend.title=element_text(size=12, face="bold")) +
    guides(color=guide_legend(title.position="top", nrow=1),
           linetype=guide_legend(title.position="top"))
}

A <- c("All", "Deletions", "Duplications")
A <- set_names(A)

B <- c("All", "Small", "Medium", "Large", "Very Large")
B <- set_names(B)

C <- c("Sensitivity", "PPV", "FNR", "FDR", "F1", "FMI", "CSI")
C <- set_names(C)
  
PLOTS <- map(A, function(x) map(B, function(y) map(C, function(z) MetricPlot(a=x, b=y, c=z))))

Upvotes: 1

Related Questions