Reputation: 11
I wrote a loop in R that takes my dataframe of validation metrics and cycles through them, plotting the results using ggplot. However, geom_hline
portion on the plot has weird shifting that I cannot explain and is not occurring when running the identical code outside of the loop, see example below.
Complete code for the loop:
PLOTS <- list()
for(a in c("All", "Deletions", "Duplications")){
for(b in c("All", "Small", "Medium", "Large", "Very Large")){
for(c in c("Sensitivity", "PPV", "FNR", "FDR", "F1", "FMI", "CSI")){
H1 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H2 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
H3 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H4 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
PLOTS[[a]][[b]][[c]] <- ANALYSIS %>%
filter(!Matching_Method %in% c("PerfectMatch", "Full")) %>%
filter(CNV_Type==a & CNV_Size==b) %>%
ggplot(aes(x=MaxD_LOG, y=.data[[c]], linetype=Matching_Type, color=Matching_Method)) +
geom_hline(aes(yintercept=H1, color="Perfect Match", linetype="Raw"), size=1) +
geom_hline(aes(yintercept=H2, color="Perfect Match", linetype="QCd"), size=1) +
geom_hline(aes(yintercept=H3, color="Reference", linetype="Raw"), size=1) +
geom_hline(aes(yintercept=(H4-0.004), color="Reference", linetype="QCd"), size=1) +
geom_line(size=1) +
scale_color_manual(values=c("darkorange1", "slateblue2", "seagreen4", "hotpink3", "red3", "steelblue3"),
breaks=c("BAF", "LRRmean", "LRRsd", "Pos", "Perfect Match", "Reference")) +
labs(x=expression(bold("LOG"["10"] ~ "[MAXIMUM MATCHING DISTANCE]")),
y=toupper(c),
linetype="CNV CALLSET QC",
color="MATCHING METHOD") +
ylim(0, 1) +
theme_bw() +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.5, size=12),
axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5, size=12),
axis.title.x=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=10, r=0, b=0, l=0, unit="pt")),
axis.title.y=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=0, r=10, b=0, l=0, unit="pt")),
legend.position="top",
legend.justification="left",
legend.title=element_text(size=12, face="bold")) +
guides(color=guide_legend(title.position="top", nrow=1),
linetype=guide_legend(title.position="top"))
}
}
}
If I want to print the following:
print(PLOTS$All$All$Sensitivity)
I get the following incorrect plot:
But, if I hard-set a
, b
, and c
and run the identical code:
a="All"
b="All"
c="Sensitivity"
H1 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H2 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
H3 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H4 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
ANALYSIS %>%
filter(!Matching_Method %in% c("PerfectMatch", "Full")) %>%
filter(CNV_Type==a & CNV_Size==b) %>%
ggplot(aes(x=MaxD_LOG, y=.data[[c]], linetype=Matching_Type, color=Matching_Method)) +
geom_hline(aes(yintercept=H1, color="Perfect Match", linetype="Raw"), size=1) +
geom_hline(aes(yintercept=H2, color="Perfect Match", linetype="QCd"), size=1) +
geom_hline(aes(yintercept=H3, color="Reference", linetype="Raw"), size=1) +
geom_hline(aes(yintercept=(H4-0.004), color="Reference", linetype="QCd"), size=1) +
geom_line(size=1) +
scale_color_manual(values=c("darkorange1", "slateblue2", "seagreen4", "hotpink3", "red3", "steelblue3"),
breaks=c("BAF", "LRRmean", "LRRsd", "Pos", "Perfect Match", "Reference")) +
labs(x=expression(bold("LOG"["10"] ~ "[MAXIMUM MATCHING DISTANCE]")),
y=toupper(c),
linetype="CNV CALLSET QC",
color="MATCHING METHOD") +
ylim(0, 1) +
theme_bw() +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.5, size=12),
axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5, size=12),
axis.title.x=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=10, r=0, b=0, l=0, unit="pt")),
axis.title.y=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=0, r=10, b=0, l=0, unit="pt")),
legend.position="top",
legend.justification="left",
legend.title=element_text(size=12, face="bold")) +
guides(color=guide_legend(title.position="top", nrow=1),
linetype=guide_legend(title.position="top"))
I get the following correct plot:
I though c
values were not correctly updated as the looping occurs. I tested if H
-values (calculated based on c
values) are computed correctly as follows:
CASE.TEST <- data.frame(NULL)
for(a in c("All", "Deletions", "Duplications")){
for(b in c("All", "Small", "Medium", "Large", "Very Large")){
for(c in c("Sensitivity", "PPV", "FNR", "FDR", "F1", "FMI", "CSI")){
H1 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H2 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
H3 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H4 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
CASE.TEST <- rbind(CASE.TEST,
data.frame(
H1=H1,
H2=H2,
H3=H3,
H4=H4))
}
}
}
And they are computed correctly, so it seems like the issue might be with ggplot portion itself?!
I tried several various ways of setting up geom_hline
, which worked when plot would be made outside of loop, but not inside of loop.
Here's the CSV file of the dataframe in question: ANALYSIS.csv.
Upvotes: 0
Views: 65
Reputation: 161110
I've used nested lapply
s, but sometimes they can go a little overboard.
An alternative is to use a frame and store the plots in a list-column:
eg <- expand.grid(A=A, B=B, C=C)
head(eg)
# A B C
# 1 All All Sensitivity
# 2 Deletions All Sensitivity
# 3 Duplications All Sensitivity
# 4 All Small Sensitivity
# 5 Deletions Small Sensitivity
# 6 Duplications Small Sensitivity
nrow(eg)
# [1] 105
eg$gg <- Map(MetricPlot, eg$A, eg$B, eg$C)
filter(eg, A == "Deletions", B == "Small", C == "Sensitivity")$gg[[1]]
Upvotes: 1
Reputation: 11
Thanks to the tip from @r2evans, instead of running nested for
loops, I took advantage of nested purrr::map
functions. I'll include the full functional code below for reference.
MetricPlot <- function(a, b, c){
H1 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H2 <- ANALYSIS %>%
filter(Matching_Method=="PerfectMatch" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
H3 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="Raw") %>%
pull(.data[[c]])
H4 <- ANALYSIS %>%
filter(Matching_Method=="Full" & CNV_Type==a & CNV_Size==b & Matching_Type=="QCd") %>%
pull(.data[[c]])
PLOT <- ANALYSIS %>%
filter(!Matching_Method %in% c("PerfectMatch", "Full")) %>%
filter(CNV_Type==a & CNV_Size==b) %>%
ggplot(aes(x=MaxD_LOG, y=.data[[c]], linetype=Matching_Type, color=Matching_Method)) +
geom_hline(aes(yintercept=H1, color="Perfect Match", linetype="Raw"), size=1) +
geom_hline(aes(yintercept=H2, color="Perfect Match", linetype="QCd"), size=1) +
geom_hline(aes(yintercept=H3, color="Reference", linetype="Raw"), size=1) +
geom_hline(aes(yintercept=(H4-0.004), color="Reference", linetype="QCd"), size=1) +
geom_line(size=1) +
scale_color_manual(values=c("goldenrod1", "slateblue2", "seagreen4", "lightsalmon4", "red3", "steelblue3"),
breaks=c("BAF", "LRRmean", "LRRsd", "Pos", "Perfect Match", "Reference")) +
labs(x=expression(bold("LOG"["10"] ~ "[MAXIMUM MATCHING DISTANCE]")),
y=toupper(c),
linetype="CNV CALLSET QC",
color="MATCHING METHOD") +
ylim(0, 1) +
theme_bw() +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.5, size=12),
axis.text.y=element_text(angle=90, vjust=0.5, hjust=0.5, size=12),
axis.title.x=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=10, r=0, b=0, l=0, unit="pt")),
axis.title.y=element_text(size=15, hjust=0, vjust=0, face="bold", margin=margin(t=0, r=10, b=0, l=0, unit="pt")),
legend.position="top",
legend.justification="left",
legend.title=element_text(size=12, face="bold")) +
guides(color=guide_legend(title.position="top", nrow=1),
linetype=guide_legend(title.position="top"))
}
A <- c("All", "Deletions", "Duplications")
A <- set_names(A)
B <- c("All", "Small", "Medium", "Large", "Very Large")
B <- set_names(B)
C <- c("Sensitivity", "PPV", "FNR", "FDR", "F1", "FMI", "CSI")
C <- set_names(C)
PLOTS <- map(A, function(x) map(B, function(y) map(C, function(z) MetricPlot(a=x, b=y, c=z))))
Upvotes: 1