Reputation: 75
I am running an R script to analyze some biological data. Example of the snippet data and script is provided below. This data file has many columns (but I would like to focus on 5th column- Gene). I have more than 100 data files like this (consider 5th Gene column in all files as the column of interest). Currently I am running each file separately in R and the process is tedious. I would like to run a R script for all data files simultaneously and save all the data in different folder as per the file name. Is it possible to loop or iterate and analyze all the data files at once in a script. Please assist me with this.
Thank you,
Toufiq
Formatted question and Revised dataframe
Inserted: Names of the files to be read
M3.1_IPA.txt
M8.1_IPA.txt
M8.2_IPA.txt
M8.3_IPA.txt
1. Load gene sets to analyze
Data_file <- read.delim(file = "./M3.1_IPA.txt", stringsAsFactors = FALSE, check.names = FALSE, row.names = NULL)
dput(head(Data_file, 5))
structure(list(row.names = c("M3.1_ALPP", "M3.1_ALS2CR14", "M3.1_ANKRD30B",
"M3.1_ARL16", "M3.1_BCYRN1"), X = 1:5, Module = c("M3.1", "M3.1",
"M3.1", "M3.1", "M3.1"), Title = c("Cell Cycle", "Cell Cycle",
"Cell Cycle", "Cell Cycle", "Cell Cycle"), Gene = c("ALPP", "ALS2CR14",
"ANKRD30B", "ARL16", "BCYRN1"), Probes = c("ILMN_1693789", "ILMN_1787314",
"ILMN_1730678", "ILMN_2188119", "ILMN_1678757"), Module_gene = c("M3.1_ALPP",
"M3.1_ALS2CR14", "M3.1_ANKRD30B", "M3.1_ARL16", "M3.1_BCYRN1"
)), row.names = c(NA, 5L), class = "data.frame")
Module_10_1 <- Data_file[,5]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
Select motif database to use (i.e. organism and distance around TSS)
data(motifAnnotations_hgnc)
motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
Calculate enrichment
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
Type = "Enrichment"
Module = "M10_1"
auc <- getAUC(motifs_AUC)
##Export the plots##
pdf(paste0(Type, "_", Module, "_AUC_Histogram_Plot.pdf"), height = 5, width = 10)
hist(auc, main="Module_10_1", xlab="AUC histogram",
breaks=100, col="#ff000050", border="darkred")
nes3 <- (3*sd(auc)) + mean(auc)
abline(v=nes3, col="red")
dev.off()
Select significant motifs and/or annotate to TFs
motifEnrichmentTable <- addMotifAnnotation(motifs_AUC, nesThreshold=3,
motifAnnot=motifAnnotations_hgnc)
##Export the Motif enrichment analysis results###
write.csv(motifEnrichmentTable, file ="./motifEnrichmentTable.csv", row.names = F, sep = ",")
Identify the genes with the best enrichment for each Motif
motifEnrichmentTable_wGenes_v1 <- addSignificantGenes(motifEnrichmentTable,
rankings=motifRankings,
geneSets=Module_10_1_geneLists)
##Export the Motif enrichment analysis results##
write.csv(motifEnrichmentTable_wGenes_v1,
file ="./motifEnrichmentTable_wGenes_v1.csv",
row.names = F, sep = ",")
Plot for a few motifs
geneSetName <- "Module_10_1_Genelist"
selectedMotifs <- c("cisbp__M6275",
sample(motifEnrichmentTable$motif, 2))
par(mfrow=c(2,2))
Type_v1 <- "Few_motifs"
##Export the plots
pdf(paste0(Type_v1, "_", Module, "_Sig_Genes_Plot.pdf"), height = 5, width = 5)
getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=selectedMotifs,
plotCurve=TRUE, maxRank=5000, genesFormat="none",
method="aprox")
dev.off()
The final output of RcisTarget is a data.table and export to html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
Type_v2 <- "Motifs_Table"
library(DT)
## export the data table to html file format###
dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE],
escape = FALSE, # To show the logo
filter="top", options=list(pageLength=100))
html_test <- "dtable.html"
saveWidget(dtable, html_test)
Building a network and Export to the html format
signifMotifNames <- motifEnrichmentTable$motif[1:3]
incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=signifMotifNames,
plotCurve=TRUE, maxRank=5000,
genesFormat="incidMatrix",
method="aprox")$incidMatrix
library(reshape2)
edges <- melt(incidenceMatrix)
edges <- edges[which(edges[,3]==1),1:2]
colnames(edges) <- c("from","to")
library(visNetwork)
motifs <- unique(as.character(edges[,1]))
genes <- unique(as.character(edges[,2]))
nodes <- data.frame(id=c(motifs, genes),
label=c(motifs, genes),
title=c(motifs, genes), # tooltip
shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE)
##Export the network##
visSave(Network, file ="Network.html")
Upvotes: 0
Views: 661
Reputation: 430
I've written it based on the example file names given:
file_names <- c("M3.1_IPA.txt","M8.1_IPA.txt","M8.2_IPA.txt","M8.3_IPA.txt")
Easiest way is to iterate over 1:length(file_names)
, and create a unique set of output files for each iteration. As per your question, you want to save the results to different folders. This can be done by extracting the file name (removing the .txt) and then creating a new directory with that name. You can then save all your outputs for this iteration to the new directory
A new directory is then created for the next iteration, so you aren't saving over previous results.
for(i in 1:length(file_names)){
Data_file <- read.csv(file = paste0("./",file_names[i]), stringsAsFactors = FALSE, check.names = FALSE)
Module_10_1 <- Data_file[,1]
Module_10_1_geneLists <- list(Module_10_1_geneListName=Module_10_1)
data(motifAnnotations_hgnc)
motifRankings <- importRankings("./hg19-tss-centered-10kb-7species.mc9nr.feather")
motifs_AUC <- calcAUC(Module_10_1_geneLists, motifRankings, nCores=1)
Type = "Enrichment"
Module = "M10_1"
auc <- getAUC(motifs_AUC)
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\\.txt","\\1")
#Create the new directory:
dir.create(new_directory_name)
##Export the plots##
# Save to the new directory
pdf(paste0(new_directory_name,"/",Type,"_", Module,"_AUC_Histogram_Plot.pdf"), height = 5, width = 10)
hist(auc, main="Module_10_1", xlab="AUC histogram",
breaks=100, col="#ff000050", border="darkred")
nes3 <- (3*sd(auc)) + mean(auc)
abline(v=nes3, col="red")
dev.off()
}
I've omitted the rest of your script because the solution for saving motifEnrichmentTable_wGenes_v1.csv
, _Sig_Genes_Plot.pdf
, and the other outputs is the same as above with _AUC_Histogram_Plot.pdf
. You just need to write these outputs to the new directory with paste0(new_directory_name,"/",<insert-output-name>)
.
If you've got many files, instead of manually creating the vector file_names
you can search the local directory for files which match the correct pattern. e.g.
all_files <- dir()
file_names <- grep(all_files,pattern = "*.txt$",value = TRUE)
will return all the .txt files in the local directory. You can refine it further if it needs to be only .txt files starting with "M":
all_files <- dir()
file_names <- grep(all_files,pattern = "^M.*.txt$",value = TRUE)
You'd do this if not all of the .txt files in the local directory are input files, and you may need to refine the regular expression pattern further until you are only capturing the txt files you need.
For the .html files it is a little more difficult because saveWidget()
currently doesn't let you save using relative file paths, but there is a solution using withr::with_dir()
. This should work for Part 7:
withr::with_dir(new_directory_name, saveWidget(dtable, file="dtable.html"))
and this should work for part 8:
withr::with_dir(new_directory_name, saveWidget(Network, file="Network.html"))
Upvotes: 1
Reputation: 75
7. The final output of RcisTarget is a data.table and export to html
motifEnrichmentTable_wGenes_v1_wLogo <- addLogo(motifEnrichmentTable_wGenes_v1)
resultsSubset <- motifEnrichmentTable_wGenes_v1_wLogo[1:147,]
Type_v2 <- "Motifs_Table"
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\\.txt","\\1")
#Create the new directory:
dir.create(new_directory_name)
library(DT)
library(webshot)
library(htmltools)
library(xml2)
## export the data table to html file format###
dtable <- datatable(resultsSubset[,-c("enrichedGenes", "TF_lowConf"), with=FALSE],
escape = FALSE, # To show the logo
filter="top", options=list(pageLength=1000))
html_test <- paste0(new_directory_name,"/","dtable.html")
##To save files in the pdf format###
#saveWidget(dtable, html_test)
#webshot(html_test, paste0(new_directory_name,"/", "motif.pdf"))
8. Building a network and Export to the html format
# Extract the file name without the extension, so the directory with the results
# will correspond to this name:
new_directory_name <- gsub(x = file_names[i],pattern = "(.*)\\.txt","\\1")
#Create the new directory:
dir.create(new_directory_name)
signifMotifNames <- motifEnrichmentTable$motif[1:3]
incidenceMatrix <- getSignificantGenes(Module_10_1_geneLists,
motifRankings,
signifRankingNames=signifMotifNames,
plotCurve=TRUE, maxRank=5000,
genesFormat="incidMatrix",
method="aprox")$incidMatrix
library(reshape2)
edges <- melt(incidenceMatrix)
edges <- edges[which(edges[,3]==1),1:2]
colnames(edges) <- c("from","to")
library(visNetwork)
motifs <- unique(as.character(edges[,1]))
genes <- unique(as.character(edges[,2]))
nodes <- data.frame(id=c(motifs, genes),
label=c(motifs, genes),
title=c(motifs, genes), # tooltip
shape=c(rep("diamond", length(motifs)), rep("elypse", length(genes))),
color=c(rep("purple", length(motifs)), rep("skyblue", length(genes))))
Network <- visNetwork(nodes, edges) %>% visOptions(highlightNearest = TRUE,
nodesIdSelection = TRUE)
##Export the network##
#visSave(Network, file ="Network.html")
html_test_v1 <- paste0(new_directory_name,"/","Network.html")
#saveWidget(Network, html_test_v1)
#webshot(html_test_v1, paste0(new_directory_name,"/", "Network.pdf"))
Upvotes: 0