Reputation: 1547
I have a list of bam files in chr16_bam
folder and also an annotation file sgseq_sam.txt
.
library(SGSeq)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(BSgenome.Hsapiens.UCSC.hg19)
library(org.Hs.eg.db)
library(tidyverse)
library(AnnotationDbi)
library(dplyr)
library(stringr)
# Rename the "file_bam" column values to the full path where the BAMs are stored
setwd("C:/Users/User/Downloads/")
bamPath = "C:/Users/User/Downloads/chr16_bam"
samFile <- read.delim("C:/Users/User/Downloads/sgseq_sam.txt", header=T)
samFile <- samFile %>%
mutate(file_bam = paste0(bamPath, file_bam))
# Save the TxDb.Hsapiens.UCSC.hg19.knownGene object into a variable.
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txdb <- keepSeqlevels(txdb, "chr16")
seqlevelsStyle(txdb) <- "NCBI"
# Read in the gene list from Supplementary Table 1
gene.list <- read.table("Table_1_Differential Expression Analysis Revealing CLCA1 to Be a Prognostic and Diagnostic Biomarker for Colorectal Cancer.xls", header=T)
# Convert the gene symbols to Entrez IDs and remove the genes that don't map to an Entrez ID.
entrez = mapIds(org.Hs.eg.db, keys=gene.list$Name, column = "ENTREZID", keytype="SYMBOL")
gene.list <- gene.list %>%
mutate(entrez) %>%
filter(!is.na(entrez))
# Convert the TxDb.Hsapiens.UCSC.hg19.knownGene object
txf_ucsc <- convertToTxFeatures(txdb)
# Cast this object to a dataframe and save as another variable name
txf_df <- as.data.frame(txf_ucsc)
# Subset by the Entrez IDs
txf_df <- txf_df %>% filter(geneName %in% gene.list$entrez)
# Find the number of common transcripts
unique <- unique(txf_df$geneName)
gene.list <- gene.list[gene.list$entrez %in% unique,]
nrow(gene.list)
# Cast the seqnames column to factor
txf_df$seqnames <- as.factor(as.character(txf_df$seqnames))
# Remove the "chr" prefix from the seqnames column
txf_df %>%
rename_all(~stringr::str_replace(.,"^chr",""))
# Recast this dataframe back to a GRanges object
txf_grange <- makeGRangesFromDataFrame(txf_df, keep.extra.columns=T)
Now, I want to create a loop for each of the genes, where upon iteration, subset Granges objects in txf_grange
by only the gene, use the reduce
function to collapse the ranges of the genes into a single vector, run analyzeFeatures
, then annotate
functions, and finally plotFeatures
.
for (i in txf_grange$geneName) {
if (i=="343") {
next
}
else{
# For each of the 15 genes, subset the Granges objects by only the gene
grange.subset <- txf_grange[i == toString(i)]
# Collapse the ranges of the genes into a single vector
grange.subset <- unlist(IRanges::reduce((split(grange.subset, grange.subset$geneName))))
# Run analyzeFeatures
for (j in 1:dim(samFile)[[1]]) {
si <- samFile
for (k in si) {
for (l in list.files(path="C:/Users/User/Downloads/chr16_bam", pattern=".bam$", all.files=F, full.names=F)) {
sgfc_pred <- analyzeFeatures(k, which=grange.subset, features=txf_ucsc, predict=T)
# Annotate predicted features
sgfc_pred <- annotate(sgfc_pred, txf_ucsc)
# Plot features
pdf(paste("plot", l, ".pdf", sep=""))
plotFeatures(sgfc_pred, geneID=1)
dev.off()
}
}
}
}
}
Data
> dput(head(txf_grange))
new("GRanges", seqnames = new("Rle", values = structure(1L, .Label = "16", class = "factor"),
lengths = 6L, elementMetadata = NULL, metadata = list()),
ranges = new("IRanges", start = c(12058964L, 12059311L, 12059311L,
12060052L, 12060198L, 12060198L), width = c(348L, 742L, 2117L,
147L, 680L, 1230L), NAMES = NULL, elementType = "ANY", elementMetadata = NULL,
metadata = list()), strand = new("Rle", values = structure(1L, .Label = c("+",
"-", "*"), class = "factor"), lengths = 6L, elementMetadata = NULL,
metadata = list()), seqinfo = new("Seqinfo", seqnames = "16",
seqlengths = NA_integer_, is_circular = NA, genome = NA_character_),
elementMetadata = new("DFrame", rownames = NULL, nrows = 6L,
listData = list(type = structure(c(3L, 1L, 1L, 2L, 1L,
1L), .Label = c("J", "I", "F", "L", "U"), class = "factor"),
txName = structure(list(c("uc002dbv.3", "uc010buy.3",
"uc010buz.3"), c("uc002dbv.3", "uc010buy.3"), "uc010buz.3",
c("uc002dbv.3", "uc010buy.3"), "uc010buy.3",
"uc002dbv.3"), class = "AsIs"), geneName = structure(list(
"608", "608", "608", "608", "608", "608"), class = "AsIs")),
elementType = "ANY", elementMetadata = NULL, metadata = list()),
elementType = "ANY", metadata = list())
Upvotes: 1
Views: 124
Reputation: 11
change line:
analyzeFeatures(samFile, grange.subset)
Also you do not need that many loops to run for the question. The question asks for 14 plots and you might be plotting for much more with the number of loops you have.
Upvotes: 1