Reputation: 49
I have a series of codes to run for each sample. (n=35) Basically, what I am doing is to filter out some genomic windows according to their read counts for annotation.
df1 <- df[df[,1]>30,]
df1 <- setDT(df1, keep.rownames = TRUE)[]
df1 <- df1[,-(2:36)]
df1 <- separate(df1, "rn", c("chr","start","end"), sep = "-", remove = TRUE)
write.table(df1, "df1.txt", sep = "\t", col.name = FALSE, row.names = FALSE, quote = FALSE)
dm_regions1 = read_regions(con = "df1.txt", genome = 'hg19', format = 'bed')
dm_annotated1 = annotate_regions(regions = dm_regions1, annotations = annotations, ignore.strand = TRUE, quiet = FALSE)
dm_annsum1 = summarize_annotations(annotated_regions = dm_annotated1, quiet = TRUE)
Now I am trying to write a loop for all samples. All "1" on the codes above should be changed to for(i in 1:35). For example, first line is df[i] <- df[df[,i]>30,].
For the first code, my loop looks like this and it worked
for(i in 1:35){
assign(sprintf('df%d', i),
value = df[df[,i] > 30,, drop = FALSE])
}
But I when I ran a second loop, the results were not as I expected.
for(i in 1:35){
df[i] <- setDT(df[i], keep.rownames = TRUE)[]
df[i] <- df[i][,-(2:36)]
df[i] <- separate(df[i], "rn", c("chr","start","end"), sep = "-", remove = TRUE)
}
My question is how to loop over the vector itself. Also, can I run all those codes in one big loop?
The data frame looks like this after the first code:
X1ME.bam.counts X2ME.bam.counts X3ME.bam.counts X4ME.bam.counts X5ME.bam.counts X6ME.bam.counts X7ME.bam.counts
chr1-1119901-1120200 26 44 37 25 30 35 36
chr1-1120201-1120500 22 39 41 22 29 41 40
chr1-121133101-121133400 11 24 31 17 29 23 25
chr1-121351501-121351800 21 39 36 34 40 35 42
chr1-121351801-121352100 107 129 102 118 115 119 119
chr1-121352101-121352400 152 173 145 169 153 157 153
chr1-121352401-121352700 101 118 100 133 111 121 112
chr1-121352701-121353000 49 69 65 75 57 77 67
chr1-121353001-121353300 89 106 107 111 96 105 82
chr1-121353301-121353600 64 64 72 78 68 79 68
Thanks @DanielBonnery Here is the answer:
L<-lapply (1:35,function(i){
df1 <- df[df[,i]>30,]
df1 <- setDT(df1, keep.rownames = TRUE)[]
df1 <- df1[,1]
df1 <- separate(df1, "rn", c("chr","start","end"), sep = "-", remove = TRUE)
write.table(df1, paste0("df",i,".txt"), sep = "\t", col.name = FALSE, row.names = FALSE, quote = FALSE)
dm_regions = read_regions(con = paste0("df",i,".txt"), genome = 'hg19', format = 'bed')
dm_annotated = annotate_regions(regions = dm_regions, annotations = annotations, ignore.strand = TRUE, quiet = FALSE)
dm_annsum = summarize_annotations(annotated_regions = dm_annotated, quiet = TRUE)
return(list(dm_annsum=dm_annsum))
})
Upvotes: 0
Views: 99
Reputation: 405
L<-lapply (2:35,function(i){
df1 <- df[df[,i]>30,]
df1 <- setDT(df1, keep.rownames = TRUE)[]
df1 <- df1[,c(1,i)]
df1 <- separate(df1, "rn", c("chr","start","end"), sep = "-", remove = TRUE)
write.table(df1, paste0("df",i,".txt"), sep = "\t", col.name = FALSE, row.names = FALSE, quote = FALSE)
dm_regions1 = read_regions(con = paste0("df",i,".txt"), genome = 'hg19', format = 'bed')
dm_annotated1 = annotate_regions(regions = dm_regions1, annotations = annotations, ignore.strand = TRUE, quiet = FALSE)
dm_annsum1 = summarize_annotations(annotated_regions = dm_annotated1, quiet = TRUE)
return(list(dm_annotated=dm_annotated1,dm_annsum=dm_annsum1,dm_regions=dm_regions1))
})
#summary annotations for i=2:
L[[2]]$dm_annsum
#summary regions for i=2:
L[[2]]$dm_regions
Upvotes: 1