Reputation: 2059
I am not sure if this has been asked before and feel free to edit the title of this question if it can be phrased better. I have a list of files that need to be moved. They are stored on a server and are quite large so they take a bit of time to move. I can only really move 10 files at a time but I can submit multiple jobs. So what I want to do is use gsub and a for loop to edit a script with the first 10 files then a new script with the next 10 files to move and then a new script with the next 10 files to move and so on. Here is an example of the files I want to move. The list has 26 files... I know I mentioned I wanted to move in groups of 10 but I am curious what would happen if there were 6 samples instead of 10. The output files can be named whatever, that doesn't really matter. They obviously need to be unique. I will also attach the for loop that I tried to run but what I struggled with was looping through the list and also making a unique name
mcli <- structure(list(`mcli[c(1:25), ]` = structure(1:26, .Label = c("mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/",
"mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/",
"mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/",
"mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/",
"mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/",
"mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/",
"mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/",
"mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/"), class = "factor")), .Names = "mcli[c(1:25), ]", row.names = c(NA,
-25L), class = "data.frame")
Here is an example of the script I want to edit
slurm <- structure(list(V1 = c("#!/bin/bash -l", "#SBATCH --nodes=1",
"#SBATCH --ntasks-per-node=10 ", "#SBATCH --mem-per-cpu=5gb",
"#SBATCH -t 20:00:00", "#SBATCH --mail-type=ALL",
"#SBATCH --mail-user=email ", "#SBATCH -o %j.out",
"#SBATCH -e %j.err", "", "", "cd $SLURM_SUBMIT_DIR",
"", "srun --exclusive --ntasks 1 abc123 &", "srun --exclusive --ntasks 1 def456 &",
"srun --exclusive --ntasks 1 ghi789 &", "srun --exclusive --ntasks 1 jkl101112 &",
"srun --exclusive --ntasks 1 mno131415 &", "srun --exclusive --ntasks 1 pqr161718 &",
"srun --exclusive --ntasks 1 stu192021 &", "srun --exclusive --ntasks 1 vwx222324 &",
"srun --exclusive --ntasks 1 yza252627 &", "srun --exclusive --ntasks 1 abc282930 &",
"", "wait")), row.names = c(NA,
-25L), class = c("data.table", "data.frame"))
example code that doesn't work
for(i in 1:length(mcli[[1]])){
df2 <- as.data.frame(slurm)
df2[,'V1'] <- gsub("abc123", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("def456", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("ghi789", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("jkl101112", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("mno131415", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("pqr161718", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("stu192021", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("vwx222324", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("yza252627", mcli[['V1']][i], df2[,'V1'])
df2[,'V1'] <- gsub("abc282930", mcli[['V1']][i], df2[,'V1'])
write.table(df2, paste(file = "/path/to/random/folder/",mcli[['V1']][i], ".csh", sep = ""), row.names = FALSE, col.names =FALSE, quote = FALSE)
}
expected output script1
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH [email protected]
#SBATCH -o %j.out
#SBATCH -e %j.err
cd $SLURM_SUBMIT_DIR
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/ &
wait
expected output script2
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH [email protected]
#SBATCH -o %j.out
#SBATCH -e %j.err
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/ &
wait
expected output script3
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH [email protected]
#SBATCH -o %j.out
#SBATCH -e %j.err
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/ &
wait
Upvotes: 2
Views: 46
Reputation: 160417
Instead of a frame, I'm going to operate on a vector of commands to run:
mcli <- c("mcli mv --recursive /path/to/directory/D00063/alignment/ /path/to/directory/D00063/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00063/bam/ /path/to/directory/D00063/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00063/fastqc/ /path/to/directory/D00063/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00063/gvcf/ /path/to/directory/D00063/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00063/trim/ /path/to/directory/D00063/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00064/alignment/ /path/to/directory/D00064/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00064/bam/ /path/to/directory/D00064/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00064/fastqc/ /path/to/directory/D00064/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00064/gvcf/ /path/to/directory/D00064/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00064/trim/ /path/to/directory/D00064/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00105/gvcf/ /path/to/directory/D00105/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00151/gvcf/ /path/to/directory/D00151/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00188/gvcf/ /path/to/directory/D00188/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D002053/D02053/ /path/to/directory/D002053/new.folder/D02053/", "mcli mv --recursive /path/to/directory/D00220/gvcf/ /path/to/directory/D00220/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00257/gvcf/ /path/to/directory/D00257/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00258/gvcf/ /path/to/directory/D00258/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00264/gvcf/ /path/to/directory/D00264/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00268/alignment/ /path/to/directory/D00268/new.folder/alignment/", "mcli mv --recursive /path/to/directory/D00268/bam/ /path/to/directory/D00268/new.folder/bam/", "mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/", "mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/", "mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/", "mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/", "mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/")
I do this for three reasons: (1) I get warnings about corrupt data frame: columns will be truncated or padded with NAs
; (2) those are factor
s, when we really just need character
(easily fixed, I know); and (3) I want this to be generalizable to any other column/vector.
I'll modify your slurm
template a little, as
slurm <- list(
bef = "#!/bin/bash -l\n#SBATCH --nodes=1\n#SBATCH --ntasks-per-node=10 \n#SBATCH --mem-per-cpu=5gb\n#SBATCH -t 20:00:00\n#SBATCH --mail-type=ALL\n#SBATCH --mail-user=email \n#SBATCH -o %j.out\n#SBATCH -e %j.err\n\n\ncd $SLURM_SUBMIT_DIR",
job = "srun --exclusive --ntasks 1 %s &",
aft = "\n\nwait\n"
)
From here:
scripts <- by(mcli, (seq_along(mcli)-1) %/% 10,
FUN = function(z) {
paste(slurm$bef, paste(sprintf(slurm$job, z), collapse = "\n"),
slurm$aft, sep = "\n")
})
Each of the elements of scripts
is a full 10-job batch file. For instance, the last is
> cat(scripts[[3]], "\n")
#!/bin/bash -l
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=10
#SBATCH --mem-per-cpu=5gb
#SBATCH -t 20:00:00
#SBATCH --mail-type=ALL
#SBATCH --mail-user=email
#SBATCH -o %j.out
#SBATCH -e %j.err
cd $SLURM_SUBMIT_DIR
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/fastqc/ /path/to/directory/D00268/new.folder/fastqc/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/gvcf/ /path/to/directory/D00268/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/logs/ /path/to/directory/D00268/new.folder/logs/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00268/trim/ /path/to/directory/D00268/new.folder/trim/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00269/gvcf/ /path/to/directory/D00269/new.folder/gvcf/ &
srun --exclusive --ntasks 1 mcli mv --recursive /path/to/directory/D00270/gvcf/ /path/to/directory/D00270/new.folder/gvcf/ &
wait
If you need those as individual files, you can do something like:
for (i in seq_along(scripts)) {
writeLines(scripts[[i]], paste0("script", i))
}
Upvotes: 2