Reputation: 13
I have several line of code that looks something like this, although it is able to only process one file (5cym24.pdb), i.e.:
grep -E 'TRP' 5cym24.pdb > 5cym24_d.pdb
grep -E 'CYS' 5cym24.pdb > 5cym24_b.pdb
sed -n '3030,3106p;3138,3205p;3238,3268p;3329,3361p;3423,3453p' 5cym24.pdb > 5cym24_a.pdb
awk '{$1=$1}1' OFS=, 5cym24_d.pdb > 5cym24_d.csv
awk '{$1=$1}1' OFS=, 5cym24_b.pdb > 5cym24_b.csv
awk '{$1=$1}1' OFS=, 5cym24_a.pdb > 5cym24_a.csv
My question is, how do I include a loop such that it processes the many pdb files that I have, i.e., 5cymX.pdb, where X ranges from 24 to 70, for example, thus generating 5cymX_d.pdb, 5cymX_b.pdb, 5cymX_a.pdb, 5cymX_d.csv, 5cymX_b.csv, 5cymX_a.csv. Thanks!
Upvotes: 0
Views: 43
Reputation: 204015
Your existing code:
grep -E 'TRP' 5cym24.pdb > 5cym24_d.pdb
grep -E 'CYS' 5cym24.pdb > 5cym24_b.pdb
sed -n '3030,3106p;3138,3205p;3238,3268p;3329,3361p;3423,3453p' 5cym24.pdb > 5cym24_a.pdb
awk '{$1=$1}1' OFS=, 5cym24_d.pdb > 5cym24_d.csv
awk '{$1=$1}1' OFS=, 5cym24_b.pdb > 5cym24_b.csv
awk '{$1=$1}1' OFS=, 5cym24_a.pdb > 5cym24_a.csv
can be reduced to just 1 call to awk (untested of course since you didn't provide sample input/output to test against):
awk -v OFS=',' '
FNR==1 {
close(base"_a.csv")
close(base"_b.csv")
close(base"_d.csv")
base = FILENAME
sub(/\.pdb$/,"",base)
}
{ $1=$1 }
/TRP/ { print > (base"_d.csv") }
/CYS/ { print > (base"_b.csv") }
(FNR>=3030 && FNR<=3106) || (FNR>=3138 && FNR<=3205) || (FNR>=3238 && FNR<=3268) ||
(FNR>=3329 && FNR<=3361) || (FNR>=3423 && FNR<=3453) { print > (base"_a.csv") }
' 5cym24.pdb
and you could probably just change 5cym24.pdb
to *.pdb
(or whatever list of input files you want to provide) and it'd simply work as-is for all of your files at once.
Upvotes: 0
Reputation: 4164
try this:
#!/bin/bash
do_work() {
grep -E 'TRP' "5cym$1.pdb" > "5cym$1_d.pdb"
grep -E 'CYS' "5cym$1.pdb" > "5cym$1_b.pdb"
sed -n '3030,3106p;3138,3205p;3238,3268p;3329,3361p;3423,3453p' "5cym$1.pdb" > "5cym$1_a.pdb"
awk '{$1=$1}1' OFS=, "5cym$1_d.pdb" > "5cym$1_d.csv"
awk '{$1=$1}1' OFS=, "5cym$1_b.pdb" > "5cym$1_b.csv"
awk '{$1=$1}1' OFS=, "5cym$1_a.pdb" > "5cym$1_a.csv"
}
for i in $(seq 24 70); do
do_work "$i"
done
Upvotes: 1