Reputation: 821
How can I filter the values from the input files using the row names from ids.1 file and the column names from ids.2 file?
sample input
name s1 s2 s3 s4
a1 7 8 7 8
a2 7 54 7 8
a3 8 8 8 8
a4 7 7 7 0
ids.1
name
a1
a4
ids.2
name
s3
s4
sample output
name s3 s4
a1 7 8
a4 7 0
I was using the following code to filter the values of selected rows. How can I extend this to columns as well?
awk 'ARGIND == 1 { a[$1] = 1; next } a[$1] { print $0 }' ids.1 sample.input
name s1 s2 s3 s4
a1 7 8 7 8
a4 7 7 7 0
Upvotes: 1
Views: 246
Reputation: 195039
a simpler and faster version:
awk '
ARGIND==1{row[$1]=1;next}
ARGIND==2{col[$1]=1;next}
row[$1]{
for(i=1;i<=NF;i++){
if(col[$i] && FNR==1) v[i]=1
if (v[i]) printf "%s%s", (i==1?"":FS), $i
}
print ""
} ' id.1 id.2 data.file
With your example, it gives:
name s3 s4
a1 7 8
a4 7 0
Upvotes: 2
Reputation: 37404
This one assumes, that the first record is always in the columns file (ids.2):
$ awk '
ARGIND==1 { # first file, rows
r[$1]
}
ARGIND==2 { # second file, columns
c[$1]
}
ARGIND==3 && FNR==1 { # first record of third file, data
n=split($0,a) # split the first record to a, the column template
for(i in a) # delete the cols we don t want
if((a[i] in c)==0)
delete a[i]
}ARGIND==3 && $1 in r { # third file and the rows we want
b="" # print buffer
for(i=1;i<=NF;i++) # for all cols
if(i in a) # get the ones we want
b=b (b==""?"":OFS) $i
print b # output
}' ids.1 ids.2 file
name s3 s4
a1 7 8
a4 7 0
Upvotes: 1