Reputation: 2866
I trying to take a file like that shown below and output it in which $1
is the columns and $2
is the rows.
$ cat file
8 A 11.56
6 A 1.46
10 A 1.60
7 A 19.36
9 A 7.09
9 C 0.63
8 C 3.64
7 C 5.20
8 G 12.20
6 G 1.62
7 G 17.39
10 G 2.12
9 G 7.77
9 T 0.67
8 T 3.07
7 T 4.62
the desired output is:
B 6 7 8 9 10
A 1.46 19.36 11.56 7.09 1.60
C 5.20 3.64 0.63
T 4.62 3.07 0.67
G 1.62 17.39 12.20 7.77 2.12G
I have written the following code to get it most of the way there, but there are some errors still.
awk '
BEGIN{OFS="\t"
print "B",6,7,8,9,10
}
{
a[$2""$1]=$3
b[$2]
}
END{ORS="\t"
for (c in b)
{
print c
for (i=6;i<11;i++)
{
for (r in a)
if (r==c""i)
print a[c""i]""c""i
}
print "\n"
}
}' file
output >
B 6 7 8 9 10
A 1.46A6 19.36A7 11.56A8 7.09A9 1.60A10
C 5.20C7 3.64C8 0.63C9
T 4.62T7 3.07T8 0.67T9
G 1.62G6 17.39G7 12.20G8 7.77G9 2.12G10
I can't figure out why there are tabs at the beginning of the last three rows, and I am not sure how to put a space in when there is no data (I will probably just format my input file to have zeros for those values).
Upvotes: 1
Views: 116
Reputation: 25033
If the OP accepts a solution that depends on gawk
(specifically depending on multidimensional arrays and the asort
builtin) here it's mine
% cat tabular.awk
{col[$1]=$1;row[$2]=$2;val[$2,$1]=$3}
END{asort(col); asort(row)
printf "B" ; for(j in col) printf "%10d", col[j]; print ""
for(i in row) {
printf row[i]
for(j in col) printf "%10s", val[row[i],col[j]]
print ""}}
% awk OP.dat -f tabular.awk
B 6 7 8 9 10
A 1.46 19.36 11.56 7.09 1.60
C 5.20 3.64 0.63
G 1.62 17.39 12.20 7.77 2.12
T 4.62 3.07 0.67
%
Should the OP need a different formatting of the numerical data, s/he can modify the statement
val[$2,$1] = $3
in the default treatment of every line to, e.g.,
val[$2,$1] = sprintf("%10.2e", $3)
Upvotes: 0
Reputation: 45273
Not fix on 6-10, can be any string in $1
awk '{a[$2 FS $1]=$3;b[$1];c[$2]}
END{ printf "B\t" ;for (i in b) printf i OFS;printf RS
for (i in c)
{ printf i OFS
for (j in b)
printf a[i FS j] OFS
printf RS
}
}' OFS="\t" infile
Upvotes: 0
Reputation: 246942
I'd do this:
awk -F "\t" -v OFS="\t" '
{v[$2,$1] = $3; k[$2]}
END {
print "B",6,7,8,9,10
for (key in k) {
printf "%s", key
for (i=6; i<=10; i++) {
printf "%s%s", OFS, ((key SUBSEP i) in v ? v[key,i] : "")
}
print ""
}
}
' file
output
B 6 7 8 9 10
A 1.46 19.36 11.56 7.09 1.60
C 5.20 3.64 0.63
T 4.62 3.07 0.67
G 1.62 17.39 12.20 7.77 2.12
Upvotes: 4