Reputation: 23
I have the following file list.txt
:
AbateI. D
AcatulloM. A
AcerbiF. D
AcquafrescaR. A
AcquahA. C
AdjapongC. D
AdnanA. D
AdrianoL. A
AjetiA. D
AlbiolR. D
AldeganiG. P
AleesamiH. D
AlexSandro D
AlissonR. P
And I want rearrange the file with awk
to group them by the second column to look like this:
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
This is what I tried:
#!/usr/bin/awk -f
BEGIN {
FORMAT="\t%-20s%-20s%-20s%s\n"
printf FORMAT,"P","D","C","A"
}
($2=="P") {a[$1] = $1}
($2=="D") {b[$1] = $1}
($2=="C") {c[$1] = $1}
($2=="A") {d[$1] = $1}
END{for(i in a) printf FORMAT, a[i],"","",""}
But I don't know how to loop and print other arrays.
Upvotes: 2
Views: 93
Reputation: 827
Solution with awk 4.0 2D arrays - allows output of any number of groups in any order
# output order of groups
order=$*
awk -vorderstr="$order" '
BEGIN { split(orderstr, order) }
{
# grpnames[group][index]=name
grpnames[$2][grpi[$2]++]=$1
# track max group size
if(grpi[$2] > maxgrpsz)
maxgrpsz=grpi[$2]
}
END {
# print groups header in order
printf("%-20s", order[1])
for(j=2; j <= length(order); ++j) {
printf("\t%-20s", order[j])
}
printf("\n")
for(i=0; i < maxgrpsz; ++i) {
# run across each group in output order
printf("%-20s", grpnames[order[1]][i])
for(j=2; j <= length(order); ++j) {
grp=order[j]
printf("\t%-20s", grpnames[grp][i])
}
printf("\n")
}
}
'
tested
./myscr.sh P D C A <in.txt
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
./myscr.sh D A P C <in.txt
D A P C
AbateI. AcatulloM. AldeganiG. AcquahA.
AcerbiF. AcquafrescaR. AlissonR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
./myscr.sh A P <in.txt
A P
AcatulloM. AldeganiG.
AcquafrescaR. AlissonR.
AdrianoL.
Upvotes: 1
Reputation: 37404
In GNU awk:
$ cat > list.awk
{
n=(n<++b[$2]?b[$2]:n) # n is the max count of words in one group
a[$2][b[$2]]=$1 # put words to two dimensional array
}
END {
for(i=1;i<=n;i++) { # from 1 to n
for(j in a) # for all groups
printf "%14-s%s",a[j][i],OFS # print a word
printf "%s",ORS # ORS in the end
}
}
$ -f list.awk list.txt
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
Upvotes: 0
Reputation: 203607
$ cat tst.awk
BEGIN { OFS="\t" }
{
rowNr = ++numColRows[$2]
val[rowNr,$2] = $1
numRows = (rowNr > numRows ? rowNr : numRows)
}
END {
for (colName in numColRows) {
printf "%s%s", (c++ ? OFS : ""), colName
}
print ""
for (rowNr=1; rowNr<=numRows; rowNr++) {
c = 0
for (colName in numColRows) {
printf "%s%s", (c++ ? OFS : ""), val[rowNr,colName]
}
print ""
}
}
$ awk -f tst.awk file | column -s$'\t' -t
A P C D
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
Read Effective Awk Programming, 4th Edition, by Arnold Robbins.
Upvotes: 2
Reputation: 21965
You could use grep-cut-paste-expand
combination too
paste \
<(echo "P";grep 'P$' list.txt |cut -d ' ' -f1 ) \
<(echo "D";grep 'D$' list.txt |cut -d ' ' -f1 ) \
<(echo "C";grep 'C$' list.txt |cut -d ' ' -f1 ) \
<(echo "A";grep 'A$' list.txt |cut -d ' ' -f1) | expand -t 20
Output
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
You could replace grep-cut
with sed
as shown below
paste \
<(echo "P";sed -n '/P$/{s/[[:blank:]]*P$//;p}' file ) \
<(echo "D";sed -n '/D$/{s/[[:blank:]]*D$//;p}' file ) \
<(echo "C";sed -n '/C$/{s/[[:blank:]]*C$//;p}' file ) \
<(echo "A";sed -n '/A$/{s/[[:blank:]]*A$//;p}' file ) | expand -t 20
Output
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
You could also do it this way
paste \
<(awk 'BEGIN{print "P"}/P$/{print $1}' file )
<(awk 'BEGIN{print "D"}/D$/{print $1}' file )
<(awk 'BEGIN{print "C"}/C$/{print $1}' file )
<(awk 'BEGIN{print "A"}/A$/{print $1}' file ) | expand -t 20
Ouput
P D C A
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
Upvotes: 2
Reputation: 67507
here is a non-traditional approach
$ awk -v OFS='\n' '{a[$2]=a[$2] OFS $1;
c[$2]++;
if(c[$2]>max) max=c[$2]}
END{pr="pr -"length(c)"t";
for(k in a)
{print k a[k] | pr;
for(i=c[k];i<max;i++)
{print "" | pr}}}'
A P C D
AcatulloM. AldeganiG. AcquahA. AbateI.
AcquafrescaR. AlissonR. AcerbiF.
AdrianoL. AdjapongC.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
note that the order of the columns is somewhat arbitrary but the values are listed in insertion order.
Also this approach is not following the traditional "transpose" method with two dimensional arrays. Perhaps better to learn that instead.
This site has many answers already for almost the same question.
Upvotes: 1
Reputation: 39414
You can use paste
and column
with some process substitution:
$ paste \
<(awk '/P$/ {print $1}'<input) \
<(awk '/D$/ {print $1}'<input) \
<(awk '/C$/ {print $1}'<input) \
<(awk '/A$/ {print $1}'<input) | column -s $'\t' -t
AldeganiG. AbateI. AcquahA. AcatulloM.
AlissonR. AcerbiF. AcquafrescaR.
AdjapongC. AdrianoL.
AdnanA.
AjetiA.
AlbiolR.
AleesamiH.
AlexSandro
Add the column headers manually, if you like.
Upvotes: 2