Manoj Kumar
Manoj Kumar

Reputation: 55

UNIX : Creating a table formatted ouput from a delimited text file

I have a requirement to get a table format output from a text file and I am achieving it through the below awk command.

Delimited File

ACTIVE#1238917238971238#USA#The U.S. is a country of 50 states covering a vast swath of North America.
ACTIVE#21389721839781237812#INDIA#India, officially the Republic of India, is a country in South Asia.
ACTIVE#3121278372183782137812#AUSTRALIA#Australia, officially the Commonwealth of Australia, is a sovereign country comprising the mainland of the Australian continent

AWK Command

awk -F"#" 'BEGIN {{printf "%-80s\n","--------------------------------------------------------------------------------------------------------------------------------------"} {printf "|%-12s|%-30s|%-38s|%-50s|\n","STATUS","ID", "Country", "Description"} {printf "%-80s\n","--------------------------------------------------------------------------------------------------------------------------------------"}} {printf "|%-12s|%-30s|%-38s|%-50s|\n",$1,$2,$3,$4} END{printf "%-80s\n","--------------------------------------------------------------------------------------------------------------------------------------"}' /tmp/test.txt

Output: enter image description here

If you can see the output for the Description column , it doesnt format the output in its own column rather it messes the complete table due to string length.

Can someone please review and suggest me how I can display the output better for the Description column?

Upvotes: 3

Views: 1056

Answers (4)

James Brown
James Brown

Reputation: 37394

Here is another awk. It calculates the average length of fields and then the proportion of the terminal to use for output. There are probably better approaches than the average (or max) but I only tried those 2. It uses tput cols to get the termminal width:

$ awk '
BEGIN {
    FS="#"                                             # delims
    OFS=""                                             # to allow length==0
}
NR==FNR {                                              # avg field lenghts *
    for(i=1;i<=NF;i++)
        avg[i]+=length($i)
    next
}
FNR==1 {
    if(("tput cols"|getline cols)<0 || cols<2*NF-1) {  # get terminal width
        print "Yours is too small"                     # exit if too small
        exit                                           # in reality fails when
    }                                                  # field width rounds to 0
    for(i in avg) {         
        avg[i]=avg[i]/(NR-1)                           # * avg divided here
        avgs+=avg[i]
    }
    for(i=1;i<=NF;i++)                                 # below: field terminal 
        size[i]=((v=sprintf("%0.f",((avg[i]/avgs)*cols)-1))>0?v:1) # proportions
}                                                      # rounded with %0.f, min 1
{
    while(length>0)                                    # while unprinted chars
    for(i=1;i<=NF;i++) {                               # keep outputing
        printf "%-" size[i] "s%s",substr($i,1,size[i]),(i==NF?ORS:"|")
        $i=substr($i,size[i]+1)                        # cut printed from fields
    }
}' file file                                           # 2 runs

Output for a 64 wide terminal:

AC|123891723|US|The U.S. is a country of 50 states covering a v
TI|8971238  |A |ast swath of North America.                    
VE|         |  |                                               
AC|213897218|IN|India, officially the Republic of India, is a c
TI|397812378|DI|ountry in South Asia.                          
VE|12       |A |                                               
AC|312127837|AU|Australia, officially the Commonwealth of Austr
TI|218378213|ST|alia, is a sovereign country comprising the mai
VE|7812     |RA|nland of the Australian continent              
  |         |LI|                                               
  |         |A |               

Upvotes: 1

Ed Morton
Ed Morton

Reputation: 203254

I'd let the UNIX utility fold do the line wrapping for the field(s) you want to wrap since it knows to try to split at blanks, etc. to keep the wrapped text as readable as possible:

$ cat tst.awk
BEGIN {
    FS = "#"
    OFS = "|"
}
NR == 1 {
    split("8 12 10 45",fldWidths," ")
    rowWidth = NF + 1   # for the OFSs between fields and at the start/end of the line
    for (i in fldWidths) {
        rowWidth += fldWidths[i]
    }

    rowSep = sprintf("%*s",rowWidth,"")
    gsub(/ /,"-",rowSep)

    print rowSep
    split("STATUS ID Country Description",hdrs," ")
    for (i=1; i<=NF; i++) {
        printf "%s%-*s", OFS, fldWidths[i], hdrs[i]
    }
    print OFS
    print rowSep
}
{
    numRows = 0
    for (fldNr=1; fldNr<=NF; fldNr++) {
        cmd = "printf \047%s\n\047 \047" $fldNr "\047 | fold -s -w " fldWidths[fldNr]
        rowNr = 0
        while ( (cmd | getline line) > 0 ) {
            rows[++rowNr,fldNr] = line
            numRows = (numRows > rowNr ? numRows : rowNr)
        }
        close(cmd)
    }
    for (rowNr=1; rowNr<=numRows; rowNr++) {
        for (fldNr=1; fldNr<=NF; fldNr++) {
            printf "%s%-*s", OFS, fldWidths[fldNr], rows[rowNr,fldNr]
        }
        print OFS
    }
    print rowSep
}

.

$ awk -f tst.awk file
--------------------------------------------------------------------------------
|STATUS  |ID          |Country   |Description                                  |
--------------------------------------------------------------------------------
|ACTIVE  |123891723897|USA       |The U.S. is a country of 50 states covering  |
|        |1238        |          |a vast swath of North America.               |
--------------------------------------------------------------------------------
|ACTIVE  |213897218397|INDIA     |India, officially the Republic of India, is  |
|        |81237812    |          |a country in South Asia.                     |
--------------------------------------------------------------------------------
|ACTIVE  |312127837218|AUSTRALIA |Australia, officially the Commonwealth of    |
|        |3782137812  |          |Australia, is a sovereign country comprising |
|        |            |          |the mainland of the Australian continent     |
--------------------------------------------------------------------------------

Massage the field widths to suit.

Upvotes: 4

Shawn
Shawn

Reputation: 52344

I'd do it in perl instead, using the Term::Table module (Installable via your OS's package manager or off of CPAN), which will automatically work out column widths and wrap text as needed:

#!/usr/bin/env perl
use strict;
use warnings;
use feature qw/say/;
use Term::Table;

my @lines = map { chomp; [ split /#/ ] } <>;
say for Term::Table->new(
    max_width => 80,
    header => ["Status", "ID", "Country", "Description"],
    rows => \@lines
    )->render;

Example usage:

$ ./table.pl < input.txt
+--------+--------------------------+-----------+--------------------------+
| Status | ID                       | Country   | Description              |
+--------+--------------------------+-----------+--------------------------+
| ACTIVE | 1238917238971238         | USA       | The U.S. is a country of |
|        |                          |           |  50 states covering a va |
|        |                          |           | st swath of North Americ |
|        |                          |           | a.                       |
|        |                          |           |                          |
| ACTIVE | 21389721839781237812     | INDIA     | India, officially the Re |
|        |                          |           | public of India, is a co |
|        |                          |           | untry in South Asia.     |
|        |                          |           |                          |
| ACTIVE | 3121278372183782137812   | AUSTRALIA | Australia, officially th |
|        |                          |           | e Commonwealth of Austra |
|        |                          |           | lia, is a sovereign coun |
|        |                          |           | try comprising the mainl |
|        |                          |           | and of the Australian co |
|        |                          |           | ntinent                  |
+--------+--------------------------+-----------+--------------------------+

Come to think of it, it can be done without any non-core modules too, thanks to perl formats. I actually like this way better because it does better word wrapping (Though it becomes more cumbersome to change the overall width of the table or even individual columns):

#!/usr/bin/env perl
use strict;
use warnings;
use feature qw/say/;

my ($status, $id, $country, $description);
while (<>) {
    chomp;
    ($status, $id, $country, $description) = split /#/;
    write;
}
say "+--------+------------------------+-----------+-------------------------------+";

format STDOUT_TOP =
+--------+------------------------+-----------+-------------------------------+
| Status | Id                     | Country   | Description                   |
+--------+------------------------+-----------+-------------------------------+
.

format STDOUT =
| @<<<<< | @<<<<<<<<<<<<<<<<<<<<< | @<<<<<<<< | ^<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
  $status, $id,                     $country,   $description
|~~      |                        |           | ^<<<<<<<<<<<<<<<<<<<<<<<<<<<< |
                                                $description
|        |                        |           |                               |
.
$ ./table.pl < input.txt
+--------+------------------------+-----------+-------------------------------+
| Status | Id                     | Country   | Description                   |
+--------+------------------------+-----------+-------------------------------+
| ACTIVE | 1238917238971238       | USA       | The U.S. is a country of 50   |
|        |                        |           | states covering a vast swath  |
|        |                        |           | of North America.             |
|        |                        |           |                               |
| ACTIVE | 21389721839781237812   | INDIA     | India, officially the         |
|        |                        |           | Republic of India, is a       |
|        |                        |           | country in South Asia.        |
|        |                        |           |                               |
| ACTIVE | 3121278372183782137812 | AUSTRALIA | Australia, officially the     |
|        |                        |           | Commonwealth of Australia, is |
|        |                        |           | a sovereign country           |
|        |                        |           | comprising the mainland of    |
|        |                        |           | the Australian continent      |
|        |                        |           |                               |
+--------+------------------------+-----------+-------------------------------+

Upvotes: 7

RavinderSingh13
RavinderSingh13

Reputation: 133458

EDIT: With headers try following.

awk -v line="-----------------------------------" '
BEGIN{
  FS="#"
  OFS="|"
  num=split("STATUS,ID,Country,Description",a,",")
  print line
}
FNR==NR{
  for(i=2;i<=NF;i++){
    max[i]=max[i]>=length($i)?max[i]:length($i)
  }
  next
}
FNR==1{
  for(i=1;i<=num;i++){
    header=(header?header OFS:"")sprintf("%-"max[i]"s",a[i])
  }
  print header
}
{
  for(i=1;i<=NF;i++){
    $i=sprintf("%-"max[i]"s",$i)
  }
}
1;
END{
  print line
}
'  Input_file  Input_file


Since OP haven't mentioned logic of adding spaces in fields but from looking the output could say it could be based on maximum length value in a field, so based on this assumption could you please try following(tested and written based on shown samples).

awk '
BEGIN{
  FS="#"
  OFS="|"
}
FNR==NR{
  for(i=2;i<=NF;i++){
    max[i]=max[i]>=length($i)?max[i]:length($i)
  }
  next
}
{
  for(i=1;i<=NF;i++){
    $i=sprintf("%-"max[i]"s",$i)
  }
}
1
'  Input_file Input_file

Explanation of above solution: Adding detailed explanation for above.

awk '                                                ##Starting awk program from here.
BEGIN{                                               ##Starting BEGIN section of this program from here.
  FS="#"                                             ##Setting OFS as | here for all lines.
  OFS="|"
}
FNR==NR{                                             ##Checking condition FNR==NR which will be TRUE when first Input_file is being read here.
  for(i=2;i<=NF;i++){                                ##Running for loop from 2nd field to last field of lines.
    max[i]=max[i]>=length($i)?max[i]:length($i)      ##Creating array max with index and value of either current field length OR max array value.
  }
  next                                               ##next will skip all further statements from here.
}
{
  for(i=1;i<=NF;i++){                                ##Running for loop from 1st field to last field of lines.
    $i=sprintf("%-"max[i]"s",$i)                     ##Re-creating first field with sprintf and adding spaces after each field value.
  }
}
1                                                    ##Mentioning 1 will print current line here.
' Input_file Input_file                              ##Mentioning Input_file names here.

Upvotes: 2

Related Questions