OXXO
OXXO

Reputation: 724

Summary data using csv file

I would like to add more details in the desired output file.

Is there please the option to improve the code below to get the desired file.

code

awk -F, '{a[$2$7]+=$4}{b[$2$7]+=$5}{c[$2$7]+=$6}END{for(i in a)print i,a[i],b[i],c[i]}' tmp3 | sort -t, -k1n |
awk 'BEGIN{
print ("\tCODE-1T     COD-Area   CODE-1      CODE-S       CODE-T")
printf ("\t------------------------------------------------------------\n")
}
{
sum2 += $2;
sum3 += $3;
sum4 += $4;
sum5 = sum2 + sum3 + sum4;
printf ("\t%9s%10s%12s%12d%16d\n",substr($0,1,9),substr($0,10,5),$2,$3,$4)
}
END {
printf ("\t------------------------------------------------------------------------\n")
printf ("\tTotal:\t%23d\t%11d\t%11d\t%4d\n",sum2,sum3,sum4,sum5)
printf ("\t------------------------------------------------------------------------\n")

input file

032118,333000004,3213,11,10,142,SS/RR
032118,333000004,3214,11,0,42,AS/RR
032118,333000004,3215,11,0,761,AS/RR
032118,333000005,3216,7,2,762,SS/RR
032118,333000005,3217,6,2,876,SS/RR
032118,333000005,3218,6,0,876,ST/RR
032118,333000005,3222,5,3,258,ST/RR
032118,333000006,3223,5,3,258,ST/RR
032118,333000006,3224,4,4,870,SS/RR
032118,333000006,3225,3,5,870,SS/RR
032118,333000007,3226,3,34,876,SX/RR
032118,333000007,3227,2,55,876,SS/RR
032218,333000007,3208,2,4,36,SS/RR
032218,333000007,3209,1,3,879,ST/RR
032218,333000007,3210,2,2,803,ST/RR

I got this output file

CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T
------------------------------------------------------------
333000004     AS/RR          22           0             803
333000004     SS/RR          11          10             142
333000005     SS/RR          13           4            1638
333000005     ST/RR          11           3            1134
333000006     SS/RR           7           9            1740
333000006     ST/RR           5           3             258
333000007     SS/RR           4          59             912
333000007     ST/RR           3           5            1682
333000007     SX/RR           3          34             876
------------------------------------------------------------------------
Total:                       79         127        9185 9391
------------------------------------------------------------------------

The output desired is the following

Is this possible with AWK? From my example below.

CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T
------------------------------------------------------------
333000004     AS/RR          22           0             803
333000004     SS/RR          11          10             142
Total                        33          10             945      988
---------------------------------------------------------------------
333000005     SS/RR          13           4            1638
333000005     ST/RR          11           3            1134
Total                        24           7            2772     2803
---------------------------------------------------------------------
333000006     SS/RR           7           9            1740
333000006     ST/RR           5           3             258
Total                        12          12            1998     2202
---------------------------------------------------------------------
333000007     SS/RR           4          59             912
333000007     ST/RR           3           5            1682
333000007     SX/RR           3          34             876
Total                        10          98            3470     3578
---------------------------------------------------------------------
---------------------------------------------------------------------
Gran Total:                 79          127            9185     9391
---------------------------------------------------------------------

Thanks in advance

Upvotes: 1

Views: 262

Answers (2)

RavinderSingh13
RavinderSingh13

Reputation: 133518

EDIT: As per OP 4th and 5th column's zero values previous code was not working so fixing that now.

awk '
BEGIN{                                                      ##Starting BEGIN section here of awk.
   FS=","                                                   ##Setting FS as comma here.
   OFS="\t\t"                                               ##Setting OFS as 2 TABs as output field separator.
   s1="------------------------------------------------------------------------------------------------"   ##Setting s1 as dashes.
   print "CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T" ORS s1  ##printing headers before output prints.
}
FNR==NR{                                                    ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
   code1[$2,$NF]+=$4                                        ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
   codes[$2,$NF]+=$5                                        ##Creating array codes index is $2,$NF value is $5 and adding to itself.
   codet[$2,$NF]+=$6                                        ##Creating array codet index is $2,$NF value is $6 and adding to itself.
   next                                                     ##next will skip all further statements from here.
}
prev!=$2 && prev{                                           ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
   sum_col=val1+val2+val3                                   ##creating sum_col whose value is val1+val2+val3.
   SUM+=sum_col                                             ##creating SUM whose value is sum_col and adding to itself too.
   sum_val1+=val1                                           ##Creating variable sum_val1 whose value is val1 and adding to itself.
   sum_val2+=val2                                           ##Creating variable sum_val2 whose value is val2 and adding to itself.
   sum_val3+=val3                                           ##Creating variable sum_val3 whose value is val3 and adding to itself.
   print "Total\t\t\t\t\t"val1,val2,val3,sum_col ORS s1              ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
   val1=val2=val3=""                                        ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]!=""{                                              ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
   print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]   ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
   val1+=code1[$2,$NF]                                      ##Creating variable val1 who is array code1 value and adding to itself.
   val2+=codes[$2,$NF]                                      ##Creating variable val2 who is array codes value and adding to itself.
   val3+=codet[$2,$NF]                                      ##Creating variable val3 who is array codet value and adding to itself.
   delete code1[$2,$NF]                                     ##Deleting array code1 whose index is $2,$NF here.
}
{
   prev=$2                                                  ##Setting prev value to $2.
}
END{                                                        ##Starting END block of awk here now.
   if(val1){                                                ##Checking condition if variable val1 is NOT NULL then do following.
      sum_col=val1+val2+val3                                ##Creating sum_col whose value is addition of val1+val2+val3.
      sum_val1+=val1                                        ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
      sum_val2+=val2                                        ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
      sum_val3+=val3                                        ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
      print "\t\t\t"val1,val2,val3,sum_col                  ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
   }
   print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1  ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}' Input_file  Input_file                                  ##mentioning Input_file 2 times here.


Could you please try following.

awk -F, '
BEGIN{
   s1="------------------------------------------------------------------------------------------------"
   print "CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T" ORS s1
}
FNR==NR{
   code1[$2,$NF]+=$4
   codes[$2,$NF]+=$5
   codet[$2,$NF]+=$6
   next
}
prev!=$2 && prev{
   sum_col=val1+val2+val3
   SUM+=sum_col
   sum_val1+=val1
   sum_val2+=val2
   sum_val3+=val3
   print "\t\t\t"val1,val2,val3,sum_col ORS s1
   val1=val2=val3=""
}
code1[$2,$NF]{
   print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
   val1+=code1[$2,$NF]
   val2+=codes[$2,$NF]
   val3+=codet[$2,$NF]
   delete code1[$2,$NF]
}
{
   prev=$2
}
END{
   if(val1){
      sum_col=val1+val2+val3
      sum_val1+=val1
      sum_val2+=val2
      sum_val3+=val3
      print "\t\t\t"val1,val2,val3,sum_col
   }
   print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1
}'  OFS="\t\t"  Input_file  Input_file

Explanation: Adding explanation too here.

awk '
BEGIN{                                                      ##Starting BEGIN section here of awk.
   FS=","                                                   ##Setting FS as comma here.
   OFS="\t\t"                                               ##Setting OFS as 2 TABs as output field separator.
   s1="------------------------------------------------------------------------------------------------"   ##Setting s1 as dashes.
   print "CODE-1T     COD-Area     CODE-1      CODE-S       CODE-T" ORS s1  ##printing headers before output prints.
}
FNR==NR{                                                    ##Putting condition to check FNR==NR which will be TRUE when first time Input_file is being read.
   code1[$2,$NF]+=$4                                        ##Creating array code1 index is $2,$NF value is $4 and adding to itself.
   codes[$2,$NF]+=$5                                        ##Creating array codes index is $2,$NF value is $5 and adding to itself.
   codet[$2,$NF]+=$6                                        ##Creating array codet index is $2,$NF value is $6 and adding to itself.
   next                                                     ##next will skip all further statements from here.
}
prev!=$2 && prev{                                           ##checking condition prev is NOT equal to $2 and prev is NOT NULL then do following.
   sum_col=val1+val2+val3                                   ##creating sum_col whose value is val1+val2+val3.
   SUM+=sum_col                                             ##creating SUM whose value is sum_col and adding to itself too.
   sum_val1+=val1                                           ##Creating variable sum_val1 whose value is val1 and adding to itself.
   sum_val2+=val2                                           ##Creating variable sum_val2 whose value is val2 and adding to itself.
   sum_val3+=val3                                           ##Creating variable sum_val3 whose value is val3 and adding to itself.
   print "\t\t\t"val1,val2,val3,sum_col ORS s1              ##Printing 3 TABs then value of val1, val2, val3, sum_col ORS and s1 value now.
   val1=val2=val3=""                                        ##Nullifying values of val1, val2 and val3 here.
}
code1[$2,$NF]{                                              ##Checking if array code1 value whose index is $1,$NF is NOT NULL then do following.
   print $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]   ##Printing values of $2,$NF,code1[$2,$NF],codes[$2,$NF],codet[$2,$NF]
   val1+=code1[$2,$NF]                                      ##Creating variable val1 who is array code1 value and adding to itself.
   val2+=codes[$2,$NF]                                      ##Creating variable val2 who is array codes value and adding to itself.
   val3+=codet[$2,$NF]                                      ##Creating variable val3 who is array codet value and adding to itself.
   delete code1[$2,$NF]                                     ##Deleting array code1 whose index is $2,$NF here.
}
{
   prev=$2                                                  ##Setting prev value to $2.
}
END{                                                        ##Starting END block of awk here now.
   if(val1){                                                ##Checking condition if variable val1 is NOT NULL then do following.
      sum_col=val1+val2+val3                                ##Creating sum_col whose value is addition of val1+val2+val3.
      sum_val1+=val1                                        ##Creating sum_val1 whose value is addition of sum_val1 abd val1 values.
      sum_val2+=val2                                        ##Creating sum_val2 whose value is addition of sum_val2 abd val2 values.
      sum_val3+=val3                                        ##Creating sum_val3 whose value is addition of sum_val3 abd val3 values.
      print "\t\t\t"val1,val2,val3,sum_col                  ##Printing 3 TABs and value of val1, val2, val3 and sum_col.
   }
   print s1 ORS s1 ORS "Grand Total:\t\t",sum_val1,sum_val2,sum_val3,SUM+sum_col ORS s1  ##Printing s1 ORS s1 and values of sum_val1,sum_val2,sum_val3,SUM+sum_col s1.
}'  Input_file  Input_file                                 ##mentioning Input_file 2 times here.

Upvotes: 3

karakfa
karakfa

Reputation: 67507

Not the complete solution (I don't have enough patience now for the formatting, but shows the main idea for the subtotals on one field, which you can generalize and copy other formatting...

$ awk -F, '{k=$2 OFS $7; f2[$2]; f7[$7]; f4[k]+=$4; f5[k]+=$5; f6[k]+=$6} 
        END{for(i2 in f2) 
              {s4=0; 
               for(i7 in f7) 
                  {k=i2 OFS i7; 
                   if(k in f4) 
                      {print k, f4[k]; 
                       s4+=f4[k]}} 
               print "Total","-",s4}}' file | column -t


333000004  SS/RR  11
333000004  AS/RR  22
Total      -      33
333000005  SS/RR  13
333000005  ST/RR  11
Total      -      24
333000006  SS/RR  7
333000006  ST/RR  5
Total      -      12
333000007  SS/RR  4
333000007  SX/RR  3
333000007  ST/RR  3
Total      -      10

Upvotes: 2

Related Questions