LamaMo
LamaMo

Reputation: 626

Using awk to compute distinct sums depending on column value

I have a file with 5 columns:

1   1311    2   171115067   1.1688e-08
1   1313    3   171115067   1.75321e-08
1   1314    4   171115067   2.33761e-08
2   1679    5   135534747   3.68909e-08
2   1680    2   135534747   1.47564e-08
3   688 34  191154276   1.77867e-07
3   689 38  191154276   1.98792e-07
3   690 39  191154276   2.04024e-07

I would like to get the accumulated value $2*$3/$4 per index which is given in field $1:

So, as an example: For the index 1, I should have (1311*2+1313*3+1314*4)/171115067 and for the index 2 in $1 it should read (1679*5+1680*2)/135534747

What I tried is:

awk '{sum+=($2*$3)/$4} END { print "Result = ",sum}'

But that gives me the sum of the multiplication for all together divided by each time which not what I need

Upvotes: 0

Views: 50

Answers (3)

stack0114106
stack0114106

Reputation: 8721

Using Perl

$ cat sara.txt
1   1311    2   171115067   1.1688e-08
1   1313    3   171115067   1.75321e-08
1   1314    4   171115067   2.33761e-08
2   1679    5   135534747   3.68909e-08
2   1680    2   135534747   1.47564e-08
3   688 34  191154276   1.77867e-07
3   689 38  191154276   1.98792e-07
3   690 39  191154276   2.04024e-07
$ perl -lane ' $kv{join(",",$F[0],$F[3])}+=$F[1]*$F[2]; END { for(sort keys %kv) { @x=split(",");print "$x[0],",$kv{$_}/$x[1]} print eval(join("+",values %kv)) } ' sara.txt
1,6.90587930518123e-05
2,8.67305267482441e-05
3,0.000400116605291111
100056
$

Upvotes: 0

RavinderSingh13
RavinderSingh13

Reputation: 133680

EDIT: As per OP's comment added fllowing solution too, which will give overall sum too for all column 1s.

awk '
prev!=$1 && prev{
  if(fourth){
    printf("%.9f\n",mul/fourth)
    sum+=sprintf("%.9f\n",mul/fourth)
  }
  else{
    print 0
  }
  mul=fourth=prev=""
}
{
  mul+=$2*$3
  fourth=$4
  prev=$1
  total_sum[$1]+=($2*$3)
}
END{
  if(prev){
    if(fourth){
       printf("%.9f\n",mul/fourth)
       sum+=sprintf("%.9f\n",mul/fourth)
    }
    else{
       print 0
    }
  }
  print "total= ",sum
}'  Input_file


Could you please try following.

awk '
prev!=$1 && prev{
  if(fourth){
    printf("%.9f\n",mul/fourth)
  }
  else{
    print 0
  }
  mul=fourth=prev=""
}
{
  mul+=$2*$3
  fourth=$4
  prev=$1
}
END{
  if(prev){
    if(fourth){
      printf("%.9f\n",mul/fourth)
    }
    else{
      print 0
    }
  }
}'  Input_file

Upvotes: 1

kvantour
kvantour

Reputation: 26511

If your data is sorted you can do:

awk '(NR==1) { num=0; den=$4; tmp=$1 }
     ($1!=tmp) { print "Result",tmp,":",num/den;
                 num=0; den=$4; tmp=$1 }
     { num+= $2*$3 }
     END { print "Result",tmp,":",num/den }' file

If your data is not sorted you can do:

awk '{ sum[$1]+= $2*$3/$4 }
     END { for(i in sum) { print "Result",i,":",sum[i] }' file

and this outputs:

Result 1 : 6.90588e-05
Result 2 : 8.67305e-05
Result 3 : 0.000400117

Upvotes: 0

Related Questions