Reputation: 626
I have a file with 5 columns:
1 1311 2 171115067 1.1688e-08
1 1313 3 171115067 1.75321e-08
1 1314 4 171115067 2.33761e-08
2 1679 5 135534747 3.68909e-08
2 1680 2 135534747 1.47564e-08
3 688 34 191154276 1.77867e-07
3 689 38 191154276 1.98792e-07
3 690 39 191154276 2.04024e-07
I would like to get the accumulated value $2*$3/$4
per index which is given in field $1
:
So, as an example: For the index 1
, I should have (1311*2+1313*3+1314*4)/171115067
and for the index 2
in $1
it should read (1679*5+1680*2)/135534747
What I tried is:
awk '{sum+=($2*$3)/$4} END { print "Result = ",sum}'
But that gives me the sum of the multiplication for all together divided by each time which not what I need
Upvotes: 0
Views: 50
Reputation: 8721
Using Perl
$ cat sara.txt
1 1311 2 171115067 1.1688e-08
1 1313 3 171115067 1.75321e-08
1 1314 4 171115067 2.33761e-08
2 1679 5 135534747 3.68909e-08
2 1680 2 135534747 1.47564e-08
3 688 34 191154276 1.77867e-07
3 689 38 191154276 1.98792e-07
3 690 39 191154276 2.04024e-07
$ perl -lane ' $kv{join(",",$F[0],$F[3])}+=$F[1]*$F[2]; END { for(sort keys %kv) { @x=split(",");print "$x[0],",$kv{$_}/$x[1]} print eval(join("+",values %kv)) } ' sara.txt
1,6.90587930518123e-05
2,8.67305267482441e-05
3,0.000400116605291111
100056
$
Upvotes: 0
Reputation: 133680
EDIT: As per OP's comment added fllowing solution too, which will give overall sum too for all column 1s.
awk '
prev!=$1 && prev{
if(fourth){
printf("%.9f\n",mul/fourth)
sum+=sprintf("%.9f\n",mul/fourth)
}
else{
print 0
}
mul=fourth=prev=""
}
{
mul+=$2*$3
fourth=$4
prev=$1
total_sum[$1]+=($2*$3)
}
END{
if(prev){
if(fourth){
printf("%.9f\n",mul/fourth)
sum+=sprintf("%.9f\n",mul/fourth)
}
else{
print 0
}
}
print "total= ",sum
}' Input_file
Could you please try following.
awk '
prev!=$1 && prev{
if(fourth){
printf("%.9f\n",mul/fourth)
}
else{
print 0
}
mul=fourth=prev=""
}
{
mul+=$2*$3
fourth=$4
prev=$1
}
END{
if(prev){
if(fourth){
printf("%.9f\n",mul/fourth)
}
else{
print 0
}
}
}' Input_file
Upvotes: 1
Reputation: 26511
If your data is sorted you can do:
awk '(NR==1) { num=0; den=$4; tmp=$1 }
($1!=tmp) { print "Result",tmp,":",num/den;
num=0; den=$4; tmp=$1 }
{ num+= $2*$3 }
END { print "Result",tmp,":",num/den }' file
If your data is not sorted you can do:
awk '{ sum[$1]+= $2*$3/$4 }
END { for(i in sum) { print "Result",i,":",sum[i] }' file
and this outputs:
Result 1 : 6.90588e-05
Result 2 : 8.67305e-05
Result 3 : 0.000400117
Upvotes: 0