Bob
Bob

Reputation: 227

SQL Query - Sum of Aggregate above a Threshold

Suppose I have this table:

Name Month Date Value
n1 12 2020-12-01 7
n1 12 2020-12-05 9
n1 12 2020-12-09 17
n1 12 2020-12-14 8
n2 8 2020-08-02 12
n2 8 2020-08-08 7
n2 8 2020-08-09 14
n3 9 2020-09-01 5
n3 9 2020-09-03 11
n3 9 2020-09-07 10
n3 9 2020-09-21 7

What is the SQL query to group by Name and Month (sorted by Date) and once the aggregated sum of Value field becomes greater than 15, then it's selected as one row of the result? value of Date field for every individual Name is unique. For example all rows that have Name='n1' have different date values. This applies for n2,n3,... as well.

Based on above example, the result should be as follows:

Name Month Value
n1 12 16
n1 12 17
n2 8 19
n3 9 16
n3 9 17

Thanks

Upvotes: 1

Views: 1073

Answers (2)

astentx
astentx

Reputation: 6751

One possible way you can do it is recursive subquery in both SQL Server and Oracle. What do you need is to decide if you need to reset calculation on current step or not.

I still cannot understand the reason why you've removed n1 12 2020-12-14 and n2 8 2020-08-09 from the output, but you can do it by removing and isleaf = 1 from the example below.

Here's the query with comments: (UPD: removed select ... from( select and rewritten reset_flag calculation for standard SQL recursion)

with a as (
  select 'n1' as name, 12 as month, convert(date, '2020-12-01', 23) as dt, 7 as val union all
  select 'n1' as name, 12 as month, convert(date, '2020-12-05', 23) as dt, 9 as val union all
  select 'n1' as name, 12 as month, convert(date, '2020-12-09', 23) as dt, 17 as val union all
  select 'n1' as name, 12 as month, convert(date, '2020-12-14', 23) as dt, 8 as val union all
  select 'n2' as name, 8  as month, convert(date, '2020-08-02', 23) as dt, 12 as val union all
  select 'n2' as name, 8  as month, convert(date, '2020-08-08', 23) as dt, 7 as val union all
  select 'n2' as name, 8  as month, convert(date, '2020-08-09', 23) as dt, 14 as val union all
  select 'n3' as name, 9  as month, convert(date, '2020-09-01', 23) as dt, 5 as val union all
  select 'n3' as name, 9  as month, convert(date, '2020-09-03', 23) as dt, 11 as val union all
  select 'n3' as name, 9  as month, convert(date, '2020-09-07', 23) as dt, 10 as val union all
  select 'n3' as name, 9  as month, convert(date, '2020-09-21', 23) as dt, 7 as val
)
, rn as (
  /*Build calculation hierarchy from 1st to last*/
  select
    name,
    month,
    dt,
    val,
    dense_rank() over(partition by name, month order by dt asc) as rn,
    /*To identify last item in group*/
    dense_rank() over(partition by name, month order by dt desc) as rn_desc
  from a
)
/*Simulate running sum with reset*/
, rec (
  name,
  month,
  running_sum,
  dt_until,
  rn,
  isleaf,
  reset_flag
) as (
  /*Start from 1st item*/
  select
    name,
    month,
    val as running_sum,
    dt,
    rn,
    case when rn_desc = 1 then 1 else 0 end as isleaf,
    case when val > 15 then 1 else 0 end as reset_flag
  from rn
  where rn = 1
  
  union all
  
  /*Calculate current value*/
  select
    rec.name,
    rec.month,
    /*
      If we need to reset calculation,
      then use original value,
      else - add value to running total
    */
    case
      when rec.running_sum > 15
      then rn.val
      else rec.running_sum + rn.val
    end as running_sum,
    dt,
    rec.rn + 1 as rn,
    case when rn.rn_desc = 1 then 1 else 0 end as isleaf,
    case
      /*Reset on threshold violation after addition*/
      when rec.running_sum <= 15
        and rec.running_sum + rn.val > 15
      then 1
      /*Or when there was reset before and current value vuolates threshold also*/
      when rn.val > 15
      then 1
      else 0
    end as reset_flag
  from rec
    join rn
      on rec.name = rn.name
        and rec.month = rn.month
        and rec.rn + 1 = rn.rn
)
select *
from rec
where isleaf = 1
  or reset_flag = 1
order by 1, 2, rn asc
GO
name | month | running_sum | dt_until   | rn | isleaf | reset_flag
:--- | ----: | ----------: | :--------- | -: | -----: | ---------:
n1   |    12 |          16 | 2020-12-05 |  2 |      0 |          1
n1   |    12 |          17 | 2020-12-09 |  3 |      0 |          1
n1   |    12 |           8 | 2020-12-14 |  4 |      1 |          0
n2   |     8 |          19 | 2020-08-08 |  2 |      0 |          1
n2   |     8 |          14 | 2020-08-09 |  3 |      1 |          0
n3   |     9 |          16 | 2020-09-03 |  2 |      0 |          1
n3   |     9 |          17 | 2020-09-21 |  4 |      1 |          1

db<>fiddle here for SQL Server. And for Oracle here.

(UPD: Previous query is in this db<>fiddle)

And another way for Oracle with MODEL clause. Here we reset calculation with CV function instead of recursion.

with a as (
  select 'n1' as name, 12 as month, date '2020-12-01' as dt, 7 as val   from dual union all
  select 'n1' as name, 12 as month, date '2020-12-05' as dt, 9 as val   from dual union all
  select 'n1' as name, 12 as month, date '2020-12-09' as dt, 17 as val  from dual union all
  select 'n1' as name, 12 as month, date '2020-12-14' as dt, 8 as val   from dual union all
  select 'n2' as name, 8  as month, date '2020-08-02' as dt, 12 as val  from dual union all
  select 'n2' as name, 8  as month, date '2020-08-08' as dt, 7 as val   from dual union all
  select 'n2' as name, 8  as month, date '2020-08-09' as dt, 14 as val  from dual union all
  select 'n3' as name, 9  as month, date '2020-09-01' as dt, 5 as val   from dual union all
  select 'n3' as name, 9  as month, date '2020-09-03' as dt, 11 as val  from dual union all
  select 'n3' as name, 9  as month, date '2020-09-07' as dt, 10 as val  from dual union all
  select 'n3' as name, 9  as month, date '2020-09-21' as dt, 7 as val   from dual 
)
, rn as (
  /*Build calculation hierarchy from 1st to last*/
  select
    name,
    month,
    dt,
    val,
    0 as rsum,
    0 as keep_flag,
    dense_rank() over(partition by name, month order by dt asc) as rn
  from a
)
, rsum as (
  /*Running sum with reset*/
  select *
  from rn
  model
    /*When to break calculation*/
    partition by (name, month)
    /*Dimension to iterate with model*/
    dimension by (rn)
    /*Value, running sum and the flag where we reset calculations*/
    measures (val, rsum, keep_flag, dt)
    /*Keep null values out of calculation range to identify last row per group*/
    keep nav
    rules update
    (
      /*For all sequential numberer RNs in ascending order*/
      rsum[rn > 0] order by rn asc
        /*When we still have place till 15 (e.g previous calculation of RSUM
        is not greater than 15), we add current value of VAL to previous RSUM.
        Else we need to restart
        */
        = case
            when rsum[cv() - 1] <= 15
            then rsum[cv() - 1] + val[cv()]
            else val[cv()]
          end,
      
      /*Again, when there's a place and we are not at the end of partition,
      we mark the row as participating in another aggregated row
      */
      keep_flag[rn > 0] order by rn asc
        = case
            when rsum[cv()] <= 15 and rsum[cv() + 1] is not null
            then 0
            else 1
          end
    )
)
select
  name,
  month,
  rsum,
  dt
from rsum
/*Keep only aggregated rows or last row per group*/
where keep_flag = 1
order by name, month, rn asc
NAME | MONTH | RSUM | DT       
:--- | ----: | ---: | :--------
n1   |    12 |   16 | 05-DEC-20
n1   |    12 |   17 | 09-DEC-20
n1   |    12 |    8 | 14-DEC-20
n2   |     8 |   19 | 08-AUG-20
n2   |     8 |   14 | 09-AUG-20
n3   |     9 |   16 | 03-SEP-20
n3   |     9 |   17 | 21-SEP-20

db<>fiddle here

Upvotes: 2

user5683823
user5683823

Reputation:

In Oracle 12.1 and higher, this task is easy work for the match_recognize clause. You could just as easily add more columns to the output (for example to show the first and last date for each group, etc.)

SETUP

create table this_table (name, mth, dt, val) as
  select 'n1', 12, date '2020-12-01',  7 from dual union all
  select 'n1', 12, date '2020-12-05',  9 from dual union all
  select 'n1', 12, date '2020-12-09', 17 from dual union all
  select 'n1', 12, date '2020-12-14',  8 from dual union all
  select 'n2',  8, date '2020-08-02', 12 from dual union all
  select 'n2',  8, date '2020-08-08',  7 from dual union all
  select 'n2',  8, date '2020-08-09', 14 from dual union all
  select 'n3',  9, date '2020-09-01',  5 from dual union all
  select 'n3',  9, date '2020-09-03', 11 from dual union all
  select 'n3',  9, date '2020-09-07', 10 from dual union all
  select 'n3',  9, date '2020-09-21',  7 from dual
;

QUERY AND OUTPUT

select name, mth, sum_val
from   this_table
match_recognize(
  partition by name, mth
  order     by dt
  measures  sum(val) as sum_val
  pattern   ( a* b )
  define    a as sum(val) <= 15, b as sum(val) > 15
);

NAME   MTH SUM_VAL
----  ---- -------
n1      12      16
n1      12      17
n2       8      19
n3       9      16
n3       9      17

Upvotes: 1

Related Questions