whytheq
whytheq

Reputation: 35557

Running Distinct Count with a Partition

I'd like a running distinct count with a partition by year for the following data:

DROP TABLE IF EXISTS #FACT;
CREATE TABLE #FACT("Year" INT,"Month" INT, "Acc" varchar(5));
INSERT INTO #FACT
    values 
        (2015, 1, 'A'),
        (2015, 1, 'B'),
        (2015, 1, 'B'),
        (2015, 1, 'C'),
        (2015, 2, 'D'),
        (2015, 2, 'E'),
        (2015, 3, 'E'),
        (2016, 1, 'A'),
        (2016, 1, 'A'),
        (2016, 2, 'B'),
        (2016, 2, 'C');
SELECT * FROM #FACT;    

The following returns the correct answer but is there a more concise way that is also performant?

WITH 
dnsRnk AS
(
    SELECT 
        "Year"
        , "Month"
        , DenseR  = DENSE_RANK() OVER(PARTITION BY "Year", "Month" ORDER BY "Acc")
    FROM #FACT
),
mxPerMth AS
(
    SELECT
        "Year"
        , "Month"
        , RunningTotal = MAX(DenseR)
    FROM dnsRnk
    GROUP BY 
        "Year"
        , "Month"
)
SELECT 
    "Year"
    , "Month"
    , X = SUM(RunningTotal) OVER (PARTITION BY "Year" ORDER BY "Month")
FROM mxPerMth
ORDER BY 
    "Year"
    , "Month";

The above returns the following - the answer should also return exactly the same table:

enter image description here

Upvotes: 2

Views: 5699

Answers (2)

Gordon Linoff
Gordon Linoff

Reputation: 1269763

If you want a running count of distinct accounts:

SELECT f.*,
    sum(case when seqnum = 1 then 1 else 0 end) over (partition by year order by month) as cume_distinct_acc
FROM (
    SELECT 
        f.*
        ,row_number() over (partition by account order by year, month) as seqnum
    FROM #fact f
) f;

This counts each account during the first month when it appears.

EDIT:

Oops. The above doesn't aggregate by year and month and then start over for each year. Here is the correct solution:

SELECT 
    year
    ,month
    ,sum( sum(case when seqnum = 1 then 1 else 0 end)
        ) over (partition by year order by month) as cume_distinct_acc
FROM (
    SELECT 
        f.*
        ,row_number() over (partition by account, year order by month) as seqnum
    FROM #fact f
) f
group by year, month
order by year, month;

And, SQL Fiddle isn't working but the following is an example:

with FACT as (
    SELECT yyyy, mm, account
    FROM (values 
        (2015, 1, 'A'),
        (2015, 1, 'B'),
        (2015, 1, 'B'),
        (2015, 1, 'C'),
        (2015, 2, 'D'),
        (2015, 2, 'E'),
        (2015, 3, 'E'),
        (2016, 1, 'A'),
        (2016, 1, 'A'),
        (2016, 2, 'B'),
        (2016, 2, 'C')) v(yyyy, mm, account)
)
SELECT 
    yyyy
    ,mm
    ,sum(sum(case when seqnum = 1 then 1 else 0 end)) over (partition by yyyy order by mm) as cume_distinct_acc
FROM (
    SELECT 
        f.*
        ,row_number() over (partition by account, yyyy order by mm) as seqnum
    FROM fact f
) f
group by yyyy, mm
order by yyyy, mm;

Upvotes: 4

TheGameiswar
TheGameiswar

Reputation: 28900

Demo Here:

;with cte as (
    SELECT yearr, monthh, count(distinct acc) as cnt  
    FROM #fact
    GROUP BY yearr, monthh
)
SELECT 
    yearr
    ,monthh
    ,sum(cnt) over (Partition by yearr order by yearr, monthh rows unbounded preceding ) as x
FROM cte

Upvotes: 1

Related Questions