user14385273
user14385273

Reputation:

Sequential occurrence (advanced gaps and islands problem)

I want to count maximum sequential occurrence of event with flg_no_talk=1 for each hid in agent_login. Sequence is generated by date.

There is an example of my table:

    DROP TABLE IF EXISTS max_in_row;
    
    CREATE TABLE max_in_row (
        hit_finish_dttm VARCHAR(255),
        hid VARCHAR(255),
        agent_login VARCHAR(255),
        flg_no_talk int
    );
    
    
    INSERT INTO max_in_row(hit_finish_dttm, hid, agent_login, flg_no_talk)
    VALUES ('2020-03-01', 'EQERR13', 'Dmitrii', 0),
    VALUES ('2020-03-02', 'EQERR13', 'Dmitrii', 1),
    VALUES ('2020-03-03', 'EQERR13', 'Dmitrii', 1),
    VALUES ('2020-03-01', 'RR13EQE', 'Dmitrii', 0),
    VALUES ('2020-03-02', 'RR13EQE', 'Dmitrii', 1),
    VALUES ('2020-03-03', 'RR13EQE', 'Dmitrii', 0),
    VALUES ('2020-03-04', 'RR13EQE', 'Dmitrii', 0),
    VALUES ('2020-03-05', 'RR13EQE', 'Dmitrii', 1),
    VALUES ('2020-03-06', 'RR13EQE', 'Dmitrii', 1),
    VALUES ('2020-03-07', 'RR13EQE', 'Dmitrii', 0),
    VALUES ('2020-03-01', 'EQERR13', 'Alex', 1),
    VALUES ('2020-03-02', 'EQERR13', 'Alex', 1),
    VALUES ('2020-03-03', 'EQERR13', 'Alex', 0),
    VALUES ('2020-03-04', 'EQERR13', 'Alex', 1),
    VALUES ('2020-03-05', 'EQERR13', 'Alex', 1),
    VALUES ('2020-03-06', 'EQERR13', 'Alex', 1),
    VALUES ('2020-03-02', 'RR13EQE', 'Alex', 1),
    VALUES ('2020-03-03', 'RR13EQE', 'Alex', 0),
    VALUES ('2020-03-04', 'RR13EQE', 'Alex', 1);

I expect to receive at result next table:

Dmitrii | EQERR13 | 2
Dmitrii | RR13EQE | 2
Alex    | EQERR13 | 3
Alex    | RR13EQE | 1

I literally tried to overcome my problem with row_number(order by date) - row_number(partition by human, hid, flag order by flag), but idk why it doesn't work.

Thanks a lot!

Upvotes: 1

Views: 267

Answers (1)

Ross Bush
Ross Bush

Reputation: 15155

Here is one solution (SQL Server).

DECLARE @max_in_row TABLE(
    hit_finish_dttm VARCHAR(255),
    hid VARCHAR(255),
    agent_login VARCHAR(255),
    flg_no_talk int
);
INSERT INTO @max_in_row(hit_finish_dttm, hid, agent_login, flg_no_talk)
VALUES('2020-03-01', 'EQERR13', 'Dmitrii', 0),
      ('2020-03-02', 'EQERR13', 'Dmitrii', 1),
      ('2020-03-03', 'EQERR13', 'Dmitrii', 1),
      ('2020-03-01', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-02', 'RR13EQE', 'Dmitrii', 1),
      ('2020-03-03', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-04', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-05', 'RR13EQE', 'Dmitrii', 1),
      ('2020-03-06', 'RR13EQE', 'Dmitrii', 1),
      ('2020-03-07', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-01', 'EQERR13', 'Alex', 1),
      ('2020-03-02', 'EQERR13', 'Alex', 1),
      ('2020-03-03', 'EQERR13', 'Alex', 0),
      ('2020-03-04', 'EQERR13', 'Alex', 1),
      ('2020-03-05', 'EQERR13', 'Alex', 1),
      ('2020-03-06', 'EQERR13', 'Alex', 1),
      ('2020-03-02', 'RR13EQE', 'Alex', 1),
      ('2020-03-03', 'RR13EQE', 'Alex', 0),
      ('2020-03-04', 'RR13EQE', 'Alex', 1)
;
WITH OrderNormalized AS
(
    --Since the 0 and 1 can come out of sequence in the data, build up clusters of distinct groups with a chronological order flag 
    --to use as a virtual grouped timetable
    SELECT *,
        GroupNumber = DENSE_RANK() OVER(ORDER BY hid, agent_login ),
        OrderInGroup = RANK() OVER(PARTITION BY  hid, agent_login ORDER BY hit_finish_dttm) 
    FROM 
        @max_in_row
)
,GapsMarked AS
(
    --Order the Gaps so they can be joined with connected islands
    --This is needed because the value can go from 0 to 1 multiple times per partition. 
    --That condition needs to be accounted for to reset the count.
    SELECT *,
        NoTalkGroupNumber = RANK() OVER(PARTITION BY  GroupNumber ORDER BY OrderInGroup)
    FROM 
        OrderNormalized
    WHERE
        flg_no_talk = 0
        
)
,IslandsGrouped AS
(
    --Data is partitioned and the gaps serialized above. Now join the islands with the closest 
    --gap looking backwards and take the min NOTE: There is a cleaner solution here, I just don't have the time to think it up right now
    SELECT 
        D.*,  
        NoTalkGroupNumber=CASE WHEN MIN(G.NoTalkGroupNumber) IS NULL THEN 0 ELSE MIN(G.NoTalkGroupNumber) END 
    FROM 
        OrderNormalized D
        LEFT JOIN GapsMarked G ON G.GroupNumber = D.GroupNumber AND G.OrderInGroup > D.OrderInGroup
    GROUP BY
        D.agent_login,D.flg_no_talk,D.GroupNumber,D.hid,D.hit_finish_dttm,D.OrderInGroup
)
,SeralizedItemsInIslandGroups AS
(
    SELECT 
        *,
        --This serializes by summing sequential flg_no_talk within each  respective islands 
        ItemOrder = SUM(flg_no_talk) OVER (PARTITION BY GroupNumber,NoTalkGroupNumber ORDER BY OrderInGroup ROWS UNBOUNDED PRECEDING)
    FROM 
        IslandsGrouped 
)

SELECT 
    agent_login, hid, MAX(ItemOrder) FROM SeralizedItemsInIslandGroups
GROUP BY
    agent_login, hid

And Here is a PostgreSQL Fiddle->

SQL Fiddle

PostgreSQL 9.6 Schema Setup:

    CREATE TABLE max_in_row (
        hit_finish_dttm VARCHAR(255),
        hid VARCHAR(255),
        agent_login VARCHAR(255),
        flg_no_talk int
    );
    

Query 1:

INSERT INTO max_in_row(hit_finish_dttm, hid, agent_login, flg_no_talk)
VALUES('2020-03-01', 'EQERR13', 'Dmitrii', 0),
      ('2020-03-02', 'EQERR13', 'Dmitrii', 1),
      ('2020-03-03', 'EQERR13', 'Dmitrii', 1),
      ('2020-03-01', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-02', 'RR13EQE', 'Dmitrii', 1),
      ('2020-03-03', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-04', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-05', 'RR13EQE', 'Dmitrii', 1),
      ('2020-03-06', 'RR13EQE', 'Dmitrii', 1),
      ('2020-03-07', 'RR13EQE', 'Dmitrii', 0),
      ('2020-03-01', 'EQERR13', 'Alex', 1),
      ('2020-03-02', 'EQERR13', 'Alex', 1),
      ('2020-03-03', 'EQERR13', 'Alex', 0),
      ('2020-03-04', 'EQERR13', 'Alex', 1),
      ('2020-03-05', 'EQERR13', 'Alex', 1),
      ('2020-03-06', 'EQERR13', 'Alex', 1),
      ('2020-03-02', 'RR13EQE', 'Alex', 1),
      ('2020-03-03', 'RR13EQE', 'Alex', 0),
      ('2020-03-04', 'RR13EQE', 'Alex', 1)

Results:

Query 2:

WITH OrderNormalized AS
(
    SELECT *,
        DENSE_RANK() OVER(ORDER BY hid, agent_login ) GroupNumber,
        RANK() OVER(PARTITION BY  hid, agent_login ORDER BY hit_finish_dttm) OrderInGroup 
    FROM 
        max_in_row
)
,GapsMarked AS
(
    SELECT *,
        RANK() OVER(PARTITION BY  GroupNumber ORDER BY OrderInGroup) NoTalkGroupNumber
    FROM 
        OrderNormalized
    WHERE
        flg_no_talk = 0
        
)
,IslandsGrouped AS
(
    SELECT 
        D.*,  
        CASE WHEN MIN(G.NoTalkGroupNumber) IS NULL THEN 0 ELSE MIN(G.NoTalkGroupNumber) END NoTalkGroupNumber
    FROM 
        OrderNormalized D
        LEFT JOIN GapsMarked G ON G.GroupNumber = D.GroupNumber AND G.OrderInGroup > D.OrderInGroup
    GROUP BY
        D.agent_login,D.flg_no_talk,D.GroupNumber,D.hid,D.hit_finish_dttm,D.OrderInGroup
)
,SeralizedItemsInIslandGroups AS
(
    SELECT 
        *,
        SUM(flg_no_talk) OVER (PARTITION BY GroupNumber,NoTalkGroupNumber ORDER BY OrderInGroup ROWS UNBOUNDED PRECEDING) ItemOrder    FROM 
        IslandsGrouped 
)

SELECT 
    agent_login, hid, MAX(ItemOrder) FROM SeralizedItemsInIslandGroups
GROUP BY
    agent_login, hid

Results:

| agent_login |     hid | max |
|-------------|---------|-----|
|     Dmitrii | RR13EQE |   2 |
|        Alex | RR13EQE |   1 |
|        Alex | EQERR13 |   3 |
|     Dmitrii | EQERR13 |   2 |

Upvotes: 0

Related Questions