Reputation:
I want to count maximum sequential occurrence of event with flg_no_talk=1
for each hid
in agent_login
. Sequence is generated by date.
There is an example of my table:
DROP TABLE IF EXISTS max_in_row;
CREATE TABLE max_in_row (
hit_finish_dttm VARCHAR(255),
hid VARCHAR(255),
agent_login VARCHAR(255),
flg_no_talk int
);
INSERT INTO max_in_row(hit_finish_dttm, hid, agent_login, flg_no_talk)
VALUES ('2020-03-01', 'EQERR13', 'Dmitrii', 0),
VALUES ('2020-03-02', 'EQERR13', 'Dmitrii', 1),
VALUES ('2020-03-03', 'EQERR13', 'Dmitrii', 1),
VALUES ('2020-03-01', 'RR13EQE', 'Dmitrii', 0),
VALUES ('2020-03-02', 'RR13EQE', 'Dmitrii', 1),
VALUES ('2020-03-03', 'RR13EQE', 'Dmitrii', 0),
VALUES ('2020-03-04', 'RR13EQE', 'Dmitrii', 0),
VALUES ('2020-03-05', 'RR13EQE', 'Dmitrii', 1),
VALUES ('2020-03-06', 'RR13EQE', 'Dmitrii', 1),
VALUES ('2020-03-07', 'RR13EQE', 'Dmitrii', 0),
VALUES ('2020-03-01', 'EQERR13', 'Alex', 1),
VALUES ('2020-03-02', 'EQERR13', 'Alex', 1),
VALUES ('2020-03-03', 'EQERR13', 'Alex', 0),
VALUES ('2020-03-04', 'EQERR13', 'Alex', 1),
VALUES ('2020-03-05', 'EQERR13', 'Alex', 1),
VALUES ('2020-03-06', 'EQERR13', 'Alex', 1),
VALUES ('2020-03-02', 'RR13EQE', 'Alex', 1),
VALUES ('2020-03-03', 'RR13EQE', 'Alex', 0),
VALUES ('2020-03-04', 'RR13EQE', 'Alex', 1);
I expect to receive at result next table:
Dmitrii | EQERR13 | 2
Dmitrii | RR13EQE | 2
Alex | EQERR13 | 3
Alex | RR13EQE | 1
I literally tried to overcome my problem with row_number(order by date) - row_number(partition by human, hid, flag order by flag)
, but idk why it doesn't work.
Thanks a lot!
Upvotes: 1
Views: 267
Reputation: 15155
Here is one solution (SQL Server).
DECLARE @max_in_row TABLE(
hit_finish_dttm VARCHAR(255),
hid VARCHAR(255),
agent_login VARCHAR(255),
flg_no_talk int
);
INSERT INTO @max_in_row(hit_finish_dttm, hid, agent_login, flg_no_talk)
VALUES('2020-03-01', 'EQERR13', 'Dmitrii', 0),
('2020-03-02', 'EQERR13', 'Dmitrii', 1),
('2020-03-03', 'EQERR13', 'Dmitrii', 1),
('2020-03-01', 'RR13EQE', 'Dmitrii', 0),
('2020-03-02', 'RR13EQE', 'Dmitrii', 1),
('2020-03-03', 'RR13EQE', 'Dmitrii', 0),
('2020-03-04', 'RR13EQE', 'Dmitrii', 0),
('2020-03-05', 'RR13EQE', 'Dmitrii', 1),
('2020-03-06', 'RR13EQE', 'Dmitrii', 1),
('2020-03-07', 'RR13EQE', 'Dmitrii', 0),
('2020-03-01', 'EQERR13', 'Alex', 1),
('2020-03-02', 'EQERR13', 'Alex', 1),
('2020-03-03', 'EQERR13', 'Alex', 0),
('2020-03-04', 'EQERR13', 'Alex', 1),
('2020-03-05', 'EQERR13', 'Alex', 1),
('2020-03-06', 'EQERR13', 'Alex', 1),
('2020-03-02', 'RR13EQE', 'Alex', 1),
('2020-03-03', 'RR13EQE', 'Alex', 0),
('2020-03-04', 'RR13EQE', 'Alex', 1)
;
WITH OrderNormalized AS
(
--Since the 0 and 1 can come out of sequence in the data, build up clusters of distinct groups with a chronological order flag
--to use as a virtual grouped timetable
SELECT *,
GroupNumber = DENSE_RANK() OVER(ORDER BY hid, agent_login ),
OrderInGroup = RANK() OVER(PARTITION BY hid, agent_login ORDER BY hit_finish_dttm)
FROM
@max_in_row
)
,GapsMarked AS
(
--Order the Gaps so they can be joined with connected islands
--This is needed because the value can go from 0 to 1 multiple times per partition.
--That condition needs to be accounted for to reset the count.
SELECT *,
NoTalkGroupNumber = RANK() OVER(PARTITION BY GroupNumber ORDER BY OrderInGroup)
FROM
OrderNormalized
WHERE
flg_no_talk = 0
)
,IslandsGrouped AS
(
--Data is partitioned and the gaps serialized above. Now join the islands with the closest
--gap looking backwards and take the min NOTE: There is a cleaner solution here, I just don't have the time to think it up right now
SELECT
D.*,
NoTalkGroupNumber=CASE WHEN MIN(G.NoTalkGroupNumber) IS NULL THEN 0 ELSE MIN(G.NoTalkGroupNumber) END
FROM
OrderNormalized D
LEFT JOIN GapsMarked G ON G.GroupNumber = D.GroupNumber AND G.OrderInGroup > D.OrderInGroup
GROUP BY
D.agent_login,D.flg_no_talk,D.GroupNumber,D.hid,D.hit_finish_dttm,D.OrderInGroup
)
,SeralizedItemsInIslandGroups AS
(
SELECT
*,
--This serializes by summing sequential flg_no_talk within each respective islands
ItemOrder = SUM(flg_no_talk) OVER (PARTITION BY GroupNumber,NoTalkGroupNumber ORDER BY OrderInGroup ROWS UNBOUNDED PRECEDING)
FROM
IslandsGrouped
)
SELECT
agent_login, hid, MAX(ItemOrder) FROM SeralizedItemsInIslandGroups
GROUP BY
agent_login, hid
And Here is a PostgreSQL Fiddle->
PostgreSQL 9.6 Schema Setup:
CREATE TABLE max_in_row (
hit_finish_dttm VARCHAR(255),
hid VARCHAR(255),
agent_login VARCHAR(255),
flg_no_talk int
);
Query 1:
INSERT INTO max_in_row(hit_finish_dttm, hid, agent_login, flg_no_talk)
VALUES('2020-03-01', 'EQERR13', 'Dmitrii', 0),
('2020-03-02', 'EQERR13', 'Dmitrii', 1),
('2020-03-03', 'EQERR13', 'Dmitrii', 1),
('2020-03-01', 'RR13EQE', 'Dmitrii', 0),
('2020-03-02', 'RR13EQE', 'Dmitrii', 1),
('2020-03-03', 'RR13EQE', 'Dmitrii', 0),
('2020-03-04', 'RR13EQE', 'Dmitrii', 0),
('2020-03-05', 'RR13EQE', 'Dmitrii', 1),
('2020-03-06', 'RR13EQE', 'Dmitrii', 1),
('2020-03-07', 'RR13EQE', 'Dmitrii', 0),
('2020-03-01', 'EQERR13', 'Alex', 1),
('2020-03-02', 'EQERR13', 'Alex', 1),
('2020-03-03', 'EQERR13', 'Alex', 0),
('2020-03-04', 'EQERR13', 'Alex', 1),
('2020-03-05', 'EQERR13', 'Alex', 1),
('2020-03-06', 'EQERR13', 'Alex', 1),
('2020-03-02', 'RR13EQE', 'Alex', 1),
('2020-03-03', 'RR13EQE', 'Alex', 0),
('2020-03-04', 'RR13EQE', 'Alex', 1)
Query 2:
WITH OrderNormalized AS
(
SELECT *,
DENSE_RANK() OVER(ORDER BY hid, agent_login ) GroupNumber,
RANK() OVER(PARTITION BY hid, agent_login ORDER BY hit_finish_dttm) OrderInGroup
FROM
max_in_row
)
,GapsMarked AS
(
SELECT *,
RANK() OVER(PARTITION BY GroupNumber ORDER BY OrderInGroup) NoTalkGroupNumber
FROM
OrderNormalized
WHERE
flg_no_talk = 0
)
,IslandsGrouped AS
(
SELECT
D.*,
CASE WHEN MIN(G.NoTalkGroupNumber) IS NULL THEN 0 ELSE MIN(G.NoTalkGroupNumber) END NoTalkGroupNumber
FROM
OrderNormalized D
LEFT JOIN GapsMarked G ON G.GroupNumber = D.GroupNumber AND G.OrderInGroup > D.OrderInGroup
GROUP BY
D.agent_login,D.flg_no_talk,D.GroupNumber,D.hid,D.hit_finish_dttm,D.OrderInGroup
)
,SeralizedItemsInIslandGroups AS
(
SELECT
*,
SUM(flg_no_talk) OVER (PARTITION BY GroupNumber,NoTalkGroupNumber ORDER BY OrderInGroup ROWS UNBOUNDED PRECEDING) ItemOrder FROM
IslandsGrouped
)
SELECT
agent_login, hid, MAX(ItemOrder) FROM SeralizedItemsInIslandGroups
GROUP BY
agent_login, hid
| agent_login | hid | max |
|-------------|---------|-----|
| Dmitrii | RR13EQE | 2 |
| Alex | RR13EQE | 1 |
| Alex | EQERR13 | 3 |
| Dmitrii | EQERR13 | 2 |
Upvotes: 0