PyBoss
PyBoss

Reputation: 631

SQL find consecutive transactions within certain time period

I want to find suspicious transactions that have 3 or more consecutive transactions within 10 minutes.

sample data

enter image description here

expected result

enter image description here

my below code doesn't work very well. coz it gives me the id 116 and 117, which is not right

IF OBJECT_ID('tempdb..#t') is not null DROP TABLE #t;
create TABLE #t
(
 trans_id int,  trans_time  datetime,  store_address varchar(20)
)
insert into #t values
 (111, '5/27/18 6:36 AM', '79 street, NY')
,(112, '5/27/18 6:53 AM', '79 street, NY')
,(113, '5/27/18 6:54 AM', '79 street, NY')
,(114, '5/27/18 6:55 AM', '79 street, NY')
,(115, '5/27/18 6:59 AM', '79 street, NY')
,(116, '5/27/18 9:45 PM', '79 street, NY')
,(117, '5/27/18 9:47 PM', '79 street, NY')
,(118, '3/24/18 6:35 AM', '44 tree ave,FL')
,(119, '3/24/18 6:36 AM', '44 tree ave,FL')
,(120, '3/24/18 6:36 AM', '44 tree ave,FL')
,(121, '3/24/18 6:36 AM', '2 pop ave, NJ');
--select * from #t;

IF OBJECT_ID('tempdb..#self') is not null DROP TABLE #self;
SELECT DISTINCT d1.trans_id 'trans_id1',d2.trans_id 'trans_id2', d1.trans_time 't1',d2.trans_time 't2'
INTO #self
FROM #t d1 JOIN #t d2 ON d1.store_address = d2.store_address  --self join
AND DATEDIFF(minute,d1.trans_time,d2.trans_time) < 10
AND d1.trans_id <> d2.trans_id
AND d1.trans_id < d2.trans_id

IF OBJECT_ID('tempdb..#date') is not null DROP TABLE #date;
SELECT DISTINCT d.trans_id,d.trans_time,d.store_address 
INTO #DATE 
FROM #t d
JOIN #SELF dd  ON d.trans_id=dd.trans_id1 or d.trans_id=dd.trans_id2   

IF OBJECT_ID('tempdb..#address') is not null DROP TABLE #address;
SELECT store_address 
INTO #address --address for min count 3 of store address
FROM #t 
GROUP BY store_address HAVING COUNT(store_address) >= 3

SELECT * FROM #date d
JOIN #address a ON d.store_address = a.store_address

Upvotes: 3

Views: 1887

Answers (1)

D-Shih
D-Shih

Reputation: 46219

This is a gap and island problem, you can try to use LAG window function to get previous trans_time in the first CTE query.

then use SUM window function with CASE WHEN make row number by datediff(minute, nextDt,trans_time) greater than 10.

Final only group count greater than 3 by store_address and grp.

;WITH CTE AS (
SELECT *, LAG(trans_time,1,trans_time) 
              OVER(PARTITION BY store_address ORDER BY trans_time) nextDt
FROM T
),CTE2 AS (
   SELECT *,COUNT(*) OVER(PARTITION BY grp,store_address) cnt
   FROM (
       SELECT *,SUM(CASE WHEN datediff(minute, nextDt,trans_time) >= 10 THEN 1 ELSE 0 END) 
             OVER(PARTITION BY store_address ORDER BY trans_time) grp
      FROM CTE 
   ) t1
)
SELECT trans_id,trans_time,store_address    
FROM CTE2
WHERE cnt >= 3
ORDER BY trans_id

sqlfiddle

Upvotes: 1

Related Questions