Reputation: 631
I want to find suspicious transactions that have 3 or more consecutive transactions within 10 minutes.
sample data
expected result
my below code doesn't work very well. coz it gives me the id 116 and 117, which is not right
IF OBJECT_ID('tempdb..#t') is not null DROP TABLE #t;
create TABLE #t
(
trans_id int, trans_time datetime, store_address varchar(20)
)
insert into #t values
(111, '5/27/18 6:36 AM', '79 street, NY')
,(112, '5/27/18 6:53 AM', '79 street, NY')
,(113, '5/27/18 6:54 AM', '79 street, NY')
,(114, '5/27/18 6:55 AM', '79 street, NY')
,(115, '5/27/18 6:59 AM', '79 street, NY')
,(116, '5/27/18 9:45 PM', '79 street, NY')
,(117, '5/27/18 9:47 PM', '79 street, NY')
,(118, '3/24/18 6:35 AM', '44 tree ave,FL')
,(119, '3/24/18 6:36 AM', '44 tree ave,FL')
,(120, '3/24/18 6:36 AM', '44 tree ave,FL')
,(121, '3/24/18 6:36 AM', '2 pop ave, NJ');
--select * from #t;
IF OBJECT_ID('tempdb..#self') is not null DROP TABLE #self;
SELECT DISTINCT d1.trans_id 'trans_id1',d2.trans_id 'trans_id2', d1.trans_time 't1',d2.trans_time 't2'
INTO #self
FROM #t d1 JOIN #t d2 ON d1.store_address = d2.store_address --self join
AND DATEDIFF(minute,d1.trans_time,d2.trans_time) < 10
AND d1.trans_id <> d2.trans_id
AND d1.trans_id < d2.trans_id
IF OBJECT_ID('tempdb..#date') is not null DROP TABLE #date;
SELECT DISTINCT d.trans_id,d.trans_time,d.store_address
INTO #DATE
FROM #t d
JOIN #SELF dd ON d.trans_id=dd.trans_id1 or d.trans_id=dd.trans_id2
IF OBJECT_ID('tempdb..#address') is not null DROP TABLE #address;
SELECT store_address
INTO #address --address for min count 3 of store address
FROM #t
GROUP BY store_address HAVING COUNT(store_address) >= 3
SELECT * FROM #date d
JOIN #address a ON d.store_address = a.store_address
Upvotes: 3
Views: 1887
Reputation: 46219
This is a gap and island problem, you can try to use LAG
window function to get previous trans_time
in the first CTE
query.
then use SUM
window function with CASE WHEN
make row number by datediff(minute, nextDt,trans_time)
greater than 10
.
Final only group count
greater than 3 by store_address
and grp
.
;WITH CTE AS (
SELECT *, LAG(trans_time,1,trans_time)
OVER(PARTITION BY store_address ORDER BY trans_time) nextDt
FROM T
),CTE2 AS (
SELECT *,COUNT(*) OVER(PARTITION BY grp,store_address) cnt
FROM (
SELECT *,SUM(CASE WHEN datediff(minute, nextDt,trans_time) >= 10 THEN 1 ELSE 0 END)
OVER(PARTITION BY store_address ORDER BY trans_time) grp
FROM CTE
) t1
)
SELECT trans_id,trans_time,store_address
FROM CTE2
WHERE cnt >= 3
ORDER BY trans_id
Upvotes: 1