Reputation: 470
I'm on SQL Server 2008 R2
I have a set of disbursements:
account_number transaction_amount transaction_date 0001 $250.00 01/01/2020 0001 $250.00 01/02/2020 0001 $500.00 02/01/2020 0002 $100.00 01/05/2020 0003 $150.00 02/05/2020
And a set of refunds:
account_number transaction_amount transaction_date sequence_number 0001 $250.00 01/17/2020 1 0001 $250.00 01/17/2020 2 0001 $700.00 02/21/2020 1 0003 $150.00 02/21/2020 1
There can only be one disbursement transaction per day. There can be multiple refund transactions per day, but the sequence numbers are unique and sequential.
For each refund, I want to find the most recent previous transaction that has the same account number and amount. I only want to match one refund to one disbursement. I want to update the disbursements table with the date and sequence number of the refund, and also update the refunds table with a marker that it was used (or ideally, the date of the disbursement it was tied to):
account_number transaction_amount transaction_date refund_date refund_sequence refund_amount 0001 $250.00 01/01/2020 01/17/2020 2 $250.00 0001 $250.00 01/02/2020 01/17/2020 1 $250.00 0001 $500.00 02/01/2020 NULL NULL NULL 0002 $100.00 01/05/2020 NULL NULL NULL 0003 $150.00 02/05/2020 02/21/2020 1 $150.00
account_number transaction_amount transaction_date sequence_number assigned 0001 $250.00 01/17/2020 1 01/02/2020 0001 $250.00 01/17/2020 2 01/01/2020 0001 $700.00 02/21/2020 1 NULL 0003 $150.00 02/21/2020 1 02/05/2020
This is what I have so far, but it takes forever to run on my full population and it's not giving me the right results anyway.
IF OBJECT_ID('tempdb.dbo.#disb', 'U') IS NOT NULL
DROP TABLE #disb;
create table #disb (
account_number char(4)
,transaction_amount money
,transaction_date datetime
,refund_date datetime null
,refund_sequence int null
,refund_amount money null
)
insert into #disb (account_number, transaction_amount, transaction_date)
values
('0001',250.00,'01/01/2020'),
('0001',250.00,'01/02/2020'),
('0001',500.00,'02/01/2020'),
('0002',100.00,'01/05/2020'),
('0003',150.00,'02/05/2020')
IF OBJECT_ID('tempdb.dbo.#refund', 'U') IS NOT NULL
DROP TABLE #refund;
create table #refund (
account_number char(4)
,transaction_amount money
,transaction_date datetime
,sequence_number int
,assigned varchar(1) null
)
insert into #refund (account_number, transaction_amount, transaction_date, sequence_number)
values
('0001',250.00,'01/17/2020',1),
('0001',250.00,'01/17/2020',2),
('0001',700.00,'02/21/2020',1),
('0003',150.00,'02/21/2020',1)
DECLARE @account_number char(10), @refund_date datetime, @refund_seq int
DECLARE MY_CURSOR CURSOR
LOCAL STATIC READ_ONLY FORWARD_ONLY
FOR
SELECT DISTINCT r.account_number, r.transaction_date, r.sequence_number
FROM #REFUND r
INNER JOIN #disb d on r.account_number = d.account_number and r.transaction_date > d.transaction_date and r.transaction_amount = d.transaction_amount
ORDER BY r.account_number, r.transaction_date, r.sequence_number
OPEN MY_CURSOR
FETCH NEXT FROM MY_CURSOR INTO @account_number, @refund_date, @refund_seq
WHILE @@FETCH_STATUS = 0
BEGIN
update i
set i.refund_date = r.transaction_date, i.refund_sequence = r.sequence_number, i.refund_amount = r.transaction_amount
from
#refund r
outer apply (
select top 1 d.*
from #disb d
where
r.account_number = d.account_number
and r.transaction_amount = d.transaction_amount
and d.refund_date is null
order by
d.transaction_date desc
) i
IF @@ROWCOUNT = 1
update #refund
set assigned = 'Y'
where account_number = @account_number
and transaction_date = @refund_date
and sequence_number = @refund_seq
FETCH NEXT FROM MY_CURSOR INTO @account_number, @refund_date, @refund_seq
END
CLOSE MY_CURSOR
DEALLOCATE MY_CURSOR
select * from #disb
select * from #refund
I know you're not supposed to use cursors and I'm sure there's a better solution, but the only samples I've found depended on exact matches, rather than a "less than" and I can't think of a way to keep them from matching the same transaction to two different refunds or vice-versa.
Upvotes: 1
Views: 55
Reputation: 4648
Because efficiency is already a problem as you mentioned, I decide to reimplement the matching algorithm using double cursors in order to minimize the I/O cost.
The essence is the fact that this algorithm could be done with only one pass per dataset (ignoring the cost of sorting and index building). To achieve this goal, explicitly tracking the current rows for BOTH datasets seems inevitable to me. How these two cursors interact with each other is explained in the comments of the "inner loop" section.
Disclaimer: The code was tested on SQL Server 2017 and the datasets are slightly modified. The main point is to showcase the double cursor strategy. Some revision may be required according to your actual situation.
Test Datasets
use [testdb];
if OBJECT_ID('testdb..disburse') is not null
drop table testdb..disburse;
create table disburse (
account_number varchar(4),
transaction_amount money,
transaction_date date,
refund_date date,
refund_sequence int,
refund_amount money
);
insert into disburse (account_number, transaction_amount, transaction_date)
values ('0001', 250.00, '01/01/2020'),
('0001', 250.00, '01/02/2020'),
('0001', 500.00, '02/01/2020'),
('0002', 100.00, '01/05/2020'),
('0003', 150.00, '02/05/2020');
--select * from disburse;
if OBJECT_ID('testdb..refund') is not null
drop table testdb..refund;
create table refund (
account_number varchar(4),
transaction_amount money,
transaction_date date,
sequence_number int,
);
insert into refund (account_number, transaction_amount, transaction_date, sequence_number)
values ('0001', 250.00, '01/17/2020', 1),
('0001', 250.00, '01/17/2020', 2),
('0001', 700.00, '02/21/2020', 1),
('0003', 150.00, '02/21/2020', 1);
--select * from refund;
Solution
/* cursor variables */
-- cursor 1: disburse
declare @d_an varchar(4),
@d_ta money,
@d_td date;
-- need a index to make the cursor updatable
create index #idx on disburse(account_number, transaction_date desc);
declare cur_d CURSOR local
for select account_number, transaction_amount, transaction_date
from disburse
order by account_number, transaction_date desc
for update of refund_date, refund_sequence, refund_amount;
open cur_d;
-- cursor 2: refund
declare @r_an varchar(4),
@r_ta money,
@r_td date,
@r_sn int;
declare cur_r CURSOR local
for select account_number, transaction_amount, transaction_date, sequence_number
from refund
order by account_number, transaction_date desc, sequence_number desc;
open cur_r;
-- state vairables
declare @flag int = 0; -- 0: normal, 1: break inner loop, 2: break all loops
/* main program */
-- read the first disburse record (ignoring emptiness check)
fetch next from cur_d into @d_an, @d_ta, @d_td;
-- outer loop: for each refund record
while 1=1 BEGIN
fetch next from cur_r into @r_an, @r_ta, @r_td, @r_sn;
-- termination check (no more refunds)
if @@FETCH_STATUS <> 0
break;
-- inner loop: find the disburse record
while 1=1 begin
/* the main control logic of cursor interaction */
-- 1) current disburse account > refund account -> read next refund record
if @d_an > @r_an
break;
-- 2) current disburse account < refund account -> read next disburse record
else if @d_an < @r_an
fetch next from cur_d into @d_an, @d_ta, @d_td;
-- 3) same account
else if @d_an = @r_an begin
-- save the result only when disburse record is earlier than refund record
if @d_td < @r_td begin
update disburse
set refund_date = @r_td,
refund_sequence = @r_sn,
refund_amount = @r_ta
where current of cur_d;
set @flag = 1; -- terminate the inner loop later on
end
-- proceed to next disburse record in any case
fetch next from cur_d into @d_an, @d_ta, @d_td;
end
-- termination checks
-- disburse records exhausted -> quit both loops
if @@FETCH_STATUS <> 0 begin
set @flag = 2;
break;
end
-- go to next refund record if this refund record has found its disburse record
if @flag = 1 begin
set @flag = 0; -- reset flag
break;
end
end
-- disburse records exhausted
if @flag = 2
break;
END
-- cleanup
close cur_d;
deallocate cur_d;
close cur_r;
deallocate cur_r;
Result
select * from disburse;
| account_number | transaction_amount | transaction_date | refund_date | refund_sequence | refund_amount |
|----------------|--------------------|------------------|-------------|-----------------|---------------|
| 0001 | 250.0000 | 2020-01-01 | 2020-01-17 | 1 | 250.0000 |
| 0001 | 250.0000 | 2020-01-02 | 2020-01-17 | 2 | 250.0000 |
| 0001 | 500.0000 | 2020-02-01 | 2020-02-21 | 1 | 700.0000 |
| 0002 | 100.0000 | 2020-01-05 | NULL | NULL | NULL |
| 0003 | 150.0000 | 2020-02-05 | 2020-02-21 | 1 | 150.0000 |
Upvotes: 1