Reputation: 531
Hi SO Community I have a Proc that is migrate from tsql into Snowsql . I was doing testing with full table truncate and just loading the data BUT now we are pushing the code into testing phases AND it seems my MERGE has a bug somewhere.
Non-Tech side : A person can have multiple rows of discount of same type on one item. They are caught by [rtrans_lineitm_seq] . So example is you can buy 20 car filters each one shows up as a new line number on the printed receipt . AND if it is a weekly special a discount is applied to each item. That said let just jump into the tech side...
So here is the Snowsql proc code. and dummy record that is causing issue will be posted after the code below.
MERGE INTO DISCOUNT_2 tgt
USING (
select to_varchar(concat(organization_id,rtl_loc_id,to_varchar(replace(to_date(business_date),'-','')),trans_seq,wkstn_id,IFNULL(rtrans_lineitm_seq,0),IFNULL(rtl_price_mod_seq_nbr,0),replace(IFNULL(rtl_price_mod_reasoncode,''),' ',''),replace(IFNULL(discount_code,''),' ',''),replace(IFNULL(deal_id,''),' ','') ) ) as Unique_id
,*
,'PROC_1' AS PROC_NUMBER
,'LOYALTY_2' AS PROC_NAME
,1 as EXECUTION_NUMBER
,current_timestamp(2) as LAST_PROC_EXECUTION_DATE
from (
select 2200 as organization_id
,TH.STORE_NO as rtl_loc_id
,TH.Date as business_date
,TH.TRANSACTION_NO as trans_seq
,case when left(TH.POS_TERMINAL_NO,2) = 'NP' then 1
when left(TH.POS_TERMINAL_NO,2) = 'NS' then 2
when left(TH.POS_TERMINAL_NO,2) = 'NT' then 3 else 0 end as wkstn_id
,TSE.LINE_NO as rtrans_lineitm_seq
,ROW_NUMBER() over (partition by TSE.STORE_NO,TSE.Date,TSE.TRANSACTION_NO,TSE.POS_TERMINAL_NO order by TSE.STORE_NO,TSE.Date,TSE.TRANSACTION_NO,TSE.POS_TERMINAL_NO,TSE.DISCOUNT_AMOUNT ) AS rtl_price_mod_seq_nbr
,CAST(CONCAT(trim(substring(TH.DATE,0,charindex(':',TH.DATE)-4)), ' ' ,substring(TH.Time,charindex(':',TH.Time)-2,length(TH.Time)))AS TIMESTAMP_NTZ(9)) as create_date
,trim(CONCAT('LOYALTY',' ',ifnull(TIE.Information,''))) AS rtl_price_mod_reasoncode
,IFNULL(PERIODIC_DISC_GROUP,TIE.Information) AS discount_code
,IFNULL(abs(TSE.DISCOUNT_AMOUNT),0) AS deal_amt
,null as deal_id
,TH.STAFF_ID as create_user
,null as sales_agt_com
,null as serial_number
from HEADER as TH
join SALES_ENTRY TSE on TSE.TRANSACTION_NO = TH.TRANSACTION_NO and TSE.STORE_NO = TH.STORE_NO and TSE.POS_TERMINAL_NO = TH.POS_TERMINAL_NO
left join CODE_ENTRY TIE on TIE.TRANSACTION_NO = TH.TRANSACTION_NO and TIE.STORE_NO = TH.STORE_NO and TIE.POS_TERMINAL_NO = TH.POS_TERMINAL_NO and TIE.LINE_NO = TSE.LINE_NO
where TH.TRANSACTION_TYPE = 2
and TH.ENTRY_STATUS not in (1,3)
and TIE.TRANSACTION_TYPE = 1
and TIE.INFOCODE = 'LOYALTY'
and TIE.INFORMATION not in ('PPP EXCLUSIVE','PPP Points Discount')
and TH.TRANSACTION_NO >= 20000000
) as a where to_varchar(concat(organization_id,rtl_loc_id,to_varchar(replace(to_date(business_date),'-','')),trans_seq,wkstn_id,IFNULL(rtrans_lineitm_seq,0),IFNULL(rtl_price_mod_seq_nbr,0),replace(IFNULL(rtl_price_mod_reasoncode,''),' ',''),replace(IFNULL(discount_code,''),' ',''),replace(IFNULL(deal_id,''),' ','') ) )
= '2200710320210826200121721126LOYALTYPPPSENIORDISCPPPSENIORDISC'
) AS src
ON (//tgt.Unique_id = src.Unique_id
to_varchar(concat(tgt.organization_id,tgt.rtl_loc_id,to_varchar(replace(to_date(tgt.business_date),'-','')),tgt.trans_seq,tgt.wkstn_id,IFNULL(tgt.rtrans_lineitm_seq,0),IFNULL(tgt.rtl_price_mod_seq_nbr,0),replace(IFNULL(tgt.rtl_price_mod_reasoncode,''),' ',''),replace(IFNULL(tgt.discount_code,''),' ',''),replace(IFNULL(tgt.deal_id,''),' ','') ) ) = src.Unique_id
)
WHEN NOT MATCHED THEN INSERT ( tgt.Unique_id
,tgt.organization_id
,tgt.rtl_loc_id
,tgt.business_date
,tgt.trans_seq
,tgt.wkstn_id
,tgt.rtrans_lineitm_seq
,tgt.rtl_price_mod_seq_nbr
,tgt.create_date
,tgt.rtl_price_mod_reasoncode
,tgt.discount_code
,tgt.deal_amt
,tgt.deal_id
,tgt.create_user
,tgt.sales_agt_com
,tgt.serial_number
,tgt.PROC_NUMBER
,tgt.PROC_NAME
,tgt.EXECUTION_NUMBER
,tgt.LAST_PROC_EXECUTION_DATE
)
values ( src.Unique_id
,src.organization_id
,src.rtl_loc_id
,src.business_date
,src.trans_seq
,src.wkstn_id
,src.rtrans_lineitm_seq
,src.rtl_price_mod_seq_nbr
,src.create_date
,src.rtl_price_mod_reasoncode
,src.discount_code
,src.deal_amt
,src.deal_id
,src.create_user
,src.sales_agt_com
,src.serial_number
,src.PROC_NUMBER
,src.PROC_NAME
,src.EXECUTION_NUMBER
,src.LAST_PROC_EXECUTION_DATE )
WHEN MATCHED THEN UPDATE SET
/* tgt.organization_id = SRC.organization_id
,tgt.rtl_loc_id = SRC.rtl_loc_id
,tgt.business_date = SRC.business_date
,tgt.trans_seq = SRC.trans_seq
,tgt.wkstn_id = SRC.wkstn_id
,tgt.rtrans_lineitm_seq = SRC.rtrans_lineitm_seq
,tgt.rtl_price_mod_seq_nbr = SRC.rtl_price_mod_seq_nbr
,tgt.create_date = SRC.create_date
,tgt.rtl_price_mod_reasoncode = SRC.rtl_price_mod_reasoncode
,tgt.discount_code = SRC.discount_code
,tgt.deal_amt = SRC.deal_amt
,tgt.deal_id = SRC.deal_id
,tgt.create_user = SRC.create_user
,tgt.sales_agt_com = SRC.sales_agt_com
,tgt.serial_number = SRC.serial_number
,tgt.PROC_NUMBER = SRC.PROC_NUMBER
,tgt.PROC_NAME = SRC.PROC_NAME ,*/
tgt.EXECUTION_NUMBER = (SRC.EXECUTION_NUMBER + 1)
,tgt.LAST_PROC_EXECUTION_DATE = current_timestamp(2)
Here is the sample row that I'm testing with
UNIQUE_ID ORGANIZATION_ID RTL_LOC_ID BUSINESS_DATE TRANS_SEQ WKSTN_ID RTRANS_LINEITM_SEQ RTL_PRICE_MOD_SEQ_NBR CREATE_DATE RTL_PRICE_MOD_REASONCODE DISCOUNT_CODE DEAL_AMT DEAL_ID CREATE_USER SALES_AGT_COM SERIAL_NUMBER PROC_NUMBER PROC_NAME EXECUTION_NUMBER LAST_PROC_EXECUTION_DATE 2200710320210826200121721126LOYALTYPPPSENIORDISCPPPSENIORDISC, 2200, 7103, 2021-08-26, 20012172, 1, 1, 26, 29:12.4, LOYALTY PPP SENIOR DISC PPP SENIOR DISC, 0.22, ST7103, 00, PROC_1, LOYALTY_2, 4, 21:53.9,
Error I get
Duplicate row detected during DML action Row Values: ["2200710320210826200121721126LOYALTYPPPSENIORDISCPPPSENIORDISC", 2200, "7103", 18865, 20012172, 1, 1, 26, 1630009752450000000, "LOYALTY PPP SENIOR DISC", "PPP SENIOR DISC", 2200, NULL, "ST7103 00", NULL, NULL, "PROC_1", "LOYALTY_2", 1, 1642853936960000000]
My question is: 9 TIMES OUT OF 10 WHY WOULD THIS GET CAUGHT IN THE "NOT MATCH INSERT" PART on n-th time VS NOT just being moving to the "WHEN MATCHED UPDATE" part??? as you can see in the sample row above. I was able to run the code successfully 4 times BUT WHEN I TRIED TO RUN IT A 5th time it failed with error...
there ARE OTHER CASES I can share if need be. Any help would be great. Thanks.
Upvotes: 1
Views: 1606
Reputation: 175556
The duplicate exists on the source side and makes it undeterministic. This behaviour is described in documentation:
When a merge joins a row in the target table against multiple rows in the source, the following join conditions produce nondeterministic results (i.e. the system is unable to determine the source value to use to update or delete the target row)
In this situation, the outcome of the merge depends on the value specified for the ERROR_ON_NONDETERMINISTIC_MERGE session parameter:
If TRUE (default value), the merge returns an error.
If FALSE, one row from among the duplicates is selected to perform the update or delete; the row selected is not defined.
... To avoid errors when multiple rows in the data source (i.e. the source table or subquery) match the target table based on the ON condition, use GROUP BY in the source clause to ensure that each target row joins against one row (at most) in the source.
Option number 1:Using session parameter(it is quick fix that will mask the duplicate error but choose source row in undefined manner):
ALTER SESSION SET ERROR_ON_NONDETERMINISTIC_MERGE = FALSE;
Option number 2:
Identify why they are duplicates in the source and change USING part
. To find duplicates QUALIFY COUNT(*) OVER(PARTITION BY Unique_id) > 1;
is the fastest option:
select to_varchar(concat(organization_id,rtl_loc_id,to_varchar(replace(to_date(business_date),'-','')),trans_seq,wkstn_id,IFNULL(rtrans_lineitm_seq,0),IFNULL(rtl_price_mod_seq_nbr,0),replace(IFNULL(rtl_price_mod_reasoncode,''),' ',''),replace(IFNULL(discount_code,''),' ',''),replace(IFNULL(deal_id,''),' ','') ) ) as Unique_id
,*
,'PROC_1' AS PROC_NUMBER
,'LOYALTY_2' AS PROC_NAME
,1 as EXECUTION_NUMBER
,current_timestamp(2) as LAST_PROC_EXECUTION_DATE
from (
select 2200 as organization_id
,TH.STORE_NO as rtl_loc_id
,TH.Date as business_date
,TH.TRANSACTION_NO as trans_seq
,case when left(TH.POS_TERMINAL_NO,2) = 'NP' then 1
when left(TH.POS_TERMINAL_NO,2) = 'NS' then 2
when left(TH.POS_TERMINAL_NO,2) = 'NT' then 3 else 0 end as wkstn_id
,TSE.LINE_NO as rtrans_lineitm_seq
,ROW_NUMBER() over (partition by TSE.STORE_NO,TSE.Date,TSE.TRANSACTION_NO,TSE.POS_TERMINAL_NO order by TSE.STORE_NO,TSE.Date,TSE.TRANSACTION_NO,TSE.POS_TERMINAL_NO,TSE.DISCOUNT_AMOUNT ) AS rtl_price_mod_seq_nbr
,CAST(CONCAT(trim(substring(TH.DATE,0,charindex(':',TH.DATE)-4)), ' ' ,substring(TH.Time,charindex(':',TH.Time)-2,length(TH.Time)))AS TIMESTAMP_NTZ(9)) as create_date
,trim(CONCAT('LOYALTY',' ',ifnull(TIE.Information,''))) AS rtl_price_mod_reasoncode
,IFNULL(PERIODIC_DISC_GROUP,TIE.Information) AS discount_code
,IFNULL(abs(TSE.DISCOUNT_AMOUNT),0) AS deal_amt
,null as deal_id
,TH.STAFF_ID as create_user
,null as sales_agt_com
,null as serial_number
from HEADER as TH
join SALES_ENTRY TSE on TSE.TRANSACTION_NO = TH.TRANSACTION_NO and TSE.STORE_NO = TH.STORE_NO and TSE.POS_TERMINAL_NO = TH.POS_TERMINAL_NO
left join CODE_ENTRY TIE on TIE.TRANSACTION_NO = TH.TRANSACTION_NO and TIE.STORE_NO = TH.STORE_NO and TIE.POS_TERMINAL_NO = TH.POS_TERMINAL_NO and TIE.LINE_NO = TSE.LINE_NO
where TH.TRANSACTION_TYPE = 2
and TH.ENTRY_STATUS not in (1,3)
and TIE.TRANSACTION_TYPE = 1
and TIE.INFOCODE = 'LOYALTY'
and TIE.INFORMATION not in ('PPP EXCLUSIVE','PPP Points Discount')
and TH.TRANSACTION_NO >= 20000000
) as a where to_varchar(concat(organization_id,rtl_loc_id,to_varchar(replace(to_date(business_date),'-','')),trans_seq,wkstn_id,IFNULL(rtrans_lineitm_seq,0),IFNULL(rtl_price_mod_seq_nbr,0),replace(IFNULL(rtl_price_mod_reasoncode,''),' ',''),replace(IFNULL(discount_code,''),' ',''),replace(IFNULL(deal_id,''),' ','') ) )
= '2200710320210826200121721126LOYALTYPPPSENIORDISCPPPSENIORDISC'
QUALIFY COUNT(*) OVER(PARTITION BY Unique_id) > 1;
If the query returns more than one row it means source query is not producing unique_id
and requires redesign.
Upvotes: 1