Reputation: 1
This query is used to pull data into Power BI from Snowflake, focusing on different stages of the shipment process from leaving the supplier to arriving at the dealer.
I'm having trouble understanding the logic behind the creation of multiple CTEs (base, base_comb, fship, and final) and the reasoning for self-joining.
WITH base AS (
SELECT
A.ACK_DT,
A.ARVL_DT,
A.ENT_DT,
A.FAC_CD,
A.FNL_PLT_SHP_DT,
A.ORD_CTL_NO,
A.ORD_RCPT_DT,
A.oth_sls_mdl_no AS f_model,
A.shp_src,
A.dlr_cd,
A.ult_dest_ctry_cd AS des_ctry,
A.diff_ord_stat,
A.shp_to_dlr_cd,
TRIM(B.SER_NO_PFX) || TRIM(B.SER_NO_BDY) AS SERIAL_NO,
DIV0NULL(B.ARVL_TO_ENT_HR, 24) AS ARVL_TO_ENT_DD,
DIV0NULL(B.ENT_TO_ACK_HR, 24) AS ENT_TO_ACK_HR,
DIV0NULL(B.ARVL_TO_ACK_HR, 24) AS ARVL_TO_ACK_DD,
TRIM(B.SER_NO_PFX) AS SER_NO_PFX,
TRIM(B.SER_NO_BDY) AS SER_NO_BDY,
B.LNE_NO
FROM
MDW_MACHINE_PROD_DB.MDW_MACHINE_CONSUMER_SCHEMA.MDW_ORD_REDATE_DTS_V1 A
LEFT JOIN MDW_MACHINE_PROD_DB.MDW_MACHINE_CONSUMER_SCHEMA.MDW_ORD_MTRIC_DTA_V1 B ON A.ord_ctl_no = B.ord_ctl_no
INNER JOIN EMD_DATA_ANALYTICS_PROD_DB.EMD_REFERENCE_DOMAIN.DIM_REF_PFX PFX ON TRIM(UPPER(B.SER_NO_PFX)) = PFX.PFX
AND PFX.BRAND IN ('CAT', 'SEM')
WHERE
YEAR(A.ORD_RCPT_DT) >= YEAR(CURRENT_DATE()) - 3
),
base_comb AS (
SELECT
A.*,
B.ACK_DT AS ACK_DT2,
B.SERIAL_NO AS SERIAL_NO2,
B.SER_NO_PFX AS SER_NO_PFX2,
B.shp_src AS shp_src2,
B.dlr_cd AS dlr_cd2,
B.des_ctry AS des_ctry2,
B.diff_ord_stat AS diff_ord_stat2,
B.shp_to_dlr_cd AS shp_to_dlr_cd2,
B.ARVL_TO_ENT_DD AS ARVL_TO_ENT_DD2,
B.ENT_TO_ACK_HR AS ENT_TO_ACK_HR2,
B.ARVL_TO_ACK_DD AS ARVL_TO_ACK_DD2,
B.SER_NO_BDY AS SER_NO_BDY2,
B.LNE_NO AS LNE_NO2,
B.ARVL_DT AS ARVL_DT2,
B.ENT_DT AS ENT_DT2,
B.FAC_CD AS FAC_CD2,
B.FNL_PLT_SHP_DT AS FNL_PLT_SHP_DT2,
B.ORD_CTL_NO AS ORD_CTL_NO2,
B.ORD_RCPT_DT AS ORD_RCPT_DT2,
B.f_model AS f_model2,
CASE
WHEN B.ARVL_TO_ENT_DD IS NOT NULL THEN B.ARVL_TO_ENT_DD
ELSE A.ARVL_TO_ENT_DD
END AS ARVL_TO_ENT_DD_NEW,
CASE
WHEN B.ENT_TO_ACK_HR IS NOT NULL THEN B.ENT_TO_ACK_HR
ELSE A.ENT_TO_ACK_HR
END AS ENT_TO_ACK_HR_NEW,
CASE
WHEN A.FNL_PLT_SHP_DT IS NOT NULL THEN A.FNL_PLT_SHP_DT
ELSE B.FNL_PLT_SHP_DT
END AS FNL_PLT_SHP_DT_NEW,
CASE
WHEN B.ACK_DT IS NOT NULL THEN B.ACK_DT
ELSE A.ACK_DT
END AS ACK_DT_NEW
FROM
base A
LEFT JOIN base B ON A.SERIAL_NO = B.SERIAL_NO
),
fship AS (
SELECT
SERIAL_NO,
SUM(DATEDIFF(dd, ACK_DT_NEW, FNL_PLT_SHP_DT_NEW)) AS ACK_TO_FSHIP
FROM
base_comb
WHERE
ACK_DT_NEW <> '1/1/0001'
AND FNL_PLT_SHP_DT_NEW <> '1/1/0001'
AND FNL_PLT_SHP_DT_NEW < GETDATE()
GROUP BY
SERIAL_NO
HAVING
SUM(DATEDIFF(dd, ACK_DT_NEW, FNL_PLT_SHP_DT_NEW)) >= 0
),
final AS (
SELECT
b.*,
f.ACK_TO_FSHIP
FROM
base_comb b
LEFT JOIN fship f ON b.SERIAL_NO = f.SERIAL_NO
)
SELECT
* EXCLUDE(shp_src2),
IFF(shp_src2 IS NULL, 'no_epc', shp_src2) AS shp_src2
FROM
final
WHERE
LEN(SERIAL_NO) = 8
Purpose of CTEs: What is the reasoning behind creating the base, base_comb, fship, and final CTEs? Specifically, why is there a self-join in base_comb?
Optimization: Is there a more efficient way to transform this data without using multiple CTEs and self-joins? If so, how can this query be optimized?
Upvotes: 0
Views: 38