Reputation: 175
While using the left join to join invmast & cadv table to add two new columns coupon_pos as coupon_pos_c and coupon_web as coupon_web_c , i'm getting duplicate values from the columns of the right table (cadv).
/*INVMAST*/
Proc Sql;
Create Table INVMAST.INVMAST_01 as
Select
Case
When BV_LM = 'Y' Then (INV_DATE - Day(INV_DATE))
Else INV_DATE
END as INV_DATE format=DDMMYY8., BV_LM,
/*Case
When INV_TOT NE 0 Then Count(IN_CH)
Else 0
End*/ Count(IN_CH) as TOTAL_KIT_NOS, COALESCE(Sum(INV_TOT),0) as TOTAL_KIT_VALUE,
COALESCE(Sum(COUPON_WEB),0) as COUPON_WEB, COALESCE(Sum(COUPON_POS),0) as COUPON_POS,
IN_CH, LOCA_CODE, LOCA_NAME, ZONE From INVMAST where IN_CH IN ('N','I') AND CANCL NE 1
Group by INV_DATE, BV_LM, IN_CH, LOCA_CODE, LOCA_NAME,ZONE;
Quit;
/*CADV*/
PROC SQL;
CREATE TABLE CADV.CADV_01 AS
SELECT
Case
When BV_LM = 'Y' Then (INV_DATE - Day(INV_DATE))
Else INV_DATE
END as INV_DATE format=DDMMYY8.,COALESCE(SUM(COUPON_POS),0) AS COUPON_POS_C,COALESCE(SUM(COUPON_WEB),0) AS COUPON_WEB_C,
BV_LM,IN_CH,LOCA_CODE,LOCA_NAME,ZONE FROM CADV WHERE IN_CH="I" AND CANCL NE 1
GROUP BY INV_DATE,BV_LM,IN_CH,LOCA_CODE,LOCA_NAME,ZONE;
QUIT;
/*Joining INVMAST & CADV*/
Proc Sql;
Create Table INCADV.INCADV_01 as
Select a.*,COALESCE(SUM(b.COUPON_POS_C),0) as COUPON_POS_C,COALESCE(SUM(b.COUPON_WEB_C),0) AS COUPON_WEB_C
FROM INVMAST.INVMAST_01 AS a LEFT JOIN CADV.CADV_01 as b
On a.INV_DATE = b.INV_DATE
Group by a.INV_DATE, a.LOCA_CODE, a.LOCA_NAME, a.ZONE,a.BV_LM,a.IN_CH;
Quit;
Upvotes: 0
Views: 1094
Reputation: 95
You're duplicating the aggregate values from the Right-hand table every time there's a match on inv_date. Try adding conditions to the join statement, e.g.
/*Joining INVMAST & CADV*/
Create Table INCADV.INCADV_01 as
Select a.*
, COALESCE(SUM(b.COUPON_POS_C),0) as COUPON_POS_C
, COALESCE(SUM(b.COUPON_WEB_C),0) AS COUPON_WEB_C
FROM INVMAST.INVMAST_01 AS a
LEFT JOIN CADV.CADV_01 as b
On a.INV_DATE = b.INV_DATE
/* Add additional joins here */
and a.BV_LM = b.BV_LM
and a.IN_CH = b.IN_CH
and a.LOCA_CODE = b.LOCA_CODE
and a.LOCA_NAME = b.LOCA_NAME
and a.zone = b.zone
Group by a.INV_DATE, a.LOCA_CODE, a.LOCA_NAME, a.ZONE, a.BV_LM, a.IN_CH;
Quit;
Upvotes: 1
Reputation: 51566
Your query that makes the right-hand table is grouping by
INV_DATE,BV_LM,IN_CH,LOCA_CODE,LOCA_NAME,ZONE
but then you join it with the left-hand table on just
INV_DATE
If you don't want it to introduce multiples then collapse it to the right level or join it on enough keys.
Upvotes: 0