user1874594
user1874594

Reputation: 2493

Teradata SQL Tuning Multiple columns in a huge table being joined to the same table with OR condition on the filter

This looked simple enough because there was just 1 col in the sel but I am not getting a tuning approach.

sel distinct (ColID ) from 1Billiontb btb
Join smalltb1  stb1 on btb.col_1=stb1.col_1
Join stb2           on btb.col_2=stb2.col_2
join stb3           on btb.col_3=stb3.col_3
etc till 12 
Join  (some medium size tables )
where stb1 colx in ( value-list1 )
OR stb2 coly in (  same value-list1 as above )
OR stb3 colz in (  same value-list1 as above )
OR stb4 cola in (  same value-list1 as above )
continued till...stb12 
where <some more filters on medium size tables>

What I did
changed the distinct to group by based on the cardinality of the ColID values and I changed an inner join to exists condition on the medium size tables and this got the CPU down by around 20K.
It still runs like a clunker
Now there are somethings I need some help here in understanding

    sel * from stb1
    MINUS
    sel * from stb2
    MINUS 
    sel * from sbt3
etc
0 rows 

So its actually joining various columns of the fact table to the SAME col of the dimension table and a thought or 2 more would make one realize that calling the Same Col again and again from the same table is not the same as aliasing the same table again and calling the same col

    sel distinct (ColID ) from 1Billiontb btb
        Join smalltb1  stb1 on btb.col_1=stb1.col_1
        Join stb2           on btb.col_2=stb2.col_2
        join stb3           on btb,col_3=stb3.col_3
        etc till 9 
    

is not the same as

    sel distinct (ColID ) from 1Billiontb btb
        Join   (smalltb1  stb1 on btb.col_1=stb1.col_1
        and btb.col_2=stb1.col_1
         and  btb.col_3=stb1.col_1
        etc till 9 )
    

even if stb1 and stb2 ..etc are same as in the former case there are actually more tables involved. Here I need help Visualizing what's happening . I know the above 2 are different one the fact that more tables are involved but what precisely makes them different in terms of o/p

    sel (ColID ) from 1Billiontb btb
        Join smalltb1  stb1 on btb.col_1=stb1.col_1

        Join  (some medium size tables )
        where stb1 colx in ( value-list1 )

        where 

    UNION 
    sel (ColID ) from 1Billiontb btb
        Join smalltb2  stb2 on btb.col_2=stb2.col_2


        Join  (some medium size tables )
        where 
         stb2 coly in (  same value-list1 as above )

     UNION 
     till...9 
     group by 1 

# compare to original Query where I am replacing the OR part with the UNION 
# stb1.col1 is the same as stb2.col2 aliases  but same sel inside the views 

They give different results on test data. My understanding was OR's could be replaced by UNION but here the results are different . Can someone explain this

Here is the explain plan for the original report : -

Explain 

CREATE MULTISET VOLATILE TABLE VT AS (
SELECT DISTINCT d.in_ID FROM 

DB.FACT_BODtl d

JOIN cd_vw1 dc1 ON (d.col_CD1=dc1.col_CD1)
JOIN cd_vw2 dc2 ON (d.col_CD2=dc2.col_CD2)
JOIN cd_vw3 dc3 ON (d.col_CD3=dc3.col_CD3)
JOIN cd_vw4 dc4 ON (d.col_CD4=dc4.col_CD4)
JOIN cd_vw5 dc5 ON (d.col_CD5=dc5.col_CD5)
JOIN cd_vw6 dc6 ON (d.col_CD6=dc6.col_CD6)
JOIN cd_vw7 dc7 ON (d.col_CD7=dc7.col_CD7)
JOIN cd_vw8 dc8 ON (d.col_CD8=dc8.col_CD8)
JOIN cd_vw9 dc9 ON (d.col_CD9=dc9.col_CD9)
JOIN DM_NBR NBR ON (d.fact_SysID = NBR.fact_SysID)
JOIN PRICE PRC ON (d.in_ID=PRC.in_ID)
JOIN Dm_LabDt ENDDT ON (d.dt_tb_sysID=ENDDT.DT_SYS_ID )
JOIN CC_sg CSG ON (PRC.client_sysID=CSG.client_sysID)
JOIN COMPONENT PROD ON (CSG.prdt_cd=PROD.prdt_cd)


WHERE 
NBR.COLX_CD IN ('163000')
AND EXTRACT (YEAR FROM ENDDT.Fdt) = '2015'
AND PROD.Prd_CD1 IN ('COM')
AND dc1.d_cd1 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc2.d_cd2 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc3.d_cd3 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc4.d_cd4 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc5.d_cd5 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc6.d_cd6 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc7.d_cd7 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc8.d_cd8 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')
OR dc9.d_cd9 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')

) 
WITH DATA 
ON COMMIT PRESERVE ROWS;

 This query is optimized using type 2 profile insert-sel, profileid
 10001.
  1) First, we lock DB.DM_CODE_TABLE in view
     DB.cd_vw4 for access, we lock
     DB.FACT_FACT_BODtl in view DB.FACT_BODtl for access,
     we lock DB.DIM_CC_sg in view
     DB.CC_sg for access, we lock
     DB.DIM_COMPONENT_2 in view DB.COMPONENT for access, we
     lock DB.DIM_PRICE in view DB.PRICE for
     access, we lock DB.DIM_DM_NBR in view
     DB.DM_NBR for access, and we lock DB.DIM_DATE in
     view DB.Dm_LabDt for access.
  2) Next, we create the table header.
  3) We execute the following steps in parallel.
       1) We do an all-AMPs RETRIEVE step from DB.DIM_COMPONENT_2
          in view DB.COMPONENT by way of an all-rows scan with no
          residual conditions into Spool 4 (all_amps) (compressed
          columns allowed), which is duplicated on all AMPs.  The size
          of Spool 4 is estimated with high confidence to be 2,550,240
          rows (68,856,480 bytes).  The estimated time for this step is
          0.06 seconds.
       2) We do an all-AMPs RETRIEVE step from DB.DIM_DATE in
          view DB.Dm_LabDt by way of an all-rows scan with no
          residual conditions into Spool 5 (all_amps) (compressed
          columns allowed), which is duplicated on all AMPs.  The size
          of Spool 5 is estimated with high confidence to be 32,399,136
          rows (615,583,584 bytes).  The estimated time for this step
          is 0.19 seconds.
       3) We do an all-AMPs RETRIEVE step from
          DB.DM_CODE_TABLE in view DB.cd_vw4
          by way of an all-rows scan with no residual conditions split
          into Spool 6 (all_amps) with a condition of ("DIAG_CD_SYS_ID
          IN (:*)") to qualify rows matching skewed rows of the skewed
          relation (compressed columns allowed) and Spool 7 (all_amps)
          with remaining rows (compressed columns allowed) fanned out
          into 4 hash join partitions.  Spool 6 is built locally on the
          AMPs.  The size of Spool 6 is estimated with high confidence
          to be 3 rows.  Spool 7 is duplicated on all AMPs.  The size
          of Spool 7 is estimated with high confidence to be
          132,346,872 rows.  The estimated time for this step is 0.85
          seconds.
  4) We do an all-AMPs JOIN step from Spool 5 (Last Use) by way of an
     all-rows scan, which is joined to DB.FACT_FACT_BODtl in view
     DB.FACT_BODtl by way of an all-rows scan with no residual
     conditions.  Spool 5 and DB.FACT_FACT_BODtl are joined using
     a dynamic hash join, with a join condition of (
     "DB.FACT_FACT_BODtl.dt_tb_sysID = DT_SYS_ID").  The
     result is split into Spool 8 (all_amps) with a condition of (
     "col_CD4 IN (:*)") to qualify skewed rows fanned out into
     50 hash join partitions and Spool 9 (all_amps) with remaining rows
     fanned out into 4 hash join partitions.  Spool 8 is built locally
     on the AMPs.  The size of Spool 8 is estimated with low confidence
     to be 2,097,544,557 rows (123,755,128,863 bytes).  Spool 9 is
     built locally on the AMPs.  The size of Spool 9 is estimated with
     low confidence to be 505,740,248 rows (29,838,674,632 bytes).  The
     estimated time for this step is 54.89 seconds.
  5) We execute the following steps in parallel.
       1) We do an all-AMPs RETRIEVE step from Spool 6 (Last Use) by
          way of an all-rows scan into Spool 10 (all_amps) (compressed
          columns allowed) fanned out into 50 hash join partitions,
          which is duplicated on all AMPs.  The size of Spool 10 is
          estimated with high confidence to be 1,512 rows (37,800
          bytes).  The estimated time for this step is 0.01 seconds.
       2) We do an all-AMPs RETRIEVE step from
          DB.DM_CODE_TABLE in view DB.cd_vw5
          by way of an all-rows scan with no residual conditions
          locking for access split into Spool 11 (all_amps) with a
          condition of ("DG_CD_ID IN (:*)") to qualify rows
          matching skewed rows of the skewed relation (compressed
          columns allowed) and Spool 12 (all_amps) with remaining rows
          (compressed columns allowed) fanned out into 4 hash join
          partitions.  Spool 11 is built locally on the AMPs.  The size
          of Spool 11 is estimated with high confidence to be 2 rows.
          Spool 12 is duplicated on all AMPs.  The size of Spool 12 is
          estimated with high confidence to be 132,347,376 rows.  The
          estimated time for this step is 0.85 seconds.
  6) We do an all-AMPs JOIN step from Spool 10 (Last Use) by way of an
     all-rows scan, which is joined to Spool 8 (Last Use) by way of an
     all-rows scan.  Spool 10 and Spool 8 are joined using a hash join
     of 50 partitions, with a join condition of ("col_CD4 =
     DG_CD_ID").  The result goes into Spool 13 (all_amps), which
     is built locally on the AMPs.  The size of Spool 13 is estimated
     with low confidence to be 2,097,544,557 rows (132,145,307,091
     bytes).  The estimated time for this step is 3.04 seconds.
  7) We do an all-AMPs JOIN step from Spool 7 (Last Use) by way of an
     all-rows scan, which is joined to Spool 9 (Last Use) by way of an
     all-rows scan.  Spool 7 and Spool 9 are joined using a hash join
     of 4 partitions, with a join condition of ("col_CD4 =
     DG_CD_ID").  The result goes into Spool 13 (all_amps), which
     is built locally on the AMPs.  The size of Spool 13 is estimated
     with low confidence to be 505,740,248 rows (31,861,635,624 bytes).
     The estimated time for this step is 36.84 seconds.
  8) We execute the following steps in parallel.
       1) We do an all-AMPs RETRIEVE step from Spool 13 (Last Use) by
          way of an all-rows scan split into Spool 14 (all_amps) with a
          condition of ("col_CD5 IN (:*)") to qualify skewed
          rows (compressed columns allowed) fanned out into 50 hash
          join partitions and Spool 15 (all_amps) with remaining rows
          (compressed columns allowed) fanned out into 4 hash join
          partitions.  Spool 14 is built locally on the AMPs.  The size
          of Spool 14 is estimated with low confidence to be
          2,355,693,166 rows.  Spool 15 is built locally on the AMPs.
          The size of Spool 15 is estimated with low confidence to be
          247,591,639 rows.  The estimated time for this step is 36.84
          seconds.
       2) We do an all-AMPs RETRIEVE step from Spool 11 (Last Use) by
          way of an all-rows scan into Spool 16 (all_amps) (compressed
          columns allowed) fanned out into 50 hash join partitions,
          which is duplicated on all AMPs.  The size of Spool 16 is
          estimated with high confidence to be 1,008 rows (25,200
          bytes).  The estimated time for this step is 0.01 seconds.
       3) We do an all-AMPs RETRIEVE step from
          DB.DM_CODE_TABLE in view DB.cd_vw6
          by way of an all-rows scan with no residual conditions
          locking for access into Spool 17 (all_amps) (compressed
          columns allowed) fanned out into 4 hash join partitions,
          which is duplicated on all AMPs.  The size of Spool 17 is
          estimated with high confidence to be 132,348,384 rows (
          3,308,709,600 bytes).  The estimated time for this step is
          0.84 seconds.
  9) We do an all-AMPs JOIN step from Spool 16 (Last Use) by way of an
     all-rows scan, which is joined to Spool 14 (Last Use) by way of an
     all-rows scan.  Spool 16 and Spool 14 are joined using a hash join
     of 50 partitions, with a join condition of ("col_CD5 =
     DG_CD_ID").  The result goes into Spool 18 (all_amps) fanned
     out into 4 hash join partitions, which is built locally on the
     AMPs.  The size of Spool 18 is estimated with low confidence to be
     2,355,693,166 rows (157,831,442,122 bytes).  The estimated time
     for this step is 3.30 seconds.
 10) We do an all-AMPs JOIN step from Spool 12 (Last Use) by way of an
     all-rows scan, which is joined to Spool 15 (Last Use) by way of an
     all-rows scan.  Spool 12 and Spool 15 are joined using a hash join
     of 4 partitions, with a join condition of ("col_CD5 =
     DG_CD_ID").  The result goes into Spool 18 (all_amps) fanned
     out into 4 hash join partitions, which is built locally on the
     AMPs.  The size of Spool 18 is estimated with low confidence to be
     247,591,639 rows (16,588,639,813 bytes).  The estimated time for
     this step is 36.63 seconds.
 11) We do an all-AMPs JOIN step from Spool 17 by way of an all-rows
     scan, which is joined to Spool 18 (Last Use) by way of an all-rows
     scan.  Spool 17 and Spool 18 are joined using a hash join of 4
     partitions, with a join condition of ("col_CD6 =
     DG_CD_ID").  The result goes into Spool 20 (all_amps)
     (compressed columns allowed) fanned out into 4 hash join
     partitions, which is built locally on the AMPs.  The size of Spool
     20 is estimated with index join confidence to be 2,603,284,805
     rows (184,833,221,155 bytes).  The estimated time for this step is
     45.65 seconds.
 12) We do an all-AMPs JOIN step from Spool 17 by way of an all-rows
     scan, which is joined to Spool 20 (Last Use) by way of an all-rows
     scan.  Spool 17 and Spool 20 are joined using a hash join of 4
     partitions, with a join condition of ("col_CD7 =
     DG_CD_ID").  The result goes into Spool 22 (all_amps)
     (compressed columns allowed) fanned out into 4 hash join
     partitions, which is built locally on the AMPs.  The size of Spool
     22 is estimated with index join confidence to be 2,603,284,805
     rows (195,246,360,375 bytes).  The estimated time for this step is
     46.96 seconds.
 13) We do an all-AMPs JOIN step from Spool 17 by way of an all-rows
     scan, which is joined to Spool 22 (Last Use) by way of an all-rows
     scan.  Spool 17 and Spool 22 are joined using a hash join of 4
     partitions, with a join condition of ("col_CD8 =
     DG_CD_ID").  The result goes into Spool 24 (all_amps)
     (compressed columns allowed) fanned out into 4 hash join
     partitions, which is built locally on the AMPs.  The size of Spool
     24 is estimated with index join confidence to be 2,603,284,805
     rows (205,659,499,595 bytes).  The estimated time for this step is
     48.26 seconds.
 14) We do an all-AMPs RETRIEVE step from DB.DIM_DM_NBR in
     view DB.DM_NBR by way of an all-rows scan with no
     residual conditions into Spool 25 (all_amps) (compressed columns
     allowed), which is duplicated on all AMPs.  The size of Spool 25
     is estimated with high confidence to be 4,421,592 rows (
     92,853,432 bytes).  The estimated time for this step is 0.06
     seconds.
 15) We do an all-AMPs JOIN step from Spool 17 by way of an all-rows
     scan, which is joined to Spool 24 (Last Use) by way of an all-rows
     scan.  Spool 17 and Spool 24 are joined using a hash join of 4
     partitions, with a join condition of ("col_CD9 =
     DG_CD_ID").  The result goes into Spool 26 (all_amps)
     (compressed columns allowed), which is built locally on the AMPs.
     The size of Spool 26 is estimated with index join confidence to be
     2,603,284,805 rows (216,072,638,815 bytes).  The estimated time
     for this step is 49.57 seconds.
 16) We do an all-AMPs JOIN step from Spool 25 (Last Use) by way of an
     all-rows scan, which is joined to Spool 26 (Last Use) by way of an
     all-rows scan.  Spool 25 and Spool 26 are joined using a single
     partition hash join, with a join condition of ("fact_SysID =
     fact_SysID").  The result goes into Spool 28 (all_amps)
     (compressed columns allowed) fanned out into 4 hash join
     partitions, which is built locally on the AMPs.  The size of Spool
     28 is estimated with index join confidence to be 2,603,284,805
     rows (226,485,778,035 bytes).  The estimated time for this step is
     52.96 seconds.
 17) We do an all-AMPs JOIN step from Spool 17 by way of an all-rows
     scan, which is joined to Spool 28 (Last Use) by way of an all-rows
     scan.  Spool 17 and Spool 28 are joined using a hash join of 4
     partitions, with a join condition of ("col_CD1 =
     DG_CD_ID").  The result goes into Spool 30 (all_amps)
     (compressed columns allowed) fanned out into 4 hash join
     partitions, which is built locally on the AMPs.  The size of Spool
     30 is estimated with index join confidence to be 2,603,284,805
     rows (236,898,917,255 bytes).  The estimated time for this step is
     54.66 seconds.
 18) We do an all-AMPs JOIN step from Spool 4 (Last Use) by way of an
     all-rows scan, which is joined to
     DB.DIM_CC_sg in view
     DB.CC_sg by way of an all-rows scan with
     no residual conditions.  Spool 4 and
     DB.DIM_CC_sg are joined using a dynamic
     hash join, with a join condition of (
     "DB.DIM_CC_sg.prdt_cd = prdt_cd").
     The result is split into Spool 31 (all_amps) with a condition of (
     "client_sysID IN (:*)") to qualify rows matching skewed
     rows of the skewed relation and Spool 32 (all_amps) with remaining
     rows.  Spool 31 is duplicated on all AMPs.  The size of Spool 31
     is estimated with low confidence to be 25,200 rows (730,800 bytes).
     Spool 32 is built locally on the AMPs.  The size of Spool 32 is
     estimated with low confidence to be 1,902,852 rows (55,182,708
     bytes).  The estimated time for this step is 0.04 seconds.
 19) We do an all-AMPs RETRIEVE step from DB.DIM_PRICE in
     view DB.PRICE by way of an all-rows scan with no
     residual conditions split into Spool 33 (all_amps) with a
     condition of ("client_sysID IN (:*)") to qualify skewed
     rows (compressed columns allowed) and Spool 34 (all_amps) with
     remaining rows (compressed columns allowed).  Spool 33 is built
     locally on the AMPs.  The size of Spool 33 is estimated with high
     confidence to be 8,061,965 rows.  Spool 34 is redistributed by
     hash code to all AMPs.  The size of Spool 34 is estimated with
     high confidence to be 34,903,738 rows.  The estimated time for
     this step is 1.44 seconds.
 20) We do an all-AMPs JOIN step from Spool 31 (Last Use) by way of an
     all-rows scan, which is joined to Spool 33 (Last Use) by way of an
     all-rows scan.  Spool 31 and Spool 33 are joined using a single
     partition hash join, with a join condition of (
     "client_sysID = client_sysID").  The result
     goes into Spool 35 (all_amps) fanned out into 5 hash join
     partitions, which is redistributed by the hash code of (
     DB.DIM_PRICE.in_ID) to all AMPs.  The size of
     Spool 35 is estimated with low confidence to be 8,061,965 rows (
     233,796,985 bytes).  The estimated time for this step is 0.03
     seconds.
 21) We do an all-AMPs JOIN step from Spool 32 (Last Use) by way of an
     all-rows scan, which is joined to Spool 34 (Last Use) by way of an
     all-rows scan.  Spool 32 and Spool 34 are joined using a single
     partition hash join, with a join condition of (
     "client_sysID = client_sysID").  The result
     goes into Spool 35 (all_amps) fanned out into 5 hash join
     partitions, which is redistributed by the hash code of (
     DB.DIM_PRICE.in_ID) to all AMPs.  The size of
     Spool 35 is estimated with low confidence to be 34,903,738 rows (
     1,012,208,402 bytes).  The estimated time for this step is 1.20
     seconds.
 22) We do an all-AMPs JOIN step from Spool 17 by way of an all-rows
     scan, which is joined to Spool 30 (Last Use) by way of an all-rows
     scan.  Spool 17 and Spool 30 are joined using a hash join of 4
     partitions, with a join condition of ("col_CD2 =
     DG_CD_ID").  The result goes into Spool 36 (all_amps)
     (compressed columns allowed) fanned out into 5 hash join
     partitions, which is redistributed by the hash code of (
     {RightTable}.in_ID) to all AMPs.  The size of Spool 36 is
     estimated with index join confidence to be 2,603,284,805 rows (
     247,312,056,475 bytes).  The estimated time for this step is 3
     minutes and 10 seconds.
 23) We do an all-AMPs JOIN step from Spool 35 (Last Use) by way of an
     all-rows scan, which is joined to Spool 36 (Last Use) by way of an
     all-rows scan.  Spool 35 and Spool 36 are joined using a hash join
     of 5 partitions, with a join condition of ("in_ID =
     in_ID").  The result goes into Spool 38 (all_amps)
     (compressed columns allowed) fanned out into 4 hash join
     partitions, which is built locally on the AMPs.  The size of Spool
     38 is estimated with index join confidence to be 2,603,284,805
     rows (278,551,474,135 bytes).  The estimated time for this step is
     1 minute.
 24) We do an all-AMPs JOIN step from Spool 17 (Last Use) by way of an
     all-rows scan, which is joined to Spool 38 (Last Use) by way of an
     all-rows scan.  Spool 17 and Spool 38 are joined using a hash join
     of 4 partitions, with a join condition of ("(col_CD3 =
     DG_CD_ID) AND (((DG_GEN_CD = '1701.02 ') OR ((DG_GEN_CD
     = '1701 ') OR ((DG_GEN_CD = '1701.1 ') OR ((DG_GEN_CD =
     '1701.12 ') OR ((DG_GEN_CD = '1701.2 ') OR ((DG_GEN_CD =
     '1701.22 ') OR ((DG_GEN_CD = '1701.3 ') OR ((DG_GEN_CD =
     '1701.32 ') OR ((DG_GEN_CD = '1701.4 ') OR ((DG_GEN_CD =
     '1701.42 ') OR ((DG_GEN_CD = '1701.5 ') OR ((DG_GEN_CD =
     '1701.52 ') OR ((DG_GEN_CD = '1701.6 ') OR ((DG_GEN_CD =
     '1701.62 ') OR ((DG_GEN_CD = '1701.7 ') OR ((DG_GEN_CD =
     '1701.72 ') OR ((DG_GEN_CD = '1701.8 ') OR ((DG_GEN_CD =
     '1701.82 ') OR ((DG_GEN_CD = '1701.9 ') OR (DG_GEN_CD =
     '1701.92 ')))))))))))))))))))) OR (((DG_GEN_CD = '1701.02 ') OR
     ((DG_GEN_CD = '1701 ') OR ((DG_GEN_CD = '1701.1 ') OR
     ((DG_GEN_CD = '1701.12 ') OR ((DG_GEN_CD = '1701.2 ') OR
     ((DG_GEN_CD = '1701.22 ') OR ((DG_GEN_CD = '1701.3 ') OR
     ((DG_GEN_CD= '1701.32 ') OR ((DG_GEN_CD = '1701.4 ') OR
     ((DG_GEN_CD = '1701.42 ') OR ((DG_GEN_CD = '1701.5 ') OR
     ((DG_GEN_CD = '1701.52 ') OR ((DG_GEN_CD = '1701.6 ') OR
     ((DG_GEN_CD = '1701.62 ') OR ((DG_GEN_CD = '1701.7 ') OR
     ((DG_GEN_CD = '1701.72 ') OR ((DG_GEN_CD = '1701.8 ') OR
     ((DG_GEN_CD = '1701.82 ') OR ((DG_GEN_CD = '1701.9 ') OR
     (DG_GEN_CD = '1701.92 ')))))))))))))))))))) OR (((DG_GEN_CD =
     '1701.02 ') OR ((DG_GEN_CD = '1701 ') OR ((DG_GEN_CD = '1701.1
     ') OR ((DG_GEN_CD = '1701.12 ') OR ((DG_GEN_CD = '1701.2 ') OR
     ((DG_GEN_CD = '1701.22 ') OR ((DG_GEN_CD = '1701.3 ') OR
     ((DG_GEN_CD = '1701.32 ') OR ((DG_GEN_CD = '1701.4 ') OR
     ((DG_GEN_CD = '1701.42 ') OR ((DG_GEN_CD = '1701.5 ') OR
     ((DG_GEN_CD = '1701.52 ') OR ((DG_GEN_CD = '1701.6 ') OR
     ((DG_GEN_CD = '1701.62 ') OR ((DG_GEN_CD = '1701.7 ') OR
     ((DG_GEN_CD = '1701.72 ') OR ((DG_GEN_CD = '1701.8 ') OR
     ((DG_GEN_CD = '1701.82 ') OR ((DG_GEN_CD = '1701.9 ') OR
     (DG_GEN_CD = '1701.92 ')))))))))))))))))))) OR (((DG_GEN_CD =
     '1701.02 ') OR ((DG_GEN_CD = '1701 ') OR ((DG_GEN_CD =
     '1701.1') OR ((DG_GEN_CD = '1701.12 ') OR ((DG_GEN_CD = '1701.2
     ') OR ((DG_GEN_CD = '1701.22 ') OR ((DG_GEN_CD = '1701.3 ') OR
     ((DG_GEN_CD = '1701.32 ') OR ((DG_GEN_CD = '1701.4 ') OR
     ((DG_GEN_CD = '1701.42 ') OR ((DG_GEN_CD = '1701.5 ') OR
     ((DG_GEN_CD = '1701.52 ') OR ((DG_GEN_CD = '1701.6 ') OR
     ((DG_GEN_CD = '1701.62 ') OR ((DG_GEN_CD = '1701.7 ') OR
     ((DG_GEN_CD = '1701.72 ') OR ((DG_GEN_CD = '1701.8 ') OR
     ((DG_GEN_CD = '1701.82 ') OR ((DG_GEN_CD = '1701.9 ') OR
     (DG_GEN_CD = '1701.92 ')))))))))))))))))))) OR (((DG_GEN_CD =
     '1701.02 ') OR ((DG_GEN_CD = '1701 ') OR ((DG_GEN_CD = '1701.1
     ') OR ((DG_GEN_CD = '1701.12 ') OR ((DG_GEN_CD = '1701.2 ') OR
     ((DG_GEN_CD = '1701.22 ') OR ((DG_GEN_CD = '1701.3 ') OR
     ((DG_GEN_CD = '1701.32 ') OR ((DG_GEN_CD = '1701.4 ') OR
     ((DG_GEN_CD = '1701.42 ') OR ((DG_GEN_CD = '1701.5 ') OR
     ((DG_GEN_CD = '1701.52 ') OR ((DG_GEN_CD = '1701.6 ') OR
     ((DG_GEN_CD = '1701.62 ') OR ((DG_GEN_CD = '1701.7 ') OR
     ((DG_GEN_CD = '1701.72 ') OR ((DG_GEN_CD = '1701.8 ') OR
     ((DG_GEN_CD = '1701.82 ') OR ((** additional conditions
     notlisted **) OR (** additional conditions not listed
     **)))))))))))))))))))) OR (** additional conditions not listed
     **))))))").  The result goes into Spool 3 (all_amps) (compressed
     columns allowed), which is built locally on the AMPs.  The size of
     Spool 3 is estimated with index join confidence to be
     2,603,284,805 rows (49,462,411,295 bytes).  The estimated time for
     this step is 25.72 seconds.
 25) We do an all-AMPs SUM step to aggregate from Spool 3 (Last Use) by
     way of an all-rows scan, and the grouping identifier in field 2.
     Aggregate Intermediate Results are computed globally, then placed
     in Spool 39.  The size of Spool 39 is estimated with low
     confidence to be 34,391,711 rows (722,225,931 bytes).  The
     estimated time for this step is 51.94 seconds.
 26) We do an all-AMPs RETRIEVE step from Spool 39 (Last Use) by way of
     an all-rows scan into Spool 1 (all_amps) (compressed columns
     allowed), which is redistributed by hash code to all AMPs.  Then
     we do a SORT to order Spool 1 by row hash.  The size of Spool 1 is
     estimated with low confidence to be 34,391,711 rows (584,659,087
     bytes).  The estimated time for this step is 0.87 seconds.
 27) We do an all-AMPs MERGE into scarp3.VT from Spool 1
     (Last Use).  The size is estimated with low confidence to be
     34,391,711 rows.  The estimated time for this step is 3 minutes
     and 6 seconds.

Upvotes: 0

Views: 737

Answers (1)

dnoeth
dnoeth

Reputation: 60482

You need to join the same table multiple times when you denormalize :-)

But the plan doesn't look bad, the optimizer knows about the skew and uses PRPD (Partial Redistribution Partial Duplication).

Regarding rewrite, if the views actually share the same definiton (and the brackets where missing) you can simplify it to:

CREATE MULTISET VOLATILE TABLE VT AS (
WITH cd_vw AS 
 ( SELECT * FROM cd_vw1
   WHERE dc1.d_cd1 IN ('1701','1701.02','1701.1','1701.12','1701.2'
                       ,'1701.22','1701.3','1701.32','1701.4','1701.42'
                       ,'1701.5','1701.52','1701.6','1701.62','1701.7'
                       ,'1701.72','1701.8','1701.82','1701.9','1701.92')
 )
SELECT DISTINCT d.in_ID
FROM DB.FACT_BODtl d

JOIN cd_vw dc1 ON (d.col_CD1=dc1.col_CD1)
JOIN cd_vw dc2 ON (d.col_CD2=dc2.col_CD2)
JOIN cd_vw dc3 ON (d.col_CD3=dc3.col_CD3)
JOIN cd_vw dc4 ON (d.col_CD4=dc4.col_CD4)
JOIN cd_vw dc5 ON (d.col_CD5=dc5.col_CD5)
JOIN cd_vw dc6 ON (d.col_CD6=dc6.col_CD6)
JOIN cd_vw dc7 ON (d.col_CD7=dc7.col_CD7)
JOIN cd_vw dc8 ON (d.col_CD8=dc8.col_CD8)
JOIN cd_vw dc9 ON (d.col_CD9=dc9.col_CD9)
JOIN DM_NBR NBR ON (d.fact_SysID = NBR.fact_SysID)
JOIN PRICE PRC ON (d.in_ID=PRC.in_ID)
JOIN Dm_LabDt ENDDT ON (d.dt_tb_sysID=ENDDT.DT_SYS_ID )
JOIN CC_sg CSG ON (PRC.client_sysID=CSG.client_sysID)
JOIN COMPONENT PROD ON (CSG.prdt_cd=PROD.prdt_cd)

WHERE 
NBR.COLX_CD IN ('163000')
AND EXTRACT (YEAR FROM ENDDT.Fdt) = '2015'
AND PROD.Prd_CD1 IN ('COM')
) 
WITH DATA 
ON COMMIT PRESERVE ROWS;

I don't know if this will also change the plan.

Those multiple joins probably replaced an ORed join, which always leads to an Product Join and performs really bad for a larger number of rows:

SELECT DISTINCT d.in_ID
FROM DB.FACT_BODtl d

JOIN cd_vw dc1 
  ON ((d.col_CD1=dc1.col_CD1) OR
      (d.col_CD2=dc1.col_CD2) OR
      (d.col_CD3=dc1.col_CD3) OR
      (d.col_CD4=dc1.col_CD4) OR
      (d.col_CD5=dc1.col_CD5) OR
      (d.col_CD6=dc1.col_CD6) OR
      (d.col_CD7=dc1.col_CD7) OR
      (d.col_CD8=dc1.col_CD8) OR
      (d.col_CD9=dc1.col_CD9)
JOIN DM_NBR NBR ON (d.fact_SysID = NBR.fact_SysID)
JOIN PRICE PRC ON (d.in_ID=PRC.in_ID)
JOIN Dm_LabDt ENDDT ON (d.dt_tb_sysID=ENDDT.DT_SYS_ID )
JOIN CC_sg CSG ON (PRC.client_sysID=CSG.client_sysID)
JOIN COMPONENT PROD ON (CSG.prdt_cd=PROD.prdt_cd)

WHERE 
NBR.COLX_CD IN ('163000')
AND EXTRACT (YEAR FROM ENDDT.Fdt) = '2015'
AND PROD.Prd_CD1 IN ('COM')
AND dc1.d_cd1 IN ('1701','1701.02','1701.1','1701.12','1701.2','1701.22','1701.3','1701.32','1701.4','1701.42','1701.5','1701.52','1701.6','1701.62','1701.7','1701.72','1701.8','1701.82','1701.9','1701.92')

Upvotes: 1

Related Questions