jen
jen

Reputation: 61

I would like to improve performance for my Snowflake SQL query that contains a Well-Known Text (WKT)

I have a Snowflake query that contains a Well-Known Text (WKT) and I'm trying to improve the performance.

WITH CustomArea AS (
    SELECT 
        "PPPPPP" AS GC,
        ROUND(SUM(HP),5) AS HP,
        ROUND(SUM(PP),5) AS PP  
    FROM 
        "DB1"."SCHEMA1"."TABLE1"
    WHERE 
        ST_WITHIN (CE, TO_GEOGRAPHY('POLYGON ((...))'))
    GROUP BY 
        GC
),
FilteredData AS (
    SELECT
        tbl.VC,
        CASE
            WHEN tbl.WU = 'PP' THEN a.PP * tbl.VV
            WHEN tbl.WU = 'HH' THEN a.HP * tbl.VV
            ELSE tbl.VV
        END AS Count
    FROM 
        "DB2"."SCHEMA2"."TABLE2" tbl
    INNER JOIN 
        CustomArea a ON tbl.CC = a.GC 
    WHERE 
        tbl.GG = 'PPPPPP'
    AND 
        tbl.VC IN ('ABC', 'DEF', 'GHI')
)
SELECT
    VC,
    SUM(Count) AS Count
FROM 
    FilteredData
GROUP BY 
    VC;

"DB1"."SCHEMA1"."TABLE1" INFO:

Columns Data Type
KE VARCHAR(14)
GI NUMBER(38,0)
GO VARCHAR(14)
HP NUMBER(18,8)
PP NUMBER(18,8)
FL VARCHAR(6)
PPPPPP VARCHAR(8)
PA VARCHAR(8)
CT VARCHAR(16)
CA VARCHAR(16)
PC VARCHAR(16)
PS VARCHAR(16)
PF VARCHAR(16)
PR VARCHAR(2)
RR VARCHAR(1)
CN VARCHAR(2)
FQ VARCHAR(16)
CE GEOGRAPHY

"DB2"."SCHEMA2"."TABLE2" INFO:

Columns Data Type
CC VARCHAR(1000)
GG VARCHAR(1024)
GI NUMBER(10,0)
NN VARCHAR(16777216)
VC VARCHAR(16777216)
YY VARCHAR(1024)
SC NUMBER(38,5)
VV FLOAT
WU VARCHAR(255)

Snowflake Query Explain Plan:

{
    "GlobalStats": {
        "partitionsTotal": 1085,
        "partitionsAssigned": 183,
        "bytesAssigned": 2916814336
    },
    "Operations": [
        [
            {
                "id": 0,
                "operation": "Result",
                "expressions": [
                    "tbl.VC",
                    "SUM(IFF(tbl.WU = 'PP', (TO_DOUBLE(ROUND(SUM(TABLE1.PP), 5))) * tbl.VV, IFF(tbl.WU = 'HH', (TO_DOUBLE(ROUND(SUM(TABLE1.HP), 5))) * tbl.VV, tbl.VV)))"
                ]
            },
            {
                "id": 1,
                "operation": "Aggregate",
                "expressions": [
                    "aggExprs: [SUM(IFF(tbl.WU = 'PP', (TO_DOUBLE(ROUND(SUM(TABLE1.PP), 5))) * tbl.VV, IFF(tbl.WU = 'HH', (TO_DOUBLE(ROUND(SUM(TABLE1.HP), 5))) * tbl.VV, tbl.VV)))]",
                    "groupKeys: [tbl.VC]"
                ],
                "parentOperators": [
                    0
                ]
            },
            {
                "id": 2,
                "operation": "InnerJoin",
                "expressions": [
                    "joinKey: (TABLE1.PPPPPP = tbl.CC)"
                ],
                "parentOperators": [
                    1
                ]
            },
            {
                "id": 3,
                "operation": "Aggregate",
                "expressions": [
                    "aggExprs: [SUM(TABLE1.HP), SUM(TABLE1.PP)]",
                    "groupKeys: [TABLE1.PPPPPP]"
                ],
                "parentOperators": [
                    2
                ]
            },
            {
                "id": 4,
                "operation": "Filter",
                "expressions": [
                    "ST_CONTAINS_LNGLAT_ROUND(TO_BINARY(GET(IFF(PARSE_GEO('POLYGON ((...))') IS NULL, null, OBJECT_CONSTRUCT('_shape', PARSE_GEO('POLYGON ((...))'), 'version', 1, 'has_internal', TRUE, 'internal', GEOGRAPHY_COMPUTE_INTERNAL(PARSE_GEO('POLYGON ((...))')))), 'internal')), TO_BINARY(GET_PATH(TABLE1.CE, 'internal')))"
                ],
                "parentOperators": [
                    3
                ]
            },
            {
                "id": 5,
                "operation": "TableScan",
                "objects": [
                    "DB1.SCHEMA1.TABLE1"
                ],
                "expressions": [
                    "HP",
                    "PP",
                    "PPPPPP"
                ],
                "partitionsAssigned": 3,
                "partitionsTotal": 3,
                "bytesAssigned": 48081920,
                "parentOperators": [
                    4
                ]
            },
            {
                "id": 6,
                "operation": "Filter",
                "expressions": [
                    "(tbl.VC IN 'ABC' IN 'DEF' IN 'GHI') AND (tbl.GG = 'PPPPPP')"
                ],
                "parentOperators": [
                    2
                ]
            },
            {
                "id": 7,
                "operation": "JoinFilter",
                "expressions": [
                    "joinKey: (TABLE1.PPPPPP = tbl.CC)"
                ],
                "parentOperators": [
                    6
                ]
            },
            {
                "id": 8,
                "operation": "TableScan",
                "objects": [
                    "DB2.SCHEMA2.TABLE2"
                ],
                "expressions": [
                    "CC",
                    "GG",
                    "VC",
                    "VV",
                    "WU"
                ],
                "alias": "tbl",
                "partitionsAssigned": 180,
                "partitionsTotal": 1082,
                "bytesAssigned": 2868732416,
                "parentOperators": [
                    7
                ]
            }
        ]
    ]
}

How do I improve the performance of this query?

Upvotes: 0

Views: 79

Answers (0)

Related Questions