Reputation: 61
I have a Snowflake query that contains a Well-Known Text (WKT) and I'm trying to improve the performance.
WITH CustomArea AS (
SELECT
"PPPPPP" AS GC,
ROUND(SUM(HP),5) AS HP,
ROUND(SUM(PP),5) AS PP
FROM
"DB1"."SCHEMA1"."TABLE1"
WHERE
ST_WITHIN (CE, TO_GEOGRAPHY('POLYGON ((...))'))
GROUP BY
GC
),
FilteredData AS (
SELECT
tbl.VC,
CASE
WHEN tbl.WU = 'PP' THEN a.PP * tbl.VV
WHEN tbl.WU = 'HH' THEN a.HP * tbl.VV
ELSE tbl.VV
END AS Count
FROM
"DB2"."SCHEMA2"."TABLE2" tbl
INNER JOIN
CustomArea a ON tbl.CC = a.GC
WHERE
tbl.GG = 'PPPPPP'
AND
tbl.VC IN ('ABC', 'DEF', 'GHI')
)
SELECT
VC,
SUM(Count) AS Count
FROM
FilteredData
GROUP BY
VC;
"DB1"."SCHEMA1"."TABLE1"
INFO:
Columns | Data Type |
---|---|
KE | VARCHAR(14) |
GI | NUMBER(38,0) |
GO | VARCHAR(14) |
HP | NUMBER(18,8) |
PP | NUMBER(18,8) |
FL | VARCHAR(6) |
PPPPPP | VARCHAR(8) |
PA | VARCHAR(8) |
CT | VARCHAR(16) |
CA | VARCHAR(16) |
PC | VARCHAR(16) |
PS | VARCHAR(16) |
PF | VARCHAR(16) |
PR | VARCHAR(2) |
RR | VARCHAR(1) |
CN | VARCHAR(2) |
FQ | VARCHAR(16) |
CE | GEOGRAPHY |
"DB2"."SCHEMA2"."TABLE2"
INFO:
Columns | Data Type |
---|---|
CC | VARCHAR(1000) |
GG | VARCHAR(1024) |
GI | NUMBER(10,0) |
NN | VARCHAR(16777216) |
VC | VARCHAR(16777216) |
YY | VARCHAR(1024) |
SC | NUMBER(38,5) |
VV | FLOAT |
WU | VARCHAR(255) |
Snowflake Query Explain Plan:
{
"GlobalStats": {
"partitionsTotal": 1085,
"partitionsAssigned": 183,
"bytesAssigned": 2916814336
},
"Operations": [
[
{
"id": 0,
"operation": "Result",
"expressions": [
"tbl.VC",
"SUM(IFF(tbl.WU = 'PP', (TO_DOUBLE(ROUND(SUM(TABLE1.PP), 5))) * tbl.VV, IFF(tbl.WU = 'HH', (TO_DOUBLE(ROUND(SUM(TABLE1.HP), 5))) * tbl.VV, tbl.VV)))"
]
},
{
"id": 1,
"operation": "Aggregate",
"expressions": [
"aggExprs: [SUM(IFF(tbl.WU = 'PP', (TO_DOUBLE(ROUND(SUM(TABLE1.PP), 5))) * tbl.VV, IFF(tbl.WU = 'HH', (TO_DOUBLE(ROUND(SUM(TABLE1.HP), 5))) * tbl.VV, tbl.VV)))]",
"groupKeys: [tbl.VC]"
],
"parentOperators": [
0
]
},
{
"id": 2,
"operation": "InnerJoin",
"expressions": [
"joinKey: (TABLE1.PPPPPP = tbl.CC)"
],
"parentOperators": [
1
]
},
{
"id": 3,
"operation": "Aggregate",
"expressions": [
"aggExprs: [SUM(TABLE1.HP), SUM(TABLE1.PP)]",
"groupKeys: [TABLE1.PPPPPP]"
],
"parentOperators": [
2
]
},
{
"id": 4,
"operation": "Filter",
"expressions": [
"ST_CONTAINS_LNGLAT_ROUND(TO_BINARY(GET(IFF(PARSE_GEO('POLYGON ((...))') IS NULL, null, OBJECT_CONSTRUCT('_shape', PARSE_GEO('POLYGON ((...))'), 'version', 1, 'has_internal', TRUE, 'internal', GEOGRAPHY_COMPUTE_INTERNAL(PARSE_GEO('POLYGON ((...))')))), 'internal')), TO_BINARY(GET_PATH(TABLE1.CE, 'internal')))"
],
"parentOperators": [
3
]
},
{
"id": 5,
"operation": "TableScan",
"objects": [
"DB1.SCHEMA1.TABLE1"
],
"expressions": [
"HP",
"PP",
"PPPPPP"
],
"partitionsAssigned": 3,
"partitionsTotal": 3,
"bytesAssigned": 48081920,
"parentOperators": [
4
]
},
{
"id": 6,
"operation": "Filter",
"expressions": [
"(tbl.VC IN 'ABC' IN 'DEF' IN 'GHI') AND (tbl.GG = 'PPPPPP')"
],
"parentOperators": [
2
]
},
{
"id": 7,
"operation": "JoinFilter",
"expressions": [
"joinKey: (TABLE1.PPPPPP = tbl.CC)"
],
"parentOperators": [
6
]
},
{
"id": 8,
"operation": "TableScan",
"objects": [
"DB2.SCHEMA2.TABLE2"
],
"expressions": [
"CC",
"GG",
"VC",
"VV",
"WU"
],
"alias": "tbl",
"partitionsAssigned": 180,
"partitionsTotal": 1082,
"bytesAssigned": 2868732416,
"parentOperators": [
7
]
}
]
]
}
How do I improve the performance of this query?
Upvotes: 0
Views: 79