Reputation: 5273
I want to pivot a table based on a field which can contain "dynamic" values (not always known beforehand).
I can make it work by hard coding the values (which is undesirable):
SELECT *
FROM my_table
pivot(SUM(amount) FOR type_id IN (1,2,3,4,5,20,50,83,141,...));
But I can't make it work using a query to provide the values dynamically:
SELECT *
FROM my_table
pivot(SUM(amount) FOR type_id IN (SELECT id FROM types));
---
090150 (22000): Single-row subquery returns more than one row.
SELECT *
FROM my_table
pivot(SUM(amount) FOR type_id IN (SELECT ARRAY_AGG(id) FROM types));
---
001038 (22023): SQL compilation error:
Can not convert parameter 'my_table.type_id' of type [NUMBER(38,0)] into expected type [ARRAY]
Is there a way to accomplish this?
Upvotes: 9
Views: 10016
Reputation: 4428
Snowflake now supports dynamic pivot using the ANY keyword in the PIVOT IN list or a Subquery e.g.
select *
from performance_reviews
pivot (avg(rating) for skill in (ANY))
or
select *
from performance_reviews
pivot (avg(rating) for skill in (select skill from performance_reviews where rating > 4));
Upvotes: 3
Reputation: 41
Creation of foo table for dynamic pivot in snowflake
create table temp
(
date datetime,
category varchar(3),
amount decimal
);
insert into temp values ('1/1/2012', 'ABC', 1000.00);
insert into temp values ('2/1/2012', 'DEF', 500.00);
insert into temp values ('2/1/2012', 'GHI', 800.00);
insert into temp values ('2/10/2012', 'DEF', 700.00);
insert into temp values ('3/1/2012', 'ABC', 1100.00);
insert into temp values ('3/1/2012', 'ZXY', 1100.00);
1.-UDF definition to add single quotes ('') in each dynamic column as a row
CREATE OR REPLACE FUNCTION QUOTENAME (input varchar)
RETURNS VARCHAR
AS
$$
SELECT concat('\'',input,'\'')
$$
;
2.-Dynamic pivot in snowflake
set cols=(select LISTAGG(QUOTENAME(CATEGORY), ',') AS ResultString
from (select distinct category from temp) as tmp);
select $cols;
set query =
'
SELECT * from
(
select date, amount, category from temp
) src
pivot
(
max(amount) for category in (' ||$cols||')
) piv
';
select $query;
EXECUTE IMMEDIATE ($query);
3.-Output
Upvotes: 4
Reputation: 1507
Inspired by my two predecessors, I created another stored proc that could be called to create even multi-grouped and multi-pivot emulated pivot queries:
create or replace procedure
test_pivot.public.get_full_pivot(
"source" varchar, // fully-qualified 'table/view_name' or full '(subquery)'
"row_headers" varchar, // comma-separated list of 1+ GROUP BY field names
"col_header1" varchar, // first (mandatory) PIVOT field name
"col_header2" varchar, // secondary (optional) PIVOT field name ('' if none)
"agg" varchar, // field name for the aggregate values
"aggf" varchar) // aggregate function (sum, avg, min, max, count...)
returns varchar
language javascript
as $$
// collect all distinct values for a column header field
function get_distinct_values(col_header) {
var vals = [];
if (col_header != '') {
var result = snowflake.execute(
{sqlText: `select distinct ${col_header}\n`
+ `from ${source}\n`
+ `order by ${col_header}`}); // nulls last!
while (result.next())
vals.push(result.getColumnValueAsString(1));
}
return vals;
}
var vals1 = get_distinct_values(col_header1);
var vals2 = get_distinct_values(col_header2);
// create and return the emulated pivot query, for one or two column header values
var query = `select ${row_headers}`;
if (vals2.length == 0)
for (const i in vals1) {
var cond1 = (vals1[i] == 'null'
? `${col_header1} is null` : `to_char(${col_header1})='${vals1[i]}'`);
query += `,\n ${aggf}(iff(${cond1}, ${agg}, null)) as "${vals1[i]}"`;
}
else
for (const i in vals1)
for (const j in vals2) {
var cond1 = (vals1[i] == 'null'
? `${col_header1} is null` : `to_char(${col_header1})='${vals1[i]}'`);
var cond2 = (vals2[j] == 'null'
? `${col_header2} is null` : `to_char(${col_header2})='${vals2[j]}'`);
query += `,\n ${aggf}(iff(${cond1} AND ${cond2}, ${agg}, null)) as "${vals1[i]}+${vals2[j]}"`;
}
query += `\nfrom ${source}\n`
+ `group by ${row_headers}\n`
+ `order by ${row_headers};`; // nulls last!
return query;
$$;
Call with:
call test_pivot.public.get_full_pivot(
'test_pivot.public.demographics',
'country, education', 'status', 'gender', 'sales', 'sum');
to generate the following SQL:
select country, education,
sum(iff(to_char(status)='divorced' AND to_char(gender)='F', sales, null)) as "divorced+F",
sum(iff(to_char(status)='divorced' AND to_char(gender)='M', sales, null)) as "divorced+M",
sum(iff(to_char(status)='married' AND to_char(gender)='F', sales, null)) as "married+F",
sum(iff(to_char(status)='married' AND to_char(gender)='M', sales, null)) as "married+M",
sum(iff(to_char(status)='single' AND to_char(gender)='F', sales, null)) as "single+F",
sum(iff(to_char(status)='single' AND to_char(gender)='M', sales, null)) as "single+M",
sum(iff(status is null AND to_char(gender)='F', sales, null)) as "null+F",
sum(iff(status is null AND to_char(gender)='M', sales, null)) as "null+M"
from test_pivot.public.demographics
group by country, education
order by country, education;
Which may return a result structured like:
Upvotes: 1
Reputation: 59175
I wrote a Snowflake stored procedure to get dynamics pivots inside Snowflake, check:
3 steps:
call pivot_prev_results()
select * from table(result_scan(last_query_id(-2)))
The procedure:
create or replace procedure pivot_prev_results()
returns string
language javascript
execute as caller as
$$
var cols_query = `
select '\\''
|| listagg(distinct pivot_column, '\\',\\'') within group (order by pivot_column)
|| '\\''
from table(result_scan(last_query_id(-1)))
`;
var stmt1 = snowflake.createStatement({sqlText: cols_query});
var results1 = stmt1.execute();
results1.next();
var col_list = results1.getColumnValue(1);
pivot_query = `
select *
from (select * from table(result_scan(last_query_id(-2))))
pivot(max(pivot_value) for pivot_column in (${col_list}))
`
var stmt2 = snowflake.createStatement({sqlText: pivot_query});
stmt2.execute();
return `select * from table(result_scan('${stmt2.getQueryId()}'));\n select * from table(result_scan(last_query_id(-2)));`;
$$;
Upvotes: 5
Reputation: 2600
I don't think it's possible in native SQL, but I wrote an article and published some code showing how my team does this by generating the query from Python.
You can call the Python script directly, passing arguments similar to the options Excel gives you for pivot tables:
python generate_pivot_query.py \
--dbtype snowflake --database mydb \
--host myhost.url --port 5432 \
--user me --password myp4ssw0rd \
--base-columns customer_id \
--pivot-columns category \
--exclude-columns order_id \
--aggfunction-mappings amount=sum \
myschema orders
Or, if you're Airflow, you can use a CreatePivotTableOperator
to create tasks directly.
Upvotes: 7