Reputation: 3090
I have a dataset that looks something like this:
I'd like to aggregate all co
values on one row, so the final result looks something like:
Seems pretty easy, right? Just write a query using crosstab
, as suggested in this answer. Problem is that requires that I CREATE EXTENSION tablefunc;
and I don't have write access to my DB.
Can anyone recommend an alternative?
Upvotes: 5
Views: 16466
Reputation: 402
As of postgres 9.4 you can use json_object_agg...
--set up a temp table with some rows to pivot into columns
drop table _temp;
create temp table _temp(id serial, key int, header text, value text);
insert into _temp(key, header, value) values
(1, 'a', 'this is a'),
(1, 'b', 'this is b'),
(1, 'c', 'this is c'),
(2, 'a', 'this is a'),
(2, 'b', 'this is b'),
(3, 'c', 'this is c')
;
--pivot using json_object_agg
select t.key,
t.headers->>'a' as a,
t.headers->>'b' as b,
t.headers->>'c' as c
from (
select key, json_object_agg(header, value) as headers
from _temp
group by key
) t
order by key
;
/*
--results
+---+---------+---------+---------+
|key|a |b |c |
+---+---------+---------+---------+
|1 |this is a|this is b|this is c|
|2 |this is a|this is b|null |
|3 |null |null |this is c|
+---+---------+---------+---------+
*/
Upvotes: 1
Reputation: 207
Since Ver. 9.4 there's json_object_agg(), which lets us do part of the necessary magic dynamically.
However to be totally dynamic, a temp type (a temp table) has to be FIRST built by running a SQL-EXEC inside an anonymous procedure.
DB FIDDLE (UK): https://dbfiddle.uk/Sn7iO4zL
DISCLAIMER: Typically the ability to create TEMP TABLES are granted to end-users, but YMMV. Another concern is whether anon. procedures can be exec'd as in-line code by regular users.
-- /**
-- begin test data
-- begin test data
-- begin test data
-- */
DROP TABLE IF EXISTS tmpSales ;
CREATE TEMP TABLE tmpSales AS
SELECT
sale_id
,TRUNC(RANDOM()*12)+1 AS book_id
,TRUNC(RANDOM()*100)+1 AS customer_id
,(date '2010-01-01' + random() * (timestamp '2016-12-31' - timestamp '2010-01-01')) AS sale_date
FROM generate_series(1,10000) AS sale_id;
DROP TABLE IF EXISTS tmp_month_total ;
CREATE TEMP TABLE tmp_month_total AS
SELECT
date_part( 'year' , sale_date ) AS year
,date_part( 'month', sale_date ) AS mn
,to_char(sale_date, 'mon') AS month
,COUNT(*) AS total
FROM tmpSales
GROUP BY date_part('year', sale_date), to_char(sale_date, 'mon') ,date_part( 'month', sale_date )
;
DATA:
+----+--+-----+-----+
|year|mn|month|total|
+----+--+-----+-----+
|2010|1 |jan |127 |
|2010|2 |feb |117 |
|2010|3 |mar |121 |
|2010|4 |apr |131 |
|2010|5 |may |106 |
|2010|6 |jun |121 |
|2010|7 |jul |129 |
|2010|8 |aug |114 |
|2010|9 |sep |115 |
|2010|10|oct |110 |
|2010|11|nov |133 |
|2010|12|dec |108 |
+----+--+-----+-----+
-- /**
-- END test data
-- END test data
-- END test data
-- */
-- /**
-- dyn. build a temporary row-type based on existing data, not hard-coded
-- dyn. build a temporary row-type based on existing data, not hard-coded
-- dyn. build a temporary row-type based on existing data, not hard-coded
-- **/
DROP TABLE IF EXISTS tmpTblTyp CASCADE ;
DO LANGUAGE plpgsql $$ DECLARE v_sqlstring VARCHAR = ''; BEGIN
v_sqlstring := CONCAT( 'CREATE TEMP TABLE tmpTblTyp AS SELECT '
,(SELECT STRING_AGG( CONCAT('NULL::int AS ' , month )::TEXT , ' ,'
ORDER BY mn
)::TEXT
FROM
(SELECT DISTINCT month, mn FROM tmp_month_total )a )
,' LIMIT 0 '
) ; -- RAISE NOTICE '%', v_sqlstring ;
EXECUTE( v_sqlstring ) ; END $$;
DROP TABLE IF EXISTS tmpMoToJson ;
CREATE TEMP TABLE tmpMoToJson AS
SELECT
year AS year
,(json_build_array( months )) AS js_months_arr
,json_populate_recordset ( NULL::tmpTblTyp /** use temp table as a record type!! **/
, json_build_array( months )
) jprs /** builds row-type column that can be expanded with (jprs).*
**/
FROM ( SELECT year
-- accum data into JSON array
,json_object_agg(month,total) AS months
FROM tmp_month_total
GROUP BY year
ORDER BY year
) a
;
SELECT
year
,(ROW((jprs).*)::tmpTblTyp).* -- explode the composite type row
FROM tmpMoToJson ;
+----+---+---+---+---+---+---+---+---+---+---+---+---+
|year|jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec|
+----+---+---+---+---+---+---+---+---+---+---+---+---+
|2010|127|117|121|131|106|121|129|114|115|110|133|108|
|2011|117|112|117|115|139|116|119|152|117|112|115|103|
|2012|129|111|98 |140|109|131|114|110|112|115|100|121|
|2013|128|112|141|127|141|102|113|109|111|110|123|116|
|2014|129|114|117|118|111|123|106|111|127|121|124|145|
|2015|118|113|131|122|120|121|140|114|118|108|114|131|
|2016|117|110|139|100|110|116|112|109|131|117|122|132|
+----+---+---+---+---+---+---+---+---+---+---+---+---+
Upvotes: 1
Reputation: 87
By using pivot also we can achieve your required out put
SELECT co
,industry
,customer_type
,product_type
,sales_model
,stage
FROM dataSet
PIVOT(max(tags) FOR ontologyType IN (
industry
,customer_type
,product_type
,sales_model
,stage
)) AS PVT
Upvotes: -2
Reputation: 422
You can use DO to generate and PREPARE your own SQL with crosstab columns, then EXECUTE it.
-- replace tab_name to yours table name
DO $$
DECLARE
_query text;
_name text;
BEGIN
_name := 'prepared_query';
_query := '
SELECT co
'||(SELECT ', '||string_agg(DISTINCT
' string_agg(DISTINCT
CASE ontology_type WHEN '||quote_literal(ontology_type)||' THEN tags
ELSE NULL
END, '',''
) AS '||quote_ident(ontology_type),',')
FROM tab_name)||'
FROM tab_name
GROUP BY co
';
BEGIN
EXECUTE 'DEALLOCATE '||_name;
EXCEPTION
WHEN invalid_sql_statement_name THEN
END;
EXECUTE 'PREPARE '||_name||' AS '||_query;
END
$$;
EXECUTE prepared_query;
Upvotes: 1
Reputation: 175944
Conditional aggregation:
SELECT co,
MIN(CASE WHEN ontology_type = 'industry' THEN tags END) AS industry,
MIN(CASE WHEN ontology_type = 'customer_type' THEN tags END) AS customer_type,
-- ...
FROM tab_name
GROUP BY co
Upvotes: 12