Reputation: 1
I want to Transpose columns and rows when you don't have a unique id type column in BigQuery.
I have large number of rows. Date values are unique for each month-year going over couple of years.
Columns:
date total_count_of_customer total_revenue new_customer_count new_customer_revenue churn_count churn_revenue flat_count flat_revenue returning_count returning_revenue expansion_mrr expansion_count contraction_mrr contraction_count
I want to swap 'date' column values to column and rest of the columns to rows.
Sample Data:
date total_count_of_customer total_revenue new_customer_count new_customer_revenue churn_count churn_revenue flat_count flat_revenue returning_count returning_revenue expansion_mrr contraction_mrr
7/1/11 756 18627.91 518.33 11505.81 188.00 -4333.50 2015.00 47057.36 -267.33 -5848.13 682.46 -962.85
8/1/11 45645 27691.1 562.83 12517.54 217.50 -5045.38 2388.50 55845.24 -365.33 -8066.40 597.02 -1114.54
9/1/11 3445 35502.98 607.33 13529.26 247.00 -5757.26 2762.00 64633.11 -463.33 -10284.67 511.57 -1266.22
Output:
7/1/11 8/1/11 9/1/11
total_count_of_customer 756 45645 3445
total_revenue 18627.91 27691.1 35502.98
new_customer_count 518.33 562.83 607.33
new_customer_revenue 11505.81 12517.54 13529.26
churn_count 188.00 217.50 247.00
churn_revenue -4333.50 -5045.38 -5757.26
flat_count 2015.00 2388.50 2762.00
flat_revenue 47057.36 55845.24 64633.11
returning_count -267.33 -365.33 -463.33
returning_revenue -5848.13 -8066.40 -10284.67
expansion_mrr 682.46 597.02 511.57
contraction_mrr -962.85 -1114.54 -1266.22
I referenced this article but i dont have any unique ID in my case: How to transpose rows to columns with large amount of the data in BigQuery/SQL?
Upvotes: 0
Views: 232
Reputation: 173161
Below example for BigQuery Standard SQL
#standardSQL
WITH `project.dataset.table` AS (
SELECT '7/1/11' `date`, 756 total_count_of_customer, 18627.91 total_revenue, 518.33 new_customer_count, 11505.81 new_customer_revenue, 188.00 churn_count, -4333.50 churn_revenue, 2015.00 flat_count, 47057.36 flat_revenue, -267.33 returning_count, -5848.13 returning_revenue, 682.46 expansion_mrr, -962.85 contraction_mrr UNION ALL
SELECT '8/1/11', 45645, 27691.1, 562.83, 12517.54, 217.50, -5045.38, 2388.50, 55845.24, -365.33, -8066.40, 597.02, -1114.54 UNION ALL
SELECT '9/1/11', 3445, 35502.98, 607.33, 13529.26, 247.00, -5757.26, 2762.00, 64633.11, -463.33, -10284.67, 511.57, -1266.22
), temp AS (
SELECT x id, SPLIT(kv, ':')[OFFSET(0)] key, SPLIT(kv, ':')[SAFE_OFFSET(1)] value
FROM (
SELECT TO_JSON_STRING(t) x
FROM `project.dataset.table` t
),
UNNEST(SPLIT(REGEXP_REPLACE(x, r'[{}"]', ''))) kv
), flatten AS (
SELECT b.key, b.value value, a.value `date`
FROM temp a
JOIN temp b
USING(id)
WHERE a.key = 'date'
AND b.key != 'date'
)
SELECT
key,
MAX(IF(`date`='7/1/11', value, NULL)) date_7_1_11,
MAX(IF(`date`='8/1/11', value, NULL)) date_8_1_11,
MAX(IF(`date`='9/1/11', value, NULL)) date_9_1_11
FROM flatten
GROUP BY key
with result
So, obviously (as you mentioned - you have few years of monthly data) it is up to you now how to proceed from that point. You can either manually add lines in above SELECT statement - one line per month (in case of 3 years - it will be 36 lines so not that big deal but boring) or you can now reuse technique described in my another answer that you mentioned in your question - How to transpose rows to columns with large amount of the data in BigQuery/SQL?
I have years from 2011 to 2019 for now now and will keep on growing in future. ... It will be helpful if you can also describe the solution mentioned in that link if that is possible
Step 1: prepare SELECT Statement covering 2011 - 2019 years
#standardSQL
WITH `project.dataset.table` AS (
SELECT DATE_ADD('2011-01-01', INTERVAL i - 1 MONTH) month
FROM UNNEST(GENERATE_ARRAY(1, 9*12)) i
), temp AS (
SELECT
FORMAT('%i/%i/%i', EXTRACT(MONTH FROM month), EXTRACT(DAY FROM month), EXTRACT(YEAR FROM month) - 2000) month,
FORMAT('date_%i_%i_%i', EXTRACT(MONTH FROM month), EXTRACT(DAY FROM month), EXTRACT(YEAR FROM month) - 2000) column
FROM `project.dataset.table`
)
SELECT
CONCAT('SELECT key,', STRING_AGG(CONCAT("MAX(IF(`date`='", month, "',value,NULL)) ", column, "")), ' FROM flatten GROUP BY key')
FROM temp
above will return below STRING
SELECT key,MAX(IF(`date`='1/1/11',value,NULL)) date_1_1_11,MAX(IF(`date`='2/1/11',value,NULL)) date_2_1_11,MAX(IF(`date`='3/1/11',value,NULL)) date_3_1_11,MAX(IF(`date`='4/1/11',value,NULL)) date_4_1_11,MAX(IF(`date`='5/1/11',value,NULL)) date_5_1_11,MAX(IF(`date`='6/1/11',value,NULL)) date_6_1_11,MAX(IF(`date`='7/1/11',value,NULL)) date_7_1_11,MAX(IF(`date`='8/1/11',value,NULL)) date_8_1_11,MAX(IF(`date`='9/1/11',value,NULL)) date_9_1_11,MAX(IF(`date`='10/1/11',value,NULL)) date_10_1_11,MAX(IF(`date`='11/1/11',value,NULL)) date_11_1_11,MAX(IF(`date`='12/1/11',value,NULL)) date_12_1_11,MAX(IF(`date`='1/1/12',value,NULL)) date_1_1_12,MAX(IF(`date`='2/1/12',value,NULL)) date_2_1_12,MAX(IF(`date`='3/1/12',value,NULL)) date_3_1_12,MAX(IF(`date`='4/1/12',value,NULL)) date_4_1_12,MAX(IF(`date`='5/1/12',value,NULL)) date_5_1_12,MAX(IF(`date`='6/1/12',value,NULL)) date_6_1_12,MAX(IF(`date`='7/1/12',value,NULL)) date_7_1_12,MAX(IF(`date`='8/1/12',value,NULL)) date_8_1_12,MAX(IF(`date`='9/1/12',value,NULL)) date_9_1_12,MAX(IF(`date`='10/1/12',value,NULL)) date_10_1_12,MAX(IF(`date`='11/1/12',value,NULL)) date_11_1_12,MAX(IF(`date`='12/1/12',value,NULL)) date_12_1_12,MAX(IF(`date`='1/1/13',value,NULL)) date_1_1_13,MAX(IF(`date`='2/1/13',value,NULL)) date_2_1_13,MAX(IF(`date`='3/1/13',value,NULL)) date_3_1_13,MAX(IF(`date`='4/1/13',value,NULL)) date_4_1_13,MAX(IF(`date`='5/1/13',value,NULL)) date_5_1_13,MAX(IF(`date`='6/1/13',value,NULL)) date_6_1_13,MAX(IF(`date`='7/1/13',value,NULL)) date_7_1_13,MAX(IF(`date`='8/1/13',value,NULL)) date_8_1_13,MAX(IF(`date`='9/1/13',value,NULL)) date_9_1_13,MAX(IF(`date`='10/1/13',value,NULL)) date_10_1_13,MAX(IF(`date`='11/1/13',value,NULL)) date_11_1_13,MAX(IF(`date`='12/1/13',value,NULL)) date_12_1_13,MAX(IF(`date`='1/1/14',value,NULL)) date_1_1_14,MAX(IF(`date`='2/1/14',value,NULL)) date_2_1_14,MAX(IF(`date`='3/1/14',value,NULL)) date_3_1_14,MAX(IF(`date`='4/1/14',value,NULL)) date_4_1_14,MAX(IF(`date`='5/1/14',value,NULL)) date_5_1_14,MAX(IF(`date`='6/1/14',value,NULL)) date_6_1_14,MAX(IF(`date`='7/1/14',value,NULL)) date_7_1_14,MAX(IF(`date`='8/1/14',value,NULL)) date_8_1_14,MAX(IF(`date`='9/1/14',value,NULL)) date_9_1_14,MAX(IF(`date`='10/1/14',value,NULL)) date_10_1_14,MAX(IF(`date`='11/1/14',value,NULL)) date_11_1_14,MAX(IF(`date`='12/1/14',value,NULL)) date_12_1_14,MAX(IF(`date`='1/1/15',value,NULL)) date_1_1_15,MAX(IF(`date`='2/1/15',value,NULL)) date_2_1_15,MAX(IF(`date`='3/1/15',value,NULL)) date_3_1_15,MAX(IF(`date`='4/1/15',value,NULL)) date_4_1_15,MAX(IF(`date`='5/1/15',value,NULL)) date_5_1_15,MAX(IF(`date`='6/1/15',value,NULL)) date_6_1_15,MAX(IF(`date`='7/1/15',value,NULL)) date_7_1_15,MAX(IF(`date`='8/1/15',value,NULL)) date_8_1_15,MAX(IF(`date`='9/1/15',value,NULL)) date_9_1_15,MAX(IF(`date`='10/1/15',value,NULL)) date_10_1_15,MAX(IF(`date`='11/1/15',value,NULL)) date_11_1_15,MAX(IF(`date`='12/1/15',value,NULL)) date_12_1_15,MAX(IF(`date`='1/1/16',value,NULL)) date_1_1_16,MAX(IF(`date`='2/1/16',value,NULL)) date_2_1_16,MAX(IF(`date`='3/1/16',value,NULL)) date_3_1_16,MAX(IF(`date`='4/1/16',value,NULL)) date_4_1_16,MAX(IF(`date`='5/1/16',value,NULL)) date_5_1_16,MAX(IF(`date`='6/1/16',value,NULL)) date_6_1_16,MAX(IF(`date`='7/1/16',value,NULL)) date_7_1_16,MAX(IF(`date`='8/1/16',value,NULL)) date_8_1_16,MAX(IF(`date`='9/1/16',value,NULL)) date_9_1_16,MAX(IF(`date`='10/1/16',value,NULL)) date_10_1_16,MAX(IF(`date`='11/1/16',value,NULL)) date_11_1_16,MAX(IF(`date`='12/1/16',value,NULL)) date_12_1_16,MAX(IF(`date`='1/1/17',value,NULL)) date_1_1_17,MAX(IF(`date`='2/1/17',value,NULL)) date_2_1_17,MAX(IF(`date`='3/1/17',value,NULL)) date_3_1_17,MAX(IF(`date`='4/1/17',value,NULL)) date_4_1_17,MAX(IF(`date`='5/1/17',value,NULL)) date_5_1_17,MAX(IF(`date`='6/1/17',value,NULL)) date_6_1_17,MAX(IF(`date`='7/1/17',value,NULL)) date_7_1_17,MAX(IF(`date`='8/1/17',value,NULL)) date_8_1_17,MAX(IF(`date`='9/1/17',value,NULL)) date_9_1_17,MAX(IF(`date`='10/1/17',value,NULL)) date_10_1_17,MAX(IF(`date`='11/1/17',value,NULL)) date_11_1_17,MAX(IF(`date`='12/1/17',value,NULL)) date_12_1_17,MAX(IF(`date`='1/1/18',value,NULL)) date_1_1_18,MAX(IF(`date`='2/1/18',value,NULL)) date_2_1_18,MAX(IF(`date`='3/1/18',value,NULL)) date_3_1_18,MAX(IF(`date`='4/1/18',value,NULL)) date_4_1_18,MAX(IF(`date`='5/1/18',value,NULL)) date_5_1_18,MAX(IF(`date`='6/1/18',value,NULL)) date_6_1_18,MAX(IF(`date`='7/1/18',value,NULL)) date_7_1_18,MAX(IF(`date`='8/1/18',value,NULL)) date_8_1_18,MAX(IF(`date`='9/1/18',value,NULL)) date_9_1_18,MAX(IF(`date`='10/1/18',value,NULL)) date_10_1_18,MAX(IF(`date`='11/1/18',value,NULL)) date_11_1_18,MAX(IF(`date`='12/1/18',value,NULL)) date_12_1_18,MAX(IF(`date`='1/1/19',value,NULL)) date_1_1_19,MAX(IF(`date`='2/1/19',value,NULL)) date_2_1_19,MAX(IF(`date`='3/1/19',value,NULL)) date_3_1_19,MAX(IF(`date`='4/1/19',value,NULL)) date_4_1_19,MAX(IF(`date`='5/1/19',value,NULL)) date_5_1_19,MAX(IF(`date`='6/1/19',value,NULL)) date_6_1_19,MAX(IF(`date`='7/1/19',value,NULL)) date_7_1_19,MAX(IF(`date`='8/1/19',value,NULL)) date_8_1_19,MAX(IF(`date`='9/1/19',value,NULL)) date_9_1_19,MAX(IF(`date`='10/1/19',value,NULL)) date_10_1_19,MAX(IF(`date`='11/1/19',value,NULL)) date_11_1_19,MAX(IF(`date`='12/1/19',value,NULL)) date_12_1_19 FROM flatten GROUP BY key
Step 2: Copy that string and replace SELECT statement in original answer. You should get below
#standardSQL
WITH `project.dataset.table` AS (
SELECT '7/1/11' `date`, 756 total_count_of_customer, 18627.91 total_revenue, 518.33 new_customer_count, 11505.81 new_customer_revenue, 188.00 churn_count, -4333.50 churn_revenue, 2015.00 flat_count, 47057.36 flat_revenue, -267.33 returning_count, -5848.13 returning_revenue, 682.46 expansion_mrr, -962.85 contraction_mrr UNION ALL
SELECT '8/1/11', 45645, 27691.1, 562.83, 12517.54, 217.50, -5045.38, 2388.50, 55845.24, -365.33, -8066.40, 597.02, -1114.54 UNION ALL
SELECT '9/1/11', 3445, 35502.98, 607.33, 13529.26, 247.00, -5757.26, 2762.00, 64633.11, -463.33, -10284.67, 511.57, -1266.22
), temp AS (
SELECT x id, SPLIT(kv, ':')[OFFSET(0)] key, SPLIT(kv, ':')[SAFE_OFFSET(1)] value
FROM (
SELECT TO_JSON_STRING(t) x
FROM `project.dataset.table` t
),
UNNEST(SPLIT(REGEXP_REPLACE(x, r'[{}"]', ''))) kv
), flatten AS (
SELECT b.key, b.value value, a.value `date`
FROM temp a
JOIN temp b
USING(id)
WHERE a.key = 'date'
AND b.key != 'date'
)
SELECT key,MAX(IF(`date`='1/1/11',value,NULL)) date_1_1_11,MAX(IF(`date`='2/1/11',value,NULL)) date_2_1_11,MAX(IF(`date`='3/1/11',value,NULL)) date_3_1_11,MAX(IF(`date`='4/1/11',value,NULL)) date_4_1_11,MAX(IF(`date`='5/1/11',value,NULL)) date_5_1_11,MAX(IF(`date`='6/1/11',value,NULL)) date_6_1_11,MAX(IF(`date`='7/1/11',value,NULL)) date_7_1_11,MAX(IF(`date`='8/1/11',value,NULL)) date_8_1_11,MAX(IF(`date`='9/1/11',value,NULL)) date_9_1_11,MAX(IF(`date`='10/1/11',value,NULL)) date_10_1_11,MAX(IF(`date`='11/1/11',value,NULL)) date_11_1_11,MAX(IF(`date`='12/1/11',value,NULL)) date_12_1_11,MAX(IF(`date`='1/1/12',value,NULL)) date_1_1_12,MAX(IF(`date`='2/1/12',value,NULL)) date_2_1_12,MAX(IF(`date`='3/1/12',value,NULL)) date_3_1_12,MAX(IF(`date`='4/1/12',value,NULL)) date_4_1_12,MAX(IF(`date`='5/1/12',value,NULL)) date_5_1_12,MAX(IF(`date`='6/1/12',value,NULL)) date_6_1_12,MAX(IF(`date`='7/1/12',value,NULL)) date_7_1_12,MAX(IF(`date`='8/1/12',value,NULL)) date_8_1_12,MAX(IF(`date`='9/1/12',value,NULL)) date_9_1_12,MAX(IF(`date`='10/1/12',value,NULL)) date_10_1_12,MAX(IF(`date`='11/1/12',value,NULL)) date_11_1_12,MAX(IF(`date`='12/1/12',value,NULL)) date_12_1_12,MAX(IF(`date`='1/1/13',value,NULL)) date_1_1_13,MAX(IF(`date`='2/1/13',value,NULL)) date_2_1_13,MAX(IF(`date`='3/1/13',value,NULL)) date_3_1_13,MAX(IF(`date`='4/1/13',value,NULL)) date_4_1_13,MAX(IF(`date`='5/1/13',value,NULL)) date_5_1_13,MAX(IF(`date`='6/1/13',value,NULL)) date_6_1_13,MAX(IF(`date`='7/1/13',value,NULL)) date_7_1_13,MAX(IF(`date`='8/1/13',value,NULL)) date_8_1_13,MAX(IF(`date`='9/1/13',value,NULL)) date_9_1_13,MAX(IF(`date`='10/1/13',value,NULL)) date_10_1_13,MAX(IF(`date`='11/1/13',value,NULL)) date_11_1_13,MAX(IF(`date`='12/1/13',value,NULL)) date_12_1_13,MAX(IF(`date`='1/1/14',value,NULL)) date_1_1_14,MAX(IF(`date`='2/1/14',value,NULL)) date_2_1_14,MAX(IF(`date`='3/1/14',value,NULL)) date_3_1_14,MAX(IF(`date`='4/1/14',value,NULL)) date_4_1_14,MAX(IF(`date`='5/1/14',value,NULL)) date_5_1_14,MAX(IF(`date`='6/1/14',value,NULL)) date_6_1_14,MAX(IF(`date`='7/1/14',value,NULL)) date_7_1_14,MAX(IF(`date`='8/1/14',value,NULL)) date_8_1_14,MAX(IF(`date`='9/1/14',value,NULL)) date_9_1_14,MAX(IF(`date`='10/1/14',value,NULL)) date_10_1_14,MAX(IF(`date`='11/1/14',value,NULL)) date_11_1_14,MAX(IF(`date`='12/1/14',value,NULL)) date_12_1_14,MAX(IF(`date`='1/1/15',value,NULL)) date_1_1_15,MAX(IF(`date`='2/1/15',value,NULL)) date_2_1_15,MAX(IF(`date`='3/1/15',value,NULL)) date_3_1_15,MAX(IF(`date`='4/1/15',value,NULL)) date_4_1_15,MAX(IF(`date`='5/1/15',value,NULL)) date_5_1_15,MAX(IF(`date`='6/1/15',value,NULL)) date_6_1_15,MAX(IF(`date`='7/1/15',value,NULL)) date_7_1_15,MAX(IF(`date`='8/1/15',value,NULL)) date_8_1_15,MAX(IF(`date`='9/1/15',value,NULL)) date_9_1_15,MAX(IF(`date`='10/1/15',value,NULL)) date_10_1_15,MAX(IF(`date`='11/1/15',value,NULL)) date_11_1_15,MAX(IF(`date`='12/1/15',value,NULL)) date_12_1_15,MAX(IF(`date`='1/1/16',value,NULL)) date_1_1_16,MAX(IF(`date`='2/1/16',value,NULL)) date_2_1_16,MAX(IF(`date`='3/1/16',value,NULL)) date_3_1_16,MAX(IF(`date`='4/1/16',value,NULL)) date_4_1_16,MAX(IF(`date`='5/1/16',value,NULL)) date_5_1_16,MAX(IF(`date`='6/1/16',value,NULL)) date_6_1_16,MAX(IF(`date`='7/1/16',value,NULL)) date_7_1_16,MAX(IF(`date`='8/1/16',value,NULL)) date_8_1_16,MAX(IF(`date`='9/1/16',value,NULL)) date_9_1_16,MAX(IF(`date`='10/1/16',value,NULL)) date_10_1_16,MAX(IF(`date`='11/1/16',value,NULL)) date_11_1_16,MAX(IF(`date`='12/1/16',value,NULL)) date_12_1_16,MAX(IF(`date`='1/1/17',value,NULL)) date_1_1_17,MAX(IF(`date`='2/1/17',value,NULL)) date_2_1_17,MAX(IF(`date`='3/1/17',value,NULL)) date_3_1_17,MAX(IF(`date`='4/1/17',value,NULL)) date_4_1_17,MAX(IF(`date`='5/1/17',value,NULL)) date_5_1_17,MAX(IF(`date`='6/1/17',value,NULL)) date_6_1_17,MAX(IF(`date`='7/1/17',value,NULL)) date_7_1_17,MAX(IF(`date`='8/1/17',value,NULL)) date_8_1_17,MAX(IF(`date`='9/1/17',value,NULL)) date_9_1_17,MAX(IF(`date`='10/1/17',value,NULL)) date_10_1_17,MAX(IF(`date`='11/1/17',value,NULL)) date_11_1_17,MAX(IF(`date`='12/1/17',value,NULL)) date_12_1_17,MAX(IF(`date`='1/1/18',value,NULL)) date_1_1_18,MAX(IF(`date`='2/1/18',value,NULL)) date_2_1_18,MAX(IF(`date`='3/1/18',value,NULL)) date_3_1_18,MAX(IF(`date`='4/1/18',value,NULL)) date_4_1_18,MAX(IF(`date`='5/1/18',value,NULL)) date_5_1_18,MAX(IF(`date`='6/1/18',value,NULL)) date_6_1_18,MAX(IF(`date`='7/1/18',value,NULL)) date_7_1_18,MAX(IF(`date`='8/1/18',value,NULL)) date_8_1_18,MAX(IF(`date`='9/1/18',value,NULL)) date_9_1_18,MAX(IF(`date`='10/1/18',value,NULL)) date_10_1_18,MAX(IF(`date`='11/1/18',value,NULL)) date_11_1_18,MAX(IF(`date`='12/1/18',value,NULL)) date_12_1_18,MAX(IF(`date`='1/1/19',value,NULL)) date_1_1_19,MAX(IF(`date`='2/1/19',value,NULL)) date_2_1_19,MAX(IF(`date`='3/1/19',value,NULL)) date_3_1_19,MAX(IF(`date`='4/1/19',value,NULL)) date_4_1_19,MAX(IF(`date`='5/1/19',value,NULL)) date_5_1_19,MAX(IF(`date`='6/1/19',value,NULL)) date_6_1_19,MAX(IF(`date`='7/1/19',value,NULL)) date_7_1_19,MAX(IF(`date`='8/1/19',value,NULL)) date_8_1_19,MAX(IF(`date`='9/1/19',value,NULL)) date_9_1_19,MAX(IF(`date`='10/1/19',value,NULL)) date_10_1_19,MAX(IF(`date`='11/1/19',value,NULL)) date_11_1_19,MAX(IF(`date`='12/1/19',value,NULL)) date_12_1_19 FROM flatten GROUP BY key
So, now, when you run it - you will get below result
Obviously when you run it against your real data - you will get real values instead of NULLs as it is in example above
Finally, you can simply automate above steps using any client of your choice!
Upvotes: 1