Reputation: 325
I have a query which returns the following.
select
trunc(t.created),
sum(count(*)) over (order by trunc(t.created) rows unbounded preceding) as cumulative_sales
from event e
LEFT JOIN person_tickets t on e.id = t.event_id
where event_id = 9999
group by trunc(t.created)
Date cumulative_bookings
2016-02-12 1
2016-02-18 3
2016-02-19 5
2016-02-20 352
2016-02-21 352
I'd like to fill out the date series so that all dates are included.
2016-02-12 1
2016-02-13 1
2016-02-14 1
2016-02-15 1
2016-02-16 1
2016-02-17 1
2016-02-18 3
2016-02-19 5
2016-02-20 352
2016-02-21 352
I've been trying to join in the code which is generating a nice date sequence for me, but I'm struggling to see where or how to elegantly make the join.
select (
getdate()::date - row_number() over (order by true)
)::date as n
from event limit 500
Been making a little progress with the below but not quite there
CASE WHEN cumulative_bookings is null then LAG(cumulative_bookings IGNORE NULLS) OVER (ORDER BY n)
ELSE cumulative_bookings END as filled_cumulative_bookings
Upvotes: 0
Views: 2416
Reputation: 14045
You need to use a number table for this in Redshift. (In "normal" Postgres you could use generate_series()
as per this answer)
Basically you want to create a list of all the dates between your min and max dates and then left join from that list against your sparse date data.
--Create numbers table - 128 rows (0-127)
DROP TABLE IF EXISTS numbers;
CREATE TABLE numbers (n INTEGER NOT NULL) DISTSTYLE ALL;
INSERT INTO numbers VALUES (0);
INSERT INTO numbers SELECT n + 1 FROM numbers;
INSERT INTO numbers SELECT n + 2 FROM numbers;
INSERT INTO numbers SELECT n + 4 FROM numbers;
INSERT INTO numbers SELECT n + 8 FROM numbers;
INSERT INTO numbers SELECT n + 16 FROM numbers;
INSERT INTO numbers SELECT n + 32 FROM numbers;
INSERT INTO numbers SELECT n + 64 FROM numbers;
-- SELECT n FROM numbers;
--Create sample table with sparse dates
DROP TABLE IF EXISTS two_dates;
CREATE TABLE two_dates (dtm DATETIME NOT NULL, value INT NOT NULL) DISTSTYLE ALL;
INSERT INTO two_dates VALUES ('2016-12-01',1),('2016-12-07',1);
-- SELECT dtm FROM two_dates;
--First CTE finds Min, Max, and Days in range - 1 row
WITH cte_range
AS (SELECT MIN(dtm) min_dtm
,MAX(dtm) max_dtm
,DATEDIFF(day,MIN(dtm),MAX(dtm)) dtm_range
FROM two_dates)
--Second CTE creates list of dates between Min and Max dates
, cte_dtm_list
AS (SELECT DATEADD(day,numbers.n,cte_range.min_dtm) dtm
FROM cte_range
CROSS JOIN numbers
WHERE numbers.n <= cte_range.dtm_range)
--Finally we left join to the sparse date data
SELECT cte_dtm_list.dtm
,COALESCE(two_dates.value,0) value
,SUM(COALESCE(two_dates.value,0)) OVER (ORDER BY cte_dtm_list.dtm ROWS UNBOUNDED PRECEDING) as cume
FROM cte_dtm_list
LEFT JOIN two_dates
ON cte_dtm_list.dtm = two_dates.dtm
ORDER BY cte_dtm_list.dtm
;
-- dtm | value | cume
-- ---------------------+-------+------
-- 2016-12-01 00:00:00 | 1 | 1
-- 2016-12-02 00:00:00 | 0 | 1
-- 2016-12-03 00:00:00 | 0 | 1
-- 2016-12-04 00:00:00 | 0 | 1
-- 2016-12-05 00:00:00 | 0 | 1
-- 2016-12-06 00:00:00 | 0 | 1
-- 2016-12-07 00:00:00 | 1 | 2
Upvotes: 1