Reputation: 157
With given data and sql command
WITH
TableItem AS (
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0004' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 4 AS Quantity UNION ALL
# missing 2018-05-01
# missing Item18-004
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 5 AS Quantity UNION ALL
# missing Item18-0004, Item18-0005
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0006' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 6 AS Quantity UNION ALL
# some missing
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
# some missing, some new
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 5 AS Quantity UNION ALL
SELECT 'Item18-0007' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 7 AS Quantity ),
# Cross Join to get all combinations of ItemNr and PostingDate
TableItemNrPostingDate AS (
SELECT
ItemNr,
PostingDate
FROM (
SELECT
it1.ItemNr
FROM
TableItem it1
GROUP BY
it1.ItemNr ) t2
CROSS JOIN (
SELECT
it2.PostingDate
FROM
TableItem it2
GROUP BY
it2.PostingDate ) t3 ),
# Create Calender Table to get missing dates
TableCalenderDayItemNrPostingDate AS (
SELECT
CalenderDay,
TableItemNrPostingDate.ItemNr As ItemNr,
TableItemNrPostingDate.PostingDate as PostingDate
FROM
UNNEST( GENERATE_DATE_ARRAY("2018-04-30", DATE_ADD(DATE_ADD(DATE_TRUNC(CURRENT_DATE(), MONTH), INTERVAL 1 MONTH), INTERVAL -1 DAY), INTERVAL 1 DAY)) AS CalenderDay
LEFT JOIN
TableItemNrPostingDate
ON
CalenderDay = DATE(TableItemNrPostingDate.PostingDate)
ORDER BY
CalenderDay )
SELECT
CalenderDay,
FIRST_VALUE(ItemNr) OVER (PARTITION BY ItemNr ORDER BY CalenderDay ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING),
ItemNr,
PostingDate
FROM
TableCalenderDayItemNrPostingDate
ORDER BY
CalenderDay, ItemNr
I would like get ItemNr, PostingDate, Quantity for every ItemNr and every calender date "from beginning". Missing values has to be filled with previous values.
My table will be filled / updated only with changed quantity and date. That means not all items are updated every day, new items come into the table later and at some days there are no changes at all.
For given data I would like to have the result. Quantity = ItemNr to make it easier to identfy.
Item18-0001 2018-04-30 1
Item18-0002 2018-04-30 2
Item18-0003 2018-04-30 3
Item18-0004 2018-04-30 4
Item18-0005 2018-04-30 0 (or null or empty row)
Item18-0006 2018-04-30 0 (or null or empty row)
Item18-0007 2018-04-30 0 (or null or empty row)
Item18-0001 2018-05-01 1
Item18-0002 2018-05-01 2
Item18-0003 2018-05-01 3
Item18-0004 2018-05-01 4
Item18-0005 2018-05-01 0 (or null or empty row)
Item18-0006 2018-05-01 0 (or null or empty row)
Item18-0007 2018-05-01 0 (or null or empty row)
Item18-0001 2018-05-02 1
Item18-0002 2018-05-02 2
Item18-0003 2018-05-02 3
Item18-0004 2018-05-02 4
Item18-0005 2018-05-02 5
Item18-0006 2018-05-02 0 (or null or empty row)
Item18-0007 2018-05-02 0 (or null or empty row)
Item18-0001 2018-05-03 1
Item18-0002 2018-05-03 2
Item18-0003 2018-05-03 3
Item18-0004 2018-05-03 4
Item18-0005 2018-05-03 5
Item18-0006 2018-05-03 6
Item18-0007 2018-05-03 0 (or null or empty row)
Item18-0001 2018-05-04 1
Item18-0002 2018-05-04 2
Item18-0003 2018-05-04 3
Item18-0004 2018-05-04 4
Item18-0005 2018-05-04 5
Item18-0006 2018-05-04 6
Item18-0007 2018-05-03 0 (or null or empty row)
Item18-0001 2018-05-05 1
Item18-0002 2018-05-05 2
Item18-0003 2018-05-05 3
Item18-0004 2018-05-05 4
Item18-0005 2018-05-05 5
Item18-0006 2018-05-05 6
Item18-0007 2018-05-05 7
My SQL command is not ready and wrong. I would only like to show my effort or intention.
Upvotes: 0
Views: 887
Reputation: 172993
Below is for BigQuery StandardSQL
#standardSQL
WITH TableItemNr AS (
SELECT DISTINCT ItemNr FROM `project.dataset.TableItem`
), TableDates AS (
SELECT CAST(PostingDate AS TIMESTAMP) PostingDate
FROM (
SELECT DATE(MIN(PostingDate)) minPostingDate, DATE(MAX(PostingDate)) maxPostingDate
FROM `project.dataset.TableItem`
), UNNEST(GENERATE_DATE_ARRAY(minPostingDate, maxPostingDate)) PostingDate -- CURRENT_DATE() can be used instead of maxPostingDate depends on your needs
)
SELECT i.ItemNr, d.PostingDate, t.Quantity,
IF(t.ItemNr IS NULL, 0, 1) original,
LAST_VALUE(Quantity IGNORE NULLS) OVER(PARTITION BY ItemNr ORDER BY PostingDate) updatedQuantity
FROM TableDates d
CROSS JOIN TableItemNr i
LEFT JOIN `project.dataset.TableItem` t
USING(ItemNr, PostingDate)
-- ORDER BY PostingDate, ItemNr
You can tes / play with above using dummy data from your question
#standardSQL
WITH `project.dataset.TableItem` AS (
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0004' AS ItemNr, TIMESTAMP '2018-04-30' AS PostingDate, 4 AS Quantity UNION ALL
# missing 2018-05-01
# missing Item18-004
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-02' AS PostingDate, 5 AS Quantity UNION ALL
# missing Item18-0004, Item18-0005
# new Item18-0006
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0006' AS ItemNr, TIMESTAMP '2018-05-03' AS PostingDate, 6 AS Quantity UNION ALL
# some missing
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0002' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 2 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-04' AS PostingDate, 3 AS Quantity UNION ALL
# some missing, some new
SELECT 'Item18-0001' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 1 AS Quantity UNION ALL
SELECT 'Item18-0003' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 3 AS Quantity UNION ALL
SELECT 'Item18-0005' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 5 AS Quantity UNION ALL
SELECT 'Item18-0007' AS ItemNr, TIMESTAMP '2018-05-05' AS PostingDate, 7 AS Quantity
), TableItemNr AS (
SELECT DISTINCT ItemNr FROM `project.dataset.TableItem`
), TableDates AS (
SELECT CAST(PostingDate AS TIMESTAMP) PostingDate
FROM (
SELECT DATE(MIN(PostingDate)) minPostingDate, DATE(MAX(PostingDate)) maxPostingDate
FROM `project.dataset.TableItem`
), UNNEST(GENERATE_DATE_ARRAY(minPostingDate, maxPostingDate)) PostingDate
)
SELECT i.ItemNr, d.PostingDate, t.Quantity,
IF(t.ItemNr IS NULL, 0, 1) original,
LAST_VALUE(Quantity IGNORE NULLS) OVER(PARTITION BY ItemNr ORDER BY PostingDate) updatedQuantity
FROM TableDates d
CROSS JOIN TableItemNr i
LEFT JOIN `project.dataset.TableItem` t
USING(ItemNr, PostingDate)
ORDER BY PostingDate, ItemNr
with results as
Row ItemNr PostingDate Quantity original updatedQuantity
1 Item18-0001 2018-04-30 00:00:00.000 UTC 1 1 1
2 Item18-0002 2018-04-30 00:00:00.000 UTC 2 1 2
3 Item18-0003 2018-04-30 00:00:00.000 UTC 3 1 3
4 Item18-0004 2018-04-30 00:00:00.000 UTC 4 1 4
5 Item18-0005 2018-04-30 00:00:00.000 UTC null 0 null
6 Item18-0006 2018-04-30 00:00:00.000 UTC null 0 null
7 Item18-0007 2018-04-30 00:00:00.000 UTC null 0 null
8 Item18-0001 2018-05-01 00:00:00.000 UTC null 0 1
9 Item18-0002 2018-05-01 00:00:00.000 UTC null 0 2
10 Item18-0003 2018-05-01 00:00:00.000 UTC null 0 3
11 Item18-0004 2018-05-01 00:00:00.000 UTC null 0 4
12 Item18-0005 2018-05-01 00:00:00.000 UTC null 0 null
13 Item18-0006 2018-05-01 00:00:00.000 UTC null 0 null
14 Item18-0007 2018-05-01 00:00:00.000 UTC null 0 null
15 Item18-0001 2018-05-02 00:00:00.000 UTC 1 1 1
16 Item18-0002 2018-05-02 00:00:00.000 UTC 1 1 1
17 Item18-0003 2018-05-02 00:00:00.000 UTC 1 1 1
18 Item18-0004 2018-05-02 00:00:00.000 UTC null 0 4
19 Item18-0005 2018-05-02 00:00:00.000 UTC 5 1 5
20 Item18-0006 2018-05-02 00:00:00.000 UTC null 0 null
21 Item18-0007 2018-05-02 00:00:00.000 UTC null 0 null
22 Item18-0001 2018-05-03 00:00:00.000 UTC 1 1 1
23 Item18-0002 2018-05-03 00:00:00.000 UTC 2 1 2
24 Item18-0003 2018-05-03 00:00:00.000 UTC 3 1 3
25 Item18-0004 2018-05-03 00:00:00.000 UTC null 0 4
26 Item18-0005 2018-05-03 00:00:00.000 UTC null 0 5
27 Item18-0006 2018-05-03 00:00:00.000 UTC 6 1 6
28 Item18-0007 2018-05-03 00:00:00.000 UTC null 0 null
29 Item18-0001 2018-05-04 00:00:00.000 UTC null 0 1
30 Item18-0002 2018-05-04 00:00:00.000 UTC 2 1 2
31 Item18-0002 2018-05-04 00:00:00.000 UTC 2 1 2
32 Item18-0003 2018-05-04 00:00:00.000 UTC 3 1 3
33 Item18-0003 2018-05-04 00:00:00.000 UTC 3 1 3
34 Item18-0004 2018-05-04 00:00:00.000 UTC null 0 4
35 Item18-0005 2018-05-04 00:00:00.000 UTC null 0 5
36 Item18-0006 2018-05-04 00:00:00.000 UTC null 0 6
37 Item18-0007 2018-05-04 00:00:00.000 UTC null 0 null
38 Item18-0001 2018-05-05 00:00:00.000 UTC 1 1 1
39 Item18-0002 2018-05-05 00:00:00.000 UTC null 0 2
40 Item18-0003 2018-05-05 00:00:00.000 UTC 3 1 3
41 Item18-0004 2018-05-05 00:00:00.000 UTC null 0 4
42 Item18-0005 2018-05-05 00:00:00.000 UTC 5 1 5
43 Item18-0006 2018-05-05 00:00:00.000 UTC null 0 6
44 Item18-0007 2018-05-05 00:00:00.000 UTC 7 1 7
Note: for some reason you have your dates as a TIMESTAMP data type do I had to do some extra CAST'ing
Upvotes: 3