Reputation: 55
I have the following table with data from GA in BigQuery
userid visitid purchase_date
GH8932 12345 2017-04-09
GH8932 12346 null
GH8932 12347 null
GH8932 12348 null
GH8932 12349 2017-05-30
GH8932 12350 null
GH8932 12351 null
GH8932 12352 2017-06-07
GH8932 12353 null
GH8932 12354 2017-06-30
GH8932 12355 null
GH8932 12356 null
I want to fill all the nulls with the purchase_date.
The current query I use (given below)
SELECT
userid,
visitid,
FIRST_VALUE(purchase_date IGNORE NULLS) OVER (
PARTITION BY userid ORDER BY visitid
ROWS BETWEEN CURRENT ROW AND
UNBOUNDED FOLLOWING) AS purchase_date
FROM x;
gives me something like this
userid visitid purchase_date
GH8932 12345 2017-04-09
GH8932 12346 2017-05-30
GH8932 12347 2017-05-30
GH8932 12348 2017-05-30
GH8932 12349 2017-05-30
GH8932 12350 2017-06-07
GH8932 12351 2017-06-07
GH8932 12352 2017-06-07
GH8932 12353 2017-06-30
GH8932 12354 2017-06-30
GH8932 12355 null
GH8932 12356 null
Any suggestions on how I can fill the last 2 nulls with the final purchase_date?
Upvotes: 1
Views: 138
Reputation: 173056
Below is for BigQuery Standard SQL
#standardSQL
SELECT
userid,
visitid,
IFNULL(FIRST_VALUE(purchase_date IGNORE NULLS)
OVER (PARTITION BY userid ORDER BY visitid
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING),
FIRST_VALUE(purchase_date IGNORE NULLS)
OVER (PARTITION BY userid ORDER BY visitid DESC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)) AS purchase_date
FROM `project.dataset.table`
You can test / play with above using dummy data from your question
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'GH8932' userid, 12345 visitid, '2017-04-09' purchase_date UNION ALL
SELECT 'GH8932', 12346, NULL UNION ALL
SELECT 'GH8932', 12347, NULL UNION ALL
SELECT 'GH8932', 12348, NULL UNION ALL
SELECT 'GH8932', 12349, '2017-05-30' UNION ALL
SELECT 'GH8932', 12350, NULL UNION ALL
SELECT 'GH8932', 12351, NULL UNION ALL
SELECT 'GH8932', 12352, '2017-06-07' UNION ALL
SELECT 'GH8932', 12353, NULL UNION ALL
SELECT 'GH8932', 12354, '2017-06-30' UNION ALL
SELECT 'GH8932', 12355, NULL UNION ALL
SELECT 'GH8932', 12356, NULL
)
SELECT
userid,
visitid,
IFNULL(FIRST_VALUE(purchase_date IGNORE NULLS)
OVER (PARTITION BY userid ORDER BY visitid
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING),
FIRST_VALUE(purchase_date IGNORE NULLS)
OVER (PARTITION BY userid ORDER BY visitid DESC
ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING)) AS purchase_date
FROM `project.dataset.table`
ORDER BY userid, visitid
Upvotes: 1