Reputation: 10926
Given a table like this:
+---------+-------+------+------+---------------------+
| user_id | a | b | c | t |
+---------+-------+------+------+---------------------+
| 10001 | one | 2 | NULL | 2014-09-27 12:30:01 |
| 10002 | seven | 8 | 9 | 2020-09-27 12:30:02 |
| 10001 | four | NULL | 6 | 2014-09-27 12:30:03 |
+---------+-------+------+------+---------------------+
I'd like to obtain the following:
+---------+-------+------+------+
| user_id | a | b | c |
+---------+-------+------+------+
| 10001 | four | 2 | 6 |
| 10002 | seven | 8 | 9 |
+---------+-------+------+------+
This is, for each user_id
, obtain a single row with the latest non-NULL
value of each column.
My current attempt looks as follows:
WITH
foo AS (
SELECT
10001 user_id,
'one' a,
2 b,
NULL c,
TIMESTAMP('2014-09-27 12:30:01') t,
UNION ALL
SELECT
10002 user_id,
'seven' a,
8 b,
9 c,
TIMESTAMP('2020-09-27 12:30:02') t,
UNION ALL
SELECT
10001 user_id,
'four' a,
NULL b,
6 c,
TIMESTAMP('2014-09-27 12:30:03') t,
)
SELECT
user_id,
a,
b,
c
FROM
foo s1
WHERE
t = (
SELECT
MAX(t)
FROM
foo s2
WHERE
s1.user_id = s2.user_id)
ORDER BY
user_id
But of course, this simply keeps the latest row per user_id
, which is not what I need.
+---------+-------+------+---+
| user_id | a | b | c |
+---------+-------+------+---+
| 10001 | four | NULL | 6 |
| 10002 | seven | 8 | 9 |
+---------+-------+------+---+
How do I replace NULL
s with non-NULL
values from older rows of that user_id
(if existing)?
Upvotes: 0
Views: 245
Reputation: 3034
Try the following:
WITH sample_data AS (
SELECT 10001 user_id, 'one' a, 2 b, NULL c, TIMESTAMP('2014-09-27 12:30:01') t UNION ALL
SELECT 10002 user_id, 'seven' a, 8 b, 9 c, TIMESTAMP('2020-09-27 12:30:02') t UNION ALL
SELECT 10001 user_id, 'four' a, NULL b, 6 c, TIMESTAMP('2014-09-27 12:30:03') t UNION ALL
SELECT 10001 user_id, 'eight' a, NULL b, NULL c, TIMESTAMP('2014-09-27 12:33:03') t
)
, ranking as (
select distinct user_id
, last_value(a IGNORE NULLS) OVER (PARTITION BY user_id ORDER BY t) a
, last_value(b IGNORE NULLS) OVER (PARTITION BY user_id ORDER BY t) b
, last_value(c IGNORE NULLS) OVER (PARTITION BY user_id ORDER BY t) c
, row_number() OVER (partition by user_id order by t desc) row_num
from sample_data
)
select user_id
, a
, b
, c
from ranking
where row_num = 1
Last value will give you the last value in the defined window, in this case being by user ordered by the timestamp.
Upvotes: 1
Reputation: 172964
Consider below approach
select distinct user_id,
first_value(a ignore nulls) over win a,
first_value(b ignore nulls) over win b,
first_value(c ignore nulls) over win c
from foo
window win as (
partition by user_id
order by t desc
rows between unbounded preceding and unbounded following
)
if applied to sample data in your question - output is
Upvotes: 1
Reputation: 2725
Not sure if this is exactly what you would like, and this is very non-optimal approach, but it is more or less strait forward, and (I hope) easy to understand:
with foo as (
select 10001 user_id, 'one' a, 2 b, null c, timestamp('2014-09-27 12:30:01') t
union all
select 10002 user_id, 'seven' a, 8 b, 9 c, timestamp('2020-09-27 12:30:02') t
union all
select 10001 user_id, 'four' a, null b, 6 c, timestamp('2014-09-27 12:30:03') t
union all
select 10003 user_id, 'five' a, null b, null c, timestamp('2014-09-27 12:31:03') t
union all
select 10003 user_id, 'nine' a, 2 b, null c, timestamp('2014-09-27 12:31:04') t
union all
select 10004 user_id, 'six' a, 6 b, null c, timestamp('2014-09-27 12:31:03') t
union all
select 10004 user_id, 'seven' a, 6 b, 9 c, timestamp('2014-09-27 12:31:00') t
)
, b_table as (
select
user_id
, last_value(b) over(partition by user_id order by t desc) as last_b
from foo
where b is not null
)
, c_table as (
select
user_id
, last_value(c) over(partition by user_id order by t desc) as last_c
from foo
where c is not null
)
, full_table as (
select
foo.user_id
, foo.a
, b_table.last_b
, c_table.last_c
, foo.t
from
foo
left outer join
b_table on foo.user_id = b_table.user_id
left outer join
c_table on foo.user_id = c_table.user_id
)
select
agg.full_table.*
from
(
select
user_id,
array_agg(struct(full_table) order by t desc)[safe_offset(0)] agg
from
full_table
group by
user_id
)
Upvotes: 1