Reputation: 97
Need mysql/hive query which can insert new rows for missing dates from a date range for all IDs. Initial table looks like this:-
ID Date Value
1 01-Feb-2018 50
2 02-Feb-2018 10
Let's assume start date=01-Feb-2018
and end date = 2-Feb-2018
The final table should like this
ID Date Value
1 01-Feb-2018 50
1 02-Feb-2018 0
2 01-Feb-2018 0
2 02-Feb-2018 10
Thanks in advance.
Upvotes: 0
Views: 463
Reputation: 38325
Substitute start_date and end_date variables in this example with yours and also see comments in the code:
set hivevar:start_date=2015-07-01; --replace with your start_date
set hivevar:end_date=current_date; --replace with your end_date
set hive.exec.parallel=true;
set hive.auto.convert.join=true; --this enables map-join
set hive.mapjoin.smalltable.filesize=25000000; --size of table to fit in memory
with date_range as
(--this query generates date range, check it's output
select date_add ('${hivevar:start_date}',s.i) as dt
from ( select posexplode(split(space(datediff(${hivevar:end_date},'${hivevar:start_date}')),' ')) as (i,x) ) s
)
--insert overwrite table your table --uncomment this after checking if you need to overwrite your table
select s.ID,
s.dt,
case when t.id is null then 0 else t.value end as value --take existing value for joined, 0 for not joined
--also you can do simply NVL(t.value,0) as value if no NULLs are allowed in your_table.value
from
(--this subquery will give all combinations of ID and date, which should be the result
select d.dt, IDs.ID
from date_range d cross join (select distinct ID from your_table) IDs
) s
left join
your_table t on s.dt=t.date and s.id=t.id --join with existing records, check your table column names
order by s.id, s.dt --remove this if ordering is not necessary
;
Upvotes: 1