Reputation: 65516
Imagine I have the following tables:
I want to fill in the gaps on table like so
Now I can do this using multiple steps with CTEs and temp tables building up groups, and max/min values and then inserting them.
However I as wondering if there is a more elegant (!) way of doing this in single statement, that joins the source on itself with the months.
Bonus points for doing this in ansi sql as opposed to tsql, but either is acceptable.
===
here is some sql to create a test
drop table months;
create table months (year int, month int);
insert into months(year, month) values
(2000, 200007), (2001, 200101), (2002, 200201),
(2000, 200008), (2001, 200102), (2002, 200202),
(2000, 200009), (2001, 200103), (2002, 200203),
(2000, 200010), (2001, 200104), (2002, 200204),
(2000, 200011), (2001, 200105), (2002, 200205),
(2000, 200012), (2001, 200106), (2002, 200206),
(2001, 200107), (2002, 200007),
(2001, 200108), (2002, 200208),
(2001, 200109), (2002, 200208),
(2001, 200110),
(2001, 200111),
(2001, 200112)
drop table source;
create table source (name varchar(10), month int, item int, val float);
insert into source(name, month, item, val) values
('bob', 200101, 1, 1.5),
('bob', 200102, 2, 1.5),
('bob', 200103, 2, 2),
('bob', 200108, 1, 0),
('bob', 200109, 10, 6.6),
('bob', 200110, 11, 2.2),
('bob', 200111, 9, 1),
('bob', 200207, 23, 0)
Upvotes: 3
Views: 1197
Reputation: 11893
Using the sample data provided,
if object_id('#months','U') is not null drop table #months;
create table #months (year int, month int);
insert into #months(year, month) values
(2000, 200007), (2001, 200101), (2002, 200201),
(2000, 200008), (2001, 200102), (2002, 200202),
(2000, 200009), (2001, 200103), (2002, 200203),
(2000, 200010), (2001, 200104), (2002, 200204),
(2000, 200011), (2001, 200105), (2002, 200205),
(2000, 200012), (2001, 200106), (2002, 200206),
(2001, 200107), (2002, 200007),
(2001, 200108), (2002, 200208),
(2001, 200109), (2002, 200208),
(2001, 200110),
(2001, 200111),
(2001, 200112)
if object_id('#source','U') is not null drop table #source;
create table #source (name varchar(10), month int, item int, val float);
insert into #source(name, month, item, val) values
('bob', 200101, 1, 1.5),
('bob', 200102, 2, 1.5),
('bob', 200103, 2, 2),
('bob', 200108, 1, 0),
('bob', 200109, 10, 6.6),
('bob', 200110, 11, 2.2),
('bob', 200111, 9, 1),
('bob', 200207, 23, 0)
;
this SQL:
with source as (
select
this.*
,( select top 1 month
from #source
where month > this.month
and name = this.name
order by month
) as NextMonth
from #source this
),
months as (
select
m.month, n.name
from #months m
cross join (select name from #source group by name) n
group by
m.month, n.name
)
select
s.name,
m.month,
s.item,
case when m.month = s.month then s.val else 0 end val
from months m
left join source s
on s.month <= m.month and m.month < s.NextMonth
and s.name = m.name
where s.name is not null
order by
s.name
,m.month;
go
yields as desired:
name month item val
---------- ----------- ----------- ----------------------
bob 200101 1 1.5
bob 200102 2 1.5
bob 200103 2 2
bob 200104 2 2
bob 200105 2 2
bob 200106 2 2
bob 200107 2 2
bob 200108 1 0
bob 200109 10 6.6
bob 200110 11 2.2
bob 200111 9 1
bob 200112 9 1
bob 200201 9 1
bob 200202 9 1
bob 200203 9 1
bob 200204 9 1
bob 200205 9 1
bob 200206 9 1
Edit: Minor corrections to ensure val is 0 for inserted rows
Upvotes: 1
Reputation: 37089
SQLFiddle example solution for SQL Server 2008
Perhaps not the most elegant one but it's one way to do it
Table and data
create table sources
(
source varchar(20),
month int,
item int,
val decimal
);
insert into sources values
('bob', 200101, 1, 1.5),
('bob', 200102, 2, 1.5),
('bob', 200104, 2, 1.5);
Notice that 200103 is not available. We will fill that up using SQL.
Code
-- Recursive CTE to create all dates between 2001-01-01 and 2001-03-02
WITH Dates as
(
select cast('2001-01-01' as date) as CalendarDate
union all
select dateadd(day , 1, CalendarDate) AS CalendarDate
from Dates
where dateadd (day, 1, CalendarDate) < '2001-03-02'
),
-- CTE to get distinct yyyymm
FormattedDates as
(
select
distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
from Dates
)
-- compare list of yyyymm with sources and fill missing information
select
'bob',
FormattedDates.dt,
(select top 1 item from sources where month < formatteddates.dt order by month desc),
0
from FormattedDates
left join sources on FormattedDates.dt = sources.month
where source is null
-- add sources to the mix
union
select * from sources;
How does this work
First we use recursion to list out all dates between two dates with a CTE
WITH Dates as
(
select cast('2001-01-01' as date) as CalendarDate
union all
select dateadd(day , 1, CalendarDate) AS CalendarDate
from Dates
where dateadd (day, 1, CalendarDate) < '2001-03-02'
)
select * from Dates;
This is a recursive CTE (common table expression) that starts with a date and recurses till it reaches the end date. It outputs dates from 2001-01-01 through 2001-03-01.
We crudely convert dates to yyyymm format using cast(replace(left(CalendarDate,7), '-', '') as int)
. But that would create multiple 200101, 200102, etc. So we use distinct.
WITH Dates as
(
SELECT cast('2001-01-01' as date) as CalendarDate
UNION ALL
SELECT dateadd(day , 1, CalendarDate) AS CalendarDate
FROM Dates
WHERE dateadd (day, 1, CalendarDate) < '2001-03-02'
)
select
distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
from Dates;
That will list out 200101, 200102 and 200103. The following code gives the same effect:
-- Recursive CTE to create all dates between 2001-01-01 and 2001-03-02
WITH Dates as
(
select cast('2001-01-01' as date) as CalendarDate
union all
select dateadd(day , 1, CalendarDate) AS CalendarDate
from Dates
where dateadd (day, 1, CalendarDate) < '2001-03-02'
),
-- CTE to get distinct yyyymm
FormattedDates as
(
select
distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
from Dates
)
-- List out the distinct dates
select dt from FormattedDates;
We left join these dates with the one in our sources table and select only those records where there isn't a record in sources table.
-- Recursive CTE to create all dates between 2001-01-01 and 2001-03-02
WITH Dates as
(
select cast('2001-01-01' as date) as CalendarDate
union all
select dateadd(day , 1, CalendarDate) AS CalendarDate
from Dates
where dateadd (day, 1, CalendarDate) < '2001-03-02'
),
-- CTE to get distinct yyyymm
FormattedDates as
(
select
distinct cast(replace(left(CalendarDate,7), '-', '') as int) dt
from Dates
)
-- compare list of yyyymm with sources and fill missing information
select
'bob',
FormattedDates.dt,
(select top 1 item from sources where month < formatteddates.dt order by month desc),
0
from FormattedDates
left join sources on FormattedDates.dt = sources.month
where source is null;
This would list out 'bob', 200103, 2, 0
. Great, so we have the missing piece of information. Let's combine it with the data from sources to get a full dataset.
All that needs to be added now is union all select * from sources;
to get what you want.
Upvotes: 1