paul
paul

Reputation: 22001

Select Rows According to Time Window

I have a table of items:

create table #items (
        ItemId int identity(1,1) primary key,
        GroupId int null,
        CreatedTimestamp datetime)

insert into #items values   (1, '2015-01-01'),
                            (2, '2015-02-02'),
                            (3, '2015-03-03'),
                            (1, '2015-06-01'),
                            (2, '2015-09-02'),
                            (2, '2015-10-02'),
                            (1, '2016-05-05'),
                            (1, '2016-07-16'),
                            (2, '2016-03-01')

I would like to retrieve the items with the rule that there may only be one row per GroupId within each 12 month period (starting from the minimum CreatedTimestamp for each GroupId).

So, for the dataset above, rows 1,2,3,6 and 8 would be returned:

ItemId GroupId CreatedTimestamp
------ ------- -----------
1      1       2015-01-01  - yes (1st in group 1)
2      2       2015-02-02  - yes (1st in group 2)
3      3       2015-03-03  - yes (1st in group 3)
4      1       2015-06-01  - no  (within 12 months of item in group 1)
5      2       2015-09-02  - no  (within 12 months of item in group 2)
6      2       2015-10-02  - no  (within 12 months of item in group 2)
7      1       2016-05-05  - yes (over 12 months since last returned item in group 1)
8      1       2016-07-16  - no  (within 12 months of item in group 1)
9      2       2016-03-01  - yes (over 12 months since last returned item in group 2)

I have worked out a solution using a cursor (below), but wonder whether there is a set based solution, as over the years this table will grow to millions of rows.

declare @lastTable table (
    groupId int not null,
    lastItemDate datetime not null
)

declare @groupId int
declare @timestamp datetime
declare @lastgroupTime datetime

declare caseCursor CURSOR for select groupId, CreatedTimestamp from #items order by CreatedTimestamp

open caseCursor

fetch from caseCursor into @groupId, @timestamp

while @@FETCH_STATUS = 0
begin
    select  @lastGroupTime = max(lastItemDate)
    from    @lastTable
    where   groupId = @groupID

    if  (@lastgroupTime is null or  dateadd(yy, 1, @lastGroupTime) < @timestamp)
    begin
        insert into @lastTable values (@groupId, @timestamp)
    end

    fetch from caseCursor into @groupId, @timestamp
end

close caseCursor
deallocate caseCursor

select * from @lastTable

Upvotes: 0

Views: 617

Answers (1)

Gordon Linoff
Gordon Linoff

Reputation: 1270091

Start by selecting the minimum created date for each group id. Then number the years and select one row from each period. Unfortunately, SQL Server is not great at counting years between two dates. Perhaps the approximation of 365 days/year would suffice:

select i.*
from (select i.*,
             row_number() over (partition by groupid, datediff(day, minct, createdtimestamp) / 365 
                                order by CreatedTimestamp
                               ) as seqnum
      from (select i.*, min(CreatedTimestamp) over (partition by groupid) as minct
            from #items i
           ) i
     ) i
where seqnum = 1;

Upvotes: 2

Related Questions