rafamaniac
rafamaniac

Reputation: 57

Repeated data on inserted rows

--demo setup 
drop table if exists dbo.product
go

create table dbo.Product
(
    ProductId int,
    ProductTitle varchar(55),
    ProductCategory varchar(255),
    Loaddate datetime
)

insert into dbo.Product
values (1, 'Table', 'ABCD', '3/4/2018'),
       (1, 'Table', 'ABCD', '3/5/2018'),
       (1, 'Table', 'ABCD', '3/6/2018'),
       (1, 'Table', 'XYZ', '3/7/2018'),
       (1, 'Table', 'XYZ', '3/8/2018'),
       (1, 'Table', 'XYZ', '3/9/2018'),
       (1, 'Table', 'GHI', '3/10/2018'),
       (1, 'Table', 'GHI', '3/11/2018'),
       (1, 'Table', 'XYZ', '3/12/2018'),
       (1, 'Table', 'XYZ', '3/13/2018')
 
SELECT 
    product.productid, 
    product.producttitle, 
    product.productcategory, 
    MIN(product.loaddate) AS BeginDate, 
    -- ,max(product.LoadDate) as BeginDate1 
    CASE 
        WHEN MAX(product.loaddate) = MAX(oa.enddate1) 
            THEN '12/31/9999' 
            ELSE MAX(product.loaddate) 
    END AS EndDate 
FROM   
    dbo.product product 
CROSS APPLY
    (SELECT MAX(subproduct.loaddate) EndDate1 
     FROM dbo.product subproduct 
     WHERE subproduct.productid = product.productid) oa 
GROUP BY 
    productid, producttitle, productcategory

Output:

productid producttitle productcategory BeginDate EndDate
1 Table ABCD 2018-03-04 00:00:00.000 2018-03-06 00:00:00.000
1 Table XYZ 2018-03-07 00:00:00.000 9999-12-31 00:00:00.000
1 Table GHI 2018-03-10 00:00:00.000 2018-03-11 00:00:00.000

Desired output:

productid producttitle productcategory BeginDate EndDate
1 Table ABCD 2018-03-04 00:00:00.000 2018-03-06 00:00:00.000
1 Table XYZ 2018-03-07 00:00:00.000 2018-03-09 00:00:00.000
1 Table GHI 2018-03-10 00:00:00.000 2018-03-11 00:00:00.000
1 Table XYZ 2018-03-12 00:00:00.000 9999-12-31 00:00:00.000

The last two inserted rows repeat the data from Loaddate '3/7/2018'-'3/9/2018', this doesn't happen if any of the new inserted rows doesn't repeat data. The only thing that changes is the LoadDate, giving me incorrect output. how can i get something like that desired output?

Upvotes: 0

Views: 112

Answers (1)

Andrey Baulin
Andrey Baulin

Reputation: 679

Well, first of all, you need to find a sequence number over all your records. If you already have a primary key, that's good. In example you gave us, there's no such column, so let's generate it. Then, we make pairs with start and end dates for each product's category change. Another thing is to group all these product's category changes. Finally, we make just a simple group by:

;
with cte as (   select  *,
                        row_number() over(partition by ProductId order by Loaddate) as rn
                from product 
), cte2 as (    select  t1.ProductId, 
                        t1.ProductTitle, 
                        t1.ProductCategory, 
                        t1.Loaddate                         as BeginDate,  
                        case 
                            when t1.ProductCategory <> t2.ProductCategory
                            then t1.Loaddate
                            else coalesce(t2.Loaddate, null) 
                        end as EndDate,
                        row_number() over(order by t1.ProductId, t1.Loaddate)                                   as rn_overall,
                        row_number() over(partition by t1.ProductId, t1.ProductCategory order by t1.Loaddate)   as rn_category
                from cte as t1
                    left join cte as t2
                        on t2.ProductId = t1.ProductId
                        and t2.rn = t1.rn + 1
), cte3 as (    select  *, 
                        min(rn_overall) over (partition by ProductId, ProductCategory, rn_overall - rn_category) as product_group
                from cte2
)

select  ProductId, ProductTitle, ProductCategory, 
        min(BeginDate) as BeginDate, 
        case 
            when max(case when EndDate is null then 1 else 0 end) = 0
            then max(EndDate)
            else null
        end as EndDate
from cte3
group by ProductId, ProductTitle, ProductCategory, product_group
order by ProductId, BeginDate

Upvotes: 1

Related Questions