parliament
parliament

Reputation: 22964

SQL - Update column for each duplicate in other column

I'm looking for an SQL query to update a column to the same Id for each duplicate in another column and then delete each overwritten row from another table.

For example

I have:

IndustryId      ProductId       ExternalId
144             3332            13
147             3423            13
148             3532            13
2637            63199           32
121             2789            32

I want:

IndustryId      ProductId       ExternalId
144             3332            13
147             3332            13
148             3332            13
2637            63199           32
121             63199           32

I also need to record the overwritten ProductId values so that I can delete those rows in another table (3423, 3532, and 2789 should be deleted in the other table). Could be in more than one query if neccesary, that doesnt matter.

What is the best way to achieve this?

Upvotes: 0

Views: 69

Answers (2)

Mike M
Mike M

Reputation: 1435

That isn't too tough, especially if you break out the steps. Check this out:

if object_id('tempdb..#myData') is not null
    drop table #myData;

create table #myData ( industryid int, productId int, externalId int);

insert into #myData (industryId, productId, externalId)
values (144,3332,13);
insert into #myData (industryId, productId, externalId)
values (147,3423,13);
insert into #myData (industryId, productId, externalId)
values (148,3532,13);
insert into #myData (industryId, productId, externalId)
values (2637,63199,32);
insert into #myData (industryId, productId, externalId)
values (121,2789,32);

                        --select * from #myData;
-------------------------------------------------
if object_id('tempdb..#IdsToKeep') is not null
    drop table #IdsToKeep;
if object_id('tempdb..#badRows') is not null
    drop table #badRows;

create table #IdsToKeep (externalId int, productId int);
create table #badRows ( industryId int, productId int, externalId int);

insert into #IdsToKeep 
select
    externalId, min(productId)
from
    #myData
group by
    externalId;


--- Capture rows that will be changed ---
insert into #badRows
select
    md.industryId, md.productId, md.externalId
from
    #myData md
        left join #IdsToKeep itk on 
                            md.externalId = itk.externalId
                            and
                            md.productId = itk.productId
where
    itk.productId IS NULL
;


--- Make the update to the main table ---
update 
    #myData
set
    productId = itk.productId
from
    #myData
        inner join #IdsToKeep itk on #myData.externalId = itk.externalId
;


----------
select * from #mydata;
select * from #badRows;

Upvotes: 1

Balmukund Lakhani
Balmukund Lakhani

Reputation: 41

declare @table table (IndustryId int, ProductId int, ExternalId int) insert into @table values (144,3332,13), (147,3423,13), (148,3532,13), (2637,63199,32), (121,2789,32)

select * from @table

;with cte (productid, externalid) as (select max(productid), ExternalId from @table group by ExternalId) select t.IndustryId, c.productid, c.externalid from cte c right outer join @table t on c.externalid = t.ExternalId 

Upvotes: 0

Related Questions