Removing duplicate rows while also updating relations

Question

My data is set up as follows:

CREATE TABLE TableA
(
    id int IDENTITY,
    name varchar(256),
    description varchar(256)
)

CREATE TABLE TableB
(
    id int IDENTITY,
    name varchar(256),
    description varchar(256)
) --unique constraint on name, description

CREATE TABLE TableA_TableB
(
    idA int,
    idB int
) --composite key referencing TableA and TableB

The situation is that I have many duplicate records in TableB that violate the unique constraint, and those duplicate records are referenced in TableA_TableB. So I'm trying to remove those records, which is simple enough (using the following CTE), but what would be the best way to update the records in TableA_TableB to reflect this change, i.e, have the TableA_TableB records reference the same ID in TableB as opposed to different IDs for each of the duplicates?

;WITH cte
 AS (SELECT ROW_NUMBER() OVER (PARTITION BY [Name], [Description]
                                   ORDER BY ( SELECT 0)) RN
     FROM   TableB)

DELETE FROM cte
WHERE  RN = 1

Bogdan Sahlean · Accepted Answer

Note: changed b.RowNum=1 to b.RowNum>1

First, you should try with ROLLBACK and then, if it's OK, uncomment COMMIT (this script wasn't tested):

DECLARE @UpdatedRows TABLE(ID INT PRIMARY KEY);

BEGIN TRANSACTION;

;WITH Base
AS(
    SELECT  ROW_NUMBER() OVER (PARTITION BY [Name], [Description] ORDER BY ( SELECT 0)) RowNum,
        MIN(id) OVER(PARTITION BY [Name], [Description]) AS NewID,
        ID -- Old ID
    FROM    TableB
),TableB_RowsForUpdate
AS(
    SELECT  *
    FROM    Base b
    WHERE   b.RowNum>1
)
UPDATE  target
SET IDB=b.NewID
OUTPUT  deleted.IDB INTO @UpdatedRows
FROM    TableA_TableB target
INNER JOIN TableB_RowsForUpdate b ON target.IDB=b.ID;

DELETE  b
FROM    TableB b INNER JOIN @UpdatedRows upd ON b.ID=upd.ID;

ROLLBACK;
-- COMMIT;

Removing duplicate rows while also updating relations

Answers (1)

Related Questions