user3920526
user3920526

Reputation: 397

Delete duplicated row and update the row using the duplicated row id

This is the Scenario : I have a duplicate rows in my table with the same Id , Name and so on .

1) I have to find the duplicate row matching all the criteria ( this is done)

2) Delete them only if the criteria match

3) Use the id of the deleted record and update the existing row in the table

For this i have created a 2 temp table. Temp1 is the table with all the record. Temp2 consist of duplicated row.

IF OBJECT_ID('tempdb..#Temp1') IS NOT NULL
DROP TABLE #Temp1

IF OBJECT_ID('tempdb..#Temp2') IS NOT NULL
DROP TABLE #Temp2    

IF OBJECT_ID('tempdb..#Temp3') IS NOT NULL
DROP TABLE #Temp3    

CREATE Table #Temp1 ( 
Id int,
Name NVARCHAR(64),
StudentNo INT NULL,
ClassCode NVARCHAR(8) NULL, 
Section  NVARCHAR(8) NULL, 
)


INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(1,'Joe',123,'A1', 'I')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(1,'Joe',123,'A1', 'I')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(2,'Harry',113,'X2', 'H')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(2,'Harry',113,'X2', 'H')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(3,'Elle',121,'J1', 'E1')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(3,'Elle',121,'J1', 'E')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(8,'Jane',191,'A1', 'E')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(5,'Silva',811,'S1', 'SE')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(6,'Juan',411,'S2', 'SE')
INSERT INTO #Temp1 (Id, Name,StudentNo,ClassCode,Section) Values(7,'Carla',431,'S2', 'SE')

;WITH CTE AS (
    select 
ROW_NUMBER() over (partition by Id                                          
                                , StudentNo                                                                                 
                                order by Id, StudentNo)as Duplicate_RowNumber
, * from #Temp1 )

select  t1.Id,t1.Name,t1.StudentNo,t1.Section,t1.ClassCode 
INTO #Temp2 
from CTE as c INNER JOIN #Temp1 as t1 ON t1.Id = c.Id                                           
                                and t1.StudentNo = t1.StudentNo
                                and c.Duplicate_RowNumber >1

-- this will have 6 rows all the duplicates are included 
--select * from #Temp2 


-- this is for output clause 
DECLARE @inserted Table (Id int,
                        Name NVARCHAR(64),
                        StudentNo INT NULL,
                        ClassCode NVARCHAR(8) NULL, 
                        Section  NVARCHAR(8) NULL)


DELETE FROM  #temp1 
OUTPUT deleted.Id , deleted.Name ,deleted.StudentNo ,deleted.ClassCode ,deleted.Section into @inserted
 WHERE EXISTS ( SELECT * FROM #Temp2 as t2 
                                where  #temp1.Id = t2.Id 
                                        and #temp1.Name = t2.Name
                                            and #temp1.StudentNo = t2.StudentNo                                                      
                                            and #temp1.ClassCode = t2.ClassCode
                                            and #temp1.Section = t2.Section)
-- this is to check what is delete so that i can join it and update the table temp1                                         
select * from @inserted

You can see below the query should not delete the last two highlighted column because the Section does not match. It should only delete matching criteria from Temp1 and Temp2.

enter image description here

Scenario 2 : Delete the duplicate record in Temp1 and use the key in order to update the data to NULL for Section and Classcode . This is what i expect with the highlighted to be NULLs . enter image description here

You can run this query yourself - just copy and paste.

Upvotes: 0

Views: 78

Answers (1)

Avi
Avi

Reputation: 1845

Yes, for scenario #1 it is going to delete the rows because the problem is in this section.

I added this table for references. 

Added this #temp2 table to clarify for later use.

 CREATE Table #Temp2 ( 
 Id int,
 Name Varchar(64),
 StudentNo INT NULL,
 ClassCode Varchar(8) NULL, 
 Section  Varchar(8) NULL, 
 )
IF OBJECT_ID('tempdb..#tmp4') IS NOT NULL
DROP TABLE #tmp4

select  t1.Id,t1.Name,t1.StudentNo,t1.Section,t1.ClassCode, 
Duplicate_RowNumber 
INTO #Duplicatedata 
from CTE as c INNER JOIN #Temp1 as t1 ON t1.Id = c.Id                                           
                            and t1.StudentNo = t1.StudentNo
                            and c.Duplicate_RowNumber >1

select * from #Duplicatedata 

This is going to satisfy both condition as #temp 1 will have both rows for Elle as your join condition is only on ID and Student No.

I added row number column for clarity.

  Id    Name    StudentNo   Section ClassCode   Duplicate_RowNumber
   1    Joe          123    I         A1                 2
   1    Joe          123    I         A1                 2
   2    Harry        113    H         X2                 2
   2    Harry        113    H         X2                 2
   3    Elle         121    E1        J1                 2
   3    Elle         121    E         J1                 2

As your Partition is based by Student No and ID, every duplicate row will have 2 or more row numbers.

You can use this approach to delete.

select 
ROW_NUMBER() over (partition by Id                                          
                            , StudentNo                                                                                 
                            order by Id, StudentNo, section)as Duplicate_RowNumber
, * into #tmp4 from #Temp1

--You can add section in your order as well for consistency purpose. 
delete   
from #tmp4  
output deleted.id, deleted.Name, deleted.StudentNo, deleted.ClassCode, 
deleted.Section into #Temp2 
where Duplicate_RowNumber > 1 

After that it seems like you want to keep one row in your final table and put the other one in you deleted table. For Elle it will delete one of the rows from Final table and keep only one since your partition is not based on section.

To make sure that you delete 1 row from your final table you can use this.

DELETE t 
OUTPUT deleted.Id , deleted.Name ,deleted.StudentNo ,deleted.ClassCode 
,deleted.Section into @inserted  FROM    
(select *, row_number() over (Partition by tm.name, tm.studentNo Order by ID, 
StudentNo, section ) rownum  from  #temp1 tm) t
join              #Temp2 t2 on  t.Id = t2.Id 
                                         and t.Name = t2.Name
                                        and t.StudentNo = t2.StudentNo                                                      
                                        and t.ClassCode   =  t2.ClassCode 
                                        and   t.Section =  t2.Section
where t.rownum > 1 

If you notice I added this row number, so that it will not two delete the rows from final table, since Joe and Harry has all the matching attributes, and it will delete two rows.

select * from @inserted

Output you get: 

 Id Name    StudentNo   ClassCode   Section
 3  Elle    121          J1          E1
 2  Harry   113          X2          H
 1  Joe     123          A1          I

Finally you can update final table in this way. #Scenario 2

 update TMP 
 SET ClassCode = NULL, SECTION = NULL 
 FROM 
#Temp1 TMP 
JOIN @INSERTED I ON TMP.Id = I.Id 
                AND TMP.StudentNo = I.StudentNo 

 SELECT * FROM #Temp1 

Final Output:

 Id Name    StudentNo   ClassCode   Section
 1  Joe     123             NULL    NULL
 2  Harry   113             NULL    NULL
 3  Elle    121             NULL    NULL
 8  Jane    191             A1       E
 5  Silva   811             S1       SE
 6  Juan    411             S2       SE
 7  Carla   431             S2       SE

Please note that I have added scripts and output only for the parts where it required change, and rest part is same script provided by you.

Upvotes: 1

Related Questions