Reputation: 211
I'm using this to delete duplicate records from a MySql/MariaDB table based on three columns being the same.
DELETE a FROM stest as a, stest as b
WHERE
(a.facility_id=b.facility_id OR a.facility_id IS NULL AND b.facility_id IS NULL)
AND (a.inspection_date=b.inspection_date OR a.inspection_date IS NULL AND b.inspection_date IS NULL)
AND (a.deficiency_tag=b.deficiency_tag OR a.deficiency_tag IS NULL AND b.deficiency_tag IS NULL)
AND a.recno < b.recno;
What I'd like to do is, where there are duplicate records, keep the one with the largest length(inspection_text) column. (In all likelihood, the inspection_text columns will be identical, but if they're not, I want to delete the smaller ones)
Can someone give me an idea of how I might modify the above statement to add this condition?
I'm also curious as to how the DELETE works, but if I change "DELETE a" to "SELECT a.*" It doesn't show the rows to be deleted but all rows in the table?
Upvotes: 0
Views: 62
Reputation: 133370
for obtain the the values you want to delete you could use and inner join with the value witg max_len of inspection_text for the dupliceted row and delete the row with lenght <> to max_len
delete from stest
inner join (
select facility_id, deficiency_tag, inspection_date , max(length( inspection_text)) as max_len from stest
where ( facility_id, deficiency_tag, inspection_date ) in (
select facility_id, deficiency_tag, inspection_date
from stest
group by facility_id, deficiency_tag, inspection_date
having count(*) > 1
)
group by facility_id, deficiency_tag, inspection_date
) t on stest.facility_id = t.facility_id
and stest.deficiency_tag = t.deficiency_tag
and stest.inspection_date = t.inspection_date
and length( stest.inspection_text) <> t.max_len
and this don't use tuple for join
delete from stest
inner join (
select
facility_id
, deficiency_tag
, inspection_date
, max( length( inspection_text) ) as max_len
from stest
innert join (
select
facility_id
, deficiency_tag
, inspection_date
from stest
group by facility_id, deficiency_tag, inspection_date
having count(*) > 1
) t2 on stest.facility_id = t2.facility_id and stest.deficiency_tag = t2.deficiency_tag and stest.inspection_date = t2.inspection_date
group by facility_id, deficiency_tag, inspection_date
) t on stest.facility_id = t.facility_id
and stest.deficiency_tag = t.deficiency_tag
and stest.inspection_date = t.inspection_date
same version without inner join but where ..
delete from stest , (
select
facility_id
, deficiency_tag
, inspection_date
, max( length( inspection_text) ) as my_max_len
from stest, (
select
facility_id
, deficiency_tag
, inspection_date
from stest
group by facility_id, deficiency_tag, inspection_date
having count(*) > 1
) t2 where stest.facility_id = t2.facility_id and stest.deficiency_tag = t2.deficiency_tag and stest.inspection_date = t2.inspection_date
group by facility_id, deficiency_tag, inspection_date
) t where stest.facility_id = t.facility_id
and stest.deficiency_tag = t.deficiency_tag
and stest.inspection_date = t.inspection_date
and length( stest.inspection_text) <> t.my_max_len
Upvotes: 1