Anthony Villanova
Anthony Villanova

Reputation: 23

Removing Duplicates in SQL without using Distinct function

I'm probably missing something simple on this

SELECT distinct
A.ClaimNumber, 
C.ActivityDetail, 
C.CreatedDateTime AS DateClosed, 
A.CreatedDate as OpenDate, 
DATEDIFF(DAY,CONVERT(DATE,A.CreatedDate),CONVERT(DATE,C.CreatedDateTime)) AS 'Days until closed'
FROM
    ASR.dbo.tblRepairRequestActivityStream C
    JOIN ASR.dbo.tblClaimReason A
    ON  
        A.ClaimNumber = C.ClaimNumber

WHERE
    C.ActivityDetail = 'FROM Ready_For_Pickup TO Closed'
    AND C.CreatedDateTime > '2017-04-16'
    and a.Item not like 'emms'
    and a.Item not like 'detl' 
    and a.item not like 'wind'
GROUP BY
    A.ClaimNumber, C.ActivityDetail, C.CreatedDateTime, A.CreatedDate, a.Item
ORDER BY 
    C.CreatedDateTime

I'm still getting duplicates on the claim numbers and i believe it could be that the ticket gets reopen and closed again. How can i remove the duplicates ?

Upvotes: 0

Views: 78

Answers (1)

SqlZim
SqlZim

Reputation: 38023

You could use min() and/or max() for your dates like so:

select
    A.ClaimNumber
  , C.ActivityDetail
  , max(C.CreatedDateTime) as DateClosed
  , min(A.CreatedDate) as OpenDate
  , datediff(day, convert(date,min(A.CreatedDate)), convert(date, max(C.CreatedDateTime)) as [Days until closed]
from asr.dbo.tblRepairRequestActivityStream C
  inner join asr.dbo.tblClaimReason A 
    on A.ClaimNumber = C.ClaimNumber
where C.ActivityDetail = 'from Ready_For_Pickup to Closed' 
  and C.CreatedDateTime > '2017-04-16'
  and a.Item not like 'emms' 
  and a.Item not like 'detl' 
  and a.item not like 'wind'
group by 
    A.ClaimNumber
  , C.ActivityDetail
  --, C.CreatedDateTime
  --, A.CreatedDate
  --, a.Item /* not in select */
order by max(C.CreatedDateTime) 

Also, a.Item is in your group by but not in your select, which may have been causing additional duplicates.


top with ties using row_number() version:

select top 1 with ties
    A.ClaimNumber
  , C.ActivityDetail
  , C.CreatedDateTime as DateClosed
  , A.CreatedDate as OpenDate
  , datediff(day, convert(date,A.CreatedDate), convert(date, C.CreatedDateTime)) as [Days until closed]
from asr.dbo.tblRepairRequestActivityStream C
  inner join asr.dbo.tblClaimReason A 
    on A.ClaimNumber = C.ClaimNumber
where C.ActivityDetail = 'from Ready_For_Pickup to Closed' 
  and C.CreatedDateTime > '2017-04-16'
  and a.Item not in ('emms','detl','wind')
order by row_number() over (
  partition by A.ClaimNumber 
  order by c.CreatedDatetime desc /* most recent */
         , a.CreatedDate desc     /* most recent */
  )

Using a Common Table Expression and row_number() version:

;with cte as (
  select
      A.ClaimNumber
    , C.ActivityDetail
    , C.CreatedDateTime as DateClosed
    , A.CreatedDate as OpenDate
    , datediff(day, convert(date,A.CreatedDate), convert(date, C.CreatedDateTime)) as [Days until closed]
    , rn = row_number() over (
      partition by A.ClaimNumber 
      order by c.CreatedDatetime desc /* most recent */
             , a.CreatedDate desc     /* most recent */
      )
  from asr.dbo.tblRepairRequestActivityStream C
    inner join asr.dbo.tblClaimReason A 
      on A.ClaimNumber = C.ClaimNumber
  where C.ActivityDetail = 'from Ready_For_Pickup to Closed' 
    and C.CreatedDateTime > '2017-04-16'
    and a.Item not in ('emms','detl','wind')
)
select *
from cte
where rn = 1

Upvotes: 3

Related Questions