jiggybotz
jiggybotz

Reputation: 61

Rank or running count on new value?

I have the following table (a temp table from several other tables)

PersonID    Date                        Status
1337        2019-04-24 02:28:53.677     DR 
1337        2019-04-24 05:23:17.410     XM 
1337        2019-04-24 06:51:00.160     DR 
1337        2019-04-24 12:58:02.783     DR 
1337        2019-04-24 13:23:26.150     UPD
1337        2019-04-24 13:24:54.527     UPD
1337        2019-04-24 13:42:36.503     UPD
1337        2019-04-24 13:42:36.970     D
1337        2019-04-24 13:45:19.020     D
1337        2019-04-24 13:45:20.800     A
1337        2019-04-24 13:48:56.393     A
1337        2019-04-24 13:48:57.143     UPD
1337        2019-04-24 13:49:57.630     UPD
1337        2019-04-24 13:50:37.613     A

Trying to get the following ranking as Im interested in clustering them in groups

PersonID    Date                        Status   Rank
1337        2019-04-24 02:28:53.677     DR       1
1337        2019-04-24 05:23:17.410     XM       2
1337        2019-04-24 06:51:00.160     DR       3
1337        2019-04-24 12:58:02.783     DR       3
1337        2019-04-24 13:23:26.150     UPD      4
1337        2019-04-24 13:24:54.527     UPD      4
1337        2019-04-24 13:42:36.503     UPD      4
1337        2019-04-24 13:42:36.970     D        5
1337        2019-04-24 13:45:19.020     D        5
1337        2019-04-24 13:45:20.800     A        6
1337        2019-04-24 13:48:56.393     A        6
1337        2019-04-24 13:48:57.143     UPD      7
1337        2019-04-24 13:49:57.630     UPD      7
1337        2019-04-24 13:50:37.613     A        8

Have had a go at using

Select Time, Status, 
  RANK() OVER (partition by ResourceStatusCode order by ReportedDateTime ) as R1
 ,ROW_NUMBER() OVER(partition by ResourceStatusCode ORDER BY ReportedDateTime) as RN2
from #tbl1
order by Time

Should i be trying to use another approach with row number on each new status?

Our server (sql server 2008) has disabled lead() and lag() function, sum over

Upvotes: 1

Views: 156

Answers (2)

iamdave
iamdave

Reputation: 12243

Here is a slightly more concise approach that should deliver what you need it to:

declare @t table(PersonID int,DateValue datetime,Stat varchar(5));
insert into @t values (1337,'2019-04-24 02:28:53.677','DR'),(1337,'2019-04-24 05:23:17.410','XM'),(1337,'2019-04-24 06:51:00.160','DR'),(1337,'2019-04-24 12:58:02.783','DR'),(1337,'2019-04-24 13:23:26.150','UPD'),(1337,'2019-04-24 13:24:54.527','UPD'),(1337,'2019-04-24 13:42:36.503','UPD'),(1337,'2019-04-24 13:42:36.970','D'),(1337,'2019-04-24 13:45:19.020','D'),(1337,'2019-04-24 13:45:20.800','A'),(1337,'2019-04-24 13:48:56.393','A'),(1337,'2019-04-24 13:48:57.143','UPD'),(1337,'2019-04-24 13:49:57.630','UPD'),(1337,'2019-04-24 13:50:37.613','A');

with d as
(
    select PersonID
          ,DateValue
          ,Stat
          ,row_number() over (order by DateValue,Stat) as rn
    from @t
)
select d1.PersonID
      ,d1.DateValue
      ,d1.Stat
      ,sum(case when d1.Stat = d2.Stat then 0 else 1 end) over (order by d1.DateValue) as Ranking
from d as d1
    left join d as d2
        on d1.rn-1 = d2.rn
order by d1.DateValue

Output

+----------+-------------------------+------+---------+
| PersonID |        DateValue        | Stat | Ranking |
+----------+-------------------------+------+---------+
|     1337 | 2019-04-24 02:28:53.677 | DR   |       1 |
|     1337 | 2019-04-24 05:23:17.410 | XM   |       2 |
|     1337 | 2019-04-24 06:51:00.160 | DR   |       3 |
|     1337 | 2019-04-24 12:58:02.783 | DR   |       3 |
|     1337 | 2019-04-24 13:23:26.150 | UPD  |       4 |
|     1337 | 2019-04-24 13:24:54.527 | UPD  |       4 |
|     1337 | 2019-04-24 13:42:36.503 | UPD  |       4 |
|     1337 | 2019-04-24 13:42:36.970 | D    |       5 |
|     1337 | 2019-04-24 13:45:19.020 | D    |       5 |
|     1337 | 2019-04-24 13:45:20.800 | A    |       6 |
|     1337 | 2019-04-24 13:48:56.393 | A    |       6 |
|     1337 | 2019-04-24 13:48:57.143 | UPD  |       7 |
|     1337 | 2019-04-24 13:49:57.630 | UPD  |       7 |
|     1337 | 2019-04-24 13:50:37.613 | A    |       8 |
+----------+-------------------------+------+---------+

Edit following version info

As you are on v2008 and therefore don't have order by available in windowed aggregates, your options are limited. The only way I can really see of achieving this without upgrading is to use a recursive cte:

declare @t table(PersonID int,DateValue datetime,Stat varchar(5));
insert into @t values (1337,'2019-04-24 02:28:53.677','DR'),(1337,'2019-04-24 05:23:17.410','XM'),(1337,'2019-04-24 06:51:00.160','DR'),(1337,'2019-04-24 12:58:02.783','DR'),(1337,'2019-04-24 13:23:26.150','UPD'),(1337,'2019-04-24 13:24:54.527','UPD'),(1337,'2019-04-24 13:42:36.503','UPD'),(1337,'2019-04-24 13:42:36.970','D'),(1337,'2019-04-24 13:45:19.020','D'),(1337,'2019-04-24 13:45:20.800','A'),(1337,'2019-04-24 13:48:56.393','A'),(1337,'2019-04-24 13:48:57.143','UPD'),(1337,'2019-04-24 13:49:57.630','UPD'),(1337,'2019-04-24 13:50:37.613','A');

with d as
(
    select PersonID
          ,DateValue
          ,Stat
          ,row_number() over (order by DateValue,Stat) as rn
    from @t
)
,r as
(
    select PersonID
          ,DateValue
          ,Stat
          ,rn
          ,cast(1 as int) as Ranking
    from d
    where rn = 1

    union all

    select d.PersonID
          ,d.DateValue
          ,d.Stat
          ,d.rn
          ,case when d.Stat = r.Stat then r.Ranking else r.Ranking+1 end as Ranking
    from r
        join d
            on r.rn+1 = d.rn
)
select PersonID
      ,DateValue
      ,Stat
      ,Ranking
from r
order by DateValue;

Upvotes: 1

Sergey Menshov
Sergey Menshov

Reputation: 3906

Try the following:

WITH cte AS(
  SELECT PersonID,CAST([Date] AS datetime) [Date],[Status]
  FROM
    (VALUES
      (1337,'2019-04-24 02:28:53.677','DR '),
      (1337,'2019-04-24 05:23:17.410','XM '),
      (1337,'2019-04-24 06:51:00.160','DR '),
      (1337,'2019-04-24 12:58:02.783','DR '),
      (1337,'2019-04-24 13:23:26.150','UPD'),
      (1337,'2019-04-24 13:24:54.527','UPD'),
      (1337,'2019-04-24 13:42:36.503','UPD'),
      (1337,'2019-04-24 13:42:36.970','D  '),
      (1337,'2019-04-24 13:45:19.020','D  '),
      (1337,'2019-04-24 13:45:20.800','A  '),
      (1337,'2019-04-24 13:48:56.393','A  '),
      (1337,'2019-04-24 13:48:57.143','UPD'),
      (1337,'2019-04-24 13:49:57.630','UPD'),
      (1337,'2019-04-24 13:50:37.613','A  ')
    ) v(PersonID,[Date],[Status])
)
SELECT
  PersonID,
  [Date],
  [Status],
  SUM(IsNext)OVER(PARTITION BY PersonID ORDER BY [Date],[Status])+1 [Rank]
FROM
  (
    SELECT *,IIF(LAG([Status])OVER(PARTITION BY PersonID ORDER BY [Date],[Status])<>[Status],1,0) IsNext
    FROM cte
  ) q
ORDER BY PersonID,[Date],[Status]

A variant without LAG:

WITH cte1 AS(
  SELECT PersonID,CAST([Date] AS datetime) [Date],[Status]
  FROM
    (VALUES
      (1337,'2019-04-24 02:28:53.677','DR '),
      (1337,'2019-04-24 05:23:17.410','XM '),
      (1337,'2019-04-24 06:51:00.160','DR '),
      (1337,'2019-04-24 12:58:02.783','DR '),
      (1337,'2019-04-24 13:23:26.150','UPD'),
      (1337,'2019-04-24 13:24:54.527','UPD'),
      (1337,'2019-04-24 13:42:36.503','UPD'),
      (1337,'2019-04-24 13:42:36.970','D  '),
      (1337,'2019-04-24 13:45:19.020','D  '),
      (1337,'2019-04-24 13:45:20.800','A  '),
      (1337,'2019-04-24 13:48:56.393','A  '),
      (1337,'2019-04-24 13:48:57.143','UPD'),
      (1337,'2019-04-24 13:49:57.630','UPD'),
      (1337,'2019-04-24 13:50:37.613','A  ')
    ) v(PersonID,[Date],[Status])
),
cte2 AS(
  SELECT *,ROW_NUMBER()OVER(PARTITION BY PersonID ORDER BY [Date],[Status]) N
  FROM cte1
)
SELECT
  PersonID,
  [Date],
  [Status],
  SUM(IsNext)OVER(PARTITION BY PersonID ORDER BY [Date],[Status])+1 [Rank]
FROM
  (
    SELECT q1.*,CASE WHEN q1.[Status]<>q2.[Status] THEN 1 ELSE 0 END IsNext
    FROM cte2 q1
    LEFT JOIN cte2 q2 ON q1.PersonID=q2.PersonID AND q1.N=q2.N+1
  ) q
ORDER BY PersonID,[Date],[Status]

Upvotes: 0

Related Questions