Reputation: 257047
Given a set of rows, with a field sometimes null
and sometimes not:
SELECT
Date, TheThing
FROM MyData
ORDER BY Date
Date TheThing
----------------------- --------
2016-03-09 08:17:29.867 a
2016-03-09 08:18:33.327 a
2016-03-09 14:32:01.240 NULL
2016-10-21 19:53:49.983 NULL
2016-11-12 03:25:21.753 b
2016-11-24 07:43:24.483 NULL
2016-11-28 16:06:23.090 b
2016-11-28 16:09:07.200 c
2016-12-10 11:21:55.807 c
I want to have a ranking column that counts the non-null values:
Date TheThing DesiredTotal
----------------------- -------- ------------
2016-03-09 08:17:29.867 a 1
2016-03-09 08:18:33.327 a 2
2016-03-09 14:32:01.240 NULL 2 <---notice it's still 2 (good)
2016-10-21 19:53:49.983 NULL 2 <---notice it's still 2 (good)
2016-11-12 03:25:21.753 b 3
2016-11-24 07:43:24.483 NULL 3 <---notice it's still 3 (good)
2016-11-28 16:06:23.090 b 4
2016-11-28 16:09:07.200 c 5
2016-12-10 11:21:55.807 c 6
I try the obvious:
SELECT
Date, TheThing,
RANK() OVER(ORDER BY Date) AS Total
FROM MyData
ORDER BY Date
But RANK()
counts nulls:
Date TheThing Total
----------------------- -------- -----
2016-03-09 08:17:29.867 a 1
2016-03-09 08:18:33.327 a 2
2016-03-09 14:32:01.240 NULL 3 <--- notice it is 3 (bad)
2016-10-21 19:53:49.983 NULL 4 <--- notice it is 4 (bad)
2016-11-12 03:25:21.753 b 5 <--- and all the rest are wrong (bad)
2016-11-24 07:43:24.483 NULL 7
2016-11-28 16:06:23.090 b 8
2016-11-28 16:09:07.200 c 9
2016-12-10 11:21:55.807 c 10
How can i instruct RANK()
(or DENSE_RANK()
) to not count nulls?
Why yes! Much worse:
SELECT
Date, TheThing,
RANK() OVER(PARTITION BY(CASE WHEN TheThing IS NOT NULL THEN 1 ELSE 0 END) ORDER BY Date) AS Total
FROM MyData
ORDER BY Date
But RANK()
counts nulls:
Date TheThing Total
----------------------- -------- -----
2016-03-09 08:17:29.867 a 1
2016-03-09 08:18:33.327 a 2
2016-03-09 14:32:01.240 NULL 1 <--- reset to 1?
2016-10-21 19:53:49.983 NULL 2 <--- why go up?
2016-11-12 03:25:21.753 b 3
2016-11-24 07:43:24.483 NULL 3 <--- didn't reset?
2016-11-28 16:06:23.090 b 4
2016-11-28 16:09:07.200 c 5
2016-12-10 11:21:55.807 c 6
And now i randomly type things - frantic flailing.
SELECT
Date, TheThing,
RANK() OVER(PARTITION BY(CASE WHEN TheThing IS NOT NULL THEN 1 ELSE NULL END) ORDER BY Date) AS Total
FROM MyData
ORDER BY Date
SELECT
Date, TheThing,
DENSE_RANK() OVER(PARTITION BY(CASE WHEN TheThing IS NOT NULL THEN 1 ELSE NULL END) ORDER BY Date) AS Total
FROM MyData
ORDER BY Date
Edit: With all the answers, it took many iterations to find all the edge cases that i don't want. In the end what i conceptually wanted was OVER()
in order to count. I had no idea OVER
applied to anything other than RANK
(and DENSE_RANK
).
http://sqlfiddle.com/#!18/c6d87/1
Upvotes: 5
Views: 12294
Reputation: 37487
What about subtracting the current count of NULL
s from the rank()
?
SELECT date,
thething,
rank() OVER (ORDER BY date)
-
sum(CASE
WHEN thething IS NULL THEN
1
ELSE
0
END) OVER (ORDER BY date) desiredtotal
FROM mydata;
That should also keep the duplicates and gaps rank()
produces and doesn't need a subquery.
Upvotes: 1
Reputation: 37500
Try this:
declare @tbl table (dt datetime, col int);
insert into @tbl values
('2016-03-09 08:17:29.867', 1),
('2016-03-09 08:18:33.327', 1),
('2016-03-09 14:32:01.240', NULL),
('2016-10-21 19:53:49.983', NULL),
('2016-11-12 03:25:21.753', 1),
('2016-11-24 07:43:24.483', NULL),
('2016-11-28 16:06:23.090', 1),
('2016-11-28 16:09:07.200', 1),
('2016-12-10 11:21:55.807', 1);
select dt,
col,
sum(case when col is null then 0 else 1 end) over (order by dt) rnk
from @tbl
The idea is really simple: if you assign 1 to non-null values and zero where the column is null, cumulative sum ordered by date is works exactly as rank excluding nulls.
Other way would be to use RANK
combined with ROW_NUMBER
, which will respect ties in Date
column and works exactly as RANK
respecting NULL
s:
select dt,
col,
case when col is not null then
rank() over (order by dt)
else
rank() over (order by dt) - row_number() over (partition by rnDiff order by dt)
end rnk
from (
select dt,
col,
row_number() over (order by dt) -
row_number() over (partition by coalesce(col, 0) order by dt) rnDiff
from @tbl
) a
order by dt
Upvotes: 4
Reputation: 1271003
I think you are looking for a cumulative count:
SELECT Date, TheThing,
COUNT(theThing) OVER (ORDER BY Date) AS Total
FROM MyData
ORDER BY Date;
Upvotes: 5
Reputation: 50173
I would use subquery
:
SELECT [Date], TheThing,
(SELECT COUNT(*)
FROM MyData m
WHERE m.[Date] <= m1.[Date] AND m.TheThing IS NOT NULL
) AS DesiredTotal
FROM MyData m1;
In, similar way you can also try with apply
:
SELECT *
FROM MyData m1 CROSS APPLY
(SELECT COUNT(*) AS DesiredTotal
FROM MyData m
WHERE m.[Date] <= m1.[Date] AND m.TheThing IS NOT NULL
) m2;
Upvotes: 0
Reputation: 2300
I used a CTE to get the right date first and then applied the rank to the modified date:
CREATE TABLE #tmp(dt datetime, TheThing int)
INSERT INTO #tmp VALUES('2016-03-09 08:17:29.867', 1)
INSERT INTO #tmp VALUES('2016-03-09 08:18:33.327', 1)
INSERT INTO #tmp VALUES('2016-03-09 14:32:01.240', NULL)
INSERT INTO #tmp VALUES('2016-10-21 19:53:49.983', NULL)
INSERT INTO #tmp VALUES('2016-11-12 03:25:21.753', 1)
INSERT INTO #tmp VALUES('2016-11-24 07:43:24.483', NULL)
INSERT INTO #tmp VALUES('2016-11-28 16:06:23.090', 1)
INSERT INTO #tmp VALUES('2016-11-28 16:09:07.200', 1)
INSERT INTO #tmp VALUES('2016-12-10 11:21:55.807', 1)
;WITH CTE as (
SELECT
CASE WHEN TheThing IS NULL THEN (SELECT MAX(dt) from #tmp OrigTbl where OrigTbl.dt < SubTbl.dt and OrigTbl.TheThing IS NOT NULL) ELSE dt end dtMod,
SubTbl.dt,SubTbl.TheThing
from #tmp SubTbl)
SELECT dt, TheThing, DENSE_RANK() over(ORDER BY dtMod) from CTE
Upvotes: 0
Reputation: 82010
My lizard brain brings me here... sum() over vs rank()
Select *
,NewCol = sum(sign(TheThing)) over (Order by Date)
,OrEven = sum(TheThing/TheThing) over (Order by Date)
From MyData
Returns
Upvotes: 1