Reputation: 2165
I have a SQL Server temporal table with the following data:
ID ValidFrom ValidTo MyValue
23 7/7/19 13:51 7/7/19 13:51 0
23 7/7/19 13:51 9/9/19 11:22 0
23 9/9/19 11:22 9/9/19 11:23 0
23 9/9/19 11:23 5/14/20 23:02 0
23 5/14/20 23:02 5/16/20 20:02 0
23 5/16/20 20:02 5/16/20 23:53 0
23 5/16/20 23:53 5/16/20 23:58 0
23 5/16/20 23:58 5/16/20 23:58 0
23 5/16/20 23:58 5/16/20 23:59 0
23 5/16/20 23:59 5/17/20 0:16 0
23 5/17/20 0:16 5/17/20 1:47 0
23 5/17/20 1:47 5/17/20 1:48 0
23 5/17/20 1:48 5/20/20 16:52 0
23 5/20/20 16:52 5/20/20 16:52 0
23 5/20/20 16:52 8/22/20 0:22 0
23 8/22/20 0:22 9/3/20 20:22 0
23 9/3/20 20:22 9/3/20 20:23 0
23 9/3/20 20:23 12/31/99 0:00 6
I want to perform a query so I only get the point of change of the 'MyValue', like so:
23 7/7/19 13:51 7/7/19 13:51 0
23 9/3/20 20:23 12/31/99 0:00 6
SELECT ID, ValidFrom, ValidTo, MyValue FROM MyTable FOR SYSTEM_TIME ALL WHERE ID = 23
Gets me the values, but how do I arrive at my desired two-column result?
The following proposed solution does NOT work:
WITH data AS
(
SELECT ID, ValidFrom, MyValue,
LAG(MyValue, 1) OVER (PARTITION BY ID ORDER BY ValidFrom) prevVal,
LEAD(MyValue, 1) OVER (PARTITION BY ID ORDER BY ValidFrom) nextVal
FROM MyTable FOR SYSTEM_TIME ALL WHERE ID = 23
)
SELECT ID, ValidFrom, MyValue
FROM data
WHERE (prevVal IS NOT NULL AND prevVal <> MyValue) OR
(nextVal IS NOT NULL AND nextVal <> MyValue)
ORDER BY ValidFrom DESC;
Resulting in the following results:
ID ValidFrom MyValue
23 2020-09-03 20:23:32.23 6
23 2020-09-03 20:22:00.41 0
Upvotes: 2
Views: 361
Reputation:
Given this sample data:
CREATE TABLE dbo.x(ID int, ValidFrom datetime, ValidTo datetime, MyValue tinyint);
INSERT dbo.x VALUES -- notice I inserted the first two rows in a different order
(23 ,'7/7/19 13:51','9/9/19 11:22', 0),
(23 ,'7/7/19 13:51','7/7/19 13:51', 0),
(23 ,'9/9/19 11:22','9/9/19 11:23', 0),
(23 ,'9/9/19 11:23','5/14/20 23:02', 0),
(23 ,'9/3/20 20:22','9/3/20 20:23', 0),
(23 ,'9/3/20 20:23','12/31/99 0:00', 6);
With the WHERE clause limiting the results to ID = 23, you don't need PARTITION BY, but you will need to add it back if you ever pull back more than one ID. Ordering by ID when you are only pulling back one single value for ID makes zero sense. Here are three approaches, the first two being overly illustrative of the sequence (showing how "is_anchor" is determined), and the last being the most concise:
1.With just lag:
;WITH cte1 AS
(
SELECT ID, ValidFrom, ValidTo, MyValue,
prev = LAG(MyValue, 1) OVER (/* PARTITION BY ID */ ORDER BY ValidFrom, ValidTo)
FROM dbo.x WHERE ID = 23 --MyTable FOR SYSTEM_TIME ALL WHERE ID = 23
),
cte2 AS (
SELECT ID, ValidFrom, ValidTo, MyValue, is_anchor =
CASE WHEN prev <> MyValue or prev IS NULL THEN 1 ELSE 0 END
FROM cte1
)
SELECT ID, ValidFrom, ValidTo, MyValue
FROM cte2 WHERE is_anchor = 1
ORDER BY ID, ValidFrom, ValidTo;
2.With row_number:
;WITH cte1 AS
(
SELECT ID, ValidFrom, ValidTo, MyValue,
prev = LAG(MyValue, 1) OVER (/* PARTITION BY ID */ ORDER BY ValidFrom, ValidTo),
rn = ROW_NUMBER() OVER (/* PARTITION BY ID */ ORDER BY ValidFrom, ValidTo)
FROM dbo.x WHERE ID = 23 --MyTable FOR SYSTEM_TIME ALL WHERE ID = 23
),
cte2 AS (
SELECT ID, ValidFrom, ValidTo, MyValue, is_anchor =
CASE WHEN prev <> MyValue OR rn = 1 THEN 1 ELSE 0 END
FROM cte1
)
SELECT ID, ValidFrom, ValidTo, MyValue
FROM cte2 WHERE is_anchor = 1
ORDER BY ID, ValidFrom, ValidTo;
3.With one less CTE (woop!):
;WITH cte1 AS
(
SELECT ID, ValidFrom, ValidTo, MyValue,
prev = LAG(MyValue, 1) OVER (/* PARTITION BY ID */ ORDER BY ValidFrom, ValidTo)
FROM dbo.x WHERE ID = 23 --MyTable FOR SYSTEM_TIME ALL WHERE ID = 23
)
SELECT ID, ValidFrom, ValidTo, MyValue
FROM cte1 WHERE prev <> MyValue or prev IS NULL
ORDER BY ID, ValidFrom, ValidTo;
In all three cases, the results are:
ID ValidFrom ValidTo MyValue
== =================== =================== =======
23 2019-07-07 13:51:00 2019-07-07 13:51:00 0
23 2020-09-03 20:23:00 1999-12-31 00:00:00 6
Upvotes: 2
Reputation: 6015
I got it to work with this query. To adapt to a system versioned table you would need to add FOR SYSTEM_TIME ALL to the query.
Data
CREATE TABLE dbo.x(ID int, ValidFrom datetime, ValidTo datetime, MyValue int);
INSERT dbo.x VALUES -- notice I inserted the first two rows in a different order
(23 ,'7/7/19 13:51','7/7/19 13:51', 0),
(23 ,'7/7/19 13:51','9/9/19 11:22', 0),
(23 ,'9/9/19 11:22','9/9/19 11:23', 0),
(23 ,'9/9/19 11:23','5/14/20 23:02', 0),
(23 ,'5/14/20 23:02','5/16/20 20:02', 0),
(23 ,'5/16/20 20:02','5/16/20 23:53', 0),
(23 ,'5/16/20 23:53','5/16/20 23:58', 0),
(23 ,'5/16/20 23:58','5/16/20 23:58', 0),
(23 ,'5/16/20 23:58','5/16/20 23:59', 0),
(23 ,'5/16/20 23:59','5/17/20 0:16', 0),
(23 ,'5/17/20 0:16','5/17/20 1:47', 0),
(23 ,'5/17/20 1:47','5/17/20 1:48', 0),
(23 ,'5/17/20 1:48','5/20/20 16:52', 0),
(23 ,'5/20/20 16:52','5/20/20 16:52', 0),
(23 ,'5/20/20 16:52','8/22/20 0:22', 0),
(23 ,'8/22/20 0:22','9/3/20 20:22', 0),
(23 ,'9/3/20 20:22','9/3/20 20:23', 0),
(23 ,'9/3/20 20:23','12/31/99 0:00', 6);
Query
;with data_cte as
(
SELECT ID, ValidFrom, ValidTo, MyValue,
lag(MyValue, 1, -1) over (Partition by id order by ValidFrom, ValidTo) prevVal
FROM x WHERE ID = 23
)
select ID, ValidFrom, ValidTo, MyValue from data_cte
where prevVal=-1
OR prevVal <> myValue;
Results
ID ValidFrom ValidTo MyValue
23 2019-07-07 13:51:00.000 2019-07-07 13:51:00.000 0
23 2020-09-03 20:23:00.000 1999-12-31 00:00:00.000 6
Upvotes: 2
Reputation: 23797
You can use lag() function. ie:
with data as
(
SELECT ID, ValidFrom, ValidTo, MyValue,
lag(MyValue,1) over (/*Partition by id*/ order by validFrom), prevVal
lead(MyValue,1) over (/*Partition by id*/ order by validFrom) nextVal
FROM MyTable WHERE ID = 23
)
select ID, ValidFrom, ValidTo, MyValue
from data
where (prevVal is not null and prevVal <> myValue) OR
(nextVal is not null and nextVal <> myValue);
PS: Probably you would want to order by validFrom.
UPDATE: Edited to order by validFrom as in my suggestion and correcting that 'FOR SYSTEM_TIME', I have no idea where it came from.
Here is DBFiddle Demo
UPDATE: I understood the question something different (where the change occurs - as it was in original question). But in reality what you were asking was very simply the first occurrences of MyValue:
select ID, min(validFrom) ValidFrom, min(validTo) validTo, MyValue
from x
where ID = 23
group by x.ID,MyValue;
and here is DBFiddle Demo
Upvotes: 2