Reputation: 931
As I doubt my title makes very much sense, I will do my best to explain what I am asking. I need to clean up an audit table which tracks when the state of an object was modified. For one reason or another, multiple records are being created with new dates while the state of the object is still the same. I need to preserve the first record of each state change and then remove any following records where the state is the same. Oh, and there is no primary key. Yeah! :|
Here is an example data set:
ObjectID ObjectState DateOfEntry
101144 1 2007-08-14 12:39:30.587
101144 1 2007-08-14 12:41:52.620
101144 1 2007-08-14 12:42:11.150
101144 1 2007-08-14 12:42:24.197
101144 3 2007-08-14 12:44:06.403
101144 3 2007-08-14 12:44:06.467
101144 3 2007-08-14 12:46:12.573
101144 3 2007-08-14 12:50:51.670
101144 3 2007-08-14 12:50:51.750
101144 3 2007-08-14 12:56:34.330
101144 4 2007-08-14 17:28:59.280
101144 3 2007-08-14 17:32:26.313
101144 3 2007-08-14 17:32:48.720
101144 3 2007-08-14 17:45:07.460
101144 3 2007-08-14 17:46:31.740
101144 3 2007-08-14 17:47:04.380
101144 3 2007-08-14 17:47:29.507
101144 3 2007-08-14 17:49:13.460
101144 3 2007-08-14 17:54:15.320
101144 3 2007-08-14 17:55:57.540
101144 3 2007-08-14 19:50:11.913
101144 3 2007-08-14 19:53:10.820
101144 3 2007-08-14 20:03:44.900
101144 3 2007-08-16 10:34:56.477
101144 3 2007-08-16 10:36:06.477
101144 3 2007-08-16 10:36:24.570
101144 3 2007-11-06 09:19:26.157
101144 3 2007-11-06 09:24:28.200
101144 4 2010-09-27 14:11:03.287
101144 4 2014-01-27 17:31:58.077
The end table result should be:
ObjectID ObjectState DateOfEntry
101144 1 2007-08-14 12:39:30.587
101144 3 2007-08-14 12:44:06.403
101144 4 2007-08-14 17:28:59.280
101144 3 2007-08-14 17:32:26.313
101144 4 2010-09-27 14:11:03.287
I have tried using RANK()
but the problem is that I can't just sort on ObjectState
because the ObjectState
values can be repeated out of order. I have to order them by the DateOfEntry
. But if I do RANK() OVER(ORDER BY DateOfEntry)
then I basically get row numbering.
How can I create a SQL query that will allow me to order by DateOfEntry
but then group by ObjectState
so I can remove all rows within that "object state group" except for the minimum one of the group?
Upvotes: 3
Views: 68
Reputation: 931
An alternative solution I finally thought of using LAG() will eliminate the CTE.
DELETE @Audits
FROM @Audits a1
INNER JOIN (SELECT ObjectID, DateOfEntry
FROM (SELECT ObjectID, DateOfEntry, ObjectState,
LAG(ObjectState) OVER(PARTITION BY ObjectID ORDER BY DateOfEntry) AS [PreviousObjectState]
FROM @Audits) AS Audits
WHERE Audits.ObjectState = PreviousObjectState
) a2
ON a2.ObjectID = a1.ObjectID AND a2.DateOfEntry = a1.DateOfEntry
SELECT * FROM @Audits
Long version with proof (I've duplicated the data set with a different ID just to verify the partition by is working as expected)
DECLARE @Audits TABLE (ObjectID INT, ObjectState INT, DateOfEntry DATETIME)
INSERT @Audits
SELECT 101144,1,'2007-08-14 12:39:30.587' UNION ALL
SELECT 101144,1,'2007-08-14 12:41:52.620' UNION ALL
SELECT 101144,1,'2007-08-14 12:42:11.150' UNION ALL
SELECT 101144,1,'2007-08-14 12:42:24.197' UNION ALL
SELECT 101144,3,'2007-08-14 12:44:06.403' UNION ALL
SELECT 101144,3,'2007-08-14 12:44:06.467' UNION ALL
SELECT 101144,3,'2007-08-14 12:46:12.573' UNION ALL
SELECT 101144,3,'2007-08-14 12:50:51.670' UNION ALL
SELECT 101144,3,'2007-08-14 12:50:51.750' UNION ALL
SELECT 101144,3,'2007-08-14 12:56:34.330' UNION ALL
SELECT 101144,4,'2007-08-14 17:28:59.280' UNION ALL
SELECT 101144,3,'2007-08-14 17:32:26.313' UNION ALL
SELECT 101144,3,'2007-08-14 17:32:48.720' UNION ALL
SELECT 101144,3,'2007-08-14 17:45:07.460' UNION ALL
SELECT 101144,3,'2007-08-14 17:46:31.740' UNION ALL
SELECT 101144,3,'2007-08-14 17:47:04.380' UNION ALL
SELECT 101144,3,'2007-08-14 17:47:29.507' UNION ALL
SELECT 101144,3,'2007-08-14 17:49:13.460' UNION ALL
SELECT 101144,3,'2007-08-14 17:54:15.320' UNION ALL
SELECT 101144,3,'2007-08-14 17:55:57.540' UNION ALL
SELECT 101144,3,'2007-08-14 19:50:11.913' UNION ALL
SELECT 101144,3,'2007-08-14 19:53:10.820' UNION ALL
SELECT 101144,3,'2007-08-14 20:03:44.900' UNION ALL
SELECT 101144,3,'2007-08-16 10:34:56.477' UNION ALL
SELECT 101144,3,'2007-08-16 10:36:06.477' UNION ALL
SELECT 101144,3,'2007-08-16 10:36:24.570' UNION ALL
SELECT 101144,3,'2007-11-06 09:19:26.157' UNION ALL
SELECT 101144,3,'2007-11-06 09:24:28.200' UNION ALL
SELECT 101144,4,'2010-09-27 14:11:03.287' UNION ALL
SELECT 101144,4,'2014-01-27 17:31:58.077' UNION ALL
SELECT 101145,1,'2007-08-14 12:39:30.587' UNION ALL
SELECT 101145,1,'2007-08-14 12:41:52.620' UNION ALL
SELECT 101145,1,'2007-08-14 12:42:11.150' UNION ALL
SELECT 101145,1,'2007-08-14 12:42:24.197' UNION ALL
SELECT 101145,3,'2007-08-14 12:44:06.403' UNION ALL
SELECT 101145,3,'2007-08-14 12:44:06.467' UNION ALL
SELECT 101145,3,'2007-08-14 12:46:12.573' UNION ALL
SELECT 101145,3,'2007-08-14 12:50:51.670' UNION ALL
SELECT 101145,3,'2007-08-14 12:50:51.750' UNION ALL
SELECT 101145,3,'2007-08-14 12:56:34.330' UNION ALL
SELECT 101145,4,'2007-08-14 17:28:59.280' UNION ALL
SELECT 101145,3,'2007-08-14 17:32:26.313' UNION ALL
SELECT 101145,3,'2007-08-14 17:32:48.720' UNION ALL
SELECT 101145,3,'2007-08-14 17:45:07.460' UNION ALL
SELECT 101145,3,'2007-08-14 17:46:31.740' UNION ALL
SELECT 101145,3,'2007-08-14 17:47:04.380' UNION ALL
SELECT 101145,3,'2007-08-14 17:47:29.507' UNION ALL
SELECT 101145,3,'2007-08-14 17:49:13.460' UNION ALL
SELECT 101145,3,'2007-08-14 17:54:15.320' UNION ALL
SELECT 101145,3,'2007-08-14 17:55:57.540' UNION ALL
SELECT 101145,3,'2007-08-14 19:50:11.913' UNION ALL
SELECT 101145,3,'2007-08-14 19:53:10.820' UNION ALL
SELECT 101145,3,'2007-08-14 20:03:44.900' UNION ALL
SELECT 101145,3,'2007-08-16 10:34:56.477' UNION ALL
SELECT 101145,3,'2007-08-16 10:36:06.477' UNION ALL
SELECT 101145,3,'2007-08-16 10:36:24.570' UNION ALL
SELECT 101145,3,'2007-11-06 09:19:26.157' UNION ALL
SELECT 101145,3,'2007-11-06 09:24:28.200' UNION ALL
SELECT 101145,4,'2010-09-27 14:11:03.287' UNION ALL
SELECT 101145,4,'2014-01-27 17:31:58.077'
DELETE @Audits
FROM @Audits a1
INNER JOIN (SELECT ObjectID, DateOfEntry
FROM (SELECT ObjectID, DateOfEntry, ObjectState,
LAG(ObjectState) OVER(PARTITION BY ObjectID ORDER BY DateOfEntry) AS [PreviousUserState]
FROM @Audits) AS Audits
WHERE Audits.ObjectState = PreviousUserState
) a2
ON a2.ObjectID = a1.ObjectID AND a2.DateOfEntry = a1.DateOfEntry
SELECT * FROM @Audits
Yields this output
ObjectID ObjectState DateOfEntry
----------- ----------- -----------------------
101144 1 2007-08-14 12:39:30.587
101144 3 2007-08-14 12:44:06.403
101144 4 2007-08-14 17:28:59.280
101144 3 2007-08-14 17:32:26.313
101144 4 2010-09-27 14:11:03.287
101145 1 2007-08-14 12:39:30.587
101145 3 2007-08-14 12:44:06.403
101145 4 2007-08-14 17:28:59.280
101145 3 2007-08-14 17:32:26.313
101145 4 2010-09-27 14:11:03.287
Upvotes: 0
Reputation: 6073
If @table is your table, may be the below sql will help you.I assumed that the DateOfEntry is sorted.
DELETE B
FROM (SELECT *,ROW_NUMBER() OVER(ORDER BY DateOfEntry) [ROW] FROM @table) A
LEFT JOIN (SELECT *,ROW_NUMBER() OVER(ORDER BY DateOfEntry) [ROW] FROM @table) B
ON A.[Row] = B.[Row] - 1
AND ABS(a.ObjectState - b.ObjectState) = 0
Result before and after for Me
Note: Please dont mind the data inside DateOfEntry column. I kept it number for my convenience.
Upvotes: 0
Reputation: 13179
Short answer:
; WITH Records AS (
SELECT
ObjectId,
ObjectState,
DateOfEntry,
ROW_NUMBER() OVER (PARTITION BY ObjectID ORDER BY DateOfEntry) AS RowNum
FROM @Audits
)
DELETE R2
FROM Records R1
INNER JOIN Records R2
ON R1.ObjectId = R2.ObjectId
AND R1.ObjectState = R2.ObjectState
AND R1.RowNum + 1 = R2.RowNum
Proof of solution
DECLARE @Audits TABLE (ObjectID INT, ObjectState INT, DateOfEntry DATETIME)
INSERT @Audits
SELECT 101144,1,'2007-08-14 12:39:30.587' UNION ALL
SELECT 101144,1,'2007-08-14 12:41:52.620' UNION ALL
SELECT 101144,1,'2007-08-14 12:42:11.150' UNION ALL
SELECT 101144,1,'2007-08-14 12:42:24.197' UNION ALL
SELECT 101144,3,'2007-08-14 12:44:06.403' UNION ALL
SELECT 101144,3,'2007-08-14 12:44:06.467' UNION ALL
SELECT 101144,3,'2007-08-14 12:46:12.573' UNION ALL
SELECT 101144,3,'2007-08-14 12:50:51.670' UNION ALL
SELECT 101144,3,'2007-08-14 12:50:51.750' UNION ALL
SELECT 101144,3,'2007-08-14 12:56:34.330' UNION ALL
SELECT 101144,4,'2007-08-14 17:28:59.280' UNION ALL
SELECT 101144,3,'2007-08-14 17:32:26.313' UNION ALL
SELECT 101144,3,'2007-08-14 17:32:48.720' UNION ALL
SELECT 101144,3,'2007-08-14 17:45:07.460' UNION ALL
SELECT 101144,3,'2007-08-14 17:46:31.740' UNION ALL
SELECT 101144,3,'2007-08-14 17:47:04.380' UNION ALL
SELECT 101144,3,'2007-08-14 17:47:29.507' UNION ALL
SELECT 101144,3,'2007-08-14 17:49:13.460' UNION ALL
SELECT 101144,3,'2007-08-14 17:54:15.320' UNION ALL
SELECT 101144,3,'2007-08-14 17:55:57.540' UNION ALL
SELECT 101144,3,'2007-08-14 19:50:11.913' UNION ALL
SELECT 101144,3,'2007-08-14 19:53:10.820' UNION ALL
SELECT 101144,3,'2007-08-14 20:03:44.900' UNION ALL
SELECT 101144,3,'2007-08-16 10:34:56.477' UNION ALL
SELECT 101144,3,'2007-08-16 10:36:06.477' UNION ALL
SELECT 101144,3,'2007-08-16 10:36:24.570' UNION ALL
SELECT 101144,3,'2007-11-06 09:19:26.157' UNION ALL
SELECT 101144,3,'2007-11-06 09:24:28.200' UNION ALL
SELECT 101144,4,'2010-09-27 14:11:03.287' UNION ALL
SELECT 101144,4,'2014-01-27 17:31:58.077'
; WITH Records AS (
SELECT
ObjectId,
ObjectState,
DateOfEntry,
ROW_NUMBER() OVER (PARTITION BY ObjectID ORDER BY DateOfEntry) AS RowNum
FROM @Audits
)
DELETE R2
FROM Records R1
INNER JOIN Records R2
ON R1.ObjectId = R2.ObjectId
AND R1.ObjectState = R2.ObjectState
AND R1.RowNum + 1 = R2.RowNum
SELECT * FROM @Audits
Yields this output
ObjectID ObjectState DateOfEntry
----------- ----------- -----------------------
101144 1 2007-08-14 12:39:30.587
101144 3 2007-08-14 12:44:06.403
101144 4 2007-08-14 17:28:59.280
101144 3 2007-08-14 17:32:26.313
101144 4 2010-09-27 14:11:03.287
Upvotes: 3