Reputation: 148
I've been trying to write a T-SQL script - I am aware it's probably not the best tool for this scenario - that can do a topological sort given a table of nodes and their edges.
I've eventually come up with the following script inspired by Khan's algorithm:
DROP TABLE IF EXISTS #Edges
CREATE TABLE #Edges (EdgeId INT IDENTITY(1,1), FromNode INT, ToNode INT);
INSERT INTO #Edges (FromNode, ToNode) VALUES
(1, 2), (1, 3), (3, 4), (4, 2), (4, 5), (5, 6), (7, NULL), (8, NULL)--, (6, 4);
DROP TABLE IF EXISTS #AllNodesMaster CREATE TABLE #AllNodesMaster (node INT);
INSERT INTO #AllNodesMaster SELECT DISTINCT FromNode AS node FROM #Edges UNION SELECT DISTINCT ToNode AS Node FROM #Edges
DECLARE @CurrentLevel INT = 1;
DROP TABLE IF EXISTS #AllNodes CREATE TABLE #AllNodes (node INT);
DROP TABLE IF EXISTS #OrderedNodes CREATE TABLE #OrderedNodes (Id INT IDENTITY(1,1), node INT, level INT);
DROP TABLE IF EXISTS #InDegrees CREATE TABLE #InDegrees (node INT, InDegreeCount INT);
DROP TABLE IF EXISTS #Queue CREATE TABLE #Queue (node INT);
DROP TABLE IF EXISTS #DeletedEdges CREATE TABLE #DeletedEdges (node INT);
WHILE EXISTS (SELECT FromNode FROM #Edges)
BEGIN
INSERT INTO #AllNodes SELECT DISTINCT FromNode AS node FROM #Edges UNION SELECT DISTINCT ToNode AS Node FROM #Edges
INSERT INTO #InDegrees
SELECT T.node, ISNULL (C.InDegreeCount, 0) AS inDegreeCount
FROM #AllNodes T
LEFT JOIN (SELECT ToNode AS node, COUNT(FromNode) AS InDegreeCount FROM #Edges GROUP BY ToNode) C ON T.node = C.node
WHERE T.node IS NOT NULL
INSERT INTO #Queue SELECT node FROM #InDegrees WHERE inDegreeCount = 0
INSERT INTO #OrderedNodes (node, level) SELECT node, @CurrentLevel FROM #Queue
DELETE FROM #Edges OUTPUT deleted.FromNode INTO #DeletedEdges WHERE EdgeId IN (SELECT EdgeId FROM #Edges WHERE FromNode IN (SELECT node FROM #Queue))
TRUNCATE TABLE #Queue
INSERT INTO #Queue SELECT node FROM #AllNodes WHERE node NOT IN (SELECT DISTINCT FromNode AS node FROM #Edges UNION SELECT DISTINCT ToNode AS Node FROM #Edges) AND node NOT IN (SELECT node FROM #DeletedEdges)
TRUNCATE TABLE #AllNodes
TRUNCATE TABLE #DeletedEdges
TRUNCATE TABLE #InDegrees
SET @CurrentLevel = @CurrentLevel + 1;
END
SELECT
node, level
FROM
(
SELECT
nm.node,
CASE
WHEN orn.node IS NOT NULL THEN orn.level
ELSE @CurrentLevel
END AS level
FROM
#AllNodesMaster nm
LEFT JOIN #OrderedNodes orn ON nm.node = orn.node
) T
WHERE
node IS NOT NULL
ORDER BY
level
ASC
I chose to go for an iterative approach using a WHILE loop instead of a recursive CTE, as I found the latter harder to debug and often hanged endlessly.
The question is, how could cycles be best handled in this scenario using this implementation? Currently, if a cycle is introduced (the commented line "--, (6, 4)
" does this), then the WHILE gets caught in an endless loop.
Any suggestions or guidance would be greatly appreciated
Upvotes: 0
Views: 83
Reputation: 148
The solution to this that worked for me is to do the cycle detection before doing the topological search.
The implementation for cycle detection in a directed graph was taken from here. Which also came from this answer.
Upvotes: 0