Reputation: 7260
I have the following table with sample data:
Table: tbl_nodes
create table tbl_nodes
(
nod1 varchar(50),
nod2 varchar(50)
);
Sample data:
insert into tbl_nodes values('Node1','Node2');
insert into tbl_nodes values('Node2','Node4');
insert into tbl_nodes values('Node2','Node3');
insert into tbl_nodes values('Node2','Node5');
insert into tbl_nodes values('Node3','Node5');
insert into tbl_nodes values('Node3','Node6');
insert into tbl_nodes values('Node6','Node7');
insert into tbl_nodes values('Node10','Node11');
insert into tbl_nodes values('Node6','Node8');
insert into tbl_nodes values('Node18','Node19');
insert into tbl_nodes values('Node9','Node10');
insert into tbl_nodes values('Node12','Node13');
insert into tbl_nodes values('Node15','Node16');
NOTE: I am having more than 5000 records in the above table.
Expected Result:
------------------------------------
Connectivity
------------------------------------
Node1->Node2->Node3->Node5
Node1->Node2->Node3->Node6->Node7
Node1->Node2->Node3->Node6->Node8
Node1->Node2->Node4
Node1->Node2->Node5
Node9->Node10->Node11
Explaination About expected result: I want to find the connectivity between nodes which are having more than 2 nodes,
for an example Node1
has connectivity with Node2
and Node2
with 3,4,5 and so on as shown in the expected result set.
And want display each connectivity till the end node found, for an example end nodes are Node4
,Node5
,Node7
,Node8
and Node11
.
I tried the following query:
My try:
;WITH CTE AS
(
SELECT nod1,nod2,
CAST(nod1 AS VARCHAR(MAX))+'->' AS conn,
1 as lvl
from tbl_nodes T1
where EXISTS (select 1 from tbl_nodes T2 where T1.nod2 =T2.nod1) OR
EXISTS (select 1 from tbl_nodes T3 WHERE T1.nod1 =T3.nod2)
UNION ALL
SELECT C1.nod1,C1.nod2,
C.conn+CAST(C1.nod1 AS VARCHAR(MAX))+'->',
c.lvl+1
FROM CTE C INNER JOIN tbl_nodes C1 ON C.nod2 = C1.nod1
WHERE CHARINDEX(','+C.nod2+',',C.conn)=0
),cte2 as
(
select * , ROW_NUMBER() over (partition by nod1,nod2 order by lvl)as rn From CTE
),cte3 as
(
select nod1,nod2 ,MAX(LEN(conn)) conn,MAX(rn) rn
from cte2
group by nod1,nod2
)
SELECT DISTINCT c2.conn+c3.nod2 AS Connectivity
from cte3 c3
inner join cte2 c2 on c3.rn = c2.rn and c3.nod1 = c2.nod1
where c3.nod2 not in (select nod1 from cte2)
Above query works fine but unable to get the result for records more than 5000, query keeps running no result.
Edit: I can't attach running data as it has sensitive information, But will explain! I have table with columns Name1
and Name2
which I have referred as Nod1
and Nod2
. I want to find out the relationship between the names like we are finding the link between the nodes here in the given example. The person one (Name1
) may have done some transaction to second person (Name2
) and Name2
may have to do any other person. So I need to find out the link of transactions between the persons. Its just same as the given example. I tried with you given query by partitioning data, for 100 records it comes within seconds, for 500 records it took 1 min and for 5000 records it keeps running because of more permutation and combinations are there. The problem is with last data set (5000) we have to find out the links.
Upvotes: 0
Views: 1204
Reputation: 46
There are 2 questions that need to be solved about this problem:
So, here is my own version of the answer:
IF OBJECT_ID('tempdb..#tbl_nodes') IS NOT NULL
DROP TABLE #tbl_nodes;
CREATE TABLE #tbl_nodes (
nod1 VARCHAR(50)
, nod2 VARCHAR(50)
);
CREATE NONCLUSTERED INDEX #IX_tbl_nodes_1 ON #tbl_nodes (nod1, nod2);
CREATE NONCLUSTERED INDEX #IX_tbl_nodes_2 ON #tbl_nodes (nod2, nod1);
INSERT INTO #tbl_nodes (nod1, nod2)
VALUES ('Node1','Node2')
, ('Node2','Node4')
, ('Node2','Node3')
, ('Node2','Node5')
, ('Node3','Node5')
, ('Node3','Node6')
, ('Node6','Node7')
, ('Node10','Node11')
, ('Node6','Node8')
, ('Node18','Node19')
, ('Node9','Node10')
, ('Node12','Node13')
, ('Node15','Node16')
, ('Node8', 'Node3')
;
WITH cte AS (
SELECT parent.nod1, parent.nod2
, [link] = CAST('[' + parent.nod1 + '] -> [' + parent.nod2 + ']' AS VARCHAR(MAX))
, [flag] = f.flag
, [loop] = 0
, [stop] = 0
, [nodes] = 2
FROM #tbl_nodes AS parent
LEFT JOIN #tbl_nodes AS child
ON parent.nod1 = child.nod2
CROSS APPLY (
SELECT _f.flag, [rn] = ROW_NUMBER() OVER(ORDER BY _f.flag ASC)
FROM (
SELECT [flag] = CAST(1 AS BIT)
UNION ALL
SELECT [flag] = CAST(0 AS BIT)
FROM #tbl_nodes AS __f
WHERE parent.nod2 = __f.nod1
) AS _f
) AS f
WHERE child.nod2 IS NULL
AND f.rn = 1
UNION ALL
SELECT parent.nod1, child.nod2
, [link] = CAST(parent.link + ' -> [' + child.nod2 + ']' AS VARCHAR(MAX))
, [flag] = f.flag
, [loop] = l.loop
, [stop] = l.stop
, [nodes] = parent.nodes + 1
FROM cte AS parent
CROSS APPLY (
SELECT _child.nod1, _child.nod2, [rn] = ROW_NUMBER() OVER(PARTITION BY _child.nod2 ORDER BY _child.nod2)
FROM #tbl_nodes AS _child
WHERE parent.nod2 = _child.nod1
) AS child
CROSS APPLY (
SELECT _f.flag, [rn] = ROW_NUMBER() OVER(ORDER BY _f.flag ASC)
FROM (
SELECT [flag] = CAST(1 AS BIT)
UNION ALL
SELECT [flag] = CAST(0 AS BIT)
FROM #tbl_nodes AS __f
WHERE child.nod2 = __f.nod1
) AS _f
) AS f
CROSS APPLY (
SELECT [loop] = CASE WHEN (LEN(parent.link + ' -> [' + child.nod2 + ']') - LEN(REPLACE(parent.link + ' -> [' + child.nod2 + ']', '[' + child.nod2 + ']', ''))) / (LEN(child.nod2) + 2) > 1 THEN 1 ELSE 0 END
, [stop] = CASE WHEN (LEN(parent.link) - LEN(REPLACE(parent.link, '[' + parent.nod2 + ']', ''))) / (LEN(parent.nod2) + 2) > 1 THEN 1 ELSE 0 END
) AS l
WHERE child.rn = 1
AND f.rn = 1
AND l.stop = 0
)
SELECT cte.link, cte.loop
FROM cte
WHERE (cte.flag = 1 OR cte.loop = 1)
AND cte.nodes > 2
ORDER BY cte.nod1
OPTION (MAXRECURSION 0);
Cheers.
Updated: As @MAK requested, I update my answer to get paths that have more than 2 nodes.
Upvotes: 1
Reputation: 272256
Here is a simplified version of your recursive query that uses EXISTS
operator:
WITH cte_nodes AS (
SELECT CAST(nod1 + '->' + nod2 AS VARCHAR(4000)) AS path, nod2
FROM tbl_nodes AS root
WHERE NOT EXISTS (
-- no parent exists thus represents a root node
SELECT 1
FROM tbl_nodes
WHERE nod2 = root.nod1
) AND EXISTS (
-- at least one child exists thus connected with at least one node
SELECT 1
FROM tbl_nodes
WHERE nod1 = root.nod2
)
UNION ALL
SELECT CAST(prnt.path + '->' + chld.nod2 AS VARCHAR(4000)), chld.nod2
FROM cte_nodes AS prnt
JOIN tbl_nodes AS chld ON prnt.nod2 = chld.nod1
)
SELECT path
FROM cte_nodes
WHERE NOT EXISTS (
-- no child exists thus represents a leaf node
SELECT 1
FROM tbl_nodes
WHERE nod1 = cte_nodes.nod2
)
ORDER BY path
OPTION (MAXRECURSION 100) -- increase this value just enough to get the results
Upvotes: 1