Reputation: 581
I have the following two tables:
Here an example of the hierarchical architecture
- Parent 1 (root)
- Parent 2
- Child A
- Child B
- Child C
- Parent 3
- Parent 4
- Child D
- Child E
- Parent 5
- Child F
The following query returns all children
SELECT tn.Name, tn.IsLeaf
FROM [dbo].[TreeNode] tn
WHERE tn.IsLeaf = 1 AND tn.IsDeleted = 0
Name IsLeaf
--------------
Child A 1
Child B 1
Child C 1
Child D 1
Child E 1
Child F 1
While the following query returns all children and their first parent
SELECT tn.Name, tn.IsLeaf, tnp.Name AS ParentName
FROM [dbo].[TreeNode] tn
INNER JOIN [dbo].[TreeNodeHierarchy] th ON th.ChildId = tn.Id
LEFT OUTER JOIN [dbo].[TreeNode] tnp ON th.ParentId = tnp.Id
WHERE tn.IsLeaf = 1 AND tn.IsDeleted = 0
Name IsLeaf ParentName
--------------------------
Child A 1 Parent 2
Child B 1 Parent 2
Child C 1 Parent 2
Child D 1 Parent 4
Child E 1 Parent 4
Child F 1 Parent 5
I would like to write a SQL CTE in order to have for each child all its parents, like the following:
Name IsLeaf ParentsName
-----------------------------------------------
Child A 1 Parent 2, Parent 1
Child B 1 Parent 2, Parent 1
Child C 1 Parent 2, Parent 1
Child D 1 Parent 4, Parent 3, Parent 1
Child E 1 Parent 4, Parent 3, Parent 1
Child F 1 Parent 5, Parent 1
Update
@Tim many thanks for your post, but because I forgot to mention something important, your query is missing some results. The thing I forgot to mention is that in the TreeNode folder there is a field named OrganisationId that is populated only for parent elements. So Parent 1 could exist many time in the table while a specific child exists only once in the table but could belong to different parents. That means, each organisation has its own tree. Now running part of your query
--Recursive CTE.
WITH parent_list AS (
SELECT
tn.Id as ChildID,
tn.Name as ChildName,
tn.IsLeaf,
tnp.Id as ParentID,
tnp.[Name] as ParentName ,
tnp.OrganisationId,
CAST(tnp.[Name] as nvarchar(max)) as ParentNames,
0 as hierarchy_level
FROM [dbo].[TreeNode] tn
INNER JOIN [dbo].[TreeNodeHierarchy] tnhp
ON tnhp.ChildID = tn.Id
INNER JOIN [dbo].[TreeNode] as tnp
ON tnp.Id = tnhp.ParentID
WHERE tn.IsLeaf = 1 and tn.IsDeleted = 0
UNION ALL
SELECT
pl.ChildID as ChildID --Keep the ChildID the same for each recursion.
, pl.ChildName as ChildName
, pl.IsLeaf
, tnp.Id as ParentID
, tnp.[Name] as ParentName
, tnp.OrganisationId
, CAST(pl.ParentNames + ', ' + tnp.[Name] as nvarchar(max)) as ParentNames --Add the next parent onto the list of parent names.
, hierarchy_level + 1 as hierarchy_level
FROM [dbo].[TreeNode] tn
--Get the ID and name of the parent node.
INNER JOIN [dbo].[TreeNodeHierarchy] tnhp
ON tnhp.ChildID = tn.Id
INNER JOIN [dbo].[TreeNode] as tnp
ON tnp.Id = tnhp.ParentID
--Recursive join to find next parent up the list.
INNER JOIN parent_list as pl
ON pl.ParentID = tn.Id
)
SELECT
pl.ChildName
, ParentNames
,OrganisationId
, ROW_NUMBER() OVER(PARTITION BY pl.ChildName ORDER BY pl.hierarchy_level DESC) as row_num
FROM parent_list as pl
order by ChildName asc
I see, for a specific child, the following result:
What I would need to get is something like the folowing:
ChildName ParentNames
--------------------------
Fever Diseases From E-I:1,Diseases From E-I:18, Symptoms:21, Symptoms:22,...
Your full query, for that specific child returns
I'm sorry that I forgot to mention this important information.
Update 2
@Tim please have a look at the following actual sample data. At your query I've just added small changes like the organisation Id as a prefix which I need in my code.
--Temp table for test data.
DECLARE @TreeNode TABLE (
[Id] [int] NOT NULL,
[Name] [nvarchar](255) NOT NULL,
[Code] [nvarchar](100) NULL,
[IsLeaf] [bit] NOT NULL,
[OrganisationId] [int] NULL,
[IsDeleted] [bit] NOT NULL
);
--Load test data.
INSERT INTO @TreeNode (Id, [Name], [Code], [IsLeaf], [OrganisationId], [IsDeleted])
VALUES
(1,'OrgRoot 1', null, 0, 1, 0)
, (2,'OrgRoot 2', null, 0, 2, 0)
, (3,'OrgRoot 3', null, 0, 3, 0)
, (4,'OrgRoot 4', null, 0, 4, 0)
, (5,'OrgRoot 5', null, 0, 5, 0)
,(6,'Biological', null, 0, 1, 0)
, (7,'Biological', null, 0, 2, 0)
, (8,'Biological', null, 0, 3, 0)
, (9,'Biological', null, 0, 4, 0)
, (10,'Biological', null, 0, 5, 0)
, (11,'Abrin', 'abrin-code', 1, null, 0)
, (12,'Measures', null, 0, 1, 0)
, (13,'Measures', null, 0, 2, 0)
, (14,'Measures', null, 0, 3, 0)
, (15,'Measures', null, 0, 4, 0)
, (16,'Measures', null, 0, 5, 0)
, (17,'Mask', 'mask-code', 1, null, 0)
;
--Temp table for test data.
DECLARE @TreeNodeHierarchy TABLE (
Id int IDENTITY(1,1) not null
, ParentID int
, ChildId int
);
--Load test data.
INSERT INTO @TreeNodeHierarchy (ParentID, ChildId)
VALUES
(1, 6)
, (2, 7)
, (3, 8)
, (4, 9)
, (5, 10)
, (6, 11)
, (7, 11)
, (8, 11)
, (9, 11)
, (10, 11)
, (6, 12)
, (7, 13)
, (8, 14)
, (9, 15)
, (10, 16)
, (12, 17)
, (13, 17)
, (14, 17)
, (15, 17)
, (16, 17)
;
--Recursive CTE.
WITH parent_list AS (
SELECT
tn.Id as ChildID,
tn.Code as ChildCode,
tn.IsLeaf,
tnp.Id as ParentID,
tnp.Name as ParentName ,
tnp.OrganisationId,
CAST('f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames,
0 as hierarchy_level
FROM @TreeNode tn
LEFT OUTER JOIN @TreeNodeHierarchy tnhp
ON tnhp.ChildID = tn.Id
LEFT OUTER JOIN @TreeNode as tnp
ON tnp.Id = tnhp.ParentID
WHERE tn.IsLeaf = 1 and tn.IsDeleted = 0
UNION ALL
SELECT
pl.ChildID as ChildID --Keep the ChildID the same for each recursion.
, pl.ChildCode as ChildCode
, pl.IsLeaf
, tnp.Id as ParentID
, tnp.Name as ParentName
, tnp.OrganisationId
, CAST(pl.ParentNames + ', ' + 'f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames --Add the next parent onto the list of parent names.
, hierarchy_level + 1 as hierarchy_level
FROM @TreeNode tn
--Get the ID and name of the parent node.
INNER JOIN @TreeNodeHierarchy tnhp
ON tnhp.ChildID = tn.Id
INNER JOIN @TreeNode as tnp
ON tnp.Id = tnhp.ParentID
--Recursive join to find next parent up the list.
INNER JOIN parent_list as pl
ON pl.ParentID = tn.Id
)
--This CTE simply allows us to grab the last entry in the recursion.
, ranked as (
SELECT
pl.ChildCode
, ParentNames
, ROW_NUMBER() OVER(PARTITION BY pl.ChildCode ORDER BY pl.hierarchy_level DESC) as row_num
FROM parent_list as pl
)
SELECT
r.ChildCode
, 1 as IsLeaf
, r.ParentNames
,row_num
FROM ranked as r
WHERE row_num = 1 --Make sure we get the last recursive entry in the list for each child name.
The result of the above query is the following:
which for our purpose is missing some information. To see more, we just need to comment the where condition in the last query
--WHERE row_num = 1 --Make sure we get the last recursive entry in the list
Our goal is to get the following final result:
ChildCode IsLeaf ParentNames
---------------------------------------------------------
abrin-code 1 f:5:Biological, f:5:OrgRoot 5, f:4:Biological, f:4:OrgRoot 4, f:3:Biological, f:3:OrgRoot 3, f:2:Biological, f:2:OrgRoot 2, f:1:Biological, f:1:OrgRoot 1
mask-code 1 f:5:Measures, f:5:Biological, f:5:OrgRoot 5, f:4:Measures, f:4:Biological, f:4:OrgRoot 4, f:3:Measures, f:3:Biological, f:3:OrgRoot 3, f:2:Measures, f:2:Biological, f:2:OrgRoot 2, f:1:Measures, f:1:Biological, f:1:OrgRoot 1
@Tim thanks again for your support.
Upvotes: 0
Views: 1213
Reputation: 1178
So it's a little weird that you want to list the leaves and the list out the parents in reverse hierarchal order... but it's doable no matter which way you want it. I've never done a recursive CTE so this was an interesting challenge. The way I implemented it, there's no need to have a column for IsLeaf... you know the child is a leaf if it is not a parent of something else.
Here's the code:
--Temp table for test data.
DECLARE @TreeNode TABLE (
Id int not null
, [Name] nvarchar(50) null
--, Code nvarchar(50) null
--, IsLeaf bit null
--, OrgId nvarchar(50) null
--, IsDeleted bit null
);
--Load test data.
INSERT INTO @TreeNode (Id, [Name])
VALUES (1,'Parent 1')
, (2,'Parent 2')
, (3,'Parent 3')
, (4,'Parent 4')
, (5,'Parent 5')
, (6,'Child A')
, (7,'Child B')
, (8,'Child C')
, (9,'Child D')
, (10,'Child E')
, (11, 'Child F')
;
--Temp table for test data.
DECLARE @TreeNodeHierarchy TABLE (
Id int IDENTITY(1,1) not null
, ParentID int
, ChildId int
);
--Load test data.
INSERT INTO @TreeNodeHierarchy (ParentID, ChildId)
VALUES
(1, 2)
, (1, 3)
, (1, 5)
, (2, 6)
, (2, 7)
, (2, 8)
, (3, 4)
, (4, 9)
, (4, 10)
, (5, 11)
,(2,10) ,(1,10)
;
--Recursive CTE.
WITH parent_list AS (
SELECT
tn.Id as ChildID
, tn.[Name] as ChildName
, tnp.Id as ParentID
, tnp.[Name] as ParentName
, CAST(tnp.[Name] as nvarchar(max)) as ParentNames
, 0 as hierarchy_level
--Since a child may have many parents, this records the child-to-lastparent path
--so we can get the last recursion for the distinct path
--and ignore all the intermediate recursions.
, ROW_NUMBER() OVER(ORDER BY tn.Id, tnp.Id) as [path]
FROM @TreeNode tn
--Get the ID and name of the parent node.
INNER JOIN @TreeNodeHierarchy tnhp
ON tnhp.ChildID = tn.Id
INNER JOIN @TreeNode as tnp
ON tnp.Id = tnhp.ParentID
--Join to find where this node is a parent.
LEFT OUTER JOIN @TreeNodeHierarchy tnhc
ON tnhc.ParentID = tn.Id
WHERE tnhc.Id IS NULL --Find leaves (child that is not a parent to anything else)
UNION ALL
SELECT
pl.ChildID as ChildID --Keep the ChildID the same for each recursion.
, pl.ChildName as ChildName
, tnp.Id as ParentID
, tnp.[Name] as ParentName
, CAST(pl.ParentNames + ', ' + tnp.[Name] as nvarchar(max)) as ParentNames --Add the next parent onto the list of parent names.
, hierarchy_level + 1 as hierarchy_level
, pl.[path]
FROM @TreeNode tn
--Get the ID and name of the parent node.
INNER JOIN @TreeNodeHierarchy tnhp
ON tnhp.ChildID = tn.Id
INNER JOIN @TreeNode as tnp
ON tnp.Id = tnhp.ParentID
--Recursive join to find next parent up the list.
INNER JOIN parent_list as pl
ON pl.ParentID = tn.Id
)
--This CTE simply allows us to grab the last entry in the recursion.
, ranked as (
SELECT
pl.ChildName
, ParentNames
, ROW_NUMBER() OVER(PARTITION BY pl.ChildName, pl.[path] ORDER BY pl.hierarchy_level DESC) as row_num
FROM parent_list as pl
)
SELECT
r.ChildName
, 1 as IsLeaf
, r.ParentNames
FROM ranked as r
WHERE row_num = 1 --Make sure we get the last recursive entry in the list for each child name.d as r
Results:
@KubaWyrostek interesting question about a child having multiple parents. In my solution above there are no children with multiple parents. However, if I introduce a few multiple-parent children, they seem to be filtered out by the second CTE which uses ROW_NUMBER
to retrieve only the last recursive entry for each child. Add , (1, 10), (3, 10)
to the @TreeNodeHierarchy table and you'll see that they are filtered out in the results. So in reality, I'm only grabbing the child with the longest recursion chain (most parents in the path to root).
Edit: I made an edit so that if a child has multiple parents, it will report the child multiple times, one instance for each "path" up to a parent. i accomplished that by adding a "path" identifier on the root CTE select which will carry through on each recursion.
Edit2: Based on the OP's Update 2 with real sample data, I modified the main select as follows. The OP added all the pass through fields in the CTEs. I also added a ROW_NUMBER() as [path]
field on the root CTE. The purpose of the [path] is to track all the different "paths" from child to parent. Once the [path] is set on the root, it carries through on each recursion. The other change was to the main select to use a FOR XML PATH
to concatenate all the rows into a single row for each ChildName
.
--Recursive CTE.
WITH parent_list AS (
SELECT
tn.Id as ChildID,
tn.Code as ChildCode,
tn.IsLeaf,
tnp.Id as ParentID,
tnp.Name as ParentName ,
tnp.OrganisationId,
CAST('f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames,
ROW_NUMBER() OVER(PARTITION BY tn.[Name] ORDER BY tnp.OrganisationId DESC) as [path],
0 as hierarchy_level
FROM @TreeNode tn
LEFT OUTER JOIN @TreeNodeHierarchy tnhp
ON tnhp.ChildID = tn.Id
LEFT OUTER JOIN @TreeNode as tnp
ON tnp.Id = tnhp.ParentID
WHERE tn.IsLeaf = 1 and tn.IsDeleted = 0
UNION ALL
SELECT
pl.ChildID as ChildID --Keep the ChildID the same for each recursion.
, pl.ChildCode as ChildCode
, pl.IsLeaf
, tnp.Id as ParentID
, tnp.Name as ParentName
, tnp.OrganisationId
, CAST(pl.ParentNames + ', ' + 'f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames --Add the next parent onto the list of parent names.
, pl.[path] --keep the path the same for each recursion.
, hierarchy_level + 1 as hierarchy_level
FROM @TreeNode tn
--Get the ID and name of the parent node.
INNER JOIN @TreeNodeHierarchy tnhp
ON tnhp.ChildID = tn.Id
INNER JOIN @TreeNode as tnp
ON tnp.Id = tnhp.ParentID
--Recursive join to find next parent up the list.
INNER JOIN parent_list as pl
ON pl.ParentID = tn.Id
)
--This CTE simply allows us to grab the last entry in the recursion.
, ranked as (
SELECT
*
, ROW_NUMBER() OVER(PARTITION BY pl.ChildCode, pl.[path] ORDER BY pl.hierarchy_level DESC) as row_num
FROM parent_list as pl
)
--SELECT * FROM ranked
SELECT DISTINCT
r.ChildCode
, r.IsLeaf
, STUFF((
SELECT ', ' + r3.ParentNames
FROM ranked as r3
WHERE r3.ChildCode = r.ChildCode
AND r3.row_num = 1
ORDER BY r3.OrganisationId DESC
FOR XML PATH('')
),1,2,'') as ParentNames
FROM ranked as r
And the output is looking exactly as you requested:
Upvotes: 1