Francesco
Francesco

Reputation: 581

SQL recursive CTE for parent child relation with two tables

I have the following two tables:

enter image description here

Here an example of the hierarchical architecture

- Parent 1 (root)
    - Parent 2
       - Child A
       - Child B
       - Child C
    - Parent 3
       - Parent 4
          - Child D
          - Child E
    - Parent 5
          - Child F

The following query returns all children

SELECT tn.Name, tn.IsLeaf 
FROM [dbo].[TreeNode] tn
WHERE tn.IsLeaf = 1 AND tn.IsDeleted = 0
Name    IsLeaf
--------------
Child A   1
Child B   1
Child C   1
Child D   1
Child E   1
Child F   1

While the following query returns all children and their first parent

SELECT tn.Name, tn.IsLeaf, tnp.Name AS ParentName 
FROM [dbo].[TreeNode] tn
INNER JOIN [dbo].[TreeNodeHierarchy] th ON th.ChildId = tn.Id
LEFT OUTER JOIN [dbo].[TreeNode] tnp ON th.ParentId = tnp.Id
WHERE tn.IsLeaf = 1 AND tn.IsDeleted = 0
Name    IsLeaf  ParentName
--------------------------
Child A   1       Parent 2
Child B   1       Parent 2
Child C   1       Parent 2
Child D   1       Parent 4
Child E   1       Parent 4
Child F   1       Parent 5

I would like to write a SQL CTE in order to have for each child all its parents, like the following:

Name     IsLeaf   ParentsName
-----------------------------------------------
Child A   1       Parent 2, Parent 1
Child B   1       Parent 2, Parent 1
Child C   1       Parent 2, Parent 1
Child D   1       Parent 4, Parent 3, Parent 1
Child E   1       Parent 4, Parent 3, Parent 1
Child F   1       Parent 5, Parent 1

Update

@Tim many thanks for your post, but because I forgot to mention something important, your query is missing some results. The thing I forgot to mention is that in the TreeNode folder there is a field named OrganisationId that is populated only for parent elements. So Parent 1 could exist many time in the table while a specific child exists only once in the table but could belong to different parents. That means, each organisation has its own tree. Now running part of your query

--Recursive CTE.
WITH parent_list AS (

    SELECT 
        tn.Id as ChildID, 
        tn.Name as ChildName, 
        tn.IsLeaf, 
        tnp.Id as ParentID, 
        tnp.[Name] as ParentName , 
        tnp.OrganisationId,
        CAST(tnp.[Name] as nvarchar(max)) as ParentNames, 
        0 as hierarchy_level 
    FROM [dbo].[TreeNode] tn
        INNER JOIN [dbo].[TreeNodeHierarchy] tnhp 
            ON tnhp.ChildID = tn.Id
        INNER JOIN [dbo].[TreeNode] as tnp
            ON tnp.Id = tnhp.ParentID
        WHERE tn.IsLeaf = 1 and tn.IsDeleted = 0
        
    UNION ALL

    SELECT 
        pl.ChildID as ChildID           --Keep the ChildID the same for each recursion.
        , pl.ChildName as ChildName
        , pl.IsLeaf
        , tnp.Id as ParentID
        , tnp.[Name] as ParentName
        , tnp.OrganisationId
        , CAST(pl.ParentNames + ', ' + tnp.[Name] as nvarchar(max)) as ParentNames  --Add the next parent onto the list of parent names.
        , hierarchy_level + 1 as hierarchy_level
    FROM [dbo].[TreeNode] tn
    --Get the ID and name of the parent node.
        INNER JOIN [dbo].[TreeNodeHierarchy] tnhp
            ON tnhp.ChildID = tn.Id
        INNER JOIN [dbo].[TreeNode] as tnp
            ON tnp.Id = tnhp.ParentID

        --Recursive join to find next parent up the list.
        INNER JOIN parent_list as pl
            ON pl.ParentID = tn.Id
)
SELECT  
        pl.ChildName
        , ParentNames
        ,OrganisationId
        , ROW_NUMBER() OVER(PARTITION BY pl.ChildName ORDER BY pl.hierarchy_level DESC) as row_num
    FROM parent_list as pl
    order by ChildName asc

I see, for a specific child, the following result:

enter image description here

What I would need to get is something like the folowing:

ChildName   ParentNames
--------------------------
Fever       Diseases From E-I:1,Diseases From E-I:18, Symptoms:21, Symptoms:22,...

Your full query, for that specific child returns

enter image description here

I'm sorry that I forgot to mention this important information.

Update 2

@Tim please have a look at the following actual sample data. At your query I've just added small changes like the organisation Id as a prefix which I need in my code.

--Temp table for test data.
DECLARE @TreeNode TABLE (
    [Id] [int] NOT NULL,
    [Name] [nvarchar](255) NOT NULL,
    [Code] [nvarchar](100) NULL,
    [IsLeaf] [bit] NOT NULL,
    [OrganisationId] [int] NULL,
    [IsDeleted] [bit] NOT NULL
);

--Load test data.
INSERT INTO @TreeNode (Id, [Name], [Code], [IsLeaf], [OrganisationId], [IsDeleted])
VALUES 
      (1,'OrgRoot 1', null, 0, 1, 0)
    , (2,'OrgRoot 2', null, 0, 2, 0)
    , (3,'OrgRoot 3', null, 0, 3, 0)
    , (4,'OrgRoot 4', null, 0, 4, 0)
    , (5,'OrgRoot 5', null, 0, 5, 0)

     ,(6,'Biological', null, 0, 1, 0)
    , (7,'Biological', null, 0, 2, 0)
    , (8,'Biological', null, 0, 3, 0)
    , (9,'Biological', null, 0, 4, 0)
    , (10,'Biological', null, 0, 5, 0)

    , (11,'Abrin', 'abrin-code', 1, null, 0)
    
    , (12,'Measures', null, 0, 1, 0)
    , (13,'Measures', null, 0, 2, 0)
    , (14,'Measures', null, 0, 3, 0)
    , (15,'Measures', null, 0, 4, 0)
    , (16,'Measures', null, 0, 5, 0)

    , (17,'Mask', 'mask-code', 1, null, 0)
;

--Temp table for test data.
DECLARE @TreeNodeHierarchy TABLE (
    Id int IDENTITY(1,1) not null
    , ParentID int
    , ChildId int
);

--Load test data.
INSERT INTO @TreeNodeHierarchy (ParentID, ChildId)
VALUES
    (1, 6)
    , (2, 7)
    , (3, 8)
    , (4, 9)
    , (5, 10)

    , (6, 11)
    , (7, 11)
    , (8, 11)
    , (9, 11)
    , (10, 11)

    , (6, 12)
    , (7, 13)
    , (8, 14)
    , (9, 15)
    , (10, 16)

    , (12, 17)
    , (13, 17)
    , (14, 17)
    , (15, 17)
    , (16, 17)
;


--Recursive CTE.
WITH parent_list AS (

    SELECT 
        tn.Id as ChildID, 
        tn.Code as ChildCode, 
        tn.IsLeaf, 
        tnp.Id as ParentID, 
        tnp.Name as ParentName , 
        tnp.OrganisationId,
        CAST('f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames, 
        0 as hierarchy_level 
    FROM @TreeNode tn
        LEFT OUTER JOIN  @TreeNodeHierarchy tnhp 
            ON tnhp.ChildID = tn.Id
        LEFT OUTER JOIN  @TreeNode as tnp
            ON tnp.Id = tnhp.ParentID
        WHERE tn.IsLeaf = 1 and tn.IsDeleted = 0
        
    UNION ALL

    SELECT 
        pl.ChildID as ChildID           --Keep the ChildID the same for each recursion.
        , pl.ChildCode as ChildCode
        , pl.IsLeaf
        , tnp.Id as ParentID
        , tnp.Name as ParentName
        , tnp.OrganisationId
        , CAST(pl.ParentNames + ', ' + 'f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames  --Add the next parent onto the list of parent names.
        , hierarchy_level + 1 as hierarchy_level
    FROM @TreeNode tn
    --Get the ID and name of the parent node.
        INNER JOIN @TreeNodeHierarchy tnhp
            ON tnhp.ChildID = tn.Id
        INNER JOIN @TreeNode as tnp
            ON tnp.Id = tnhp.ParentID

        --Recursive join to find next parent up the list.
        INNER JOIN parent_list as pl
            ON pl.ParentID = tn.Id
)


--This CTE simply allows us to grab the last entry in the recursion.
, ranked as (
    SELECT  
        pl.ChildCode
        , ParentNames
        , ROW_NUMBER() OVER(PARTITION BY pl.ChildCode ORDER BY pl.hierarchy_level DESC) as row_num
    FROM parent_list as pl
)
SELECT
    r.ChildCode
    , 1 as IsLeaf
    , r.ParentNames
    ,row_num
FROM ranked as r
WHERE row_num = 1   --Make sure we get the last recursive entry in the list for each child name.

The result of the above query is the following:

enter image description here

which for our purpose is missing some information. To see more, we just need to comment the where condition in the last query

--WHERE row_num = 1   --Make sure we get the last recursive entry in the list 

enter image description here

Our goal is to get the following final result:

ChildCode   IsLeaf  ParentNames 
---------------------------------------------------------
abrin-code  1       f:5:Biological, f:5:OrgRoot 5, f:4:Biological, f:4:OrgRoot 4, f:3:Biological, f:3:OrgRoot 3, f:2:Biological, f:2:OrgRoot 2, f:1:Biological, f:1:OrgRoot 1
mask-code   1       f:5:Measures, f:5:Biological, f:5:OrgRoot 5, f:4:Measures, f:4:Biological, f:4:OrgRoot 4, f:3:Measures, f:3:Biological, f:3:OrgRoot 3, f:2:Measures, f:2:Biological, f:2:OrgRoot 2, f:1:Measures, f:1:Biological, f:1:OrgRoot 1

@Tim thanks again for your support.

Upvotes: 0

Views: 1213

Answers (1)

Tim Jarosz
Tim Jarosz

Reputation: 1178

So it's a little weird that you want to list the leaves and the list out the parents in reverse hierarchal order... but it's doable no matter which way you want it. I've never done a recursive CTE so this was an interesting challenge. The way I implemented it, there's no need to have a column for IsLeaf... you know the child is a leaf if it is not a parent of something else.

Here's the code:

--Temp table for test data.
DECLARE @TreeNode TABLE (
    Id int not null
    , [Name] nvarchar(50) null
    --, Code nvarchar(50) null
    --, IsLeaf bit null
    --, OrgId nvarchar(50) null
    --, IsDeleted bit null
);

--Load test data.
INSERT INTO @TreeNode (Id, [Name])
VALUES (1,'Parent 1')
    , (2,'Parent 2')
    , (3,'Parent 3')
    , (4,'Parent 4')
    , (5,'Parent 5')
    , (6,'Child A')
    , (7,'Child B')
    , (8,'Child C')
    , (9,'Child D')
    , (10,'Child E')
    , (11, 'Child F')
;

--Temp table for test data.
DECLARE @TreeNodeHierarchy TABLE (
    Id int IDENTITY(1,1) not null
    , ParentID int
    , ChildId int
);

--Load test data.
INSERT INTO @TreeNodeHierarchy (ParentID, ChildId)
VALUES
    (1, 2)
    , (1, 3)
    , (1, 5)
    , (2, 6)
    , (2, 7)
    , (2, 8)
    , (3, 4)
    , (4, 9)
    , (4, 10)
    , (5, 11)

    ,(2,10) ,(1,10)
;

--Recursive CTE.
WITH parent_list AS (
    SELECT 
        tn.Id as ChildID
        , tn.[Name] as ChildName
        , tnp.Id as ParentID
        , tnp.[Name] as ParentName
        , CAST(tnp.[Name] as nvarchar(max)) as ParentNames
        , 0 as hierarchy_level

        --Since a child may have many parents, this records the child-to-lastparent path
        --so we can get the last recursion for the distinct path
        --and ignore all the intermediate recursions.
        , ROW_NUMBER() OVER(ORDER BY tn.Id, tnp.Id) as [path]
    FROM @TreeNode tn
        --Get the ID and name of the parent node.
        INNER JOIN @TreeNodeHierarchy tnhp
            ON tnhp.ChildID = tn.Id
        INNER JOIN @TreeNode as tnp
            ON tnp.Id = tnhp.ParentID

        --Join to find where this node is a parent.
        LEFT OUTER JOIN @TreeNodeHierarchy tnhc
            ON tnhc.ParentID = tn.Id
    WHERE tnhc.Id IS NULL --Find leaves (child that is not a parent to anything else)
            

    UNION ALL

    SELECT 
        pl.ChildID as ChildID           --Keep the ChildID the same for each recursion.
        , pl.ChildName as ChildName
        , tnp.Id as ParentID
        , tnp.[Name] as ParentName
        , CAST(pl.ParentNames + ', ' + tnp.[Name] as nvarchar(max)) as ParentNames  --Add the next parent onto the list of parent names.
        , hierarchy_level + 1 as hierarchy_level
        , pl.[path]
    FROM @TreeNode tn
        --Get the ID and name of the parent node.
        INNER JOIN @TreeNodeHierarchy tnhp
            ON tnhp.ChildID = tn.Id
        INNER JOIN @TreeNode as tnp
            ON tnp.Id = tnhp.ParentID

        --Recursive join to find next parent up the list.
        INNER JOIN parent_list as pl
            ON pl.ParentID = tn.Id
)
--This CTE simply allows us to grab the last entry in the recursion.
, ranked as (
    SELECT  
        pl.ChildName
        , ParentNames
        , ROW_NUMBER() OVER(PARTITION BY pl.ChildName, pl.[path] ORDER BY pl.hierarchy_level DESC) as row_num
    FROM parent_list as pl
)
SELECT
    r.ChildName
    , 1 as IsLeaf
    , r.ParentNames
FROM ranked as r
WHERE row_num = 1   --Make sure we get the last recursive entry in the list for each child name.d as r

Results:

enter image description here

@KubaWyrostek interesting question about a child having multiple parents. In my solution above there are no children with multiple parents. However, if I introduce a few multiple-parent children, they seem to be filtered out by the second CTE which uses ROW_NUMBER to retrieve only the last recursive entry for each child. Add , (1, 10), (3, 10) to the @TreeNodeHierarchy table and you'll see that they are filtered out in the results. So in reality, I'm only grabbing the child with the longest recursion chain (most parents in the path to root).

Edit: I made an edit so that if a child has multiple parents, it will report the child multiple times, one instance for each "path" up to a parent. i accomplished that by adding a "path" identifier on the root CTE select which will carry through on each recursion.

Edit2: Based on the OP's Update 2 with real sample data, I modified the main select as follows. The OP added all the pass through fields in the CTEs. I also added a ROW_NUMBER() as [path] field on the root CTE. The purpose of the [path] is to track all the different "paths" from child to parent. Once the [path] is set on the root, it carries through on each recursion. The other change was to the main select to use a FOR XML PATH to concatenate all the rows into a single row for each ChildName.

--Recursive CTE.
WITH parent_list AS (
    SELECT 
        tn.Id as ChildID, 
        tn.Code as ChildCode, 
        tn.IsLeaf, 
        tnp.Id as ParentID, 
        tnp.Name as ParentName , 
        tnp.OrganisationId,
        CAST('f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames,
        ROW_NUMBER() OVER(PARTITION BY tn.[Name] ORDER BY tnp.OrganisationId DESC) as [path],
        0 as hierarchy_level 
    FROM @TreeNode tn
        LEFT OUTER JOIN  @TreeNodeHierarchy tnhp 
            ON tnhp.ChildID = tn.Id
        LEFT OUTER JOIN  @TreeNode as tnp
            ON tnp.Id = tnhp.ParentID
        WHERE tn.IsLeaf = 1 and tn.IsDeleted = 0
        
    UNION ALL

    SELECT 
        pl.ChildID as ChildID           --Keep the ChildID the same for each recursion.
        , pl.ChildCode as ChildCode
        , pl.IsLeaf
        , tnp.Id as ParentID
        , tnp.Name as ParentName
        , tnp.OrganisationId
        , CAST(pl.ParentNames + ', ' + 'f:' + CAST(tnp.OrganisationId as nvarchar(max)) + ':' + tnp.Name as nvarchar(max)) as ParentNames     --Add the next parent onto the list of parent names.
        , pl.[path]     --keep the path the same for each recursion.
        , hierarchy_level + 1 as hierarchy_level
    FROM @TreeNode tn
    --Get the ID and name of the parent node.
        INNER JOIN @TreeNodeHierarchy tnhp
            ON tnhp.ChildID = tn.Id
        INNER JOIN @TreeNode as tnp
            ON tnp.Id = tnhp.ParentID

        --Recursive join to find next parent up the list.
        INNER JOIN parent_list as pl
            ON pl.ParentID = tn.Id
)
--This CTE simply allows us to grab the last entry in the recursion.
, ranked as (
    SELECT  
        *
        , ROW_NUMBER() OVER(PARTITION BY pl.ChildCode, pl.[path] ORDER BY pl.hierarchy_level DESC) as row_num
    FROM parent_list as pl
)
--SELECT * FROM ranked
SELECT DISTINCT
    r.ChildCode
    , r.IsLeaf
    , STUFF((
        SELECT ', ' + r3.ParentNames
        FROM ranked as r3
        WHERE r3.ChildCode = r.ChildCode
            AND r3.row_num = 1
        ORDER BY r3.OrganisationId DESC
        FOR XML PATH('')
    ),1,2,'') as ParentNames
FROM ranked as r

And the output is looking exactly as you requested:

enter image description here

Upvotes: 1

Related Questions