Jeffrey Wilkinson
Jeffrey Wilkinson

Reputation: 41

MongoDB's $graphLookup. Dealing with nodes that have multiple parents and multiple children

I'm trying to work with the new MongoDB v3.4 $graphLookup aggregation pipeline element. I have data "nodes" that can have 0, 1, or multiple parent nodes. Each node can also have 0, 1, or multiple child nodes. I'd like to use $graphLookup to produce the following output:

 {"parentNodeKey":  undefined, "parentLabel": undefined, "nodeKey": 1, "nodeLabel": "Node 1"},
 {"parentNodeKey":  1, "parentLabel": "Node 1", "nodeKey": 11, "nodeLabel": "Node 1.1"},
 {"parentNodeKey":  1, "parentLabel": "Node 1", "nodeKey": 12, "nodeLabel": "Node 1.2"},
 {"parentNodeKey":  1, "parentLabel": "Node 1", "nodeKey": 13, "nodeLabel": "Node 1.3"},
 {"parentNodeKey":  1, "parentLabel": "Node 1", "nodeKey": 31, "nodeLabel": "Node 3.1"},
 {"parentNodeKey": 11, "parentLabel": "Node 1.1", "nodeKey": 111, "nodeLabel": "Node 1.1.1"},
 {"parentNodeKey": 11, "parentLabel": "Node 1.1", "nodeKey": 112, "nodeLabel": "Node 1.1.2"},
 {"parentNodeKey": 12, "parentLabel": "Node 1.2", "nodeKey": 121, "nodeLabel": "Node 1.2.1"},
 {"parentNodeKey": 12, "parentLabel": "Node 1.2", "nodeKey": 122, "nodeLabel": "Node 1.2.2"}

The data is stored in the following schema (I can change this if necessary):

{"_id":   1, "nodeKey":  1, "label": "Node 1", "children": [{"nodeKey": 11, "label": "Node 1.1"}
                                                        ,{"nodeKey": 12, "label": "Node 1.2"}
                                                        ,{"nodeKey": 13, "label": "Node 1.3"}
                                                        ,{"nodeKey": 31, "label": "Node 3.1"}
                                                        ]},
{"_id":   2, "nodeKey":  2, "label": "Node 2", "children": [{"nodeKey": 21, "label": "Node 2.1"}
                                                        ,{"nodeKey": 22, "label": "Node 2.2"}
                                                        ,{"nodeKey": 23, "label": "Node 2.3"}
                                                        ,{"nodeKey": 12, "label": "Node 1.2"}
                                                        ,{"nodeKey": 31, "label": "Node 3.1"}
                                                        ]},
{"_id":   3, "nodeKey":  3, "label": "Node 3", "children": [{"nodeKey": 31, "label": "Node 3.1"}
                                                        ,{"nodeKey": 32, "label": "Node 3.2"}
                                                        ,{"nodeKey": 33, "label": "Node 3.3"}
                                                        ,{"nodeKey": 11, "label": "Node 1.1"}
                                                        ]},
{"_id":  11, "nodeKey": 11, "label": "Node 1.1", "children": [{"nodeKey": 111, "label": "Node 1.1.1"}
                                                          ,{"nodeKey": 112, "label": "Node 1.1.2"}
                                                          ]
                                            , "parents": [{"nodeKey": 1, "label": "Node 1"}
                                                         ,{"nodeKey": 3, "label": "Node 3"}
                                                         ]},
{"_id":  12, "nodeKey": 12, "label": "Node 1.2", "children": [{"nodeKey": 121, "label": "Node 1.2.1"}
                                                          ,{"nodeKey": 122, "label": "Node 1.2.2"}
                                                          ]
                                            , "parents": [{"nodeKey": 1, "label": "Node 1"}
                                                         ,{"nodeKey": 2, "label": "Node 2"}
                                                         ]},
{"_id":  13, "nodeKey": 13, "label": "Node 1.3", "parents": [{"nodeKey": 1, "label": "Node 1"}]},
{"_id":  21, "nodeKey": 21, "label": "Node 2.1", "parents": [{"nodeKey": 2, "label": "Node 2"}]},
{"_id":  22, "nodeKey": 22, "label": "Node 2.2", "parents": [{"nodeKey": 2, "label": "Node 2"}]},
{"_id":  23, "nodeKey": 23, "label": "Node 2.3", "parents": [{"nodeKey": 2, "label": "Node 2"}]},
{"_id":  31, "nodeKey": 31, "label": "Node 3.1", "parents": [{"nodeKey": 1, "label": "Node 1"}
                                                         ,{"nodeKey": 2, "label": "Node 2"}
                                                         ,{"nodeKey": 3, "label": "Node 3"}]},
{"_id":  32, "nodeKey":  32, "label": "Node 3.2", "parents": [{"nodeKey": 3, "label": "Node 3"}]},
{"_id":  33, "nodeKey":  33, "label": "Node 3.3", "parents": [{"nodeKey": 3, "label": "Node 3"}]},
{"_id": 111, "nodeKey": 111, "label": "Node 1.1.1", "parents": [{"nodeKey": 11, "label": "Node 1.1"}]},
{"_id": 112, "nodeKey": 112, "label": "Node 1.1.2", "parents": [{"nodeKey": 11, "label": "Node 1.1"}]},
{"_id": 121, "nodeKey": 121, "label": "Node 1.2.1", "parents": [{"nodeKey": 12, "label": "Node 1.2"}]},
{"_id": 122, "nodeKey": 122, "label": "Node 1.2.2", "parents": [{"nodeKey": 12, "label": "Node 1.2"}]}

It appears that $graphLookup doesn't provide any information about a node's immediate parent node. I've used the following aggregation pipeline, unsuccessfully:

db.nodes.aggregate(
[{$match: {"nodeKey": 1}
 }
,{$graphLookup : {
    from            : "nodes",
    startWith       : "$children.nodeKey",
    connectFromField: "children.nodeKey",
    connectToField  : "nodeKey",
    maxDepth        : 5,
    depthField      : "depth",
    as              : "child"}
 }
,{
     $project: {
          "_id"           : 0
         ,"nodeKey"       : 1
         ,"child.depth"   : 1
         ,"child.nodeKey" : 1
         ,"child.label"   : 1
     }
  }
]).pretty()

I'm looking for any suggestions. Thanks.

Upvotes: 1

Views: 2902

Answers (1)

s7vr
s7vr

Reputation: 75934

You'll need to change collection document structure to reference nodekey's in children and parents array.

Something like this

{"_id":  12, "nodeKey": 12, "label": "Node 1.2", "children": [121, 122], "parents": [1, 2]}

Use the below aggregation query.

This should help you get going. I'll let you work with it to get to your desired output.

The important point to note we've to use $graphLookup couple of times. First lookup will get you childs and next will get you parents for childs.

db.nodes.aggregate([
     {$match: {"nodeKey": 1}},
     {$graphLookup : {
        from            : "nodes",
        startWith       : "$children",
        connectFromField: "children",
        connectToField  : "nodeKey",
        maxDepth        : 5,
        as              : "child"}
     },
     {$unwind:"$child"},
     {$replaceRoot:{newRoot:"$child"}},
     {$graphLookup : {
        from            : "nodes",
        startWith       : "$parents",
        connectFromField: "parents",
        connectToField  : "nodeKey",
        maxDepth        : 5,
        as              : "parent"
        }
     },
     {$project: {
         _id           : 0,
         parentNodeKey : "$parent.nodeKey",
         parentLabel   : "$parent.label",
         nodeLabel     : "$label",
         nodeKey       : 1
         }
     }
]);

Output after first lookup

{ "_id" : 121, "nodeKey" : 121, "label" : "Node 1.2.1", "parents" : [ 12 ] }
{ "_id" : 112, "nodeKey" : 112, "label" : "Node 1.1.2", "parents" : [ 11 ] }
{ "_id" : 122, "nodeKey" : 122, "label" : "Node 1.2.2", "parents" : [ 12 ] }
{ "_id" : 31, "nodeKey" : 31, "label" : "Node 3.1", "parents" : [ 1, 2, 3 ] }
{ "_id" : 12, "nodeKey" : 12, "label" : "Node 1.2", "children" : [ 121, 122 ], "parents" : [ 1, 2 ] }
{ "_id" : 111, "nodeKey" : 111, "label" : "Node 1.1.1", "parents" : [ 11 ] }
{ "_id" : 13, "nodeKey" : 13, "label" : "Node 1.3", "parents" : [ 1 ] }
{ "_id" : 11, "nodeKey" : 11, "label" : "Node 1.1", "children" : [ 111, 112 ], "parents" : [ 1, 3 ] }

Final Output

{ "nodeKey" : 121, "parentNodeKey" : [ 1, 2, 12 ], "parentLabel" : [ "Node 1", "Node 2", "Node 1.2" ], "nodeLabel" : "Node 1.2.1" }
{ "nodeKey" : 112, "parentNodeKey" : [ 1, 3, 11 ], "parentLabel" : [ "Node 1", "Node 3", "Node 1.1" ], "nodeLabel" : "Node 1.1.2" }
{ "nodeKey" : 122, "parentNodeKey" : [ 1, 2, 12 ], "parentLabel" : [ "Node 1", "Node 2", "Node 1.2" ], "nodeLabel" : "Node 1.2.2" }
{ "nodeKey" : 31, "parentNodeKey" : [ 3, 2, 1 ], "parentLabel" : [ "Node 3", "Node 2", "Node 1" ], "nodeLabel" : "Node 3.1" }
{ "nodeKey" : 12, "parentNodeKey" : [ 2, 1 ], "parentLabel" : [ "Node 2", "Node 1" ], "nodeLabel" : "Node 1.2" }
{ "nodeKey" : 111, "parentNodeKey" : [ 1, 3, 11 ], "parentLabel" : [ "Node 1", "Node 3", "Node 1.1" ], "nodeLabel" : "Node 1.1.1" }
{ "nodeKey" : 13, "parentNodeKey" : [ 1 ], "parentLabel" : [ "Node 1" ], "nodeLabel" : "Node 1.3" }
{ "nodeKey" : 11, "parentNodeKey" : [ 3, 1 ], "parentLabel" : [ "Node 3", "Node 1" ], "nodeLabel" : "Node 1.1" }

Upvotes: 5

Related Questions