Franck Wehrling
Franck Wehrling

Reputation: 17

create a complex mongodb query from a json with multi-level array

i have this json :

{
  "_id": "id",
  "field1": "value1",
  "field2": "value2",
  "field3": "value3",
  "field4": "value4",
  "field5": "value5",
  "field6": [
    {
      "field7": "value_a7_level1",
      "field8": "value_a8_level1",
      "field9": "value_a9_level1",
      "field10": [
        {
          "field11": "value_a11_level1",
          "field12": "value_a12_level1",
          "field13": "value_a13_level1",
          "field14": "value_a14_level1"
        },
        {
          "field11": "value_b11_level1",
          "field12": "value_b12_level1",
          "field13": "value_b13_level1",
          "field14": "value_b14_level1"
        }
      ],
      "field15": [
        {
          "field16": "zzz",
          "field17": "xxx",
          "field18": "value_a18_level1",
          "field19": "value_a19_level1"
        },
        {
          "field16": "xxx",
          "field17": "yyy",
          "field18": "value_b18_level1",
          "field19": "value_b19_level1"
        },
        {
          "field16": "xxx",
          "field17": "yyy",
          "field18": "value_c18_level1",
          "field19": "value_c19_level1"
        }
      ]
    },
    {
      "field7": "value_a7_level2",
      "field8": "value_a8_level2",
      "field9": "value_a9_level2",
      "field10": [
        {
          "field11": "value_a11_level2",
          "field12": "value_a12_level2",
          "field13": "value_a13_level2",
          "field14": "value_a14_level2"
        },
        {
          "field11": "value_b11_level2",
          "field12": "value_b12_level2",
          "field13": "value_b13_level2",
          "field14": "value_b14_level2"
        }
      ],
      "field15": [
        {
          "field16": "value_a16_level2",
          "field17": "value_a17_level2",
          "field18": "value_a18_level2",
          "field19": "value_a19_level2"
        },
        {
          "field16": "value_b16_level2",
          "field17": "value_b17_level2",
          "field18": "value_b18_level2",
          "field19": "value_b19_level2"
        },
        {
          "field16": "value_c16_level2",
          "field17": "value_c17_level2",
          "field18": "value_c18_level2",
          "field19": "value_c19_level2"
        }
      ]
    },
    {
      "field7": "value_a7_level3",
      "field8": "value_a8_level3",
      "field9": "value_a9_level3",
      "field10": [
        {
          "field11": "value_a11_level3",
          "field12": "value_a12_level3",
          "field13": "value_a13_level3",
          "field14": "value_a14_level3"
        },
        {
          "field11": "value_b11_level3",
          "field12": "value_b12_level3",
          "field13": "value_b13_level3",
          "field14": "value_b14_level3"
        }
      ],
      "field15": [
        {
          "field16": "xxx",
          "field17": "yyy",
          "field18": "value_a18_level3",
          "field19": "value_a19_level3"
        },
        {
          "field16": "value_b16_level3",
          "field17": "value_b17_level3",
          "field18": "value_b18_level3",
          "field19": "value_b19_level3"
        },
        {
          "field16": "value_c16_level3",
          "field17": "value_c17_level3",
          "field18": "value_c18_level3",
          "field19": "value_c19_level3"
        }
      ]
    }
  ],
  "field20": [
    {
      "field21": "value21_level1",
      "field22": "value22_level1",
      "field23": "value23_level1"
    },
    {
      "field21": "value21_level2",
      "field22": "value22_level2",
      "field23": "value23_level2"
    }
  ]
}

I want to return a result according to filters and selections. The filters would be for example:

field16 and field17 must be equal to "XXX" and "YYY" respectively.

I need to return a json that will contain all the fields but field15 will only contain the objects corresponding to the filters. Also field6 will only contain the objects for which chemp15 returns true to the filters.

In my example I will have this result:

{
    "_id": "id",
    "field1": "value1",
    "field2": "value2",
    "field3": "value3",
    "field4": "value4",
    "field5": "value5",
    "field6": [
      {
        "field7": "value_a7_level1",
        "field8": "value_a8_level1",
        "field9": "value_a9_level1",
        "field10": [
          {
            "field11": "value_a11_level1",
            "field12": "value_a12_level1",
            "field13": "value_a13_level1",
            "field14": "value_a14_level1"
          },
          {
            "field11": "value_b11_level1",
            "field12": "value_b12_level1",
            "field13": "value_b13_level1",
            "field14": "value_b14_level1"
          }
        ],
        "field15": [
          {
            "field16": "xxx",
            "field17": "yyy",
            "field18": "value_b18_level1",
            "field19": "value_b19_level1"
          },
          {
            "field16": "xxx",
            "field17": "yyy",
            "field18": "value_c18_level1",
            "field19": "value_c19_level1"
          }
        ]
      },
      {
        "field7": "value_a7_level3",
        "field8": "value_a8_level3",
        "field9": "value_a9_level3",
        "field10": [
          {
            "field11": "value_a11_level3",
            "field12": "value_a12_level3",
            "field13": "value_a13_level3",
            "field14": "value_a14_level3"
          },
          {
            "field11": "value_b11_level3",
            "field12": "value_b12_level3",
            "field13": "value_b13_level3",
            "field14": "value_b14_level3"
          }
        ],
        "field15": [
          {
            "field16": "xxx",
            "field17": "yyy",
            "field18": "value_a18_level3",
            "field19": "value_a19_level3"
          }
        ]
      }
    ],
    "field20": [
      {
        "field21": "value21_level1",
        "field22": "value22_level1",
        "field23": "value23_level1"
      },
      {
        "field21": "value21_level2",
        "field22": "value22_level2",
        "field23": "value23_level2"
      }
    ]
  }

I tried several ways to get this result but in vain. Here is the last one but I am not satisfied at all because the result is not well structured:

db.requirements.aggregate([
    {
        $match: {
            "field6": {
                $elemMatch: {
                    "field15": {
                        $elemMatch: {
                            "field16": "xxx",
                            "field17": "yyy",
                        }
                    }
                }
            }
        }
    },
    {
        $addFields: {
            "field6": {
                $map: {
                    input: "$field6",
                    as: "f6",
                    in: {
                        $filter: {
                            input: "$$f6.field15",
                            as: "f15",
                            cond: {$and: [
                                {$eq: ["$$f15.f16", "xxx"]},
                                {$eq: ["$$f15.f17", "yyy"]}
                            ]}
                        }
                    }
                }
            }
        },
    }
]);

I also tried with $unwind and $group but it doesn't return the json I want.

Can someone help me find the solution?

Thank you in advance for your answers.

Upvotes: 0

Views: 823

Answers (2)

Buzz Moschetti
Buzz Moschetti

Reputation: 7621

Use $reduce as a loop.

db.foo.aggregate([
    // In this strategy, we walk the field6 array with $reduce and                                                          
    // "rebuild it" with either a filtered field15 or no entry at all.                                                      
    {$addFields: {"field6": {$reduce: {
        input: "$field6",
        initialValue: [], // important: start rebuild with empty array                                                      
            in: {$let: {
                vars: {ee: {$filter: {input: "$$this.field15", as: "z",
                                      cond: {$and:[ {$eq:["$$z.field16","xxx"]},
                                                    {$eq:["$$z.field17","yyy"]}
                                                  ]}
                                     }}
                      },

                 in: {$cond: [
                     {$ne:[0,{$size: "$$ee"}]}, // IF ee is not size 0                                                      
                
                     // THEN append an entry with filter field15                                                            
                     // plus its peer fields.  Since we cannot directly                                                     
                     // say "$$this.field15 = $ee", we use $mergeObjects                                                    
                     // to overlay field15:$$ee onto the existing object                                                    
                     // with the peer fields field7, field8, field10, etc.                                                           
                     // $concatArrays wants arrays, not objects, so wrap                                                    
                     // it in [] to make an array of one:                                                                   
                     {$concatArrays: [ "$$value",
                         [ {$mergeObjects: [ "$$this", {field15: "$$ee"} ]} ]
                                     ]},

                     // ELSE no concat; just pass back the existing array:                                                  
                     "$$value"
                 ]}
        }}  
        }}
    }}

]);

Alternately, if many more conditions need to applied to field6, it might be easier to $unwind on field6 first to isolate further operations on the fields inside that doc. Note however that $unwind and $group could have performance impact if field6 is a lengthy array.

db.foo.aggregate([
    // Get us down to dealing with only one array:
    {$unwind: "$field6"}

    // Overwrite field6.field15 with filtered version of same:
    ,{$addFields: {"field6.field15":
                   {$filter: {input: "$field6.field15",
                     as: "z2",
                     cond: {$and:[ {$eq:["$$z2.field16","xxx"]},
                                   {$eq:["$$z2.field17","yyy"]}
                                 ]}
                   }}
    }}
 
    // .. and eliminate those that have NO xxx/yyy in field16 and field17:
    ,{$match: {"field6.field15": {$ne:[]} }}

    // You might be good enough at this point, but if you really want to reform the
    // shape with an array for field6, use $group to put it back together.
    // Using $first on all the other peer fields to field6 is a bit ungainly, yes, but
    // it does produce the desired result:
    ,{$group: {_id:"$_id",
               "field1": {$first: "$field1"},
               "field2": {$first: "$field2"},
               "field3": {$first: "$field3"},
               "field4": {$first: "$field4"},
               "field5": {$first: "$field5"},
               "field6": {$push: "$field6"} // ah!  Rebuild array
          }}
]);

Upvotes: 1

R2D2
R2D2

Reputation: 10737

Maybe this is what you are looking for:

     db.collection.aggregate([
         {
         "$addFields": {
            "field6": {
                     "$filter": {
                              "input": {
                                    "$map": {
                                     "input": "$field6",
                                     "as": "f6",
                                     "in": {
                                     "$cond": [
                                               true,
                                                       {
                                            "field7": "$$f6.field7",
                                            "field8": "$$f6.field8",
                                            "field9": "$$f6.field9",
                                            "field10": "$$f6.field10",
                                            "field15": {
                                                       "$filter": {
                                                       "input": "$$f6.field15",
                                                      "as": "f15",
                              "cond": {
                               $and: [
                              {
                                $eq: [
                                   "$$f15.field16",
                                      "xxx"
                                     ]
                               },
                                    {
                                $eq: [
                                     "$$f15.field17",
                                      "yyy"
                                     ]
                                 }
                             ]
                          }
                        }
                       }
                      },
                      false
                       ]
                     }
                  }
                 },
                 "as": "cls",
                 "cond": {
                           $ne: [
                          "$$cls.field15",
                           []
                           ]
                         }
         }
         }
       }
       }
      ])

Explained:

  1. Create a $addFields stage with new filed6 that will overwrite the old filtered based on map on original field6
  2. Provide to the map the filed15 based on second filter about f16 & f17
  3. Remove in the first filter from the output the f15 empty arrays since they are generated in the map for those f15 where there is no match found

( Afcourse you can add your initial $match stage with the $elemMatch , I have just removed it to save some space )

playground

Upvotes: 1

Related Questions