How to check two values in same documents in mongo?

Question

I have following data structure -

{
"_id" : ObjectId("5462f7c2e4b01e902fe359f9"),
"runtime" : 1415772003908,
"myInfo" : [ 

    {
        "name" : "AAA",
        "uuid" : "42089c2d-e3db-14e7-d427-d020881c4820",
        "canName" : [ 
            "naa.60a9800042704577762b45634476337a", 
            "naa.6d867d9c7acd60001aed76eb2c70bd53", 
            "naa.600a09804270457a7a5d455448735330"
        ]
    }, 
    {
        "name" : "BBB",
        "uuid" : "4208b252-5cab-fa1f-7e2e-449a7884c82b",
        "canName" : [ 
            "naa.6d867d9c7acd60001aed76eb2c70bd53"
        ]
    }, 

],
"ds" : [ 
    {
        "luns" : {
            "model" : "Virtual CD/DVD",
            "canName" : "mpx.vmhba32:C0:T0:L0",
            "vendor" : "Linux"
        },
        "vms" : []
    }, 
    {
        "name" : "name1",
        "utilization" : {
            "freeBytes" : 106087579648,
            "sizeBytes" : 107105746944,
            "usedBytes" : 1018167296
        },
        "luns" : {
            "model" : "LUN",
            "canName" : "naa.60a9800032466774352446525768457a",
            "vendor" : "abc"
        },
        "vms" : []
    }, 
    {
        "name" : "name2",
        "utilization" : {
            "freeBytes" : NumberLong(4352638976),
            "sizeBytes" : NumberLong(5100273664),
            "usedBytes" : NumberLong(747634688)
        },
        "luns" : {
            "model" : "LUN",
            "canName" : "naa.60a9800042704577762b456344763376",
            "vendor" : "NETAPP"
        },
        "vms" : [],
        "diskWriteBytes" : 0,
        "diskReadBytes" : 0
    }, 
    {
        "luns" : {
            "model" : "LUN",
            "canName" : "naa.60a98000324667743524465257684631",
            "vendor" : "NETAPP"
        },
        "vms" : []
    }, 
    {
        "name" : "name3",
        "utilization" : {
            "freeBytes" : NumberLong(7191134208),
            "sizeBytes" : NumberLong(11542724608),
            "usedBytes" : NumberLong(4351590400)
        },
        "luns" : {
            "model" : "LUN",
            "canName" : "naa.60a9800042704577762b45634476337a",
            "vendor" : "NETAPP"
        },
        "vms" : [ 
            "BBB"
        ]
    }, 
    {
        "name" : "name4",
        "utilization" : {
            "freeBytes" : NumberLong(550533857280),
            "sizeBytes" : NumberLong(998848331776),
            "usedBytes" : NumberLong(448314474496)
        },
        "luns" : {
            "model" : "MRSASRoMB-4i",
            "canName" : "naa.6d867d9c7acd60001aed76eb2c70bd53",
            "vendor" : "LSI"
        },
        "vms" : [ 
            "AAA", 
            "BBB", 
            "CCC", 
            "DDD"
        ],
        "diskWriteBytes" : 0,
        "diskReadBytes" : 8
    }
]
}

I want to match canName from luns in ds with canName array from myinfo and name from myinfo is present in vms of ds. By matching this I want to get following fields from both.

from ds- name, utilization,diskReadBytes, diskWriteBytes

from vms- name, uuid, canName

so my output will be like -

[{
        "name" : "AAA",
        "uuid" : "42089c2d-e3db-14e7-d427-d020881c4820",
        "canName" :"naa.6d867d9c7acd60001aed76eb2c70bd53", 
       "dsname" : "name3",
        "utilization" : {
            "freeBytes" : NumberLong(550533857280),
            "sizeBytes" : NumberLong(998848331776),
            "usedBytes" : NumberLong(448314474496)
        },
        "luns" : {
            "model" : "MRSASRoMB-4i",
            "canonicalName" : "naa.6d867d9c7acd60001aed76eb2c70bd53",
            "vendor" : "LSI"
        }
    }, 
    {
        "name" : "BBB",
        "uuid" : "4208b252-5cab-fa1f-7e2e-449a7884c82b",
        "canName" :"naa.60a9800042704577762b45634476337a",
    "dsname" : "name3",
        "utilization" : {
            "freeBytes" : NumberLong(7191134208),
            "sizeBytes" : NumberLong(11542724608),
            "usedBytes" : NumberLong(4351590400)
        },
        "luns" : {
            "model" : "MRSASRoMB-4i",
            "canonicalName" : "naa.60a9800042704577762b45634476337a",
            "vendor" : "LSI"
        }

    }]

Neil Lunn · Accepted Answer

Sure you can. Not terribly straightforward to do since you are matching within arrays. But this sort of thing is possible with the aggregation framework:

db.collection.aggregate([
    // Unwind each array to "de-normalize" all of the content
    { "$unwind": "$myInfo" },
    { "$unwind": "$myInfo.canName" },
    { "$unwind": "$ds" },

    // Project with a test where the two fields match
    { "$project": {
        "_id": "$$ROOT",
        "matched": { 
            "$eq": [ 
                "$myInfo.canName", 
                "$ds.luns.canName"
            ]
        }
    }},

    // Filter out everything that did not match
    { "$match": { "matched": true } },

    // Project out to the field structure you want
    { "$project": {
        "_id": 0,
        "name": "$_id.myInfo.name",
        "uuid": "$_id.myInfo.uuid",
        "canName": "$_id.myInfo.canName",
        "dsname": "$_id.ds.name",
        "utilization": "$_id.ds.utilization",
        "luns": "$_id.ds.luns"
    }}
])

Using $$ROOT there as introduced in MongoDB 2.6 to avoid listing all the field names again in the projection where you test for the match of the two fields. You can just traditionally list all the fields as well if you want or have a lower version.

You might also want to consider doing this with the inline processing on $map from MongoDB 2.6 and greater if your arrays could be quite large:

db.collection.aggregate([
    { "$project": {
      "_id": 0,
      "myInfo": {
        "$map": {
          "input": "$myInfo",
          "as": "info",
          "in": {
            "name": "$$info.name",
            "uuid": "$$info.uuid",
            "canName": {
              "$setDifference": [
                { "$map": {
                  "input": "$$info.canName",
                  "as": "ican",
                  "in": {
                    "$setDifference": [
                      { "$map": {
                        "input": "$ds",
                        "as": "ds",
                        "in": {
                          "$cond": [
                            { "$eq": [
                              "$$ican", "$$ds.luns.canName"
                            ]},
                            { 
                                "canName": "$$ican",
                                "dsname": "$$ds.name",
                                "utilization": "$$ds.utilization",
                                "luns": "$$ds.luns"
                            },
                            false
                          ]
                        }
                      }},
                      [false]
                    ]
                  }                      
                }},
                [false]
              ]
            }
          }
        }
      }
    }},
    { "$unwind": "$myInfo" },
    { "$unwind": "$myInfo.canName" },
    { "$unwind": "$myInfo.canName" },
    { "$project": {
        "name": "$myInfo.name",
        "uuid": "$myInfo.uuid",
        "canName": "$myInfo.canName.canName",
        "dsname": "$myInfo.canName.dsname",
        "utilization": "$myInfo.canName.utilization",
        "luns": "$myInfo.canName.luns"
    }}
])

The principles in each case remain the same. Basically this processes a comparison against each member of the arrays to see where the match is found. Like matches are put together and "filtered" out where there is no match, returning the end result.

I actually get "two" matches against your "AAA" element, since it's "canName" array has two elements matching elements in the "ds" array:

{
    "name" : "AAA",
    "uuid" : "42089c2d-e3db-14e7-d427-d020881c4820",
    "canName" : "naa.60a9800042704577762b45634476337a",
    "dsname" : "name3",
    "utilization" : {
            "freeBytes" : NumberLong("7191134208"),
            "sizeBytes" : NumberLong("11542724608"),
            "usedBytes" : NumberLong("4351590400")
    },
    "luns" : {
            "model" : "LUN",
            "canName" : "naa.60a9800042704577762b45634476337a",
            "vendor" : "NETAPP"
    }
}
{
    "name" : "AAA",
    "uuid" : "42089c2d-e3db-14e7-d427-d020881c4820",
    "canName" : "naa.6d867d9c7acd60001aed76eb2c70bd53",
    "dsname" : "name4",
    "utilization" : {
            "freeBytes" : NumberLong("550533857280"),
            "sizeBytes" : NumberLong("998848331776"),
            "usedBytes" : NumberLong("448314474496")
    },
    "luns" : {
            "model" : "MRSASRoMB-4i",
            "canName" : "naa.6d867d9c7acd60001aed76eb2c70bd53",
            "vendor" : "LSI"
    }
}
{
    "name" : "BBB",
    "uuid" : "4208b252-5cab-fa1f-7e2e-449a7884c82b",
    "canName" : "naa.6d867d9c7acd60001aed76eb2c70bd53",
    "dsname" : "name4",
    "utilization" : {
            "freeBytes" : NumberLong("550533857280"),
            "sizeBytes" : NumberLong("998848331776"),
            "usedBytes" : NumberLong("448314474496")
    },
    "luns" : {
            "model" : "MRSASRoMB-4i",
            "canName" : "naa.6d867d9c7acd60001aed76eb2c70bd53",
            "vendor" : "LSI"
    }
}

You could alternately change the last $project to a $group using the "name" and "uuid" fields as the _id grouping key, then use the $first operator on all other projected fields in order to retain only the "first" match. But it depends on your intent.

How to check two values in same documents in mongo?

Answers (1)

Related Questions