Ramesh Paul
Ramesh Paul

Reputation: 850

mongodb group by multiple keys values vise versa

I have a user collection with following data

    [
  {
    "user_id": "5625c95ac2d34f27148b64fa",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "562744ccc2d34f27148b6eb7",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "56248eb9c2d34f2f148b5a18",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "561f40bac2d34f17148b462c",
    "friend_id": "561f3e06c2d34f27148b45f6"
  },
  {
    "user_id": "561f40bac2d34f17148b462c",
    "friend_id": "5620de97c2d34f2f148b578f"
  },
  {
    "user_id": "56276b52c2d34f27148b7128",
    "friend_id": "561f40bac2d34f17148b462c"
  },
  {
    "user_id": "561f40bac2d34f17148b462c",
    "friend_id": "56276b52c2d34f27148b7128"
  }
]

i need to fetch the documents in which combination of user_id and friend_id not repeated. i.e in the above example last two documents user_id repeated in friend_id of next document.

I tried with mongo aggrigate and group by but could not reduce it.

Upvotes: 3

Views: 1292

Answers (1)

Blakes Seven
Blakes Seven

Reputation: 50416

In order to do this you basically need to combine both user_id and friend_id values in a uniquely sorted combination. This means creating an array for each document with those members and sorting that array so that the order is always the same.

Then you can $group on that sorted array content to see which documents contain that same combination and then only return those that do not share that same combination.

This leads to this aggregate statement:

db.collection.aggregate([
    { "$project": {
        "user_id": 1,
        "friend_id": 1,
        "combined": {
            "$map": {
                "input": ["A","B"],
                "as": "el",
                "in": {
                    "$cond": [ 
                        { "$eq": [ "$$el", "A" ] },
                        "$user_id",
                        "$friend_id"
                    ]
                }
            }
        }            
    }},
    { "$unwind": "$combined" },
    { "$sort": { "combined": 1 } },
    { "$group": {
        "_id": "$_id",
        "combined": { "$push": "$combined" },
        "user_id": { "$first": "$user_id" },
        "friend_id": { "$first": "$friend_id"  }
    }},
    { "$group": {
        "_id": "$combined",
        "docs": { "$push": {
            "_id": "$_id",
            "user_id": "$user_id",
            "friend_id": "$friend_id"
        }}
    }},
    { "$redact": {
        "$cond": {
            "if": { "$ne": [{ "$size": "$docs" }, 1] },
            "then": "$$PRUNE",
            "else": "$$KEEP"
        }
    }}
])

The PHP translation for laravel means to need to access the raw collection object from the manager, where "collection" is the actual name of the collection in MongoDB:

$result = DB::collection("collection")->raw(function($collection) {
    return $collection->aggregate(
        array(
            array(
                '$project' => array(
                    'user_id' => 1,
                    'friend_id' => 1,
                    'combined' => array(
                        '$map' => array(
                            'input' => array("A","B"),
                            'as' => 'el',
                            'in' => array(
                                '$cond' => array(
                                    array( '$eq' => array( '$el', 'A' ) ),
                                    '$user_id',
                                    '$friend_id'
                                )
                            )
                        )
                    )
                )
            ),
            array( '$unwind' =>'$combined' ),
            array( '$sort' => array( 'combined' => 1 ) ),
            array(
                '$group' => array(
                    '_id' => '$_id',
                    'combined' => array( '$push' => '$combined' ),
                    'user_id' => array( '$first' => '$user_id' ),
                    'friend_id' => array( '$first' => '$friend_id' )
                )
            ),
            array(
                '$group' => array(
                    '_id' => '$combined',
                    'docs' => array(
                        '$push' => array(
                            '_id' => '$_id',
                            'user_id' => '$user_id',
                            'friend_id' => 'friend_id'
                        )
                    )
                )
            ),
            array(
                '$redact' => array(
                    '$cond' => array(
                        'if' => array( '$ne' => array( array( '$size' => '$docs'), 1) ),
                        'then' => '$$PRUNE',
                        'else' => '$$KEEP'
                    )
                )
            )
        )
    );
});

Or if your MongoDB version is less than 2.6, and you lack operators like $map and $redact, then you can still do this, but not as efficiently:

$result = DB::collection("collection")->raw(function($collection) {
    return $collection->aggregate(
        array(
            array(
                '$project' => array(
                    'user_id' => 1,
                    'friend_id' => 1,
                    'type' => array( '$const' => array( 'A', 'B' ) )
                )
            ),
            array( '$unwind' => '$type' ),
            array(
                '$group' => array(
                    '_id' => '$_id',
                    'user_id' => array( '$first' => '$user_id' ),
                    'friend_id' => array( '$first' => '$friend_id' ),
                    'combined' => array( 
                        '$push' => array(
                            '$cond' => array(
                                array( '$eq' => array( '$type', 'A' ) ),
                                '$user_id',
                                '$friend_id'
                            )
                        )
                    )
                )
            )
            array( '$unwind' =>'$combined' ),
            array( '$sort' => array( 'combined' => 1 ) ),
            array(
                '$group' => array(
                    '_id' => '$_id',
                    'combined' => array( '$push' => '$combined' ),
                    'user_id' => array( '$first' => '$user_id' ),
                    'friend_id' => array( '$first' => '$friend_id' )
                )
            ),
            array(
                '$group' => array(
                    '_id' => '$combined',
                    'docs' => array(
                        '$push' => array(
                            '_id' => '$_id',
                            'user_id' => '$user_id',
                            'friend_id' => 'friend_id'
                        )
                    ),
                    'count' => array( '$sum' => 1 )
                )
            ),
            array( '$match' => array( 'count' => 1 ) )
        )
    );
});

Where the first three stages mimic what the first stage is doing in the first example listing by putting both values in a single array. Of course the last two stages by "counting" the array members while grouping and then filtering out anything that does not have a "count" of 1.

In either case this leaves you with output that only lists the documents where that combination does not occur in either order:

{ 
    "_id" : [ "561f40bac2d34f17148b462c", "5625c95ac2d34f27148b64fa" ], 
    "docs" : [ 
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0cc9"),
            "user_id" : "5625c95ac2d34f27148b64fa",
            "friend_id" : "561f40bac2d34f17148b462c" 
        } 
    ] 
}
{ 
    "_id" : [ "561f3e06c2d34f27148b45f6", "561f40bac2d34f17148b462c" ], 
    "docs" : [ 
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0ccc"), 
            "user_id" : "561f40bac2d34f17148b462c",
            "friend_id" : "561f3e06c2d34f27148b45f6"
        }
    ]
}
{ 
    "_id" : [ "561f40bac2d34f17148b462c", "56248eb9c2d34f2f148b5a18" ], 
    "docs" : [
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0ccb"), 
            "user_id" : "56248eb9c2d34f2f148b5a18", 
            "friend_id" : "561f40bac2d34f17148b462c"
        }
    ]
}
{ 
    "_id" : [ "561f40bac2d34f17148b462c", "5620de97c2d34f2f148b578f" ],
    "docs" : [
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0ccd"), 
            "user_id" : "561f40bac2d34f17148b462c", 
            "friend_id" : "5620de97c2d34f2f148b578f"
        }
    ]
}
{ 
    "_id" : [ "561f40bac2d34f17148b462c", "562744ccc2d34f27148b6eb7" ], 
    "docs" : [ 
        { 
            "_id" : ObjectId("56306f6cd2387ad4c95b0cca"), 
            "user_id" : "562744ccc2d34f27148b6eb7", 
            "friend_id" : "561f40bac2d34f17148b462c"
        }
    ]
}

You can pretty up the output, but this serves the purpose of showing the ordered combination used along with the original document data.

Upvotes: 3

Related Questions