Reputation: 20043
I have two collections, let's call them Cats
and Parties
, with the following schemas:
{ name: String }
{ date: Date, attendants: [{ cat: { ref: 'Cat' }, role: String }] }
where role
symbolizes some other attribute, say, whether the attending cat is a VIP member.
Now I want to get a list of all cats that exist (even those poor kitties who never attended any party) and for each cat, I want a list of all the roles it ever had for at least one party. Furthermore, I want this entire list to be sorted by the (per cat) last attended party's date
with cats who never attended any party being last.
This raises the following problems for me:
Parties
excludes party-pooper kitties who never joined a party.Cats
sort of goes »the wrong way« because I cannot $lookup
parties the cat attended because that information is in a subdocument array.The pipeline I currently have gives me all cats who attended at least one party with a list of their roles, but doesn't sort by the last attended party. In fact, I could live with excluding cats who never attended a party, but the sorting is crucial for me:
Party.aggregate([
{ $unwind: '$attendants' },
{ $project: { role: '$attendants.role', cat: '$attendants.cat' } },
{
$group: {
_id: '$cat',
roles: { $addToSet: '$role' }
}
},
{
$lookup: {
from: 'cats',
localField: '_id',
foreignField: '_id',
as: 'cat'
}
},
{ $unwind: '$cat' },
// (*)
{ $addFields: { 'cat.roles': '$roles' } },
{ $replaceRoot: { newRoot: '$cat' } }
])
My current idea would basically be a right outer join at (*)
to add a list of parties the cat has attended, $project
that to the party's date and then $group
using $max
to get the latest date. Then I can $unwind
that now one-element array and $sort
over it in the end.
The problem is that right outer joins don't exist in mongo, AFAIK, and I don't know how to get that list of parties per cat within the pipeline.
To clarify, the expected output should be something like
[
{
"_id": "59982d3c7ca25936f8c327c8",
"name": "Mr. Kitty",
"roles": ["vip", "birthday cat"],
"dateOfLastParty": "2017-06-02"
},
{
"_id": "59982d3c7ca25936f8c327c9",
"name": "Snuffles",
"roles": ["best looking cat"],
"dateOfLastParty": "2017-06-01"
},
...
{
"_id": "59982d3c7ca25936f8c327c4",
"name": "Sad Face McLazytown",
"roles": [],
"dateOfLastParty": null
},
]
Upvotes: 2
Views: 2528
Reputation: 151122
As stated, you want the "cats" so use the Cat
model and do the "left outer join" that is actually inherent to $lookup
, rather than asking for a "right outer join" from the opposing collection, since a "right outer join" is not possible with MongoDB at this time.
It's also far more practical as a "left join", because you want "cats" as your primary source of output. The only thing to consider when linking to "Party" is that each "Cat" is listed in an array, and therefore you get the whole document back. So all that needs to be done is in "post processing" after the $lookup
, you simply "filter" the array content for the matching entry of the current cat.
Fortunately we get good features with $arrayElemAt
and $indexOfArray
, that allow us to do that exact extraction:
let kitties = await Cat.aggregate([
{ '$lookup': {
'from': Party.collection.name,
'localField': '_id',
'foreignField': 'attendants.cat',
'as': 'parties'
}},
{ '$replaceRoot': {
'newRoot': {
'$let': {
'vars': {
'parties': {
'$map': {
'input': '$parties',
'as': 'p',
'in': {
'date': '$$p.date',
'role': {
'$arrayElemAt': [
'$$p.attendants.role',
{ '$indexOfArray': [ '$$p.attendants.cat', '$_id' ] }
]
}
}
}
}
},
'in': {
'_id': '$_id',
'name': '$name',
'roles': '$$parties.role',
'dateOfLastParty': { '$max': '$$parties.date' }
}
}
}
}}
]);
So my concept of "optimal" processing here actually uses $replaceRoot
here because you can define the whole document under a $let
statement. The reason I'm doing that is so we can take the "parties"
array output from the previous $lookup
and reshape each entry extracting the matching "role"
data for the current "kitty" at that given party. This we can actually make a variable itself.
The reason for the "array variable" is because we can then use $max
to extract the "largest/last" date property as "singular" and still extract the "role" values as an "array" from that reshaped content. This makes it easy to define the fields you wanted.
And since it was a "left join" started from Cat
in the first place, then those poor kitties that missed out on all parties are still there, and still have the desired output.
Two aggregation pipeline stages. What could be more simple!
As a full listing:
const mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/catparty',
options = { useMongoClient: true };
const catSchema = new Schema({
name: String
});
const partySchema = new Schema({
date: Date,
attendants: [{
cat: { type: Schema.Types.ObjectId, ref: 'Cat' },
role: String
}]
});
const Cat = mongoose.model('Cat', catSchema);
const Party = mongoose.model('Party', partySchema);
function log(data) {
console.log(JSON.stringify(data,undefined,2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
// Clean collections
await Promise.all(
Object.keys(conn.models).map( m => conn.models[m].remove({}) )
);
var cats = await Cat.insertMany(
['Fluffy', 'Snuggles', 'Whiskers', 'Socks'].map( name => ({ name }) )
);
cats.shift();
cats = cats.map( (cat,idx) =>
({ cat: cat._id, role: (idx === 0) ? 'Host' : 'Guest' })
);
log(cats);
let party = await Party.create({
date: new Date(),
attendants: cats
});
log(party);
let kitties = await Cat.aggregate([
{ '$lookup': {
'from': Party.collection.name,
'localField': '_id',
'foreignField': 'attendants.cat',
'as': 'parties'
}},
{ '$replaceRoot': {
'newRoot': {
'$let': {
'vars': {
'parties': {
'$map': {
'input': '$parties',
'as': 'p',
'in': {
'date': '$$p.date',
'role': {
'$arrayElemAt': [
'$$p.attendants.role',
{ '$indexOfArray': [ '$$p.attendants.cat', '$_id' ] }
]
}
}
}
}
},
'in': {
'_id': '$_id',
'name': '$name',
'roles': '$$parties.role',
'dateOfLastParty': { '$max': '$$parties.date' }
}
}
}
}}
]);
log(kitties);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})();
And example output:
[
{
"_id": "59a00d9528683e0f59e53460",
"name": "Fluffy",
"roles": [],
"dateOfLastParty": null
},
{
"_id": "59a00d9528683e0f59e53461",
"name": "Snuggles",
"roles": [
"Host"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
},
{
"_id": "59a00d9528683e0f59e53462",
"name": "Whiskers",
"roles": [
"Guest"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
},
{
"_id": "59a00d9528683e0f59e53463",
"name": "Socks",
"roles": [
"Guest"
],
"dateOfLastParty": "2017-08-25T11:44:21.903Z"
}
]
And you should be able to see how those "roles" values actually become an array with more data. And if you need that to be a "unique list", then simply wrap with $setDifference
as in:
'roles': { '$setDifference': [ '$$parties.role', [] ] },
And that is also covered
Upvotes: 2