Reputation: 434
I have a multiple collections , I used the separate collection & foreign key approach , and I want to join this collections to build a nested collections. this is my schemas of collections:
const SurveySchema = new Schema({
_id:{ type: Schema.ObjectId, auto: true },
name: String,
enabled: {type: Boolean, Default: true},
created_date:{type: Date, Default: Date.now},
company: {type: Schema.Types.ObjectId, ref: 'Company'},});
const GroupSchema = new Schema({
_id:{ type: Schema.ObjectId, auto: true },
name: String,
order: String,
created_date:{type: Date, Default: Date.now},
questions: [{type: Schema.Types.ObjectId, ref: 'Question'}],
survey: {type: Schema.Types.ObjectId, ref: 'Survey'}
});
const ResponseSchema = new Schema({
_id:{ type: Schema.ObjectId, auto: true },
response_text: String,
order: String,
created_date:{type: Date, Default: Date.now},
question:{type: Schema.Types.ObjectId, ref: 'Question'}
});
and this is my code to build this nested object:
Survey.aggregate([
{ $match: {} },
{ $lookup: {
from: 'groups',
localField: '_id',
foreignField: 'survey',
as: 'groupsofquestions',
}},
{ $unwind: {
path: "$groupsofquestions",
preserveNullAndEmptyArrays: true
}},
{ $lookup: {
from: 'questions',
localField: 'groupsofquestions._id',
foreignField: 'group',
as: 'questionsofgroup',
}},
{ $lookup: {
from: 'response',
localField: 'questionsofgroup._id',
foreignField: 'question',
as: 'responses',
}},
{ $group: {
_id: "$_id",
name: {$first: "$name"},
groups: {$push: {
id: "$groupsofquestions._id",
name: "$groupsofquestions.name",
questions: "$questionsofgroup",
reponses: "$responses"
}}
}}
])
I would like to structure as below, ( also with external link ):
http://jsoneditoronline.org/?id=d7d1779b3b95e3acb28f8a2be0785423
[
{
"__v": 0,
"_id": "59b6715725dcd2060da7f591",
"company": "59b6715725dcd2060da7f58f",
"created_date": "2017-09-11T11:19:51.709Z",
"enabled": true,
"name": "function String() { [native code] }",
"groups": [
{
"_id": "59b6715725dcd2060da7f592",
"name": "groupe 1 des question",
"order": "1",
"created_date": "2017-09-11T11:19:51.709Z",
"survey": "59b6715725dcd2060da7f591",
"__v": 0,
"questions": [
{
"_id": "59b6715725dcd2060da7f594",
"question_text": "question 1 group 1",
"order": "1",
"created_date": "2017-09-11T11:19:51.709Z",
"group": "59b6715725dcd2060da7f592",
"__v": 0,
"responses": [
{
"_id": "59b6715725dcd2060da7f598",
"response_text": "reponse 1 question 1 group 1",
"order": "1",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f594",
"__v": 0
},
{
"_id": "59b6715725dcd2060da7f599",
"response_text": "reponse 2 question 1 group 1",
"order": "2",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f594",
"__v": 0
}
]
},
{
"_id": "59b6715725dcd2060da7f595",
"question_text": "question 2 group 1",
"order": "2",
"created_date": "2017-09-11T11:19:51.710Z",
"group": "59b6715725dcd2060da7f592",
"__v": 0,
"responses": [
{
"_id": "59b6715725dcd2060da7f59a",
"response_text": "reponse 1 question 2 group 1",
"order": "1",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f595",
"__v": 0
},
{
"_id": "59b6715725dcd2060da7f59b",
"response_text": "reponse 2 question 2 group 1",
"order": "2",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f595",
"__v": 0
}
]
}
]
},
{
"_id": "59b6715725dcd2060da7f593",
"name": "groupe 2 des question",
"order": "2",
"created_date": "2017-09-11T11:19:51.709Z",
"survey": "59b6715725dcd2060da7f591",
"__v": 0,
"questions": [
{
"_id": "59b6715725dcd2060da7f596",
"question_text": "question 1 group 1",
"order": "1",
"created_date": "2017-09-11T11:19:51.710Z",
"group": "59b6715725dcd2060da7f592",
"__v": 0,
"responses": [
{
"_id": "59b6715725dcd2060da7f59c",
"response_text": "reponse 1 question 1 group 2",
"order": "1",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f596",
"__v": 0
},
{
"_id": "59b6715725dcd2060da7f59d",
"response_text": "reponse 2 question 1 group 2",
"order": "2",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f596",
"__v": 0
}
]
},
{
"_id": "59b6715725dcd2060da7f597",
"question_text": "question 2 group 1",
"order": "2",
"created_date": "2017-09-11T11:19:51.710Z",
"group": "59b6715725dcd2060da7f592",
"__v": 0,
"responses": [
{
"_id": "59b6715725dcd2060da7f59e",
"response_text": "reponse 1 question 2 group 2",
"order": "1",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f597",
"__v": 0
},
{
"_id": "59b6715725dcd2060da7f59f",
"response_text": "reponse 2 question 2 group 2",
"order": "2",
"created_date": "2017-09-11T11:19:51.710Z",
"question": "59b6715725dcd2060da7f597",
"__v": 0
}
]
}
]
}
]
}
]
Can someone help me structure the response as is shown in the sample please?
Upvotes: 2
Views: 1836
Reputation: 151122
Mostly you need to use $group
to "reconstruct" after processing with $unwind
in order to nest your array output again. There are also a couple of tips:
Survey.aggregate([
{ "$lookup": {
"from": Group.collection.name,
"localField": "_id",
"foreignField": "survey",
"as": "groups"
}},
{ "$unwind": "$groups" },
{ "$lookup": {
"from": Question.collection.name,
"localField": "groups.questions",
"foreignField": "_id",
"as": "groups.questions"
}},
{ "$unwind": "$groups.questions" },
{ "$lookup": {
"from": Response.collection.name,
"localField": "groups.questions._id",
"foreignField": "question",
"as": "groups.questions.responses"
}},
{ "$group": {
"_id": {
"_id": "$_id",
"company": "$company",
"created_date": "$created_date",
"enabled": "$enabled",
"name": "$name",
"groups": {
"_id": "$groups._id",
"name": "$groups.name",
"order": "$groups.order",
"created_date": "$groups.created_date",
"survey": "$groups.survey"
}
},
"questions": { "$push": "$groups.questions" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id._id",
"company": { "$first": "$_id.company" },
"created_date": { "$first": "$_id.created_date" },
"enabled": { "$first": "$_id.enabled" },
"name": { "$first": "$_id.name" },
"groups": {
"$push": {
"_id": "$_id.groups._id",
"name": "$_id.groups.name",
"order": "$_id.groups.order",
"created_date": "$_id.groups.created_date",
"survey": "$_id.groups.survey",
"questions": "$questions"
}
}
}},
{ "$sort": { "_id": 1 } }
]);
So that's the approach to rebuilding arrays, where you take it one step at a time rather than trying to do it all in one go. It's probably the most difficult of concepts to generally comprehend, but a "pipeline" means you can in fact do things "multiple times", chaining one action to the output of the other.
So the first $group
is done at the "groups" level of detail because you want to $push
items of the "questions"
array, which was the last "deconstructed" by $unwind
. Note that the "responses"
are still an array as the result of the last $lookup
stage. But aside from the array content, everything else goes in the _id
"grouping key".
On the "second" $group
you actually use operators like $first
to construct the specific field properties at the Survey
level. The "groups"
array is constructed with $push
again, and every property that was in the "grouping key" of the previous stage is therefore prefixed with _id
, so that's how they are all referenced here.
Also, as a technical standpoint you should always $sort
after each invocation of $group
if you have an expected order. The collection on grouping keys is not guaranteed in any specific order ( though typically it's reverse stack order ). If you expect an order, then specify it, and particularly when applying $push
to reconstruct an array following a $group
.
The reason why there is no $sort
before the initial $group
is because the preceeding pipeline stages don't actually have any effect on the existing order. So the order of discovery is always preserved.
A couple of tips:
Things like Group.collection.name
actually use the properties defined on the mongoose models to do things like "get the collection name". This saves you from hard-coding into the $lookup
itself and stays consistent with whatever is registered on the model at the time the code is run.
If you intend output of a property as an array or even have an existing "array of reference" on the schema by a certain name, then "keep that name". Making interim names for paths really does not make a lot of sense unless you are specifically doing it in a pipeline stage for the purposes of "re-ordering" the output of fields in a later stage. Otherwise, just use the name you intend to output as in all cases. It's much easier to read and interpret intent that way.
Unless you really mean it, don't use options like preserveNullAndEmptyArrays
. There is a "special way" that the combination of $lookup
+ $unwind
is actually handled, and really gets executed in a "single stage" rather than retrieving all the results before "unwinding". You can see this in the "explain" output for the aggregation pipeline. In short, if you always have relational matches, then don't use the option. It's more optimal not to.
As a complete listing and proof of concept, we can load in your source JSON, store it in the database in separate collections and then use the aggregation statement in order to retrieve and reconstruct the desired structure:
const fs = require('fs'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/nested',
options = { useMongoClient: true };
const responseSchema = new Schema({
response_text: String,
order: String,
created_date: Date,
question: { type: Schema.Types.ObjectId, ref: 'Question' }
});
const questionSchema = new Schema({
question_text: String,
order: String,
created_date: Date,
group: { type: Schema.Types.ObjectId, ref: 'Group' }
});
const groupSchema = new Schema({
name: String,
order: String,
created_date: Date,
survey: { type: Schema.Types.ObjectId, ref: 'Survey' },
questions: [{ type: Schema.Types.ObjectId, ref: 'Question' }]
});
const surveySchema = new Schema({
company: { type: Schema.Types.ObjectId, ref: 'Company' },
created_date: Date,
enabled: Boolean,
name: String
});
const companySchema = new Schema({
});
const Company = mongoose.model('Company', companySchema);
const Survey = mongoose.model('Survey', surveySchema);
const Group = mongoose.model('Group', groupSchema);
const Question = mongoose.model('Question', questionSchema);
const Response = mongoose.model('Response', responseSchema);
function log(data) {
console.log(JSON.stringify(data,undefined,2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
await Promise.all(
Object.keys(conn.models).map( m => conn.models[m].remove() )
);
// Initialize data
let content = JSON.parse(fs.readFileSync('./jsonSurveys.json'));
//log(content);
for ( let item of content ) {
let survey = await Survey.create(item);
let company = await Company.create({ _id: survey.company });
for ( let group of item.groups ) {
await Group.create(group);
for ( let question of group.questions ) {
await Question.create(question);
for ( let response of question.responses ) {
await Response.create(response);
}
}
}
}
// Run aggregation
let results = await Survey.aggregate([
{ "$lookup": {
"from": Group.collection.name,
"localField": "_id",
"foreignField": "survey",
"as": "groups"
}},
{ "$unwind": "$groups" },
{ "$lookup": {
"from": Question.collection.name,
"localField": "groups.questions",
"foreignField": "_id",
"as": "groups.questions"
}},
{ "$unwind": "$groups.questions" },
{ "$lookup": {
"from": Response.collection.name,
"localField": "groups.questions._id",
"foreignField": "question",
"as": "groups.questions.responses"
}},
{ "$group": {
"_id": {
"_id": "$_id",
"company": "$company",
"created_date": "$created_date",
"enabled": "$enabled",
"name": "$name",
"groups": {
"_id": "$groups._id",
"name": "$groups.name",
"order": "$groups.order",
"created_date": "$groups.created_date",
"survey": "$groups.survey"
}
},
"questions": { "$push": "$groups.questions" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id._id",
"company": { "$first": "$_id.company" },
"created_date": { "$first": "$_id.created_date" },
"enabled": { "$first": "$_id.enabled" },
"name": { "$first": "$_id.name" },
"groups": {
"$push": {
"_id": "$_id.groups._id",
"name": "$_id.groups.name",
"order": "$_id.groups.order",
"created_date": "$_id.groups.created_date",
"survey": "$_id.groups.survey",
"questions": "$questions"
}
}
}},
{ "$sort": { "_id": 1 } }
]);
log(results);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})();
Also worth noting is that with a few small schema changes, the same result can be achieved by using nested calls to .populate()
:
let alternate = await Survey.find().populate({
path: 'groups',
populate: {
path: 'questions',
populate: {
path: 'responses'
}
}
});
Whilst it looks a lot more simple, it's actually introducing more load due to the fact that this issues multiple queries to the database in order to retrieve the data, and not in a single call:
Mongoose: groups.find({ survey: { '$in': [ ObjectId("59b6715725dcd2060da7f591") ] } }, { fields: {} })
Mongoose: questions.find({ _id: { '$in': [ ObjectId("59b6715725dcd2060da7f594"), ObjectId("59b6715725dcd2060da7f595"), ObjectId("59b6715725dcd2060da7f596"), ObjectId("59b6715725dcd2060da7f597") ] } }, { fields: {} })
Mongoose: responses.find({ question: { '$in': [ ObjectId("59b6715725dcd2060da7f594"), ObjectId("59b6715725dcd2060da7f595"), ObjectId("59b6715725dcd2060da7f596"), ObjectId("59b6715725dcd2060da7f597") ] } }, { fields: {} })
You can see the schema changes ( just the addition of virtual fields for the joins ) along with the code in action in the amended listing:
const fs = require('fs'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.Promise = global.Promise;
mongoose.set('debug',true);
const uri = 'mongodb://localhost/nested',
options = { useMongoClient: true };
const responseSchema = new Schema({
response_text: String,
order: String,
created_date: Date,
question: { type: Schema.Types.ObjectId, ref: 'Question' }
});
const questionSchema = new Schema({
question_text: String,
order: String,
created_date: Date,
group: { type: Schema.Types.ObjectId, ref: 'Group' }
},{
toJSON: {
virtuals: true,
transform: function(doc,obj) {
delete obj.id;
return obj;
}
}
});
questionSchema.virtual('responses',{
ref: 'Response',
localField: '_id',
foreignField: 'question'
});
const groupSchema = new Schema({
name: String,
order: String,
created_date: Date,
survey: { type: Schema.Types.ObjectId, ref: 'Survey' },
questions: [{ type: Schema.Types.ObjectId, ref: 'Question' }]
});
const surveySchema = new Schema({
company: { type: Schema.Types.ObjectId, ref: 'Company' },
created_date: Date,
enabled: Boolean,
name: String
},{
toJSON: {
virtuals: true,
transform: function(doc,obj) {
delete obj.id;
return obj;
}
}
});
surveySchema.virtual('groups',{
ref: 'Group',
localField: '_id',
foreignField: 'survey'
});
const companySchema = new Schema({
});
const Company = mongoose.model('Company', companySchema);
const Survey = mongoose.model('Survey', surveySchema);
const Group = mongoose.model('Group', groupSchema);
const Question = mongoose.model('Question', questionSchema);
const Response = mongoose.model('Response', responseSchema);
function log(data) {
console.log(JSON.stringify(data,undefined,2))
}
(async function() {
try {
const conn = await mongoose.connect(uri,options);
await Promise.all(
Object.keys(conn.models).map( m => conn.models[m].remove() )
);
// Initialize data
let content = JSON.parse(fs.readFileSync('./jsonSurveys.json'));
//log(content);
for ( let item of content ) {
let survey = await Survey.create(item);
let company = await Company.create({ _id: survey.company });
for ( let group of item.groups ) {
await Group.create(group);
for ( let question of group.questions ) {
await Question.create(question);
for ( let response of question.responses ) {
await Response.create(response);
}
}
}
}
// Run aggregation
let results = await Survey.aggregate([
{ "$lookup": {
"from": Group.collection.name,
"localField": "_id",
"foreignField": "survey",
"as": "groups"
}},
{ "$unwind": "$groups" },
{ "$lookup": {
"from": Question.collection.name,
"localField": "groups.questions",
"foreignField": "_id",
"as": "groups.questions"
}},
{ "$unwind": "$groups.questions" },
{ "$lookup": {
"from": Response.collection.name,
"localField": "groups.questions._id",
"foreignField": "question",
"as": "groups.questions.responses"
}},
{ "$group": {
"_id": {
"_id": "$_id",
"company": "$company",
"created_date": "$created_date",
"enabled": "$enabled",
"name": "$name",
"groups": {
"_id": "$groups._id",
"name": "$groups.name",
"order": "$groups.order",
"created_date": "$groups.created_date",
"survey": "$groups.survey"
}
},
"questions": { "$push": "$groups.questions" }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id._id",
"company": { "$first": "$_id.company" },
"created_date": { "$first": "$_id.created_date" },
"enabled": { "$first": "$_id.enabled" },
"name": { "$first": "$_id.name" },
"groups": {
"$push": {
"_id": "$_id.groups._id",
"name": "$_id.groups.name",
"order": "$_id.groups.order",
"created_date": "$_id.groups.created_date",
"survey": "$_id.groups.survey",
"questions": "$questions"
}
}
}},
{ "$sort": { "_id": 1 } }
]);
log(results);
let alternate = await Survey.find().populate({
path: 'groups',
populate: {
path: 'questions',
populate: {
path: 'responses'
}
}
});
log(alternate);
} catch(e) {
console.error(e);
} finally {
mongoose.disconnect();
}
})();
Upvotes: 1