Soufiane Lamnizeh
Soufiane Lamnizeh

Reputation: 434

$lookup from Multiple Collections, and nested output

I have a multiple collections , I used the separate collection & foreign key approach , and I want to join this collections to build a nested collections. this is my schemas of collections:

const SurveySchema = new Schema({
_id:{ type: Schema.ObjectId, auto: true },
name: String,
enabled: {type: Boolean, Default: true},
created_date:{type: Date, Default: Date.now},
company: {type: Schema.Types.ObjectId, ref: 'Company'},});

const GroupSchema = new Schema({
  _id:{ type: Schema.ObjectId, auto: true },
  name: String,
  order: String,
  created_date:{type: Date, Default: Date.now},
  questions: [{type: Schema.Types.ObjectId, ref: 'Question'}],
  survey: {type: Schema.Types.ObjectId, ref: 'Survey'}
});

const ResponseSchema = new Schema({
  _id:{ type: Schema.ObjectId, auto: true },
  response_text: String,
  order: String,
  created_date:{type: Date, Default: Date.now},
  question:{type: Schema.Types.ObjectId, ref: 'Question'}
});

and this is my code to build this nested object:

Survey.aggregate([
  { $match: {} },
  { $lookup: {
    from: 'groups',
    localField: '_id',
    foreignField: 'survey',
    as: 'groupsofquestions',
  }},
  { $unwind: {
    path: "$groupsofquestions",
    preserveNullAndEmptyArrays: true
  }},
  { $lookup: {
    from: 'questions',
    localField: 'groupsofquestions._id',
    foreignField: 'group',
    as: 'questionsofgroup',
  }},
  { $lookup: {
    from: 'response',
    localField: 'questionsofgroup._id',
    foreignField: 'question',
    as: 'responses',
  }},
  { $group: {
    _id: "$_id",
    name: {$first: "$name"},
    groups: {$push: {
      id: "$groupsofquestions._id",
      name: "$groupsofquestions.name",
      questions: "$questionsofgroup",
      reponses: "$responses"
    }}
  }}
])

I would like to structure as below, ( also with external link ):

http://jsoneditoronline.org/?id=d7d1779b3b95e3acb28f8a2be0785423

[
  {
    "__v": 0,
    "_id": "59b6715725dcd2060da7f591",
    "company": "59b6715725dcd2060da7f58f",
    "created_date": "2017-09-11T11:19:51.709Z",
    "enabled": true,
    "name": "function String() { [native code] }",
    "groups": [
      {
        "_id": "59b6715725dcd2060da7f592",
        "name": "groupe 1 des question",
        "order": "1",
        "created_date": "2017-09-11T11:19:51.709Z",
        "survey": "59b6715725dcd2060da7f591",
        "__v": 0,
        "questions": [
          {
            "_id": "59b6715725dcd2060da7f594",
            "question_text": "question 1 group 1",
            "order": "1",
            "created_date": "2017-09-11T11:19:51.709Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              {
                "_id": "59b6715725dcd2060da7f598",
                "response_text": "reponse 1 question 1 group 1",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f594",
                "__v": 0
              },
              {
                "_id": "59b6715725dcd2060da7f599",
                "response_text": "reponse 2 question 1 group 1",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f594",
                "__v": 0
              }
            ]
          },
          {
            "_id": "59b6715725dcd2060da7f595",
            "question_text": "question 2 group 1",
            "order": "2",
            "created_date": "2017-09-11T11:19:51.710Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              {
                "_id": "59b6715725dcd2060da7f59a",
                "response_text": "reponse 1 question 2 group 1",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f595",
                "__v": 0
              },
              {
                "_id": "59b6715725dcd2060da7f59b",
                "response_text": "reponse 2 question 2 group 1",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f595",
                "__v": 0
              }
            ]
          }
        ]
      },
      {
        "_id": "59b6715725dcd2060da7f593",
        "name": "groupe 2 des question",
        "order": "2",
        "created_date": "2017-09-11T11:19:51.709Z",
        "survey": "59b6715725dcd2060da7f591",
        "__v": 0,
        "questions": [
          {
            "_id": "59b6715725dcd2060da7f596",
            "question_text": "question 1 group 1",
            "order": "1",
            "created_date": "2017-09-11T11:19:51.710Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              {
                "_id": "59b6715725dcd2060da7f59c",
                "response_text": "reponse 1 question 1 group 2",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f596",
                "__v": 0
              },
              {
                "_id": "59b6715725dcd2060da7f59d",
                "response_text": "reponse 2 question 1 group 2",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f596",
                "__v": 0
              }
            ]
          },
          {
            "_id": "59b6715725dcd2060da7f597",
            "question_text": "question 2 group 1",
            "order": "2",
            "created_date": "2017-09-11T11:19:51.710Z",
            "group": "59b6715725dcd2060da7f592",
            "__v": 0,
            "responses": [
              {
                "_id": "59b6715725dcd2060da7f59e",
                "response_text": "reponse 1 question 2 group 2",
                "order": "1",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f597",
                "__v": 0
              },
              {
                "_id": "59b6715725dcd2060da7f59f",
                "response_text": "reponse 2 question 2 group 2",
                "order": "2",
                "created_date": "2017-09-11T11:19:51.710Z",
                "question": "59b6715725dcd2060da7f597",
                "__v": 0
              }
            ]
          }
        ]
      }
    ]
  }
]

Can someone help me structure the response as is shown in the sample please?

Upvotes: 2

Views: 1836

Answers (1)

Neil Lunn
Neil Lunn

Reputation: 151122

Mostly you need to use $group to "reconstruct" after processing with $unwind in order to nest your array output again. There are also a couple of tips:

   Survey.aggregate([
      { "$lookup": {
        "from": Group.collection.name,
        "localField": "_id",
        "foreignField": "survey",
        "as": "groups"
      }},
      { "$unwind": "$groups" },
      { "$lookup": {
        "from": Question.collection.name,
        "localField": "groups.questions",
        "foreignField": "_id",
        "as": "groups.questions"
      }},
      { "$unwind": "$groups.questions" },
      { "$lookup": {
        "from": Response.collection.name,
        "localField": "groups.questions._id",
        "foreignField": "question",
        "as": "groups.questions.responses"
      }},
      { "$group": {
        "_id": {
          "_id": "$_id",
          "company": "$company",
          "created_date": "$created_date",
          "enabled": "$enabled",
          "name": "$name",
          "groups": {
            "_id": "$groups._id",
            "name": "$groups.name",
            "order": "$groups.order",
            "created_date": "$groups.created_date",
            "survey": "$groups.survey"
          }
        },
        "questions": { "$push": "$groups.questions" }
      }},
      { "$sort": { "_id": 1 } },
      { "$group": {
        "_id": "$_id._id",
        "company": { "$first": "$_id.company" },
        "created_date": { "$first": "$_id.created_date" },
        "enabled": { "$first": "$_id.enabled" },
        "name": { "$first": "$_id.name" },
        "groups": {
          "$push": {
            "_id": "$_id.groups._id",
            "name": "$_id.groups.name",
            "order": "$_id.groups.order",
            "created_date": "$_id.groups.created_date",
            "survey": "$_id.groups.survey",
            "questions": "$questions"
          }
        }
      }},
      { "$sort": { "_id": 1 } }
    ]);

So that's the approach to rebuilding arrays, where you take it one step at a time rather than trying to do it all in one go. It's probably the most difficult of concepts to generally comprehend, but a "pipeline" means you can in fact do things "multiple times", chaining one action to the output of the other.

So the first $group is done at the "groups" level of detail because you want to $push items of the "questions" array, which was the last "deconstructed" by $unwind. Note that the "responses" are still an array as the result of the last $lookup stage. But aside from the array content, everything else goes in the _id "grouping key".

On the "second" $group you actually use operators like $first to construct the specific field properties at the Survey level. The "groups" array is constructed with $push again, and every property that was in the "grouping key" of the previous stage is therefore prefixed with _id, so that's how they are all referenced here.

Also, as a technical standpoint you should always $sort after each invocation of $group if you have an expected order. The collection on grouping keys is not guaranteed in any specific order ( though typically it's reverse stack order ). If you expect an order, then specify it, and particularly when applying $push to reconstruct an array following a $group.

The reason why there is no $sort before the initial $group is because the preceeding pipeline stages don't actually have any effect on the existing order. So the order of discovery is always preserved.

A couple of tips:

  1. Things like Group.collection.name actually use the properties defined on the mongoose models to do things like "get the collection name". This saves you from hard-coding into the $lookup itself and stays consistent with whatever is registered on the model at the time the code is run.

  2. If you intend output of a property as an array or even have an existing "array of reference" on the schema by a certain name, then "keep that name". Making interim names for paths really does not make a lot of sense unless you are specifically doing it in a pipeline stage for the purposes of "re-ordering" the output of fields in a later stage. Otherwise, just use the name you intend to output as in all cases. It's much easier to read and interpret intent that way.

  3. Unless you really mean it, don't use options like preserveNullAndEmptyArrays. There is a "special way" that the combination of $lookup + $unwind is actually handled, and really gets executed in a "single stage" rather than retrieving all the results before "unwinding". You can see this in the "explain" output for the aggregation pipeline. In short, if you always have relational matches, then don't use the option. It's more optimal not to.


Demonstration

As a complete listing and proof of concept, we can load in your source JSON, store it in the database in separate collections and then use the aggregation statement in order to retrieve and reconstruct the desired structure:

const fs = require('fs'),
      mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/nested',
      options = { useMongoClient: true };

const responseSchema = new Schema({
  response_text: String,
  order: String,
  created_date: Date,
  question: { type: Schema.Types.ObjectId, ref: 'Question' }
});

const questionSchema = new Schema({
  question_text: String,
  order: String,
  created_date: Date,
  group: { type: Schema.Types.ObjectId, ref: 'Group' }
});

const groupSchema = new Schema({
  name: String,
  order: String,
  created_date: Date,
  survey: { type: Schema.Types.ObjectId, ref: 'Survey' },
  questions: [{ type: Schema.Types.ObjectId, ref: 'Question' }]
});

const surveySchema = new Schema({
  company: { type: Schema.Types.ObjectId, ref: 'Company' },
  created_date: Date,
  enabled: Boolean,
  name: String
});

const companySchema = new Schema({

});

const Company = mongoose.model('Company', companySchema);
const Survey = mongoose.model('Survey', surveySchema);
const Group = mongoose.model('Group', groupSchema);
const Question = mongoose.model('Question', questionSchema);
const Response = mongoose.model('Response', responseSchema);


function log(data) {
  console.log(JSON.stringify(data,undefined,2))
}

(async function() {

  try {

    const conn = await mongoose.connect(uri,options);

    await Promise.all(
      Object.keys(conn.models).map( m => conn.models[m].remove() )
    );

    // Initialize data
    let content = JSON.parse(fs.readFileSync('./jsonSurveys.json'));
    //log(content);

    for ( let item of content ) {

      let survey = await Survey.create(item);
      let company = await Company.create({ _id: survey.company });

      for ( let group of item.groups ) {
        await Group.create(group);
        for ( let question of group.questions ) {
          await Question.create(question);
          for ( let response of question.responses ) {
            await Response.create(response);
          }
        }
      }

    }

    // Run aggregation

    let results = await Survey.aggregate([
      { "$lookup": {
        "from": Group.collection.name,
        "localField": "_id",
        "foreignField": "survey",
        "as": "groups"
      }},
      { "$unwind": "$groups" },
      { "$lookup": {
        "from": Question.collection.name,
        "localField": "groups.questions",
        "foreignField": "_id",
        "as": "groups.questions"
      }},
      { "$unwind": "$groups.questions" },
      { "$lookup": {
        "from": Response.collection.name,
        "localField": "groups.questions._id",
        "foreignField": "question",
        "as": "groups.questions.responses"
      }},
      { "$group": {
        "_id": {
          "_id": "$_id",
          "company": "$company",
          "created_date": "$created_date",
          "enabled": "$enabled",
          "name": "$name",
          "groups": {
            "_id": "$groups._id",
            "name": "$groups.name",
            "order": "$groups.order",
            "created_date": "$groups.created_date",
            "survey": "$groups.survey"
          }
        },
        "questions": { "$push": "$groups.questions" }
      }},
      { "$sort": { "_id": 1 } },
      { "$group": {
        "_id": "$_id._id",
        "company": { "$first": "$_id.company" },
        "created_date": { "$first": "$_id.created_date" },
        "enabled": { "$first": "$_id.enabled" },
        "name": { "$first": "$_id.name" },
        "groups": {
          "$push": {
            "_id": "$_id.groups._id",
            "name": "$_id.groups.name",
            "order": "$_id.groups.order",
            "created_date": "$_id.groups.created_date",
            "survey": "$_id.groups.survey",
            "questions": "$questions"
          }
        }
      }},
      { "$sort": { "_id": 1 } }
    ]);

    log(results);

  } catch(e) {
    console.error(e);
  } finally {
    mongoose.disconnect();
  }


})();

Alternate Case

Also worth noting is that with a few small schema changes, the same result can be achieved by using nested calls to .populate():

  let alternate = await Survey.find().populate({
      path: 'groups',
      populate: {
        path: 'questions',
        populate: {
          path: 'responses'
        }
      }
    });

Whilst it looks a lot more simple, it's actually introducing more load due to the fact that this issues multiple queries to the database in order to retrieve the data, and not in a single call:

Mongoose: groups.find({ survey: { '$in': [ ObjectId("59b6715725dcd2060da7f591") ] } }, { fields: {} })
Mongoose: questions.find({ _id: { '$in': [ ObjectId("59b6715725dcd2060da7f594"), ObjectId("59b6715725dcd2060da7f595"), ObjectId("59b6715725dcd2060da7f596"), ObjectId("59b6715725dcd2060da7f597") ] } }, { fields: {} })
Mongoose: responses.find({ question: { '$in': [ ObjectId("59b6715725dcd2060da7f594"), ObjectId("59b6715725dcd2060da7f595"), ObjectId("59b6715725dcd2060da7f596"), ObjectId("59b6715725dcd2060da7f597") ] } }, { fields: {} })

You can see the schema changes ( just the addition of virtual fields for the joins ) along with the code in action in the amended listing:

const fs = require('fs'),
      mongoose = require('mongoose'),
      Schema = mongoose.Schema;

mongoose.Promise = global.Promise;
mongoose.set('debug',true);

const uri = 'mongodb://localhost/nested',
      options = { useMongoClient: true };

const responseSchema = new Schema({
  response_text: String,
  order: String,
  created_date: Date,
  question: { type: Schema.Types.ObjectId, ref: 'Question' }
});

const questionSchema = new Schema({
  question_text: String,
  order: String,
  created_date: Date,
  group: { type: Schema.Types.ObjectId, ref: 'Group' }
},{
  toJSON: {
    virtuals: true,
    transform: function(doc,obj) {
      delete obj.id;
      return obj;
    }
  }
});

questionSchema.virtual('responses',{
  ref: 'Response',
  localField: '_id',
  foreignField: 'question'
});

const groupSchema = new Schema({
  name: String,
  order: String,
  created_date: Date,
  survey: { type: Schema.Types.ObjectId, ref: 'Survey' },
  questions: [{ type: Schema.Types.ObjectId, ref: 'Question' }]
});

const surveySchema = new Schema({
  company: { type: Schema.Types.ObjectId, ref: 'Company' },
  created_date: Date,
  enabled: Boolean,
  name: String
},{
  toJSON: {
    virtuals: true,
    transform: function(doc,obj) {
      delete obj.id;
      return obj;
    }
  }
});

surveySchema.virtual('groups',{
  ref: 'Group',
  localField: '_id',
  foreignField: 'survey'
});

const companySchema = new Schema({

});

const Company = mongoose.model('Company', companySchema);
const Survey = mongoose.model('Survey', surveySchema);
const Group = mongoose.model('Group', groupSchema);
const Question = mongoose.model('Question', questionSchema);
const Response = mongoose.model('Response', responseSchema);


function log(data) {
  console.log(JSON.stringify(data,undefined,2))
}

(async function() {

  try {

    const conn = await mongoose.connect(uri,options);

    await Promise.all(
      Object.keys(conn.models).map( m => conn.models[m].remove() )
    );

    // Initialize data
    let content = JSON.parse(fs.readFileSync('./jsonSurveys.json'));
    //log(content);

    for ( let item of content ) {

      let survey = await Survey.create(item);
      let company = await Company.create({ _id: survey.company });

      for ( let group of item.groups ) {
        await Group.create(group);
        for ( let question of group.questions ) {
          await Question.create(question);
          for ( let response of question.responses ) {
            await Response.create(response);
          }
        }
      }

    }

    // Run aggregation

    let results = await Survey.aggregate([
      { "$lookup": {
        "from": Group.collection.name,
        "localField": "_id",
        "foreignField": "survey",
        "as": "groups"
      }},
      { "$unwind": "$groups" },
      { "$lookup": {
        "from": Question.collection.name,
        "localField": "groups.questions",
        "foreignField": "_id",
        "as": "groups.questions"
      }},
      { "$unwind": "$groups.questions" },
      { "$lookup": {
        "from": Response.collection.name,
        "localField": "groups.questions._id",
        "foreignField": "question",
        "as": "groups.questions.responses"
      }},
      { "$group": {
        "_id": {
          "_id": "$_id",
          "company": "$company",
          "created_date": "$created_date",
          "enabled": "$enabled",
          "name": "$name",
          "groups": {
            "_id": "$groups._id",
            "name": "$groups.name",
            "order": "$groups.order",
            "created_date": "$groups.created_date",
            "survey": "$groups.survey"
          }
        },
        "questions": { "$push": "$groups.questions" }
      }},
      { "$sort": { "_id": 1 } },
      { "$group": {
        "_id": "$_id._id",
        "company": { "$first": "$_id.company" },
        "created_date": { "$first": "$_id.created_date" },
        "enabled": { "$first": "$_id.enabled" },
        "name": { "$first": "$_id.name" },
        "groups": {
          "$push": {
            "_id": "$_id.groups._id",
            "name": "$_id.groups.name",
            "order": "$_id.groups.order",
            "created_date": "$_id.groups.created_date",
            "survey": "$_id.groups.survey",
            "questions": "$questions"
          }
        }
      }},
      { "$sort": { "_id": 1 } }
    ]);

    log(results);

    let alternate = await Survey.find().populate({
      path: 'groups',
      populate: {
        path: 'questions',
        populate: {
          path: 'responses'
        }
      }
    });

    log(alternate);

  } catch(e) {
    console.error(e);
  } finally {
    mongoose.disconnect();
  }


})();

Upvotes: 1

Related Questions