theShadow89
theShadow89

Reputation: 1549

How to store different type entities

I try to find the best way to store different type entities.

I have a generic entity Person and two type of person PersonType1 and PersonType2 that inheritance from Person entity its field.

I need to store this people data to mongo db.

is it better to create a single collection or a collection to draw each type ?

Upvotes: 0

Views: 378

Answers (1)

Blakes Seven
Blakes Seven

Reputation: 50406

What you seem to be basically talking about here is the general persistence of Polymorphism where classes vary from a base class in differing properties.

This is generally handled by a discriminator pattern in common implementations of data persistence, and is typically therefore within the same "collection", especially in a document oriented database that handles the differing properties quite well.

So generally speaking, under MongoDB the concept of storing in a single collection for these "inherrited" objects is basically a good idea. There are advantages to having things that share common properties or other related data in the same collection, as a demonstration is warranted for.

Not an endorsement, but a simple example can be demonstrated through the .discriminator() constructor available to the mongoose library. Therefore as an example of benefits it is suited ( for me ) for a quick demonstration:

var util = require('util'),
    async = require('async'),
    mongoose = require('mongoose'),
    Schema = mongoose.Schema;

mongoose.connect('mongodb://localhost/school');

// Util function for base type
function AbstractPersonSchema() {
  Schema.apply(this,arguments);

  this.add({
    name: String,
    age: Number,
    sex: { type: String, enum: [ 'Male', 'Female' ] }
  });

}

util.inherits( AbstractPersonSchema, Schema );


// Schema definitions
var personSchema = new AbstractPersonSchema();

var studentSchema = new AbstractPersonSchema({
  courses: [String]
});

var teacherSchema = new AbstractPersonSchema({
  department: String,
  teaches: [String]
});


// Model assignments
var Person    = mongoose.model( 'Person', personSchema ),
    Student   = Person.discriminator( 'Student', studentSchema ),
    Teacher   = Person.discriminator( 'Teacher', teacherSchema );

var normalPeople = [
  { "name": "Bill",   "age": 48, "sex": "Male"   },
  { "name": "Sarah",  "age": 24, "sex": "Female" }
];

var students = [
  {
    "name": "Ted",
    "age": 21,
    "sex": "Male",
    "courses": ["Math","Science","English"]
  },
  {
    "name": "Julie",
    "age": 22,
    "sex": "Female",
    "courses": ["Art","History","English"]
  }
];

var teachers = [
  {
    "name": "Harry",
    "age": 35,
    "sex": "Male",
    "department": "Maths",
    "teaches": ["Math","Science","English"]
  },
  {
    "name": "Sally",
    "age": 32,
    "sex": "Female",
    "department": "History",
    "teaches": ["English","History"]
  }
];

async.series(
  [
    // Example cleanup
    function(callback) {
      Person.remove({},callback);
    },

    function(callback) {
      async.parallel(
        [
          function(callback) {
            async.each(normalPeople,function(person,callback) {
              Person.create(person,callback);
            },callback);
          },
          function(callback) {
            async.each(students,function(student,callback) {
              Student.create(student,callback);
            },callback);
          },
          function(callback) {
            async.each(teachers,function(teacher,callback) {
              Teacher.create(teacher,callback);
            },callback);
          }
        ],
        callback
      );
    },

    function(callback) {
      console.log("Teachers per subject");
      Teacher.aggregate(
        [
          { "$unwind": "$teaches" },
          { "$group": {
            "_id": "$teaches",
            "count": { "$sum": 1 }
          }}
        ],
        function(err,result) {
          if (err) callback(err);
          console.log(result);
          callback();
        }
      );
    },

    function(callback) {
      console.log("Students and teachers in subject");
      Person.aggregate(
        [
          { "$match": { "__t": { "$in": ["Teacher","Student"] } } },
          { "$project": {
            "name": 1,
            "__t": 1,
            "subject": {
              "$cond": [
                { "$eq": [ "$__t", "Teacher" ] },
                "$teaches",
                "$courses"
              ]
            }
          }},
          { "$unwind": "$subject" },
          { "$group": {
            "_id": "$subject",
            "teachers": {
              "$addToSet": {
                "$cond": [
                  { "$eq": [ "$__t", "Teacher" ] },
                  "$name",
                  false
                ]
              }
            },
            "students": {
              "$addToSet": {
                "$cond": [
                  { "$eq": [ "$__t", "Student" ] },
                  "$name",
                  false
                ]
              }
            }
          }},
          { "$project": {
            "teachers": { "$setDifference": [ "$teachers", [false] ] },
            "students": { "$setDifference": [ "$students", [false] ] }
          }}
        ],
        function(err,results) {
          if (err) callback(err);
          console.log(results);
          callback();
        }
      );
    },

    function(callback) {
      console.log("Average age of students");
      Student.aggregate(
        [
          { "$group": {
            "_id": null,
            "average_age": { "$avg": "$age" }
          }}
        ],
        function(err,results) {
          if (err) throw err;
          console.log(results);
          callback();
        }
      )
    },

    function(callback) {
      console.log("Average age of normal people");
      Person.aggregate(
        [
          { "$match": { "__t": { "$exists": false } } },
          { "$group": {
            "_id": null,
            "average_age": { "$avg": "$age" }
          }}
        ],
        function(err,results) {
          if (err) throw err;
          console.log(results);
          callback();
        }
      );
    }
  ],
  function(err) {
    if (err) throw err;
    mongoose.disconnect();
  }
);

Which produces the output:

Teachers per subject
[ { _id: 'History', count: 1 },
  { _id: 'English', count: 2 },
  { _id: 'Science', count: 1 },
  { _id: 'Math', count: 1 } ]
Students and teachers in subject
[ { _id: 'History', teachers: [ 'Sally' ], students: [ 'Julie' ] },
  { _id: 'Art', teachers: [], students: [ 'Julie' ] },
  { _id: 'English',
    teachers: [ 'Sally', 'Harry' ],
    students: [ 'Julie', 'Ted' ] },
  { _id: 'Science', teachers: [ 'Harry' ], students: [ 'Ted' ] },
  { _id: 'Math', teachers: [ 'Harry' ], students: [ 'Ted' ] } ]
Average age of students
[ { _id: null, average_age: 21.5 } ]
Average age of normal people
[ { _id: null, average_age: 36 } ]

The particular things to notice there with usage of the discriminator pattern are that there are several "Model" defintions there for 'Person', 'Student' and 'Teacher'. Each one of these is instantiated from it's own properties, inheriting from the base 'Person' definition.

Upon construction and storage, the approach taken here is that all items are essentially stored in the same physical collection ( "people" in the plural ), yet all have differing properties and there is a definition there in the coding that assigns the .discriminator() method based on the standard collection.

What this does it essentially add a field to each inherited "class" which reprsents the "model" type here:

{ "name": "Ted", "__t": "Student" }

The task now falls to the library implementation to read that "discriminator" value and correctly assign the defined "class/model" upon reading the information from the database. With that data in place, this is a possibilty for libraries to correctly cast the data as a typed object to the intended class.

Implementations can also benefit from what the example also shows here. Notice the usage of differing queries using each of 'Person', 'Student` and 'Teacher' models here. Wherever the 'Teacher' or 'Student' model is called, the libary can "automagically" filter out results that do not match it's discriminator type. This is useful for query operations to be coded specifically on the class/model type of the objects they wish to operate on, and thus obviating the need to obscure code with noisy type checks where not required.

On the other hand, the base class/model can still be used with specific checks coded on the type discriminator value. This allows you to generally inspect all inherited types, or a sub-set of information as is required in a broader sense that dealing with a singular inheritance path.

So where such correlation is desired, then it asbsolutely makes sense to store in the same collection and utilize a discriminator pattern. Only if you never intend to do such analysis at all, does it then make any sense to separate the storage into separate collections.

Remember that this is MongoDB and not a relational database. If you want to use the data "all together" then you design to keep it "all together" because you cannot perform joins, and you need to design around that concept.

Upvotes: 2

Related Questions