newBike
newBike

Reputation: 15002

How could I remove the duplicated items(complex object) from array

In each document,

the records is an array containing many duplicated objects.

and in buy_items there are also containing many duplicated items.

How could I clean the duplicated items ?

Original documents:

{
  "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
  "records": [
    {
      "DATE": new Date("1996-02-08T08:00:00+0800"),
      "buy_items": [
        "5210 ",
        "5210 ",
        "5210 "
      ]
    },
    {
      "DATE": new Date("1996-02-08T08:00:00+0800"),
      "buy_items": [
        "5210 ",
        "5210 ",
        "5210 "
      ]
    }
    {
      "DATE": new Date("2012-12-08T08:00:00+0800"),
      "buy_items": [
        "5210 ",
        "1234 ",
        " "
      ]
    }        
    ]
}

Expected Output:

{
  "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
  "records": [
    {
      "DATE": new Date("1996-02-08T08:00:00+0800"),
      "buy_items": [
        "5210 "
      ]
    },
    {
      "DATE": new Date("2012-12-08T08:00:00+0800"),
      "buy_items": [
        "5210 ",
        "1234 ",
        " "
      ]
    }    
    ]
}

With Michaels solution, the output might looks like this

{
  "_id": "0005d116qwwewdq82a1b84f148fa6027d429f3e",
  "records": [
    "date": new Date("1996-02-08T08:00:00+0800"),
      "buy_items": [
        "5210 "
        "1234 ",
        " "
      ]
    ]
}

Upvotes: 3

Views: 107

Answers (3)

Sede
Sede

Reputation: 61225

You can remove duplicated objects using the aggregation framework

db.collection.aggregate(
    [
        { $unwind: "$records" }, 
        { $unwind: "$records.buy_items" }, 
        { $group: { "_id": {id: "$_id", date: "$records.DATE" }, buy_items: { $addToSet: "$records.buy_items" }}}, 
        { $group: {"_id": "$_id.id", records: { $push: {"date": "$_id.date", "buy_items": "$buy_items" }}}}, { $sort: { "records.0.date": 1 }} ,
        { $out: "collection" }
    ]
)

The $out operator let you write your aggregation result in specified collection or Replace you existing collection.


Even better using "Bulk" operations

var bulk = bulk = db.collection.initializeOrderedBulkOp(),
    count = 0;

db.collection.aggregate([
    { "$unwind": "$records" }, 
    { "$project": { 
        "date": "$records.DATE", 
        "buy_items": { "$setIntersection": "$records.buy_items" }
    }}, 
    { "$unwind": "$buy_items" }, 
    { "$group": { 
        "_id": { "id": "$_id", "date": "$date" }, 
        "buy_items": { "$addToSet": "$buy_items" }
    }},
    { "$group": { 
        "_id": "$_id.id", 
        "records": { "$push": { 
            "date": "$_id.date", 
            "buy_items": "$buy_items" 
        }}
    }}
]).forEach(function(doc) { 
       bulk.find({"_id": doc._id}).updateOne({
       "$set": { "records": doc.records }
       }); 
       count++; 
       if (count % 500 == 0) {   
           bulk.execute();    
           bulk = db.collection.initializeOrderedBulkOp(); 
       } 
})

if (count % 500 != 0)
    bulk.execute();

Result:

{
    "_id" : "0005d116qwwewdq82a1b84f148fa6027d429f3e",
    "records" : [
            {
                    "date" : ISODate("2012-12-08T00:00:00Z"),
                    "buy_items" : [
                            " ",
                            "1234 ",
                            "5210 "
                    ]
            },
            {
                    "date" : ISODate("1996-02-08T00:00:00Z"),
                    "buy_items" : [
                            "5210 "
                    ]
            }
    ]
}

Upvotes: 3

chridam
chridam

Reputation: 103365

You could try using the forEach() method of the find() cursor to iterate over each document properties, check for uniqueness and filter distinct values as follows:

db.collection.find().forEach(function(doc){
    var records = [], seen = {};
    doc.records.forEach(function (item){
         var uniqueBuyItems = item["buy_items"].filter(function(i, pos) {
            return item["buy_items"].indexOf(i) == pos;
         });
         item["buy_items"] = uniqueBuyItems;
         if (JSON.stringify(item["buy_items"]) !== JSON.stringify(seen["buy_items"])) {
            records.push(item);
            seen["buy_items"] = item["buy_items"];
         }         
    }); 
    doc.records = records;
    db.collection.save(doc);
})

Upvotes: 1

Neo-coder
Neo-coder

Reputation: 7840

If you want to update your current collections without creating new collection and drop previous collection. I tried this but doing this you should run two different update commands.

First update records with distinct like this :

db.collectionName.update({},{"$set":{"records":db.collectionName.distinct('records')}})

and second update for buy_items with distinct like this :

db.collectionName.update({},{"$set":{"records.0.buy_items":db.collectionName.distinct('records.buy_items')}})

If you want to avoid two update query then follow Michael answer .

Upvotes: 1

Related Questions