yesyoukenn
yesyoukenn

Reputation: 227

How to Restructure Document Fields Into An Array

I have some data that looks like the following:

{
  "name" : "someValue",
  "date" : "someValue",
  "age" : "someValue",
  "price" : "someValue",
  "sales_comp1" : "someValue",
  "sales_comp1_dos" : "someValue",
  "sales_comp1_units" : "someValue",
  "sales_comp1_NOI_unit" : "someValue",
  "sales_comp1_sales_pr_unit" : "someValue",
  "sales_comp1_adj_SPrice" : "someValue",
  "sales_comp1_cap_rate" : "someValue",
  "sales_comp2" : "someValue",
  "sales_comp2_dos" : "someValue",
  "sales_comp2_units" : "someValue",
  "sales_comp2_NOI_unit" : "someValue",
  "sales_comp2_sales_pr_unit" : "someValue",
  "sales_comp2_adj_SPrice" : "someValue",
  "sales_comp2_cap_rate" : "someValue",
  "sales_comp3" : "someValue",
  "sales_comp3_dos" : "someValue",
  "sales_comp3_units" : "someValue",
  "sales_comp3_NOI_unit" : "someValue",
  "sales_comp3_sales_pr_unit" : "someValue",
  "sales_comp3_adj_SPrice" : "someValue",
  "sales_comp3_cap_rate" : "someValue"
}

I want to consolidate the sales_compX values into an array called salesComps as such:

{
  "name" : "someValue",
  "date" : "someValue",
  "age" : "someValue",
  "price" : "someValue",
  "salesComps" : [
    {
      "sales_comp1" : "someValue",
      "sales_comp1_dos" : "someValue",
      "sales_comp1_units" : "someValue",
      "sales_comp1_NOI_unit" : "someValue",
      "sales_comp1_sales_pr_unit" : "someValue",
      "sales_comp1_adj_SPrice" : "someValue",
      "sales_comp1_cap_rate" : "someValue",
    },
    {
      "sales_comp2" : "someValue",
      "sales_comp2_dos" : "someValue",
      "sales_comp2_units" : "someValue",
      "sales_comp2_NOI_unit" : "someValue",
      "sales_comp2_sales_pr_unit" : "someValue",
      "sales_comp2_adj_SPrice" : "someValue",
      "sales_comp2_cap_rate" : "someValue",
    },
    {
      "sales_comp3" : "someValue",
      "sales_comp3_dos" : "someValue",
      "sales_comp3_units" : "someValue",
      "sales_comp3_NOI_unit" : "someValue",
      "sales_comp3_sales_pr_unit" : "someValue",
      "sales_comp3_adj_SPrice" : "someValue",
      "sales_comp3_cap_rate" : "someValue"
    }
  ]
}

What's the simplest way to do this? I would appreciate any response in either Mongoose or Mongodb code samples!

Upvotes: 0

Views: 62

Answers (2)

Neil Lunn
Neil Lunn

Reputation: 151112

One off operations are probably best done in the mongo shell, which has the JavaScript environment to handle this without needing to worry about callback and promise considerations.

So the best thing to do here is loop the collection results and re-write to the collection via the .bulkWrite() method after transforming the fields into an array:

var name = "collection";  
var ops = [];

db.getCollection(name).find().forEach(doc => {

    var obj = Object.keys(doc).filter(k => /^sales_comp/.test(k))
     .map( k => ({ [k.match(/\d+/)[0]]: { [k.replace(/\d+/,"")]: doc[k] }}))
     .reduce((acc,curr) => {
       let a = Object.keys(curr)[0];
       if ( !acc.hasOwnProperty(a) )
         acc[a] = { };
       acc[a][Object.keys(curr[a])[0]] = curr[a][Object.keys(curr[a])[0]];
       return acc;
     },{});

    // Un-comment and swap if you don't want to retain the index number
    //var comps = Object.keys(obj).map( k => obj[k] )
    var comps = Object.keys(obj).map( k => Object.assign({ "index": parseInt(k) },obj[k]) );

    var unset = Object.keys(doc).filter(k => /^sales_comp/.test(k))
      .reduce((acc,curr) => Object.assign(acc,({ [curr]: 1 })),{});

    ops.push({
      "updateOne": {
        "filter": { "_id": doc._id },
        "update": {
          "$unset": unset,
          "$set": { "salesComps": comps }   
        }
      }        
    });

    if ( ops.length >= 500 ) {
      db.getCollection(name).bulkWrite(ops);
      ops = [];
    }
});

if ( ops.length > 0 ) {
  db.getCollection(name).bulkWrite(ops);
  ops = [];   
}

Which would rewrite each document like:

{
    "_id" : ObjectId("5954461a38470d375d40aca1"),
    "name" : "someValue",
    "date" : "someValue",
    "age" : "someValue",
    "price" : "someValue",
    "salesComps" : [ 
        {
            "index" : 1,
            "sales_comp" : "someValue",
            "sales_comp_dos" : "someValue",
            "sales_comp_units" : "someValue",
            "sales_comp_NOI_unit" : "someValue",
            "sales_comp_sales_pr_unit" : "someValue",
            "sales_comp_adj_SPrice" : "someValue",
            "sales_comp_cap_rate" : "someValue"
        }, 
        {
            "index" : 2,
            "sales_comp" : "someValue",
            "sales_comp_dos" : "someValue",
            "sales_comp_units" : "someValue",
            "sales_comp_NOI_unit" : "someValue",
            "sales_comp_sales_pr_unit" : "someValue",
            "sales_comp_adj_SPrice" : "someValue",
            "sales_comp_cap_rate" : "someValue"
        }, 
        {
            "index" : 3,
            "sales_comp" : "someValue",
            "sales_comp_dos" : "someValue",
            "sales_comp_units" : "someValue",
            "sales_comp_NOI_unit" : "someValue",
            "sales_comp_sales_pr_unit" : "someValue",
            "sales_comp_adj_SPrice" : "someValue",
            "sales_comp_cap_rate" : "someValue"
        }
    ]
}

This is basically inspecting the document for the names of the "keys" starting with "sales_comp" and after accumulating the keys based on the numeric part of the string and stripping that out from the names, the data is transformed into an array for each different numeric value.

When actually processing the "update" to the target collection, we use the $unset operator on the fields that began with "sales_comp to remove these from the document. Then we use the $set operator with the transformed array to create the new property.

There are some common idioms here using .map(), .reduce() and .filter() that you should become familiar with when working with data structures in JavaScript.

Upvotes: 0

Brian Peacock
Brian Peacock

Reputation: 1849

First, loop through the data using Object.keys() and wait for the 'sales_comp' data to crop up. When it does insert the key-value pair into a holding Object and delete that entry from the main data. Once that has finished add the holding Object back into the data. Something like this...

The data...

var data = {
  "name" : "someValue",
  "date" : "someValue",
  "age" : "someValue",
  "price" : "someValue",
  "sales_comp1" : "someValue",
  "sales_comp1_dos" : "someValue",
  "sales_comp1_units" : "someValue",
  "sales_comp1_NOI_unit" : "someValue",
  "sales_comp1_sales_pr_unit" : "someValue",
  "sales_comp1_adj_SPrice" : "someValue",
  "sales_comp1_cap_rate" : "someValue",
  "sales_comp2" : "someValue",
  "sales_comp2_dos" : "someValue",
  "sales_comp2_units" : "someValue",
  "sales_comp2_NOI_unit" : "someValue",
  "sales_comp2_sales_pr_unit" : "someValue",
  "sales_comp2_adj_SPrice" : "someValue",
  "sales_comp2_cap_rate" : "someValue",
  "sales_comp3" : "someValue",
  "sales_comp3_dos" : "someValue",
  "sales_comp3_units" : "someValue",
  "sales_comp3_NOI_unit" : "someValue",
  "sales_comp3_sales_pr_unit" : "someValue",
  "sales_comp3_adj_SPrice" : "someValue",
  "sales_comp3_cap_rate" : "someValue"
};

The code...

// holding Object
var tmpStorage = {};

// Loop through the data
Object.keys(data).forEach(function(d) {
     // look for 'sales_comp' keys
     if (/sales_comp/.test(d)) {
         // find which 'sales_comp' each 'd'
         // belongs to with simple regex (allowing
         // for more than single digits here)
         var id = d.match(/sales_comp([\d]+)/)[1];
         // create a tmp property for tmpStorage object
         // if it doesn't exist (which it wont on the
         // the first pass)
         tmpStorage['tmp'+id] = tmpStorage['tmp'+id] || {};
         // add the current key-value pair to the
         // tmpStorage Object
         tmpStorage['tmp'+id][d] = data[d];
         // remove the current key-value pair from the
         // data object
         delete data[d]
     }
});

The 'data' Object now looks like this...

console.log('data:',data);
/* =>
data: {
  name: 'someValue',
  date: 'someValue',
  age: 'someValue',
  price: 'someValue'
}
*/

And the tmpStorage Object looks like this...

console.log('tmpStorage:',tmpStorage);
/* =>
tmpStorage: {
  tmp1: {
     sales_comp1: 'someValue',
     sales_comp1_dos: 'someValue',
     sales_comp1_units: 'someValue',
     sales_comp1_NOI_unit: 'someValue',
     sales_comp1_sales_pr_unit: 'someValue',
     sales_comp1_adj_SPrice: 'someValue',
     sales_comp1_cap_rate: 'someValue'
  },
  tmp2: {
     sales_comp2: 'someValue',
     sales_comp2_dos: 'someValue',
     sales_comp2_units: 'someValue',
     sales_comp2_NOI_unit: 'someValue',
     sales_comp2_sales_pr_unit: 'someValue',
     sales_comp2_adj_SPrice: 'someValue',
     sales_comp2_cap_rate: 'someValue'
  },
  tmp3: {
     sales_comp3: 'someValue',
     sales_comp3_dos: 'someValue',
     sales_comp3_units: 'someValue',
     sales_comp3_NOI_unit: 'someValue',
     sales_comp3_sales_pr_unit: 'someValue',
     sales_comp3_adj_SPrice: 'someValue',
     sales_comp3_cap_rate: 'someValue'
  }
}
*/

Now it's just a matter of pushing the contents of 'tmpStorage' into a new 'data.salesComps' array

data.salesComps = Object.keys(tmpStorage).map(function(t) {
    return tmpStorage[t];
});

And now 'data' looks like this...

console.log('data:',data);
/* =>
data: { 
  name: 'someValue',
  date: 'someValue',
  age: 'someValue',
  price: 'someValue',
  salesComps: [
     { sales_comp1: 'someValue',
       sales_comp1_dos: 'someValue',
       sales_comp1_units: 'someValue',
       sales_comp1_NOI_unit: 'someValue',
       sales_comp1_sales_pr_unit: 'someValue',
       sales_comp1_adj_SPrice: 'someValue',
       sales_comp1_cap_rate: 'someValue'
     },
     { sales_comp2: 'someValue',
       sales_comp2_dos: 'someValue',
       sales_comp2_units: 'someValue',
       sales_comp2_NOI_unit: 'someValue',
       sales_comp2_sales_pr_unit: 'someValue',
       sales_comp2_adj_SPrice: 'someValue',
       sales_comp2_cap_rate: 'someValue' 
    },
    {  sales_comp3: 'someValue',
       sales_comp3_dos: 'someValue',
       sales_comp3_units: 'someValue',
       sales_comp3_NOI_unit: 'someValue',
       sales_comp3_sales_pr_unit: 'someValue',
       sales_comp3_adj_SPrice: 'someValue',
       sales_comp3_cap_rate: 'someValue' 
    }
  ]
}
*/

It may look complicated but once you take out all the comments you'll see its a relatively simple two stage process - remove the data your need, and then add it back in the format you want.

Hope that helped :)

Upvotes: 2

Related Questions