probablykabari
probablykabari

Reputation: 1389

Map/Reduce to get group() Result

I'm trying to aggregate a bunch of user profile data in our app. Each user has an embedded profile document with a gender, and ethnicity attribute.

{
  'email': '[email protected]',
  'profile': {
    'gender': 'male',
    'ethnicity': 'Hispanic'
  }
}

If I use a group function like so:

db.respondents.group({
  key: {},
  initial: {'gender': {'male':0,'female':0}, 'ethnicity': {}, 'count': 0},
  reduce: function (user, totals) {
    var profile = user.profile;

    totals.gender[profile.gender]++;
    totals.ethnicity[profile.ethnicity] = (totals.ethnicity[profile.ethnicity] || 0);
    totals.ethnicity[profile.ethnicity]++
    totals.count++;
 }
});

I get the result in the form I want:

{
    "gender" : {
        "male" : ###,
        "female" : ###
    },
    "ethnicity" : {
        "Caucasian/White" : ###,
        "Hispanic" : ###,
                    ...

    },
    "count" : ###
}

I'm having trouble getting this to work as a map/reduce command, using a different reduce function of course. I'm not sure how to get the totals to add up. They are always incorrect. I'm aware that my output from reduce has to be in the same format as the input from map, but I feel like I'm missing something in the way that reduce works...

In response to @Jenna, the input looks like:

{
  'email': '[email protected]',
  'profile': {
    'gender': 'male',
    'ethnicity': 'Hispanic'
  }
}

and the functions are:

function map(){
  emit('demographics', this.profile)
}

function reduce (key, values) {
  var reduced = {'gender': {'male':0,'female':0}, 'ethnicity': {}, 'count': 0};
  values.forEach(function(value) {
    reduced.gender[value.gender]++;
    reduced['ethnicity'][value.ethnicity] = (reduced['ethnicity'][value.ethnicity] || 0);
    reduced['ethnicity'][value.ethnicity]++;
    reduced.count++;
  });
  return reduced;
}

and the output is:

{
    "_id": "demographics",
    "value": {
      "gender": {
        "male": 76.0,
        "female": 64.0
      },
      "ethnicity": {
        "Caucasian/White": 109.0,
        "Other": 5.0,
        "Asian": 10.0,
        "African-American": 8.0,
        "Hispanic": 7.0,
        "Native American": 1.0
      },
      "count": 141.0
    }
}

The output is way incorrect since there are over 100k records in the database.

Upvotes: 0

Views: 189

Answers (1)

narced133
narced133

Reputation: 752

The reduce function may be called again on the output from an earlier call. You are correct that the output from map should be the same as the output from reduce. Your current map function returns something different from your reduce function. Try something like this:

function map(){
  result = {'gender': {'male': 0, 'female': 0}, 'ethnicity': {}, 'count': 1};
  result['gender'][this.gender] = 1;
  result['ethnicity'][this.ethnicity] = 1;
  emit('demographics', result);
}

function reduce (key, values) {
  var reduced = {'gender': {'male':0,'female':0}, 'ethnicity': {}, 'count': 0};
  values.forEach(function(value) {
    reduced['gender']['male'] += value['gender']['male'];
    reduced['gender']['female'] += value['gender']['female'];
    for(ethnicity in value['ethnicity']){
      if(reduced['ethnicity'][ethnicity] === undefined)
        reduced['ethnicity'][ethnicity] = 0
      reduced['ethnicity'][ethnicity] += value['ethnicity'][ethnicity]
    }
    reduced['count'] += values.count;
  });
  return reduced;
}

Upvotes: 2

Related Questions