Aenaon
Aenaon

Reputation: 3573

group by and aggregate across all keys an array of objects

I have an array of objects and each object has multiple key, value pairs. I want to group by the values of let's say the first key and then derive the aggregated average and median.

I can do this with nest and rollup but only for one dimension. The example below for instance, groups by winner and then for each subgroup finds the mean/median but only over one dimension, in this case over team.4 only. Is there a way to aggregate over all four team.1, team.2, team.3, team.4 at once please? As a side note, team.1, team.2, team.3, team.4 are not known in advance.

My desired output (but quite flexible on this, its just a "nice to have") would be

var avg=[ 
{ 'winner': 'team.1', 'team.1' : 4, 'team.2' : 5.333, 'team.3': 1, 'team.4': 0.666},
{ 'winner': 'team.2', 'team.1' : 6, 'team.2' : 2.5, 'team.3': 6.5, 'team.4': 0.5}
];

and similarly for the median.

Thanks!

<!DOCTYPE html>
<html>

<head>
    <!--d3 -->
    <script src='https://d3js.org/d3.v4.min.js'></script>
</head>

<body>

<script>
    var data = [];
        data[0] = {'winner': 'team.1', 'team.1':5, 'team.2':4, 'team.3':1, 'team.4':0},
        data[1] = {'winner': 'team.2', 'team.1':5, 'team.2':1, 'team.3':4, 'team.4':1},
        data[2] = {'winner': 'team.2', 'team.1':7, 'team.2':4, 'team.3':9, 'team.4':0},
        data[3] = {'winner': 'team.1', 'team.1':5, 'team.2':8, 'team.3':0, 'team.4':1},
        data[4] = {'winner': 'team.1', 'team.1':2, 'team.2':4, 'team.3':2, 'team.4':1}

        var dim = 'team.4';

        var out = d3.nest()
            .key(function(d) { return d.winner; })
            .rollup(function(v) { return {
                dimension: dim,
                count: v.length,
                median: d3.median(v, function(d) { return d[dim]; }),
                avg: d3.mean(v, function(d) { return d[dim]; })
             };  })
             .entries(data);


         console.log(out);

</script>
</body>

</html>

Upvotes: 2

Views: 754

Answers (1)

Terry
Terry

Reputation: 66173

Since you have doubly-nested data, you will have to nest another level of rollup function within the first rollup function. So, your top-level rollup should have a callback that looks like this:

// Iterate through the object, remove the winner
// That will leave us an object containing team-score key-value pairs
// And then, we flatten the array down to a single dimension:
var teams = v.map(function(team) {
  delete team.winner;
  return d3.entries(team);
}).reduce(function(memo, team) {
  return memo.concat(team);
}, []);

// Generate the summary for the winner group
// We have an array of objects of all the scores of all teams that the winning team has played against
var groupSummary = d3.nest()
  .key(function(d) { return d.key; })
  .rollup(function(w) {
    return {
      count: w.length,
      median: d3.median(w, function(d) {
        return d['value'];
      }),
      avg: d3.mean(w, function(d) {
        return d['value'];
      })
    };
  })
  .entries(teams);

// Return the summary to the top-level rollup
return groupSummary;

<!DOCTYPE html>
<html>

<head>
  <!--d3 -->
  <script src='https://d3js.org/d3.v4.min.js'></script>
</head>

<body>

  <script>
    var data = [];
    data[0] = {
        'winner': 'team.1',
        'team.1': 5,
        'team.2': 4,
        'team.3': 1,
        'team.4': 0
      },
      data[1] = {
        'winner': 'team.2',
        'team.1': 5,
        'team.2': 1,
        'team.3': 4,
        'team.4': 1
      },
      data[2] = {
        'winner': 'team.2',
        'team.1': 7,
        'team.2': 4,
        'team.3': 9,
        'team.4': 0
      },
      data[3] = {
        'winner': 'team.1',
        'team.1': 5,
        'team.2': 8,
        'team.3': 0,
        'team.4': 1
      },
      data[4] = {
        'winner': 'team.1',
        'team.1': 2,
        'team.2': 4,
        'team.3': 2,
        'team.4': 1
      }

    var dim = 'team.4';

    var out = d3.nest()
      .key(function(d) {
        return d.winner;
      })
      .rollup(function(v) {
        var teams = v.map(function(team) {
          delete team.winner;
          return d3.entries(team);
        }).reduce(function(memo, team) {
          return memo.concat(team);
        }, []);
        
        var a = d3.nest()
          .key(function(d) { return d.key; })
          .rollup(function(w) {
            return {
              count: w.length,
              median: d3.median(w, function(d) {
                return d['value'];
              }),
              avg: d3.mean(w, function(d) {
                return d['value'];
              })
            };
          })
          .entries(teams);
   
        return a;
      })
      .entries(data);

    console.log(out);
  </script>
</body>

</html>


An alternative (and probably simpler) solution is to get all the keys in the object and store them into an array, and ensure that instead of returning a single dimension in your rollup, you iterate through all the keys (i.e. team names/ids):

// Generate an array of all team names in the group
var teams = v.reduce(function(memo, d) {
  // Iterate through nested array of objects and get their keys
  // We use reduce here so that we can flatten the 2D array into 1D
  return memo.concat(Object.keys(d));
}, []).filter(function(team) {
  // Remove winner because it is not a "team" per se
  return team !== 'winner';
});

// Now, iterate through all teams and summarize
return teams.map(function(team) {
  return {
    dimension: team,
    count: v.length,
    median: d3.median(v, function(d) {
      return d[team];
    }),
    avg: d3.mean(v, function(d) {
      return d[team];
    })
  };
});

<!DOCTYPE html>
<html>

<head>
  <!--d3 -->
  <script src='https://d3js.org/d3.v4.min.js'></script>
</head>

<body>

  <script>
    var data = [];
    data[0] = {
        'winner': 'team.1',
        'team.1': 5,
        'team.2': 4,
        'team.3': 1,
        'team.4': 0
      },
      data[1] = {
        'winner': 'team.2',
        'team.1': 5,
        'team.2': 1,
        'team.3': 4,
        'team.4': 1
      },
      data[2] = {
        'winner': 'team.2',
        'team.1': 7,
        'team.2': 4,
        'team.3': 9,
        'team.4': 0
      },
      data[3] = {
        'winner': 'team.1',
        'team.1': 5,
        'team.2': 8,
        'team.3': 0,
        'team.4': 1
      },
      data[4] = {
        'winner': 'team.1',
        'team.1': 2,
        'team.2': 4,
        'team.3': 2,
        'team.4': 1
      }

    var dim = 'team.4';

    var out = d3.nest()
      .key(function(d) {
        return d.winner;
      })
      .rollup(function(v) {
        var teams = v.reduce(function(memo, d) {
          return memo.concat(Object.keys(d));
        }, []).filter(function(team) {
          return team !== 'winner';
        });
        
        return teams.map(function(team) {
          return {
            dimension: team,
            count: v.length,
            median: d3.median(v, function(d) {
              return d[team];
            }),
            avg: d3.mean(v, function(d) {
              return d[team];
            })
          };
        });
      })
      .entries(data);


    console.log(out);
  </script>
</body>

</html>

Upvotes: 2

Related Questions