Reputation: 3573
I have an array of objects and each object has multiple key, value pairs. I want to group by the values of let's say the first key and then derive the aggregated average and median.
I can do this with nest and rollup but only for one dimension. The example below for instance, groups by winner
and then for each subgroup finds the mean/median but only over one dimension, in this case over team.4
only. Is there a way to aggregate over all four team.1, team.2, team.3, team.4
at once please? As a side note, team.1, team.2, team.3, team.4
are not known in advance.
My desired output (but quite flexible on this, its just a "nice to have") would be
var avg=[
{ 'winner': 'team.1', 'team.1' : 4, 'team.2' : 5.333, 'team.3': 1, 'team.4': 0.666},
{ 'winner': 'team.2', 'team.1' : 6, 'team.2' : 2.5, 'team.3': 6.5, 'team.4': 0.5}
];
and similarly for the median.
Thanks!
<!DOCTYPE html>
<html>
<head>
<!--d3 -->
<script src='https://d3js.org/d3.v4.min.js'></script>
</head>
<body>
<script>
var data = [];
data[0] = {'winner': 'team.1', 'team.1':5, 'team.2':4, 'team.3':1, 'team.4':0},
data[1] = {'winner': 'team.2', 'team.1':5, 'team.2':1, 'team.3':4, 'team.4':1},
data[2] = {'winner': 'team.2', 'team.1':7, 'team.2':4, 'team.3':9, 'team.4':0},
data[3] = {'winner': 'team.1', 'team.1':5, 'team.2':8, 'team.3':0, 'team.4':1},
data[4] = {'winner': 'team.1', 'team.1':2, 'team.2':4, 'team.3':2, 'team.4':1}
var dim = 'team.4';
var out = d3.nest()
.key(function(d) { return d.winner; })
.rollup(function(v) { return {
dimension: dim,
count: v.length,
median: d3.median(v, function(d) { return d[dim]; }),
avg: d3.mean(v, function(d) { return d[dim]; })
}; })
.entries(data);
console.log(out);
</script>
</body>
</html>
Upvotes: 2
Views: 754
Reputation: 66173
Since you have doubly-nested data, you will have to nest another level of rollup function within the first rollup function. So, your top-level rollup should have a callback that looks like this:
// Iterate through the object, remove the winner
// That will leave us an object containing team-score key-value pairs
// And then, we flatten the array down to a single dimension:
var teams = v.map(function(team) {
delete team.winner;
return d3.entries(team);
}).reduce(function(memo, team) {
return memo.concat(team);
}, []);
// Generate the summary for the winner group
// We have an array of objects of all the scores of all teams that the winning team has played against
var groupSummary = d3.nest()
.key(function(d) { return d.key; })
.rollup(function(w) {
return {
count: w.length,
median: d3.median(w, function(d) {
return d['value'];
}),
avg: d3.mean(w, function(d) {
return d['value'];
})
};
})
.entries(teams);
// Return the summary to the top-level rollup
return groupSummary;
<!DOCTYPE html>
<html>
<head>
<!--d3 -->
<script src='https://d3js.org/d3.v4.min.js'></script>
</head>
<body>
<script>
var data = [];
data[0] = {
'winner': 'team.1',
'team.1': 5,
'team.2': 4,
'team.3': 1,
'team.4': 0
},
data[1] = {
'winner': 'team.2',
'team.1': 5,
'team.2': 1,
'team.3': 4,
'team.4': 1
},
data[2] = {
'winner': 'team.2',
'team.1': 7,
'team.2': 4,
'team.3': 9,
'team.4': 0
},
data[3] = {
'winner': 'team.1',
'team.1': 5,
'team.2': 8,
'team.3': 0,
'team.4': 1
},
data[4] = {
'winner': 'team.1',
'team.1': 2,
'team.2': 4,
'team.3': 2,
'team.4': 1
}
var dim = 'team.4';
var out = d3.nest()
.key(function(d) {
return d.winner;
})
.rollup(function(v) {
var teams = v.map(function(team) {
delete team.winner;
return d3.entries(team);
}).reduce(function(memo, team) {
return memo.concat(team);
}, []);
var a = d3.nest()
.key(function(d) { return d.key; })
.rollup(function(w) {
return {
count: w.length,
median: d3.median(w, function(d) {
return d['value'];
}),
avg: d3.mean(w, function(d) {
return d['value'];
})
};
})
.entries(teams);
return a;
})
.entries(data);
console.log(out);
</script>
</body>
</html>
An alternative (and probably simpler) solution is to get all the keys in the object and store them into an array, and ensure that instead of returning a single dimension in your rollup, you iterate through all the keys (i.e. team names/ids):
// Generate an array of all team names in the group
var teams = v.reduce(function(memo, d) {
// Iterate through nested array of objects and get their keys
// We use reduce here so that we can flatten the 2D array into 1D
return memo.concat(Object.keys(d));
}, []).filter(function(team) {
// Remove winner because it is not a "team" per se
return team !== 'winner';
});
// Now, iterate through all teams and summarize
return teams.map(function(team) {
return {
dimension: team,
count: v.length,
median: d3.median(v, function(d) {
return d[team];
}),
avg: d3.mean(v, function(d) {
return d[team];
})
};
});
<!DOCTYPE html>
<html>
<head>
<!--d3 -->
<script src='https://d3js.org/d3.v4.min.js'></script>
</head>
<body>
<script>
var data = [];
data[0] = {
'winner': 'team.1',
'team.1': 5,
'team.2': 4,
'team.3': 1,
'team.4': 0
},
data[1] = {
'winner': 'team.2',
'team.1': 5,
'team.2': 1,
'team.3': 4,
'team.4': 1
},
data[2] = {
'winner': 'team.2',
'team.1': 7,
'team.2': 4,
'team.3': 9,
'team.4': 0
},
data[3] = {
'winner': 'team.1',
'team.1': 5,
'team.2': 8,
'team.3': 0,
'team.4': 1
},
data[4] = {
'winner': 'team.1',
'team.1': 2,
'team.2': 4,
'team.3': 2,
'team.4': 1
}
var dim = 'team.4';
var out = d3.nest()
.key(function(d) {
return d.winner;
})
.rollup(function(v) {
var teams = v.reduce(function(memo, d) {
return memo.concat(Object.keys(d));
}, []).filter(function(team) {
return team !== 'winner';
});
return teams.map(function(team) {
return {
dimension: team,
count: v.length,
median: d3.median(v, function(d) {
return d[team];
}),
avg: d3.mean(v, function(d) {
return d[team];
})
};
});
})
.entries(data);
console.log(out);
</script>
</body>
</html>
Upvotes: 2