Reputation: 14394
I'm trying to use d3-array to generate two summaries of an array of objects:
This is my current approach:
const data = [
{ post_id: 47469, action: "reply", teacher_username: "John" },
{ post_id: 47469, action: "edit", teacher_username: "John" },
{ post_id: 47468, action: "reply", teacher_username: "John" },
{ post_id: 47465, action: "reply", teacher_username: "Mary" },
{ post_id: 47465, action: "edit", teacher_username: "Mary" },
{ post_id: 47467, action: "edit", teacher_username: "Mary" },
{ post_id: 46638, action: "reply", teacher_username: "Paul" },
];
const teacherSummary = [
...d3.rollup(
data,
(x) => x.length,
(d) => d.teacher_username,
(d) => d.action
),
]
.map((x) => {
return {
teacher_username: x[0],
num_edits: x[1].get("edit") || 0,
num_replies: x[1].get("reply") || 0,
};
})
.sort((a, b) => d3.descending(a.num_edits, b.num_edits));
// [
// { "teacher_username": "Mary", "num_edits": 2, "num_replies": 1 },
// { "teacher_username": "John", "num_edits": 1, "num_replies": 2 },
// { "teacher_username": "Paul", "num_edits": 0, "num_replies": 1 }
// ]
const postIdsByTeacher = d3.rollups(
data.filter((x) => x.action === "edit"),
(v) => [...new Set(v.map((d) => d.post_id))].join(", "), // Set() is used to get rid of duplicate post_ids
(d) => d.teacher_username
);
// [
// ["John","47469"],
// ["Mary","47465, 47467"]
// ]
I'm flexible on the output format. What I'd like to optimize for is efficiency and clarity:
rollup
call? Maybe by adding edited_post_ids
to teacherSummary
.[...Map/Set]
callsEdit: Out of curiosity, I also tried this approach using alasql. Except for the nulls in edited_post_ids
, it almost works.
sql = alasql(`
select
teacher_username,
count(case when action = 'reply' then 1 end) num_replies,
count(case when action = 'edit' then 1 end) num_edits,
array(case when action = 'edit' then post_id end) as edited_post_ids
from ?
group by teacher_username
`, [data])
// [
// { teacher_username: "John", num_replies: 2, num_edits: 1, edited_post_ids: [null, 47469, null], },
// { teacher_username: "Mary", num_replies: 1, num_edits: 2, edited_post_ids: [null, 47465, 47467], },
// { teacher_username: "Paul", num_replies: 1, num_edits: 0, edited_post_ids: [null], },
// ];
Upvotes: 3
Views: 904
Reputation: 14394
I ended up simplifying @Robin Mackenzie's last suggestion:
const uniq = require('lodash.uniq');
const teacherSummary = d3
.groups(data, (d) => d.teacher_username)
.map(([teacher_username, actions]) => {
const edits = actions.filter((x) => x.action == "edit").map((x) => x.post_id);
const replies = actions.filter((x) => x.action == "reply").map((x) => x.post_id);
return {
teacher_username,
num_edits: edits.length,
num_replies: replies.length,
edited_post_ids: uniq(edits),
replied_post_ids: uniq(replies),
};
})
Upvotes: 1
Reputation: 19289
The function signature for d3.rollup
is:
d3.rollup(iterable, reduce, ...keys)
Which on the face of it says you can provide one operation in reduce
e.g. counting or summation or some other operation - but just one.
For your output, you're looking for two different operations
post_id
s where action == "edit"
As soon as you made the choice to use (x) => x.length
you've kind of cut off the opportunity to use a different reduce
operation. Arguably d3.rollup
isn't the function you need if you have multiple operations?
Still you can add edited_post_ids
to teacherSummary
by simply reaching back into the original data and applying a filter
then map
:
const data = [
{ post_id: 47469, action: "reply", teacher_username: "John" },
{ post_id: 47469, action: "edit", teacher_username: "John" },
{ post_id: 47468, action: "reply", teacher_username: "John" },
{ post_id: 47465, action: "reply", teacher_username: "Mary" },
{ post_id: 47465, action: "edit", teacher_username: "Mary" },
{ post_id: 47467, action: "edit", teacher_username: "Mary" },
{ post_id: 46638, action: "reply", teacher_username: "Paul" },
];
const teacherSummary = [...d3.rollup(
data,
v => v.length,
d => d.teacher_username,
d => d.action
)].map(d => {
return {
teacher_username: d[0],
num_edits: d[1].get("edit") || 0,
num_replies: d[1].get("reply") || 0,
edited_post_ids: data
.filter(x => x.action === "edit" & x.teacher_username == d[0])
.map(x => x.post_id)
}
});
console.log(teacherSummary);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/6.0.0/d3.min.js"></script>
An alternative is to not use d3.rollup
/d3.rollups
and use d3.groups
instead. The source for rollup
and group
are all calls to nest
btw. You lose the counting that rollup
did for you and have to implement it yourself. This example reads a bit more like the SQL example:
const data = [
{ post_id: 47469, action: "reply", teacher_username: "John" },
{ post_id: 47469, action: "edit", teacher_username: "John" },
{ post_id: 47468, action: "reply", teacher_username: "John" },
{ post_id: 47465, action: "reply", teacher_username: "Mary" },
{ post_id: 47465, action: "edit", teacher_username: "Mary" },
{ post_id: 47467, action: "edit", teacher_username: "Mary" },
{ post_id: 46638, action: "reply", teacher_username: "Paul" },
];
// compare with
// select
// teacher_username,
// count(case when action = 'reply' then 1 end) num_replies,
// count(case when action = 'edit' then 1 end) num_edits,
// array(case when action = 'edit' then post_id end) as
// edited_post_ids
// from ?
// group by teacher_username
const teacherSummary = d3.groups(data, d => d.teacher_username)
.map(k => {
return {
teacher_username: k[0],
num_edits: k[1].filter(k2 => k2.action == "edit").length,
num_replies: k[1].filter(k2 => k2.action == "reply").length,
edited_post_ids: k[1].filter(k2 => k2.action == "edit").map(k3 => k3.post_id)
}
});
console.log(teacherSummary);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/6.0.0/d3.min.js"></script>
As a side-note then, you can boil down postIdsByTeacher
to below, and avoid new Set(etc)
type stuff:
const data = [
{ post_id: 47469, action: "reply", teacher_username: "John" },
{ post_id: 47469, action: "edit", teacher_username: "John" },
{ post_id: 47468, action: "reply", teacher_username: "John" },
{ post_id: 47465, action: "reply", teacher_username: "Mary" },
{ post_id: 47465, action: "edit", teacher_username: "Mary" },
{ post_id: 47467, action: "edit", teacher_username: "Mary" },
{ post_id: 46638, action: "reply", teacher_username: "Paul" },
];
const postIdsByTeacher = d3.rollups(
data.filter(d => d.action === "edit"),
v => [].concat(v.map(k => k.post_id)),
d => d.teacher_username
);
console.log(postIdsByTeacher);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/6.0.0/d3.min.js"></script>
But my gut feel is the value of using d3.rollup
is going to be when you want to do the standard summation and counting kind of stuff.
Upvotes: 2