Reputation: 21
I would like to create a multi time line chart as a series chart.
I read the Stack Overflow regarding filling missing data dc.js lineChart - fill missing dates and show zero where no data
Question: I implemented the code there and it worked great for a single line chart. For a series chart I needed to tweak it a bit. It works, however the performance is terrible.
Here is the sample data we are using:
let data = [{description: "Walmart", location: "40.216403 -74.541296", timeReported: 1581710670184}
{description: "Target", location: "38.271996 -84.032575", timeReported: 1583524065011}
{description: "Wendys", location: "39.255831 -75.532763", timeReported: 1583524065011}
{description: "7-11", location: "34.925349 -78.463977", timeReported: 1583524065011}
{description: "WaWa", location: "35.716208 -77.741230", timeReported: 1583524065013}
{description: "7-11", location: "41.258950 -83.888060", timeReported: 1583524065013}
{description: "Shell", location: "37.879694 -79.836127", timeReported: 1583524065011}
{description: "Dominos", location: "35.890273 -80.700329", timeReported: 1583524065395}
{description: "Dominos", location: "39.268777 -78.743366", timeReported: 1583524065397}
{description: "Walgreens", location: "35.490215 -81.773863", timeReported: 1583524065399}
{description: "7-11", location: "37.974797 -81.393449", timeReported: 1583524065506}
{description: "Wendys", location: "40.859685 -76.963065", timeReported: 1583524065521}
{description: "CVS", location: "38.517910 -78.251419", timeReported: 1583524065553}
{description: "CVS", location: "35.947033 -81.616061", timeReported: 1583524142169}
{description: "Shell", location: "39.566535 -77.992499", timeReported: 1583524142176}
{description: "Target", location: "37.832142 -88.003151", timeReported: 1583524142170}
{description: "Wendys", location: "40.245397 -80.061998", timeReported: 1583524142223}
{description: "Macys", location: "39.631265 -75.157194", timeReported: 1583524142223}
{description: "Macys", location: "36.631458 -77.803286", timeReported: 1583524142213}
{description: "7-11", location: "36.249754 -79.830006", timeReported: 1583524142251}
{description: "7-11", location: "41.138285 -83.298142", timeReported: 1583524142249}
{description: "Wendys", location: "34.940485 -77.230388", timeReported: 1583524142249}
{description: "7-11", location: "39.605373 -77.448768", timeReported: 1583524142296}
{description: "Wendys", location: "35.609094 -79.455712", timeReported: 1583524142293}
{description: "WaWa", location: "37.130753 -78.076709", timeReported: 1583524142310}
{description: "Macys", location: "40.058482 -78.497258", timeReported: 1583524142338}
{description: "Wendys", location: "39.255831 -75.532763", timeReported: 1582058735883}
{description: "Macys", location: "39.631265 -75.157194", timeReported: 1582058735883}
{description: "7-11", location: "36.249754 -79.830006", timeReported: 1582058735883}
{description: "7-11", location: "39.605373 -77.448768", timeReported: 1582058735883}
{description: "Wendys", location: "35.609094 -79.455712", timeReported: 1582058735883}
{description: "WaWa", location: "37.130753 -78.076709", timeReported: 1582058735883}
{description: "Macys", location: "40.058482 -78.497258", timeReported: 1582058735883}
{description: "Kohls", location: "40.373533 -101.057470", timeReported: 1582838559493}]
Here is the sample code. BTW, curTimeInterval in the code below is just an alias for d3 timeIntervlas which can be chosen by user. (d3.timeHour, d3.timeDay, d3.timeWeek, d3.timeMonth).
cf = crossfilter(data);
dateDim = cf.dimension((d) => {
return curTimeInterval(d.timeReportedDate);
});
reportedGroup = dateDim.group().reduceSum((d) => 1);
let minDate = d3.min(reportedGroup.all(), (kv) => {
return kv.key;
});
let maxDate = d3.max(reportedGroup.all(), (kv) => {
return kv.key;
});
minDate = curTimeInterval.offset(minDate, -2);
maxDate = curTimeInterval.offset(maxDate, 2);
const runDimension = cf.dimension((d) => {
return [d.description, curTimeInterval(d.timeReportedDate)];
});
const runGroup = runDimension.group();
// Fills the missing data in the group
const filledSeries = fill_composite_intervals(runGroup, curTimeInterval);
const seriesChart = new dc.SeriesChart('#series');
seriesChart
.width(768)
.height(480)
.chart(function(c) {
return new dc.LineChart(c).curve(d3.curveCardinal);
})
.x(d3.scaleTime().domain([minDate, maxDate]))
.xUnits(curTimeInterval.range)
.brushOn(false)
.clipPadding(10)
.elasticY(true)
.dimension(runDimension)
.group(filledSeries)
.mouseZoomable(true)
.seriesAccessor((d) => {
return d.key[0];
})
.keyAccessor((d) => {
return d.key[1];
})
.valueAccessor((d) => {
return d.value;
})
.legend(dc.legend().x(350).y(350).itemHeight(13).gap(5).horizontal(1).legendWidth(140).itemWidth(70))
.yAxis()
.tickValues(d3.range(min > 0 ? min - 1 : min, max + 1));
seriesChart.margins().left += 40;
fill_composite_intervals = (group, interval) => {
return {
all: function() {
const retVal = [];
const allArray = group.all();
if (!allArray.length) {
return retVal;
}
allArray.sort((a, b) => {
if (a.key[1].getTime() < b.key[1].getTime()) {
return -1;
}
if (a.key[1].getTime() > b.key[1].getTime()) {
return 1;
}
// a must be equal to b
return 0;
});
const target = interval.range(allArray[0].key[1], allArray[allArray.length-1].key[1]);
const allMap = new Map();
allArray.forEach((obj) => {
let innerArray = allMap.get(obj.key[0]);
if (!innerArray) {
innerArray = [];
allMap.set(obj.key[0], innerArray);
}
innerArray.push({key: obj.key[1], value: obj.value});
});
allMap.forEach((value, key, map) => {
const orig = value.map((kv) => ({key: new Date(kv.key), value: kv.value}));
const result = [];
if (orig.length) {
let oi;
let ti;
for (oi = 0, ti = 0; oi < orig.length && ti < target.length;) {
if (orig[oi].key <= target[ti]) {
result.push(orig[oi]);
if (orig[oi++].key.valueOf() === target[ti].valueOf()) {
++ti;
}
} else {
result.push({key: target[ti], value: 0});
++ti;
}
}
if (oi<orig.length) {
Array.prototype.push.apply(result, orig.slice(oi));
}
if (ti<target.length) {
Array.prototype.push.apply(result, target.slice(ti).map((t) => ({key: t, value: 0})));
}
}
map.set(key, result);
});
allMap.forEach((value, key, map) => {
value.forEach((obj) => {
const newObj = {
key: [key, obj.key],
value: obj.value
};
retVal.push(newObj);
});
});
return retVal;
}
};
};
Upvotes: 1
Views: 135
Reputation: 20140
Since my previous answer was still too slow when used with small time intervals, I rewrote the core of the loop.
Rather than walking the entire range of dates between the beginning and end, it's much faster and much simpler to just look at the data and detect whether one or two zeros should be added between the last data point and this one.
The heart of fill_composite_intervals
now looks like
const [begin, end] = d3.extent(allArray, ({key}) => key[1]).map(interval);
// walk each category, adding leading and trailing zeros
allMap.forEach((value, key, map) => {
const orig = value.map(({key, value}) => ({key: new Date(key), value}));
const result = [];
if (orig.length) {
let last = interval.offset(begin, -2);
for(let oi = 0; oi < orig.length; ++oi) {
const count = interval.count(last, orig[oi].key);
if(count === 0 || count === 1) ;
else {
result.push({key: interval.offset(last, 1), value: 0});
if(count > 2)
result.push({key: interval.offset(orig[oi].key, -1), value: 0});
}
result.push(orig[oi]);
last = orig[oi].key;
}
result.push({key: interval.offset(orig[orig.length-1].key, 1), value: 0});
}
map.set(key, result);
});
The first and last curves are misshapen because they are missing a control point on the spline to make the slope 0 at the edges.
We can add one more zero at the beginning and end.
Here is the fast and smooth fake group for multi time line charts.
function fill_composite_intervals(group, interval) {
return {
all: function() {
const retVal = [];
const allArray = group.all().slice();
if (!allArray.length) {
return retVal;
}
// make sure input data is sorted
allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());
// separate the data for each category
const allMap = new Map();
allArray.forEach(({key: [cat, time], value}) => {
let innerArray = allMap.get(cat);
if (!innerArray) {
innerArray = [];
allMap.set(cat, innerArray);
}
innerArray.push({key: time, value});
});
// walk each category, adding leading and trailing zeros
allMap.forEach((value, key, map) => {
const orig = value.map(({key, value}) => ({key: new Date(key), value}));
const result = [];
if (orig.length) {
let last = interval.offset(orig[0].key, -3);
for(let oi = 0; oi < orig.length; ++oi) {
const count = interval.count(last, orig[oi].key);
if(count === 0 || count === 1) ;
else {
result.push({key: interval.offset(last, 1), value: 0});
if(count > 2)
result.push({key: interval.offset(orig[oi].key, -1), value: 0});
}
result.push(orig[oi]);
last = orig[oi].key;
}
result.push(
{key: interval.offset(orig[orig.length-1].key, 1), value: 0},
{key: interval.offset(orig[orig.length-1].key, 2), value: 0},
);
}
map.set(key, result);
});
allMap.forEach((value, key, map) => {
value.forEach(({key: time, value}) => {
retVal.push({
key: [key, time],
value
});
});
});
return retVal;
}
};
}
Upvotes: 1
Reputation: 20140
I started out by creating a fiddle which illustrates the problem. The interesting thing here is a select menu which shows which time intervals are appropriate for the data and zoom level (domain) of the chart.
It isn't appropriate to show more than width/2 points (since they won't be rendered), and it's also not appropriate to show less than two points, so the "inappropriate" options are greyed italic:
It uses an object mapping interval names to the number of milliseconds in the corresponding d3 interval:
const intervals = {
timeSecond: 1000,
timeMinute: 60000,
timeHour: 3600000,
timeDay: 86400000,
timeWeek: 604800000,
timeMonth: 2628000000,
timeYear: 31536000000
}
allowed_intervals
determines the first and last appropriate interval:
function allowed_intervals(chart, intervals, dateDomain) {
const dt = dateDomain[1].getTime() - dateDomain[0].getTime(),
first = Object.entries(intervals).find(
([iname, ms]) => dt / ms < chart.width() / 2);
if(!first)
throw new Error('date range too long')
const last = Object.entries(intervals).reverse().find(
([iname, ms]) => d3[iname](dateDomain[0]).getTime() !== d3[iname](dateDomain[1]).getTime());
return [first[0],last[0]];
}
So that's all well and great. The example prints the resulting data, and we can see that if we fill the example data with d3.timeMinute
it produces 332482 data points from the original 15. That's clearly way too much data, especially for a simple example.
This is an okay algorithm for finding the appropriate d3 time interval. However it fails when we enable zooming, because now we can zoom into a single hour, say, where timeMinute
is appropriate, but if you use that interval for all the data, it's way too many points and the chart slows down to a halt.
So I started thinking about how to make it more efficient. We don't actually need to fill every missing time interval. We really need is to make sure we catch the falling edge, when data goes from non-zero to zero, and the rising edge, where the data goes from zero to non-zero. We only need to add zeros to the input data in those cases.
Here is a new version of fill_composite_intervals
that uses the rising and falling edges, only adding as many zeros as are necessary to display these edges:
// input: a group with keys [category, time] and numeric values; a d3 time interval
// output: the same, but with zeroes filled in per the interval
function fill_composite_intervals(group, interval) {
return {
all: function() {
const retVal = [];
const allArray = group.all().slice();
if (!allArray.length) {
return retVal;
}
// make sure input data is sorted
allArray.sort((a, b) => a.key[1].getTime() - b.key[1].getTime());
// find all time intervals within the data
// pad at both ends to add leading and trailing zeros
const target = interval.range(interval.offset(allArray[0].key[1], -1),
interval.offset(allArray[allArray.length-1].key[1], 2));
// separate the data for each category
const allMap = new Map();
allArray.forEach(({key: [cat, time], value}) => {
let innerArray = allMap.get(cat);
if (!innerArray) {
innerArray = [];
allMap.set(cat, innerArray);
}
innerArray.push({key: time, value});
});
// walk each category, adding leading and trailing zeros
allMap.forEach((value, key, map) => {
const orig = value.map(({key, value}) => ({key: new Date(key), value}));
const result = [];
if (orig.length) {
let oi = 0, ti = 0, last_filled = false, skipped_fill = false;
while(oi < orig.length && ti < target.length) {
if (orig[oi].key <= target[ti]) {
if(skipped_fill) {
// in the last iteration, we skipped a zero
// so add one now (rising edge)
result.push({key: target[ti-1], value: 0});
skipped_fill = false;
}
result.push(orig[oi]);
if (orig[oi++].key.getTime() === target[ti].getTime()) {
++ti;
}
last_filled = false;
} else {
if(!last_filled) {
// last iteration we pushed a value
// so push a zero now (falling edge)
result.push({key: target[ti], value: 0});
last_filled = true;
}
else skipped_fill = true;
++ti;
}
}
if (oi<orig.length) {
Array.prototype.push.apply(result, orig.slice(oi));
}
if (ti<target.length) {
// add one trailing zero at the end
result.push({key: target[ti], value: 0});
}
}
map.set(key, result);
});
allMap.forEach((value, key, map) => {
value.forEach(({key: time, value}) => {
retVal.push({
key: [key, time],
value
});
});
});
return retVal;
}
};
}
See the comments in the code for an explanation. It only produces data proportionate to the input data, e.g. 67 points for the input 15 with timeMinute
, instead of 300+K.
Interestingly, I found that d3.curveCardinal
produces strange artifacts when there are less zeros. Intuitively, I think the line gains too much "momentum" if points are skipped. So I chose d3.curveMonotoneX instead. I think it is more appropriate anyway.
.curve(d3.curveMonotoneX)
I also padded the interval.range
at the beginning and end so that the data would start and end at zero, which is more appealing.
This example is still slow when you select d3.timeSecond
(it still iterates through 300+K points) but it seems to perform okay up to timeMinute
, which seems to capture the resolution of this data.
Further possible improvements:
interval.range
so that not so many points are calculated and thrown away; instead, detect rising and falling edges using interval.offset
and next/last data points only (tricky!)Upvotes: 1