Reputation: 756
This is my second question on the dc.js/d3.js/crossfilter.js topic. I am trying to realize a basic personal dashboard and I started by creating a very simple lineChart (with a rangeChart associated) that outputs metrics over time.
The data I have is saved as json (it will be stored in a mongoDb instance at a later stage, so for now I used JSON that also keep datetime format) and looks like this:
[
{"date":1374451200000,"prodPow":0.0,"consPow":0.52,"toGridPow":0.0,"fromGridPow":0.52,"prodEn":0.0,"consEn":0.0,"toGridEn":0.0,"fromGridEn":0.0},
{"date":1374451500000,"prodPow":0.0,"consPow":0.34,"toGridPow":0.0,"fromGridPow":0.34,"prodEn":0.0,"consEn":0.0,"toGridEn":0.0,"fromGridEn":0.0},
{"date":1374451800000,"prodPow":0.0,"consPow":0.42,"toGridPow":0.0,"fromGridPow":0.42,"prodEn":0.0,"consEn":0.0,"toGridEn":0.0,"fromGridEn":0.0},
...
]
I have around 22000 entries like this and I am experiencing lot of performance issues when opening the dashboard. Even if I try to slice the data in a set of 8000 records, the performance are still pretty bad (but at least the rendering finishes after some time) and the interaction with the data is awful. I am guessing that my code has some pitfall that makes it under-perform since I'd expect dc.js and crossfilter.js to struggle with 100k+ entries and more than one dimension!
Nevertheless, profiling with chrome and reading online didn't help much (more details on what I tried to change later).
Here is my graph.js code:
queue()
.defer(d3.json, "/data")
.await(makeGraphs);
function makeGraphs(error, recordsJson) {
// Clean data
var records = recordsJson;
// Slice data to avoid browser deadlock
records = records.slice(0, 8000);
// Crossfilter instance
ndx = crossfilter(records);
// Define Dimensions
var dateDim = ndx.dimension(function(d) { return d.date; });
// Define Groups
var consPowByDate = dateDim.group().reduceSum(function (d) { return d.consPow; });
var prodPowByDate = dateDim.group().reduceSum(function (d) { return d.prodPow; });
// Min and max dates to be used in the charts
var minDate = dateDim.bottom(1)[0]["date"];
var maxDate = dateDim.top(1)[0]["date"];
// Charts instance
var chart = dc.lineChart("#chart");
var volumeChart = dc.barChart('#volume-chart');
chart
.renderArea(true)
/* Make the chart as big as the bootstrap grid by not setting ".width(x)" */
.height(350)
.transitionDuration(1000)
.margins({top: 30, right: 50, bottom: 25, left: 40})
.dimension(dateDim)
/* Grouped data to represent and label to use in the legend */
.group(consPowByDate, "Consumed")
/* Function to access grouped-data values in the chart */
.valueAccessor(function (d) {
return d.value;
})
/* x-axis range */
.x(d3.time.scale().domain([minDate, maxDate]))
/* Auto-adjust y-axis */
.elasticY(true)
.renderHorizontalGridLines(true)
.legend(dc.legend().x(80).y(10).itemHeight(13).gap(5))
/* When on, you can't visualize values, when off you can filter data */
.brushOn(false)
/* Add another line to the chart; pass (i) group, (ii) legend label and (iii) value accessor */
.stack(prodPowByDate, "Produced", function(d) { return d.value; })
/* Range chart to link the brush extent of the range with the zoom focus of the current chart. */
.rangeChart(volumeChart)
;
volumeChart
.height(60)
.margins({top: 0, right: 50, bottom: 20, left: 40})
.dimension(dateDim)
.group(consPowByDate)
.centerBar(true)
.gap(1)
.x(d3.time.scale().domain([minDate, maxDate]))
.alwaysUseRounding(true)
;
// Render all graphs
dc.renderAll();
};
I Used chrome dev tools to do some CPU profiling and as a summary these are the results:
After reading this thread I thought it could have been an issue with dates so I tried to modified the code to use numbers instead of dates. Here is what I modified (I will write down only the changes):
// Added before creating the crossfilter to coerce a number date
records.forEach(function(d) {
d.date = +d.date;
});
// In both the lineChart and barChart I used a numeric range
.x(d3.scale.linear().domain([minDate, maxDate]))
Unfortunately nothing noticeable changed performance-wise. I have no clue on how to fix this and actually I would like to add more groups, dimensions and charts to the dashboard...
Edit: Here is a github link if you want to test my code by yourself.
I used python3 and flask for the server side, so you just have to install flask:
pip3 install flask
run the dashboard:
python3 dashboard.py
and then go with your browser to:
localhost:5000
Upvotes: 2
Views: 716
Reputation: 20120
It's hard to tell without trying it out but probably what is happening is that there are too many unique dates, so you end up with a huge number of DOM objects. Remember that JavaScript is fast, but the DOM is slow - so dealing with up to half a gigabyte of data should be fine, but you can only have a few thousand DOM objects before the browser chokes up.
This is exactly what crossfilter was designed to deal with, however! All you need to do is aggregate. You're not going to be able to see 1000s of points; they will only get lost, since your chart is (probably) only a few hundred pixels wide.
So depending on the time scale, you could aggregate by hour:
var consPowByHour = dateDim.group(function(d) {
return d3.time.hour(d);
}).reduceSum(function (d) { return d.consPow; });
chart.group(consPowByHour)
.xUnits(d3.time.hours)
or similarly for minutes, days, years, whatever. It may be more complicated than you need, but this example shows how to switch between time intervals.
(I'm not going to install a whole stack to try this - most examples are JS only so it's easy to try them out in jsfiddle or whatever. If this doesn't explain it, then adding a screenshot might also be helpful.)
EDIT: I also notice that your data is integers but your scale is time-based. Maybe this causes objects to be built all the time. Please try :
records.forEach(function(d) {
d.date = new Date(+d.date);
});
Upvotes: 4