Reputation: 275
I just started with dc.js and was looking at the NASDAQ example on the main site: https://dc-js.github.io/dc.js/
I created a Fiddle with some sample dummy data and just the two relevant charts for this question.
Similar to the NASDAQ example, I want to have a bubble chart with the Y-Axis being the % Change in value over a timespan controlled by a brush in a different chart. The code for the NASDAQ example does the following:
var yearlyPerformanceGroup = yearlyDimension.group().reduce(
/* callback for when data is added to the current filter results */
function (p, v) {
++p.count;
p.absGain += v.close - v.open;
p.fluctuation += Math.abs(v.close - v.open);
p.sumIndex += (v.open + v.close) / 2;
p.avgIndex = p.sumIndex / p.count;
p.percentageGain = p.avgIndex ? (p.absGain / p.avgIndex) * 100 : 0;
p.fluctuationPercentage = p.avgIndex ? (p.fluctuation / p.avgIndex) * 100 : 0;
return p;
},
/* callback for when data is removed from the current filter results */
function (p, v) {
--p.count;
p.absGain -= v.close - v.open;
p.fluctuation -= Math.abs(v.close - v.open);
p.sumIndex -= (v.open + v.close) / 2;
p.avgIndex = p.count ? p.sumIndex / p.count : 0;
p.percentageGain = p.avgIndex ? (p.absGain / p.avgIndex) * 100 : 0;
p.fluctuationPercentage = p.avgIndex ? (p.fluctuation / p.avgIndex) * 100 : 0;
return p;
},
/* initialize p */
function () {
return {
count: 0,
absGain: 0,
fluctuation: 0,
fluctuationPercentage: 0,
sumIndex: 0,
avgIndex: 0,
percentageGain: 0
};
}
);
which I currently interpret as summing(close-open) across all data and dividing by the average of the average daily index. But this is not a percent change formula I am familiar with. (e.g. (new-old)/old x 100)
While it seems to work for the NASDAQ example, my data would be more like the following:
country_id,received_week,product_type,my_quantity,my_revenue,country_other_quantity
3,2017-04-02,1,1,361,93881
1,2017-04-02,4,45,140,93881
2,2017-04-02,4,2,30,93881
3,2017-04-02,3,1,462,93881
2,2017-04-02,3,48,497,93881
etc.. over many months and product_types.
Let's say I was interested in computing the percent change for a particular Country. How do I get the start and end quantities for a given country so I can compute change as end-start/start * 100?
I was thinking of something such as the following (assuming I set up the proper dimensions and everything)
var country_dim = ndx.dimension(function (d) { return d['country_id']; })
var received_day_dim = ndx.dimension(function (d) { return d['received_day']; })
var date_min = received_day_dim.bottom(1)[0]['received_day']
var date_max = received_day_dim.top(1)[0]['received_day']
Then in my custom reduce function currently in the vein of the example (wrong):
var statsByCountry = country_dim.group().reduce(
function (p, v) {
++p.count;
p.units += +v["my_units"];
p.example_rate = +v['my_units']/(v['quantity_unpacked']*90) //place holder for total units per day per country
p.sumRate += p.opp_buy_rate;
p.avgRate = p.opp_buy_rate/p.count;
p.percentageGain = p.avgRate ? (p.opp_buy_rate / p.avgRate) * 100 : 0;
p.dollars += +v["quantity_unpacked"]/2;
// p.max_date = v['received_week'].max();
// p.min_date
//dateDimension.top(Infinity)[dateDimension.top(Infinity).length - 1]['distance'] - dateDimension.top(Infinity)[0]['distance']
return p;
},
function (p, v) {
--p.count;
if (v.region_id > 2) {
p.test -= 100;
}
p.units -= +v["quantity_unpacked"];
p.opp_buy_rate = +v['quantity_unpacked']/(v['quantity_unpacked']*90) //place holder for total units per day per country
p.sumRate -= p.opp_buy_rate;
p.avgRate = p.count ? p.opp_buy_rate/p.count : 0;
p.percentageGain = p.avgRate ? (p.opp_buy_rate / p.avgRate) * 100 : 0;
p.dollars -= +v["quantity_unpacked"]/2;
// p.max_date = v['received_week'].max();
return p;
},
function () {
return {quantity_unpacked: 0,
count: 0,
units: 0,
opp_buy_rate: 0,
sumRate: 0,
avgRate: 0,
percentageGain: 0,
dollars: 0,
test: 0
};//, dollars: 0}
}
);
and my chart:
country_bubble
.width(990)
.height(250)
.margins({top:10, right: 50, bottom: 30, left:80})
.dimension(country_dim)
.group(statsByCountry)
.keyAccessor(function (p) {
return p.value.units;
})
.valueAccessor(function (p) { //y alue
return p.value.percentageGain;
})
.radiusValueAccessor(function (p) { //radius
return p.value.dollars/10000000;
})
.maxBubbleRelativeSize(0.05)
.elasticX(true)
.elasticY(true)
.elasticRadius(true)
.x(d3.scale.linear())
.y(d3.scale.linear())
// .x(d3.scale.linear().domain([0, 1.2*bubble_xmax]))
// .y(d3.scale.linear().domain([0, 10000000]))
.r(d3.scale.linear().domain([0, 10]))
.yAxisPadding('25%')
.xAxisPadding('15%')
.renderHorizontalGridLines(true)
.renderVerticalGridLines(true)
.on('renderlet', function(chart, filter){
chart.svg().select(".chart-body").attr("clip-path",null);
});
Originally thought of having something similar to the following in statsbycountry:
if (v.received_day == date_min) {
p.start_value += v.my_quantity;
}
if (v.received_day == date_max) {
p.end_value += v.my_quantity;
}
This seems a bit clumsy? But if I do this, I don't think this will continually update as other filters change (say time or product)? Ethan suggested I use fake groups, but I'm a bit lost.
Upvotes: 0
Views: 706
Reputation: 6010
With the working fiddle, we can demonstrate one way to do this. I don't really think this is the best way to go about it, but it is the Crossfilter way.
First you need to maintain an ordered array of all data in a group as part of the group using your custom reduce function:
var statsByCountry = country_dim.group().reduce(
function(p, v) {
++p.count;
p.units += +v["my_quantity"];
p.country_rate = p.units / (1.0 * v['country_other_quantity']) //hopefully total sum of my_quantity divided by the fixed country_other_quantity for that week
p.percent_change = 50 //placeholder for now, ideally this would be the change in units over the timespan brush on the bottom chart
p.dollars += +v["my_revenue"];
i = bisect(p.data, v, 0, p.data.length);
p.data.splice(i, 0, v);
return p;
},
function(p, v) {
--p.count;
p.units -= +v["my_quantity"];
p.country_rate = p.units / (1.0 * v['country_other_quantity']) //hopefully total sum of my_quantity divided by the fixed country_other_quantity for that week
p.percent_change = 50 //placeholder for now, ideally this would be the change in units over the timespan brush on the bottom chart
p.dollars -= +v["my_revenue"];
i = bisect(p.data, v, 0, p.data.length);
p.data.splice(i, 1)
return p;
},
function() {
return {
data: [],
count: 0,
units: 0,
country_rate: 0,
dollars: 0,
percent_change: 0
}; //, dollars: 0}
}
);
Above, I've updated your reduce function to maintain this ordered array (ordered by received_week
) under the .data
property. It uses Crossfilter's bisect
function to maintain order efficiently.
Then in your valueAccessor
you want to actually calculate your change in value based on this data:
.valueAccessor(function(p) { //y alue
// Calculate change in units/day from first day to last day.
var firstDay = p.value.data[p.value.data.length-1].received_week.toString();
var lastDay = p.value.data[0].received_week.toString();
var firstDayUnits = d3.sum(p.value.data, function(d) { return d.received_week.toString() === firstDay ? d.my_quantity : 0 })
var lastDayUnits = d3.sum(p.value.data, function(d) { return d.received_week.toString() === lastDay ? d.my_quantity : 0 })
return lastDayUnits - firstDayUnits;
})
You do this in the value accessor because it only runs once per filter change, whereas the reduce functions run once per record added or removed, which can be thousands of times per filter.
If you want to calculate % change, you can do this here as well, but the key question for % calculations is always "% of what?" and the answer to that question wasn't clear to me from your question.
It's worth noting that with this approach your group structure is going to get really big as you are storing your entire data set in the groups. If you are having performance problems while filtering, I would still recommend moving away from this approach and towards one based on a fake group.
Working updated fiddle: https://jsfiddle.net/vysbxd1h/1/
Upvotes: 0