Reputation: 99
I am trying to create a visualization to help students that I work with learn about measures of fit, such as r-squared. For R-squared, I want to have both the regression line and a line for the mean of Y on my graph. (My end goal is to have lines between the points and/or lines to represent ESS, TSS, and SSR that students can click to see or not see).
My regression line is working fine, but when I try to add in my average line, it ends up with a strange start and end point and is noticeably NOT a flat line at the average (4.4). I also get the following error in my console:
Error: Invalid value for <path> attribute d="M112,235.71428571428572L194,119.14285714285712L276,NaNL358,NaNL440,NaN"
which corresponds to the line of code:
.attr({
within my avg.append("path") for my avgline (at least, I think that's why I'm specifying there):
svg.append("path")
.datum(avgdataset)
.attr({
d: avgline,
stroke: "green",
"stroke-width": 1,
fill: "none",
"stroke-dasharray": "5,5",
});
I've tried playing around with how avgline is specified to no end (this playing around normally ends up producing no line at all). I've also tried using data instead of datum, to no avail. I'm likely making a really basic mistake, since I'm new to javascript and D3.
Here's all of my code thus far, to put it in context:
//Width and height
var w = 500;
var h = 300;
var padding = 30;
var dataset = [
[1, 1],
[2, 5],
[3, 4],
[4, 7],
[5, 5]
];
//Create scale functions
var xScale = d3.scale.linear()
.domain([0, d3.max(dataset, function(d) {
return d[0];
})])
.range([padding, w - padding * 2]);
var yScale = d3.scale.linear()
.domain([0, d3.max(dataset, function(d) {
return d[1];
})])
.range([h - padding, padding]);
var rScale = d3.scale.linear()
.domain([0, d3.max(dataset, function(d) {
return d[1];
})])
.range([2, 5]);
//Define X axis
var xAxis = d3.svg.axis()
.scale(xScale)
.orient("bottom")
.ticks(5);
//Define Y axis
var yAxis = d3.svg.axis()
.scale(yScale)
.orient("left")
.ticks(5);
//Create SVG element
var svg = d3.select("body")
.append("svg")
.attr("width", w)
.attr("height", h);
//Create circles
svg.selectAll("circle")
.data(dataset)
.enter()
.append("circle")
.attr("cx", function(d) {
return xScale(d[0]);
})
.attr("cy", function(d) {
return yScale(d[1]);
})
.attr("r", 4)
.append("svg:title")
.text(function(d) {
return d[0] + "," + d[1];
});;
//average stuff
var sum = 0,
average;
for (var i = 0; i < dataset.length; i++) {
sum += dataset[i][1];
}
average = sum / dataset.length;
console.log(average);
var avgdataset = [
[1, average],
[2, average],
[3, average],
[4, average],
[5, average]
];
console.log(avgdataset);
document.write(avgdataset);
//Create labels
/*svg.selectAll("text")
.data(dataset)
.enter()
.append("text")
.text(function(d) {
return d[0] + "," + d[1];
})
.attr("x", function(d) {
return xScale(d[0]);
})
.attr("y", function(d) {
return yScale(d[1]);
})
.attr("font-family", "sans-serif")
.attr("font-size", "11px")
.attr("fill", "red");
*/
//Create X axis
svg.append("g")
.attr("class", "axis")
.attr("transform", "translate(0," + (h - padding) + ")")
.call(xAxis);
//Create Y axis
svg.append("g")
.attr("class", "axis")
.attr("transform", "translate(" + padding + ",0)")
.call(yAxis);
var lr = ss.linear_regression().data(dataset).line();
var forecast_x = 20
console.log(lr)
var lrline = d3.svg.line()
.x(function(d, i) {
return xScale(i);
})
.y(function(d, i) {
return yScale(lr(i));
});
svg.append("path")
.datum(Array(dataset.length * forecast_x))
.attr({
d: lrline,
stroke: "black",
"stroke-width": 1,
fill: "none",
"stroke-dasharray": "5,5",
});
var avgline = d3.svg.line()
//.x(function(d, i) { return xScale(i); })
//.y(function(d, i) { return yScale(avgdataset(i)); });
.x(function(d, i) {
return xScale(d[0]);
})
.y(function(d, i) {
return yScale(d[i]);
});
svg.append("path")
.datum(avgdataset)
.attr({
d: avgline,
stroke: "green",
"stroke-width": 1,
fill: "none",
"stroke-dasharray": "5,5",
});
//to get the m and b for the equation line
var mvalue = ss.linear_regression().data(dataset).m();
console.log(mvalue);
var bvalue = ss.linear_regression().data(dataset).b();
console.log(bvalue);
//equation written out
svg.append("text")
.text("Y= " + mvalue + "x + " + bvalue)
.attr("class", "text-label")
.attr("x", 60)
.attr("y", 30);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>
<script src="https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js"></script>
Upvotes: 2
Views: 2534
Reputation: 2874
Similar to Kaiido, it looks to me that the avgline
function was the issue. You were passing in an array of arrays and the x
and y
weren't accessing the correct part of the array. Most of the examples I've worked with pass an array of objects, so something like:
var data = [ {x: 1, y: 4.4}, {x:2, y:4.4}, etc];
If you construct an object like this you can simple pass this to the avgline
which can then elegantly access the correct parts of the data with something like:
var avgline = d3.svg.line() //changed x and y function to reflect changed data
.x(function(d, i) {
return xScale(d.x);
})
.y(function(d, i) {
return yScale(d.y);
});
There are a number of advantages of this. For instance you could ensure that all your data corresponds to this structure and then you would only need one line constructor instead of two.
Upvotes: 2
Reputation: 136776
I think you almost got it, except that avgdataset
is not a function but an array.
Simply replace
var avgline = d3.svg.line()
//.x(function(d, i) { return xScale(i); })
//.y(function(d, i) { return yScale(avgdataset(i)); });
.x(function(d, i) {
return xScale(d[0]);
})
.y(function(d, i) {
return yScale(d[i]);
});
with
var avgline = d3.svg.line()
.x(function(d, i) { return xScale(i); })
.y(function(d, i) { return yScale(avgdataset[i][1]); });
//Width and height
var w = 500;
var h = 300;
var padding = 30;
var dataset = [[1, 1], [2, 5], [3, 4], [4, 7], [5, 5]];
//Create scale functions
var xScale = d3.scale.linear()
.domain([0, d3.max(dataset, function(d) { return d[0]; })])
.range([padding, w - padding * 2]);
var yScale = d3.scale.linear()
.domain([0, d3.max(dataset, function(d) { return d[1]; })])
.range([h - padding, padding]);
var rScale = d3.scale.linear()
.domain([0, d3.max(dataset, function(d) { return d[1]; })])
.range([2, 5]);
//Define X axis
var xAxis = d3.svg.axis()
.scale(xScale)
.orient("bottom")
.ticks(5);
//Define Y axis
var yAxis = d3.svg.axis()
.scale(yScale)
.orient("left")
.ticks(5);
//Create SVG element
var svg = d3.select("body")
.append("svg")
.attr("width", w)
.attr("height", h);
//Create circles
svg.selectAll("circle")
.data(dataset)
.enter()
.append("circle")
.attr("cx", function(d) {
return xScale(d[0]);
})
.attr("cy", function(d) {
return yScale(d[1]);
})
.attr("r", 4
)
.append("svg:title")
.text(function(d){return d[0] + "," + d[1];});;
//average stuff
var sum = 0, average;
for (var i = 0; i < dataset.length; i++) {
sum += dataset[i][1];
}
average = sum / dataset.length;
console.log(average);
var avgdataset = [[1, average], [2, average], [3, average], [4, average], [5, average]];
console.log(avgdataset);
document.write(avgdataset);
//Create labels
/*svg.selectAll("text")
.data(dataset)
.enter()
.append("text")
.text(function(d) {
return d[0] + "," + d[1];
})
.attr("x", function(d) {
return xScale(d[0]);
})
.attr("y", function(d) {
return yScale(d[1]);
})
.attr("font-family", "sans-serif")
.attr("font-size", "11px")
.attr("fill", "red");
*/
//Create X axis
svg.append("g")
.attr("class", "axis")
.attr("transform", "translate(0," + (h - padding) + ")")
.call(xAxis);
//Create Y axis
svg.append("g")
.attr("class", "axis")
.attr("transform", "translate(" + padding + ",0)")
.call(yAxis);
var lr = ss.linear_regression().data(dataset).line();
var forecast_x = 20
console.log(lr)
var lrline = d3.svg.line()
.x(function(d, i) { return xScale(i); })
.y(function(d, i) { return yScale(lr(i)); });
svg.append("path")
.datum(Array(dataset.length*forecast_x))
.attr({
d: lrline,
stroke: "black",
"stroke-width": 1,
fill: "none",
"stroke-dasharray": "5,5",
});
var avgline = d3.svg.line()
.x(function(d, i) { return xScale(i); })
.y(function(d, i) { return yScale(avgdataset[i][1]); });
svg.append("path")
.datum(avgdataset)
.attr({
d: avgline,
stroke: "green",
"stroke-width": 1,
fill: "none",
"stroke-dasharray": "5,5",
});
//to get the m and b for the equation line
var mvalue = ss.linear_regression().data(dataset).m();
console.log(mvalue);
var bvalue = ss.linear_regression().data(dataset).b();
console.log(bvalue);
//equation written out
svg.append("text")
.text("Y= " + mvalue + "x + " + bvalue)
.attr("class", "text-label")
.attr("x", 60)
.attr("y", 30);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>
<script src="https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js"></script>
Upvotes: 1