moskemerak
moskemerak

Reputation: 99

Creating an average Y line for D3 visualization?

I am trying to create a visualization to help students that I work with learn about measures of fit, such as r-squared. For R-squared, I want to have both the regression line and a line for the mean of Y on my graph. (My end goal is to have lines between the points and/or lines to represent ESS, TSS, and SSR that students can click to see or not see).

My regression line is working fine, but when I try to add in my average line, it ends up with a strange start and end point and is noticeably NOT a flat line at the average (4.4). I also get the following error in my console:

Error: Invalid value for <path> attribute d="M112,235.71428571428572L194,119.14285714285712L276,NaNL358,NaNL440,NaN"

which corresponds to the line of code:

.attr({

within my avg.append("path") for my avgline (at least, I think that's why I'm specifying there):

svg.append("path")
                .datum(avgdataset)
                .attr({
                d: avgline,
                stroke: "green",
                "stroke-width": 1,
                fill: "none",
                "stroke-dasharray": "5,5",
                });

I've tried playing around with how avgline is specified to no end (this playing around normally ends up producing no line at all). I've also tried using data instead of datum, to no avail. I'm likely making a really basic mistake, since I'm new to javascript and D3.

Here's all of my code thus far, to put it in context:

//Width and height
var w = 500;
var h = 300;
var padding = 30;

var dataset = [
  [1, 1],
  [2, 5],
  [3, 4],
  [4, 7],
  [5, 5]
];

//Create scale functions
var xScale = d3.scale.linear()
  .domain([0, d3.max(dataset, function(d) {
    return d[0];
  })])
  .range([padding, w - padding * 2]);

var yScale = d3.scale.linear()
  .domain([0, d3.max(dataset, function(d) {
    return d[1];
  })])
  .range([h - padding, padding]);

var rScale = d3.scale.linear()
  .domain([0, d3.max(dataset, function(d) {
    return d[1];
  })])
  .range([2, 5]);

//Define X axis
var xAxis = d3.svg.axis()
  .scale(xScale)
  .orient("bottom")
  .ticks(5);

//Define Y axis
var yAxis = d3.svg.axis()
  .scale(yScale)
  .orient("left")
  .ticks(5);

//Create SVG element
var svg = d3.select("body")
  .append("svg")
  .attr("width", w)
  .attr("height", h);

//Create circles
svg.selectAll("circle")
  .data(dataset)
  .enter()
  .append("circle")
  .attr("cx", function(d) {
    return xScale(d[0]);
  })
  .attr("cy", function(d) {
    return yScale(d[1]);
  })
  .attr("r", 4)
  .append("svg:title")
  .text(function(d) {
    return d[0] + "," + d[1];
  });;

//average stuff
var sum = 0,
  average;
for (var i = 0; i < dataset.length; i++) {
  sum += dataset[i][1];
}
average = sum / dataset.length;

console.log(average);

var avgdataset = [
  [1, average],
  [2, average],
  [3, average],
  [4, average],
  [5, average]
];

console.log(avgdataset);
document.write(avgdataset);

//Create labels
/*svg.selectAll("text")
		   .data(dataset)
		   .enter()
		   .append("text")
		   .text(function(d) {
		   		return d[0] + "," + d[1];
		   })
		   .attr("x", function(d) {
		   		return xScale(d[0]);
		   })
		   .attr("y", function(d) {
		   		return yScale(d[1]);
		   })
		   .attr("font-family", "sans-serif")
		   .attr("font-size", "11px")
		   .attr("fill", "red");
		*/
//Create X axis
svg.append("g")
  .attr("class", "axis")
  .attr("transform", "translate(0," + (h - padding) + ")")
  .call(xAxis);

//Create Y axis
svg.append("g")
  .attr("class", "axis")
  .attr("transform", "translate(" + padding + ",0)")
  .call(yAxis);

var lr = ss.linear_regression().data(dataset).line();
var forecast_x = 20
console.log(lr)
var lrline = d3.svg.line()
  .x(function(d, i) {
    return xScale(i);
  })
  .y(function(d, i) {
    return yScale(lr(i));
  });

svg.append("path")
  .datum(Array(dataset.length * forecast_x))
  .attr({
    d: lrline,
    stroke: "black",
    "stroke-width": 1,
    fill: "none",
    "stroke-dasharray": "5,5",
  });

var avgline = d3.svg.line()
  //.x(function(d, i) { return xScale(i); })
  //.y(function(d, i) { return yScale(avgdataset(i)); });
  .x(function(d, i) {
    return xScale(d[0]);
  })
  .y(function(d, i) {
    return yScale(d[i]);
  });


svg.append("path")
  .datum(avgdataset)
  .attr({
    d: avgline,
    stroke: "green",
    "stroke-width": 1,
    fill: "none",
    "stroke-dasharray": "5,5",
  });


//to get the m and b for the equation line
var mvalue = ss.linear_regression().data(dataset).m();
console.log(mvalue);

var bvalue = ss.linear_regression().data(dataset).b();
console.log(bvalue);

//equation written out
svg.append("text")
  .text("Y= " + mvalue + "x + " + bvalue)
  .attr("class", "text-label")
  .attr("x", 60)
  .attr("y", 30);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>
<script src="https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js"></script>

Upvotes: 2

Views: 2534

Answers (2)

user1614080
user1614080

Reputation: 2874

Similar to Kaiido, it looks to me that the avgline function was the issue. You were passing in an array of arrays and the x and y weren't accessing the correct part of the array. Most of the examples I've worked with pass an array of objects, so something like:

var data = [ {x: 1, y: 4.4}, {x:2, y:4.4}, etc];

If you construct an object like this you can simple pass this to the avgline which can then elegantly access the correct parts of the data with something like:

var avgline = d3.svg.line() //changed x and y function to reflect changed data
    .x(function(d, i) {
        return xScale(d.x);
    })
    .y(function(d, i) {
        return yScale(d.y);
    });

There are a number of advantages of this. For instance you could ensure that all your data corresponds to this structure and then you would only need one line constructor instead of two.

Upvotes: 2

Kaiido
Kaiido

Reputation: 136776

I think you almost got it, except that avgdataset is not a function but an array.

Simply replace

var avgline = d3.svg.line()
  //.x(function(d, i) { return xScale(i); })
  //.y(function(d, i) { return yScale(avgdataset(i)); });
  .x(function(d, i) {
    return xScale(d[0]);
  })
  .y(function(d, i) {
    return yScale(d[i]);
  });

with

var avgline = d3.svg.line()
    .x(function(d, i) { return xScale(i); })
    .y(function(d, i) { return yScale(avgdataset[i][1]); });

    //Width and height
    var w = 500;
    var h = 300;
    var padding = 30;

    var dataset = [[1, 1], [2, 5], [3, 4], [4, 7], [5, 5]];

    //Create scale functions
    var xScale = d3.scale.linear()
                         .domain([0, d3.max(dataset, function(d) { return d[0]; })])
                         .range([padding, w - padding * 2]);

    var yScale = d3.scale.linear()
                         .domain([0, d3.max(dataset, function(d) { return d[1]; })])
                         .range([h - padding, padding]);

    var rScale = d3.scale.linear()
                         .domain([0, d3.max(dataset, function(d) { return d[1]; })])
                         .range([2, 5]);

    //Define X axis
    var xAxis = d3.svg.axis()
                      .scale(xScale)
                      .orient("bottom")
                      .ticks(5);

    //Define Y axis
    var yAxis = d3.svg.axis()
                      .scale(yScale)
                      .orient("left")
                      .ticks(5);

    //Create SVG element
    var svg = d3.select("body")
                .append("svg")
                .attr("width", w)
                .attr("height", h);

    //Create circles
    svg.selectAll("circle")
       .data(dataset)
       .enter()
       .append("circle")
       .attr("cx", function(d) {
            return xScale(d[0]);
       })
       .attr("cy", function(d) {
            return yScale(d[1]);
       })
       .attr("r", 4
       )
       .append("svg:title")
       .text(function(d){return d[0] + "," + d[1];});;

    //average stuff
    var sum = 0, average;
    for (var i = 0; i < dataset.length; i++) {
        sum += dataset[i][1];
    }
    average = sum / dataset.length;

    console.log(average);       

    var avgdataset = [[1, average], [2, average], [3, average], [4, average], [5, average]];

    console.log(avgdataset);
    document.write(avgdataset);

    //Create labels
    /*svg.selectAll("text")
       .data(dataset)
       .enter()
       .append("text")
       .text(function(d) {
            return d[0] + "," + d[1];
       })
       .attr("x", function(d) {
            return xScale(d[0]);
       })
       .attr("y", function(d) {
            return yScale(d[1]);
       })
       .attr("font-family", "sans-serif")
       .attr("font-size", "11px")
       .attr("fill", "red");
    */
    //Create X axis
    svg.append("g")
        .attr("class", "axis")
        .attr("transform", "translate(0," + (h - padding) + ")")
        .call(xAxis);

    //Create Y axis
    svg.append("g")
        .attr("class", "axis")
        .attr("transform", "translate(" + padding + ",0)")
        .call(yAxis);

    var lr = ss.linear_regression().data(dataset).line();
    var forecast_x = 20
    console.log(lr)
    var lrline = d3.svg.line()
        .x(function(d, i) { return xScale(i); })
        .y(function(d, i) { return yScale(lr(i)); });

    svg.append("path")
        .datum(Array(dataset.length*forecast_x))
        .attr({
        d: lrline,
        stroke: "black",
        "stroke-width": 1,
        fill: "none",
        "stroke-dasharray": "5,5",
        });

    var avgline = d3.svg.line()
        .x(function(d, i) { return xScale(i); })
        .y(function(d, i) { return yScale(avgdataset[i][1]); });


    svg.append("path")
        .datum(avgdataset)
        .attr({
        d: avgline,
        stroke: "green",
        "stroke-width": 1,
        fill: "none",
        "stroke-dasharray": "5,5",
        });


    //to get the m and b for the equation line
    var mvalue = ss.linear_regression().data(dataset).m();
    console.log(mvalue);

    var bvalue = ss.linear_regression().data(dataset).b();
    console.log(bvalue);

    //equation written out
    svg.append("text")
        .text("Y= " + mvalue + "x + " + bvalue)
        .attr("class", "text-label")
        .attr("x", 60)
        .attr("y", 30);
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.4.11/d3.min.js"></script>
<script src="https://raw.github.com/tmcw/simple-statistics/master/src/simple_statistics.js"></script>	

Upvotes: 1

Related Questions