MattStudios
MattStudios

Reputation: 43

Optimize javascript loop on CSV data

I am plotting a graph using d3.js by loading an external .CSV file. The code i have so far works fine with a small amount of data but when i load a larger file with thousands of lines then it kills the page.

The data has a usage column which is a value for every 30 mins throughout the day, which will go on over several months.

See Plunker example.

var avgClientArr = [];
var dateArr = [];
var dateGroupArr = [];

function csvParseClient() {
    d3.xhr('client.csv').get(function(err, response) {
        var dirtyCSV = response.responseText;
        var initialClientKeys = /TYPE,DATE,START TIME,END TIME,USAGE,UNITS,NOTES/i;
        var newClientKeys = "TYPE,x,startTime,endTime,y,UNITS,NOTES";
        var csvDataClient = dirtyCSV.replace(initialClientKeys, newClientKeys);
        var validData = csvDataClient.substr(csvDataClient.indexOf(newClientKeys));
        var csvData = d3.csv.parse(validData);

        csvData.customForEach(function(val, i) {
            // filter data
            //var keep = ['x', 'startTime', 'endTime', 'UNITS', 'y'];
            //for (var key in val[i]) {
            //    if (keep.indexOf(key) === -1) {
            //        delete val[i][key];
            //    }
            //}

            // parse data
            var date = val.x;
            var usage = val.y;
            var startTime = val.startTime;
            var endTime = val.endTime;
            var x = new Date(date);
            var y = parseFloat(usage);

            dateArr.push({
                "date": x,
                "usage": y
            })
            dateGroupArr = groupBy(dateArr, 'date');
        })
console.log(dateGroupArr);
        var objDates = objectValues(dateGroupArr);

        objDates.customForEach(function(f) {
                var avg = f.reduce(function(a, b) {
                    return a + b.usage;
                }, 0) / f.length;
                var date = f.reduce(function(a, b) {
                    return new Date(b.date);
                }, 0);
                avgClientArr.push({
                    "x": date,
                    "y": avg
                })
            })
            //console.log("avgClientArr", avgClientArr);
        document.getElementById('arrayDiv').innerHTML = '<pre>' + JSON.stringify(avgClientArr, null, 4) + '</pre>';
    })
}

function groupBy(arr, key) {
    var reducer = (grouped, item) => {
        var group_value = item[key]
        if (!grouped[group_value]) {
            grouped[group_value] = []
        }
        grouped[group_value].push(item)
        return grouped
    }
    return arr.reduce(reducer, {})
}

function objectValues(object) {
    var values = []
    for (var property in object) {
        if (object.hasOwnProperty(property)) {
            values.push(object[property])
        }
    }
    return values
}

function foreach(fn) {
    var arr = this;
    var len = arr.length;
    for (var i = 0; i < len; ++i) {
        fn(arr[i], i);
    }
}

Object.defineProperty(Array.prototype, 'customForEach', {
    enumerable: false,
    value: foreach
});

var t0 = performance.now();
csvParseClient();
var t1 = performance.now();
console.log("Call csvParseClient() " + (t1 - t0) + " milliseconds.");

What i need to happen

I need the average value of usage for the whole day returned as y and the date for that day returned as x for each day.

The slow process i have

  1. Start the loop from a specified line in the CSV file as there is unwanted data on the first few lines.
  2. Group unique date and store each usage value for that date in an object.
  3. Average the usage values for each date.
  4. Output an array of objects with property x being the date and y being the average usage value.

If you can give me any help on how to make this run faster that would be great!

Upvotes: 0

Views: 453

Answers (1)

MattStudios
MattStudios

Reputation: 43

I solved this by using the d3 nest() and rollup() functions, its simple and really fast.

d3.nest()
.key(function(d) {
    return d.x;
})
.rollup(function(d) {
    var avg = d3.mean(d, function(g) {return g.y; });
    return avg;
}).entries(dateArr); 

Upvotes: 0

Related Questions