XIMRX
XIMRX

Reputation: 2172

Node.js file write issue, incomplete writing

I am new to node.js I wrote a scraper as below and result it produces is not fine. All entries are not being written and incomplete broken data is being added to file, though individual data extraction if fine in console log.

The original file is complex sample from all code parts I have added to show my logic please tell what is being done wrong.

var request = require('request');
var cheerio = require('cheerio');

var url = 'http://example.com/index.html';
request(url, function(err, resp, body) {
    if (err)
        throw err;
    $ = cheerio.load(body);   
    var categoryname = $('#mcat span').html();
    var subcategoryname = $('span.arrow').html();
    $('.listing').each(function() {
    var companyname = $(this).find('.company-name > span').html();
    var compwebsite = $(this).find('.company-link > a').html();
    var phonelumber = "+91-" + $(this).find('span[itemprop="telephone"]').html();
            

        var data = categoryname + ", " + subcategoryname + ", " + companyname + ", " + phonelumber;
        var fs = require('fs');
        fs.writeFile("data.txt", data, function(err) {
        if(err) {
        console.log("Error: "+err);
        } else {
        console.log("Success!");
        }
        });
     });
});

Upvotes: 0

Views: 608

Answers (2)

Raphael PICCOLO
Raphael PICCOLO

Reputation: 2175

I think you could also do it easier (just call the writedata function after the each loop (because cherio's each() is synchronous, so there will be no problem)

    request(url, function(err, resp, body) {
    if (err)
        throw err;
    $ = cheerio.load(body);
    var categoryname = $('#mcat span').html();
    var subcategoryname = $('span.arrow').html();
    var data = '';
    $('.listing').each(function() {
        var companyname = $(this).find('.company-name > span').html();
        var compwebsite = $(this).find('.company-link > a').html();
        var phonelumber = "+91-" + $(this).find('span[itemprop="telephone"]').html();
        data += categoryname + ", " + subcategoryname + ", " + companyname + ", " + phonelumber + "\r\n";
    });
    writeData(data);
});

function writeData(data) {
    var fs = require('fs');
    fs.writeFile("data.txt", data, function(err) {
        if (err) {
            console.log("Error: " + err);
        } else {
            console.log("Success!");
        }
    });
}

Upvotes: 0

Ravi
Ravi

Reputation: 1360

.each is called synchronously, hence it is blocking. But the fs.writeFile is called asynchronously so it makes your data to shuffle, but no way it is going to be incomplete.

Solutions:

Use Callback

request(url, function(err, resp, body) {
    if (err)
        throw err;
    $ = cheerio.load(body);
    var categoryname = $('#mcat span').html();
    var subcategoryname = $('span.arrow').html();
    var count = 0;
    var len = $('.listing').length;
    var data = '';
    $('.listing').each(function() {
        count++;
        var companyname = $(this).find('.company-name > span').html();
        var compwebsite = $(this).find('.company-link > a').html();
        var phonelumber = "+91-" + $(this).find('span[itemprop="telephone"]').html();
        data += categoryname + ", " + subcategoryname + ", " + companyname + ", " + phonelumber + "\r\n";
        if(count == len)
             writeData(data);
    });
});

function writeData(data) {
    var fs = require('fs');
    fs.writeFile("data.txt", data, function(err) {
        if (err) {
            console.log("Error: " + err);
        } else {
            console.log("Success!");
        }
    });
}
  1. Use async module. It has various usable functions to apply callback and get the necessary result.

Upvotes: 1

Related Questions