walkthroughthecode
walkthroughthecode

Reputation: 519

Node async.series trouble

While building a fairly complex scraper i stumbled upon a problem with a control flow of my code.

What's going on in code below: 1) request a URL 2) scrape NEWURL from the results 3) pass it to readability API as first async function 4) here comes the trouble — i never get the next async function which saves readabilityData to DB

How to solve this problem? I'm new to JS, so please feel free to point out at any issues with my code.

 request(URL, function(error, response, html) {
    if (!error) {
        var $ = cheerio.load(html);
            NEWURL = data.find('a').attr('href');

            readabilityData = {}                
            var articleUrl = 'https://readability.com/api/content/v1/parser?url=' + NEWURL + token;

            async.series([
                function(){
                    request(articleUrl, function(error, response, html) {
                        if (!error) {
                            readabilityData = response.toJSON();
                        }
                    });
                },
                function(readabilityData){
                    Article.findOne({ 
                        "link": url // here's the 
                    }, function(err, link){
                        if(link) {
                            console.log(link)
                        } else {
                                var newArticle = new Article({
                        // write stuff to DB
                                });
                                newArticle.save(function (err, data) {
                        // save it
                                });
                        }   
                    });
                }
            ],
            function(err){
               console.log('all good — data written')
            });


        });
    }
});

Upvotes: 2

Views: 126

Answers (1)

JohnnyHK
JohnnyHK

Reputation: 312055

You need to call the callback parameter that's passed into the functions of the async.series call when each function's work is complete. That's how async.series knows that it can proceed to the next function. And don't redefine readabilityData as a function parameter when you're trying to use it to share data across the functions.

So something like:

var readabilityData = {};

async.series([
    function(callback){
        request(articleUrl, function(error, response, html) {
            if (!error) {
                readabilityData = response.toJSON();
            }
            callback(error);
        });
    },
    function(callback){
        Article.findOne({ 
            "link": url // here's the 
        }, function(err, link){
            if(link) {
                console.log(link);
                callback();
            } else {
                    var newArticle = new Article({
            // write stuff to DB
                    });
                    newArticle.save(function (err, data) {
            // save it
                        callback(err);
                    });
            }   
        });
    }
],
function(err){
   console.log('all good — data written')
});

Upvotes: 1

Related Questions