renathy
renathy

Reputation: 5355

node.js: downloading many images from urls: timeout as too many symultanous downloads

I have code that downloads images from given URLs. It loops through products in database and for each product calls Image download.

However, everything seems to happen asyncrhonously and if there are too many URLs (images to download) process stops with TIMEOUT. This is logicall as there are about 3000 images to download.

Can you give me some suggestions how to improve the code so that it downloads for instance only "10" images and do not start downloading for next images while previous "10" not finished? I am not used to node.js async functionality yet.

// download file
var download = function (uri, filename, callback) {
    request.head(uri, function (err, res, body) {
        request(uri).pipe(fs.createWriteStream(filename))
            .on('error', () => {
                console.log('Err');
            })
            .on('close', callback);
    })

};

// main code - looping through products in DB and upload file for each product (about 3000 products)

knex("products").select("products.id as id", "products.img as img", "products.code as code")       
    .mapSeries(function (product) {
            var imgName = 'imgs/' + product.code.toString() + ".png";
            download(product.img, imgName, function () {
                knex("products").where("id", product.id).update("img_path", imgName).then(() => {

                });
            });
        }
    })
    .then(() => {
        // console.log('done');
    });

Upvotes: 4

Views: 1593

Answers (2)

Akhilesh krishnan
Akhilesh krishnan

Reputation: 799

We have these async function with limit for instance

mapLimit(coll, limit(number), iteratee, callback)


async.mapLimit(['file1','file2','file3'], fs.stat, function(err, results) {
    // results is now an array of stats for each file
});

or

eachLimit(coll, limit(number), iteratee, callback)

so this will solve your requirement as this will have only limit number of parallel calls at any given time

Upvotes: 2

Daphoque
Daphoque

Reputation: 4678

Use async.eachOfLimit in order to proceed X async action by batch of Y elements :

var async = require("async");

// products retrieved from bdd
var products = [{img: "http://www.google.fr/a", code:"yolo", id:1}, {img: "https://www.google.fr/b", code:"yolo2", id:2}];

async.eachOfLimit(products, 10, function(currentproduct, key, ecb){

    // function called for each products
     var imgName = 'imgs/' + currentproduct.code.toString() + ".png"; 
    download(currentproduct.img, imgName, function () {
                knex("products").where("id", currentproduct.id).update("img_path", imgName).then(() => {
                  // call next
                  ecb(null);
                });
            });

}, function(err){

   // final callback when all products has been proceed
   if(err)
   {
     // do stg
   }
   else
   {
     console.log("yeah");
   }
})

Upvotes: 3

Related Questions