Reputation: 557
I have a json file with about 20000 to 100000 links in it it looks like this
[{
"file_name": "Blessed_Titles.jpg",
"url": "https://i.imgur.com/FRDibHa.jpg",
"downloadId": "6r44r4k340rvvr"
}]
Are there any ways to parallel download them about 100 at a time? Will I get any warnings or errors while downloading 1000s of links? Right now I'm using sequential download,but I'm not sure it's suitable for such large number of links.
Here's how I'm downloading currently
async function downloadALL(ultUrls) {
let i = 1;
const len = ultUrls.length;
for (ult of ultUrls) {
await downloadFile(ult, i, len)
.then(() => i++)
.catch(err => console.log(err));
}
}
function downloadFile(ult, i, len) {
return new Promise((resolve, reject, cb) => {
console.log('Downloading File: () ', ult.file_name);
const download = {
file: {},
};
let percentage = 0;
const percentage2 = ((i / len) * 100).toFixed(0);
download.file.name = ult.file_name;
download.file.percentage = percentage;
download.file.downloadId = ult.downloadId;
download.percentage = percentage2;
// console.log(download);
// let console_message = download;
let request = (ult.url.substr(0, 5) === 'https' ? https : http)
.get(ult.url, function(response) {
const lent = parseInt(response.headers['content-length'], 10);
let body = '';
let cur = 0;
const total = lent / 1048576; // 1048576 - bytes in 1Megabyte
response.on('data', function(chunk) {
body += chunk;
cur += chunk.length;
percentage = ((100.0 * cur) / lent).toFixed(0);
download.file.percentage = percentage;
mainWindow.webContents.send('download-info', download);
});
const file = utility.writeFile(ult.file_name, dir);
response.pipe(file);
file.on('error', function(err) {
console.log(`ERROR:${ err}`);
file.read();
});
file.on('finish', function() {
console.log('File downloaded');
return resolve(file.close(cb)); // close() is async, call cb after close completes.
});
})
.on('error', function(err) {
// Handle errors
return reject(err);
});
});
Upvotes: 1
Views: 3267
Reputation: 912
I recommend to use bluebird
. This Promise library has a batch promises concurrency solution.
This is the link to their tutorial: http://bluebirdjs.com/docs/api/promise.map.html
And here is a code solution with bluebird
for your case:
// don't forget to run `npm install bluebird` first
const Promise = require('bluebird');
async function downloadAll(ultUrls) {
// The concurrency property here represents the number of promises that will be allowed to run at the same time
// You can surround this line with try/catch scope if you want to
await Promise.map(ultUrls, downloadFile, {concurrency: 100});
}
// Here you no longer need the i and len parameters
function downloadFile() {
// Code change needed here stop using the i and len parameters
}
Upvotes: 2
Reputation: 2919
So since you mentioned parallel, the usual way in NodeJS is to use child process and spawn multiple parallel threads based on a number of computing resources available.
Here is a pseudo-code that you can refer to create a solution.
// parent.js
var child_process = require('child_process');
var numchild = require('os').cpus().length;
var done = 0;
var filesListJSON = [{
"file_name": "Blessed_Titles.jpg",
"url": "https://i.imgur.com/FRDibHa.jpg",
"downloadId": "6r44r4k340rvvr"
}, {
"file_name": "Blessed_Titles2.jpg",
"url": "https://i.imgur.com/FRDibHa2.jpg",
"downloadId": "6r44r4k340rvvr"
}, {
"file_name": "Blessed_Titles3.jpg",
"url": "https://i.imgur.com/FRDibHa3.jpg",
"downloadId": "6r44r4k340rvvr"
}];
// split the array into number of parallel threads avalible
var chunks = _.chunk(filesListJSON, numchild);
for (var i = 0; i < numchild; i++) {
var
var child = child_process.fork('./child');
//send the chunk of the list to respective thread.
child.send(chunks[i]);
//ps please check the count and logic for yourself I have not tested this.
child.on('message', function (message) {
console.log('[parent] received message from child:', message);
done++;
if (done === numchild) {
console.log('[parent] received all results');
...
}
});
}
// child.js
process.on('message', function (list) {
console.log('[child] received message from server:', message);
downloadFiles(list, function (done) {
console.log("Done downloading files : " + list.length);
process.send({
child: process.pid,
result: done
});
process.disconnect();
});
});
function downloadFiles(list, cb) {
//loop over list
//logic to download files
//cb(true)
}
Refer to this link for more details about the logic used.
Also, I have used chuck
function from the lodash library to split the array for processing. https://lodash.com/docs/3.10.1#chunk
Upvotes: 0