Reputation: 2476
I am refactoring some code that crawls some web pages (removing "callback hell"), and want a three second delay between each request. Here is the request function:
const getHTML = function(page, i) {
return new Promise(function(resolve, reject) {
setTimeout(function () {
api.makeAPIGetRequest(page).then((html) => {
resolve(html);
}).catch((err) => {
reject(err);
})
}, i * 3000);
});
}
I am traversing an array of objects then an array:
let p = [
{
location: 'England',
pages: [1, 3, 5]
},
{
location: 'Scotland',
pages: [2, 4, 6]
}
];
The problem is is that the output is random (because of the delay):
Page 1 - Loaded
Page 2 - Loaded
Page 5 - Loaded
Page 4 - Loaded
Page 3 - Loaded
Page 6 - Loaded
It should be:
Page 1 - Loaded
Page 3 - Loaded
Page 5 - Loaded
Page 2 - Loaded
Page 4 - Loaded
Page 6 - Loaded
Here is my code:
p.map(async (data) => {
await crawlLocationPages(data);
})
function crawlLocationPages(data) {
return Promise.all(
data.pages.map(async (page, i) => {
await getHTML(page, i).then((html) => { // <-- waits 3 seconds
console.log('Page ' + page + ' - Loaded' );
});
})
).then(() => {
})
};
I would rather keep the object and array model as it is.
Any help is appreciated.
Upvotes: 0
Views: 68
Reputation: 479
This approach might be less confusing but only works if each request take no longer than 3 seconds.
pages = p.flatMap(location => location.pages);
page = 0;
var interval = setInterval(() => {
if(page === pages.length){
clearInterval(interval);
}
api.makeAPIGetRequest(pages[page++]).then((html) => {
console.log('Page ' + page + ' - Loaded' );
}).catch((err) => {
console.error(err);
});
}, 3000);
Or call next inside then
function getPage(pages, i) {
const ts = Date.now();
api.makeAPIGetRequest(pages[i++]).then((res)=>{
console.log(res);
if(i < pages.length) {
const delay = Math.max(3000 - (Date.now() - ts), 0);
setTimeout(getPage(pages, i), delay);
}
})
}
pages = p.flatMap(location => location.pages);
getPages(pages, 0);
Upvotes: 0
Reputation: 26390
await
doesn't work inside .map
and .forEach
, but it does work inside for
loops. And of course, it has to be inside an async
function.
const run = async () => {
for(let data of p){
await crawlLocationPages(data);
}
}
const crawlLocationPages = async data => {
for(let page of data.pages){
const html = await getHTML(page);
console.log('Page ' + page + ' - Loaded - HTML = ', html );
await pause();
}
}
const pause = () => new Promise( (resolve, reject) => setTimeout(resolve, 3000) );
run()
Upvotes: 2
Reputation: 2476
Solved it using ES6 generators and yield
.
function* crawlGenerator() {
for (let i = 0; i <= (p.length - 1); i++) {
yield crawlLocationPages(p[i]);
}
}
let crawl = crawlGenerator();
crawl.next();
function crawlLocationPages(data) {
return Promise.all(
data.pages.map(async (page, i) => {
await getHTML(page, i).then((html) => { // <-- waits 3 seconds
console.log('Page ' + page + ' - Loaded' );
});
})
).then(() => {
crawl.next();
})
};
More information here: https://davidwalsh.name/async-generators
Upvotes: 0