beingalex
beingalex

Reputation: 2476

Synchronously process a nested array

I am refactoring some code that crawls some web pages (removing "callback hell"), and want a three second delay between each request. Here is the request function:

const getHTML = function(page, i) {
    return new Promise(function(resolve, reject) {
        setTimeout(function () {
            api.makeAPIGetRequest(page).then((html) => {
                resolve(html);
            }).catch((err) => {
                reject(err);
            })
        }, i * 3000);
    });
}

I am traversing an array of objects then an array:

let p = [
    {
        location: 'England',
        pages: [1, 3, 5]
    },
    {
        location: 'Scotland',
        pages: [2, 4, 6]
    }
];

The problem is is that the output is random (because of the delay):

Page 1 - Loaded
Page 2 - Loaded
Page 5 - Loaded
Page 4 - Loaded
Page 3 - Loaded
Page 6 - Loaded

It should be:

Page 1 - Loaded
Page 3 - Loaded
Page 5 - Loaded
Page 2 - Loaded
Page 4 - Loaded
Page 6 - Loaded

Here is my code:

p.map(async (data) => {
    await crawlLocationPages(data);
})

function crawlLocationPages(data) {

    return Promise.all(
        data.pages.map(async (page, i) => {
            await getHTML(page, i).then((html) => { // <-- waits 3 seconds
                console.log('Page ' + page + ' - Loaded' );
            });

        })
    ).then(() => {

    })

};

I would rather keep the object and array model as it is.

Any help is appreciated.

Upvotes: 0

Views: 68

Answers (3)

tallberg
tallberg

Reputation: 479

This approach might be less confusing but only works if each request take no longer than 3 seconds.

pages = p.flatMap(location => location.pages);
page = 0;
var interval = setInterval(() => {
  if(page === pages.length){
    clearInterval(interval);
  }
  api.makeAPIGetRequest(pages[page++]).then((html) => {
    console.log('Page ' + page + ' - Loaded' );
  }).catch((err) => {
    console.error(err);
  });
}, 3000);

Or call next inside then

function getPage(pages, i) {
    const ts = Date.now();
    api.makeAPIGetRequest(pages[i++]).then((res)=>{
        console.log(res);
        if(i < pages.length) {
            const delay = Math.max(3000 - (Date.now() - ts), 0);
            setTimeout(getPage(pages, i), delay);
        }        
    })
}

pages = p.flatMap(location => location.pages);
getPages(pages, 0);

Upvotes: 0

Jeremy Thille
Jeremy Thille

Reputation: 26390

await doesn't work inside .map and .forEach, but it does work inside for loops. And of course, it has to be inside an async function.

const run = async () => {
    for(let data of p){
        await crawlLocationPages(data);
    }
}

const crawlLocationPages = async data => {
    for(let page of data.pages){
        const html = await getHTML(page);
        console.log('Page ' + page + ' - Loaded - HTML = ', html );
        await pause();
    }
}

const pause = () => new Promise( (resolve, reject) => setTimeout(resolve, 3000) );

run()

Upvotes: 2

beingalex
beingalex

Reputation: 2476

Solved it using ES6 generators and yield.

function* crawlGenerator() {
    for (let i = 0; i <= (p.length - 1); i++) {

        yield crawlLocationPages(p[i]);
    }

}

let crawl = crawlGenerator();
crawl.next();

function crawlLocationPages(data) {

    return Promise.all(
        data.pages.map(async (page, i) => {
            await getHTML(page, i).then((html) => { // <-- waits 3 seconds
                console.log('Page ' + page + ' - Loaded' );
            });

        })
    ).then(() => {
        crawl.next();
    })

};

More information here: https://davidwalsh.name/async-generators

Upvotes: 0

Related Questions