Felipe Caldas
Felipe Caldas

Reputation: 2503

Then() completing before Promise

I am having some basic issue here with JS Promises. Here's my full code

'use strict'
const rp = require('request-promise');
const cheerio = require('cheerio');
var fs = require('fs');
var os = require('os');

const options = {
    uri: url,
    normalizeWhitespace: true,
    transform: function (body) {
        return cheerio.load(body);
    }
};

let results = []
let results2 = []

rp(options)
.then(($) => {
    $('.col-xs-4 .grid-item').each(function (i, elem) {
        let temp = $(this).find(".prod-image").attr("style")
        let productImageUrl = temp.substring(temp.indexOf("background-image:url('") + 22, temp.indexOf("')"))
        let detailUrl = $(this).find(".prod-image").attr("href")
        let title = $(this).find(".title").text()
        let description = $(elem).children().eq(4).attr("content")

        results.push({
            "productImageUrl": productImageUrl,
            "detailUrl": detailUrl,
            "title": title,
            "description": description
        })
    });
})
.then(() => {
    results.forEach(item => {
        const options1 = {
            uri: item.detailUrl,
            normalizeWhitespace: true,
            transform: function (body) {
                return cheerio.load(body);
            }
        };

        rp(options1)
            .then(($) => {
                console.log(5)
                let temp = $('#prod-title').text()
                let unit = temp.substring(temp.indexOf('Size: ') + 6, temp.indexOf('mL') - 1)
                let retail = temp.substring(temp.indexOf('Retail: $') + 9, temp.indexOf(' A'))
                let wholesale = temp.substring(temp.indexOf('Wholesale: $') + 12, temp.indexOf(' A') + 21)
                results2.push({
                    "productImageUrl": item.productImageUrl,
                    "detailUrl": item.detailUrl,
                    "title": item.title,
                    "description": item.description,
                    "unit": unit,
                    "retail": retail,
                    "wholesale": wholesale
                })
            })
            .catch((err) => {
                console.log(err);
            });
    })
})
.finally(() => {
    console.log("FINALLY " + results2)
    let header = "Handle,Title,Body" + os.EOL

    fs.writeFile("./file.csv", header, function (err) {
        if (err) {
            return console.log(err);
        }
    });

    for (let item of results2) {
        console.log(2)
        let hyphenateTitle = item.title.replace(/\s+/g, '-').toLowerCase();
        let line = hyphenateTitle + "," + item.title + "," + item.description + "," + vendor + ',"","",true,Title,Default Title,,,,,SKU,10000,,1,deny,manual,' + item.retail + "," + item.wholesale + "," + 'true,true,"",' + item.productImageUrl + "," + ',1,,false,,,,,,,,,,,,,,,,,kg,' + os.EOL
        fs.appendFile("./file.csv", line, function (err) {
            if (err) {
                return console.log(err);
            } else {
                // done
            }
        })
    }
})

.catch((err) => {
    console.log(err);
});

The idea is that in the first then() I will read the HTML page and find some URLs. I will push that information to the results array.

Then, on the second then the idea is to iterate over each item in the array and go to the second page, extract more information and push it to results

Finally, get that whole information in a csv.

This is the output in the console:

FINALLY 
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5

So as you can see the last then is running before the middle one (but after the top most one.

What am I doing wrong? Thanks

EDIT 1 This is new code as suggested by one the commenters:

'use strict'
const rp = require('request-promise');
const cheerio = require('cheerio');
var fs = require('fs');
var os = require('os');

const options = {
    uri: url,
    normalizeWhitespace: true,
    transform: function (body) {
        return cheerio.load(body);
    }
};

let results = []
let results2 = []

rp(options)
    .then(($) => {
        console.log("FIRST THEN")
        $('.col-xs-4 .grid-item').each(function (i, elem) {
            let temp = $(this).find(".prod-image").attr("style")
            let productImageUrl = temp.substring(temp.indexOf("background-image:url('") + 22, temp.indexOf("')"))
            let detailUrl = $(this).find(".prod-image").attr("href")
            let title = $(this).find(".title").text()
            let description = $(elem).children().eq(4).attr("content")

            results.push({
                "productImageUrl": productImageUrl,
                "detailUrl": detailUrl,
                "title": title,
                "description": description
            })
        });
    })
    .then(($) => {
        console.log("SECOND THEN")
        return Promise.all(results.map( item => {
            console.log("SECOND THEN INNER")
            const options1 = {
                uri: item.detailUrl,
                normalizeWhitespace: true,
                transform: function (body) {
                    return cheerio.load(body);
                }
            };

            rp(options1)
                .then(($) => {
                    console.log("SECOND THEN INSIDE 'rp(options1)'")
                    let temp = $('#prod-title').text()
                    let unit = temp.substring(temp.indexOf('Size: ') + 6, temp.indexOf('mL') - 1)
                    let retail = temp.substring(temp.indexOf('Retail: $') + 9, temp.indexOf(' A'))
                    let wholesale = temp.substring(temp.indexOf('Wholesale: $') + 12, temp.indexOf(' A') + 21)
                    results2.push({
                        "productImageUrl": item.productImageUrl,
                        "detailUrl": item.detailUrl,
                        "title": item.title,
                        "description": item.description,
                        "unit": unit,
                        "retail": retail,
                        "wholesale": wholesale
                    })
                })
                .catch((err) => {
                    console.log(err);
                });
        }))
    })
    .finally(($) => {
        console.log("FINALLY " + results2)
        let header = "Handle,Title,Body" + os.EOL

        fs.writeFile("./file.csv", header, function (err) {
            if (err) {
                return console.log(err);
            }
        });

        for (let item of results2) {
            console.log(2)
            let hyphenateTitle = item.title.replace(/\s+/g, '-').toLowerCase();
            let line = hyphenateTitle + "," + item.title + "," + item.description + "," + vendor + ',"","",true,Title,Default Title,,,,,SKU,10000,,1,deny,manual,' + item.retail + "," + item.wholesale + "," + 'true,true,"",' + item.productImageUrl + "," + ',1,,false,,,,,,,,,,,,,,,,,kg,' + os.EOL
            fs.appendFile("./file.csv", line, function (err) {
                if (err) {
                    return console.log(err);
                } else {
                    // done
                }
            })
        }
    })

    .catch((err) => {
        console.log(err);
    });

And this is the output:

FIRST THEN
SECOND THEN
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
SECOND THEN INNER
FINALLY 
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'
SECOND THEN INSIDE 'rp(options1)'

Upvotes: 0

Views: 94

Answers (1)

krampstudio
krampstudio

Reputation: 3631

In the 2nd then you are running an async iteration, nothing tells the wrapping Promise to wait the Promises inside the loop (in the forEach).

You should return the inner Promise chain to inform the parent Promise to wait, and since you need to wait for all the Promises in your iteration, you can use Promise.all.

.then( () => {
   return Promise.all(results.map( item => {
       //...
       return rp(options1).then(($) => {
            //..
        });
    }));
})

And you can either catch locally (on the inner Promise) or on the parent Promise based on your needs.

Please look at the Promise documention for the details, see the section common mistakes with Promises

Upvotes: 1

Related Questions