Karollo
Karollo

Reputation: 45

Javascript Web Scraping couple urls

I want to get some data from couple urls and write to csv file. But a result is not satisfied for me, because I'm getting not all data (should be 10 in this case) and fetched data is not in right order. Should be 1, 2, 3 and so on, but I am getting it randomly, 6, 10, 5, 1.... From this code I am getting sometimes six h3 values, sometimes five, it is happening randomly. My url addresses are on 100% good. I am using async await syntax, but it doesn't help. I am beginner. Here is my code:

const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeSteam = fs.createWriteStream('data.csv');

let data= '';
const numOfFetchData = 10;
const numbers = Array.from(Array(numOfFetchData + 1).keys());

async function getData() {
    for await (const number of numbers) {
        request('randomURL/' + (number+1), (err, res, html) => {
            if(!err && res.statusCode == 200 && (number+1) <= numOfFetchData) {
                const $ = cheerio.load(html);
                const h3Tag = $("h3")[0].children[0].data;
                data += (number + 1) + ' ' + h3Tag + '\n'   
            } else {
                writeSteam.write(`${data}`); 
            }
        });
    };
};

getData();

What should I improve in my code?

Thanks and Best Regards!

Upvotes: 0

Views: 34

Answers (1)

jasper
jasper

Reputation: 945

After taking another look at your code it seems like the request library doesn't return promises, but works w/ callbacks (making async/await not usable). If you really want your code fetched in order you can

  1. Use recursion to only fire the next request after the first one is finished:
async function getData(numbers) {
    request('randomURL/' + (numbers[numbers.length - 1] + 1), (err, res, html) => {
        numbers.pop()
        if(!err && res.statusCode == 200 && (number+1) <= numOfFetchData) {
                const $ = cheerio.load(html);
                const h3Tag = $("h3")[0].children[0].data;
                data += (number + 1) + ' ' + h3Tag + '\n'   
        } else {
                writeSteam.write(`${data}`); 
        }
        if (numbers.length > 0) getData(numbers);
    });
};

getData(numbers);
  1. In case that the order of which you get the data doesn't as long as the results are in the same order as the initial number array I'd suggest using fetch (a promise library) instead of request:
async function getData() {
    let fetchPromises = numbers.map(number => fetch('randomURL/' + (number+1)));
    const results = await Promise.all(fetchPromises); // results in order
    // Handle results
};

Upvotes: 1

Related Questions