IWI
IWI

Reputation: 1608

NodeJS scraper needing to increment page and re-run

I'm building a simple NodeJS web scraper, and I want to re-run the function like a 'for loop' until pageNum = totalNumberOfPages... im having a brain fart, and unable to re-run the function from inside itself, since it returns an array fragment and kills itself. Could someone help me overcome this obstacle? I'm pretty sure it's very simple.

I looked at this and this but didn't figure it out...

const cheerio = require("cheerio");
const axios = require("axios");
let pageNum = 0;
let siteUrl = "https://whatever.com?&page=" + pageNum + "&viewAll=true";
let productArray = [];
let vendor = [];
let productTitle = [];
let plantType = [];
let thcRange = [];
let cbdRange = [];
let price = [];
let totalNumberOfPages = undefined;

// called by getResults()
const fetchData = async () => {
  const result = await axios.get(siteUrl);
  return cheerio.load(result.data);
};

// this function is called from index.js
const getResults = async () => {
  // >>>>>>>>>>>>>>>>>> HOW DO I RERUN FROM HERE <<<<<<<<<<<<<<<<<<<<<<<<<<<
  const $ = await fetchData();

  // first check how many total pages there are
  totalNumberOfPages = parseInt($('.pagination li:nth-last-child(2)').text());

  // use fetched data to grab elements (and their text) and push into arrays defined above
  $('.product-tile__vendor').each((index, element) => {
    vendor.push($(element).text());
  });

   $('.product-tile__title').each((index, element) => {
     productTitle.push($(element).text());
   });

  $('.product-tile__plant-type').each((index, element) => {
    plantType.push($(element).text());
  });

  $('.product-tile__properties  li:nth-child(2) p').each((index, element) => {
    thcRange.push($(element).text());
  });

  $('.product-tile__properties  li:nth-child(3) p').each((index, element) => {
    cbdRange.push($(element).text());
  });

  $('.product-tile__price').each((index, element) => {
    price.push($(element).text());
  });

  // increment page number to get more products if the page count is less than total number of pages
  if (pageNum < totalNumberOfPages) {
    pageNum ++;
  };

  //Convert to an array so that we can sort the results.
  productArray.push ({
    vendors: [...vendor],
    productTitle: [...productTitle],
    plantType: [...plantType],
    thcRange: [...thcRange],
    cbdRange: [...cbdRange],
    price: [...price],
    pageNum
  });
   // >>>>>>>>>>>>>>>>>> UNTIL HERE I THINK <<<<<<<<<<<<<<<<<<<<<<<<<<<
  return productArray;
};

module.exports = getResults;

Upvotes: 0

Views: 46

Answers (1)

Mohammed naji
Mohammed naji

Reputation: 1102

you can use recursion concept in your code:

which means the function itself will call itself

so what you can do is

const getResults = async () => {
  // >>>>>>>>>>>>>>>>>> HOW DO I RERUN FROM HERE <<<<<<<<<<<<<<<<<<<<<<<<<<<
  const $ = await fetchData();

  // first check how many total pages there are
  totalNumberOfPages = parseInt($('.pagination li:nth-last-child(2)').text());

  // use fetched data to grab elements (and their text) and push into arrays defined above
  $('.product-tile__vendor').each((index, element) => {
    vendor.push($(element).text());
  });

   $('.product-tile__title').each((index, element) => {
     productTitle.push($(element).text());
   });

  $('.product-tile__plant-type').each((index, element) => {
    plantType.push($(element).text());
  });

  $('.product-tile__properties  li:nth-child(2) p').each((index, element) => {
    thcRange.push($(element).text());
  });

  $('.product-tile__properties  li:nth-child(3) p').each((index, element) => {
    cbdRange.push($(element).text());
  });

  $('.product-tile__price').each((index, element) => {
    price.push($(element).text());
  });

  // increment page number to get more products if the page count is less than total number of pages
  if (pageNum < totalNumberOfPages) {
    pageNum ++;
  };

  //Convert to an array so that we can sort the results.
  productArray.push ({
    vendors: [...vendor],
    productTitle: [...productTitle],
    plantType: [...plantType],
    thcRange: [...thcRange],
    cbdRange: [...cbdRange],
    price: [...price],
    pageNum
  });
   // >>>>>>>>>>>>>>>>>> UNTIL HERE I THINK <<<<<<<<<<<<<<<<<<<<<<<<<<<
   if(pageNum >= totalNumberOfPages) getResults()
  return productArray;
};

Upvotes: 1

Related Questions