Crocio
Crocio

Reputation: 118

How to get div children using puppeteer

I'm trying to get children elements of a div using puppeteer.
this is my code:

const puppeteer = require('puppeteer');

const url = 'https://example.com/'; 

//browser initialization
async function initBrowser() {
  const browser = await puppeteer.launch({
    headless: false,
  });
  return browser;
}

async function getPage(browser) {
  const page = await browser.newPage();
  await page.goto(url);

  return page;
}

async function getNewMessage(page) {
  let els = [];

  els = await page.$$('div.class');

  for (let i = 0; i < els.length; i++) {
    const imgSrc = await els[i]
      .$('div.class2')
      .$eval('img', (el) => el.getAttribute('src'));
    console.log(imgSrc);
    const link = await els[i].$eval('div.class3', (elem) => elem.texContent);
    console.log(link);
  }
}

async function loading() {
  const browser = await initBrowser();
  const page = await getPage(browser);
  await getNewMessage(page);
}

loading();

the page structure looks like this:

<div class="class">
  <div class="class2"> 
    <img src="..."> 
  </div>
  
  <div class="class3">
    message
  </div>
</div>

the error I get is:
Error: Error: failed to find element matching selector "img".

I actually think I found out what the problem is, because in some cases I get the image. I think that when I try to get the image it is still loading and I can't catch it.

Upvotes: 0

Views: 1477

Answers (1)

ggorlen
ggorlen

Reputation: 56855

This code is missing an await:

const imgSrc = await els[i]
  .$('div.class2')
  .$eval('img', (el) => el.getAttribute('src'));

It's not possible to chain like this, because .$ returns a promise that isn't awaited, so you're calling .$eval on a plain old JS promise rather than a Puppeteer function. The fix would be to await each new Puppeteer call on a separate line and use intermediate variables.

const div = await els[i].$('div.class2');
const imgSrc = await div.$eval('img', (el) => el.getAttribute('src'));

Technically, you could chain by tossing in parenthesis and awaits inline, but it'd be harder to read:

const imgSrc = await (await els[i].$('div.class2'))
  .$eval('img', (el) => el.getAttribute('src'));

Better yet is to combine both selectors into one. There's no need for the two-step procedure:

const imgSrc = await div.$eval('div.class2 img', (el) => el.getAttribute('src'));

Following that logic a step further, you might be able to skip all of the intermediate selectors, using one .$$eval per array you want to build. You can optionally merge them if you know they'll be the same length:

const puppeteer = require("puppeteer");

const html = `<div class="class">
  <div class="class2"> 
    <img src="..."> 
  </div>
  
  <div class="class3">
    message
  </div>
</div>`.repeat(2); // repeat the structure for demonstration purposes

let browser;
(async () => {
  browser = await puppeteer.launch({headless: true});
  const [page] = await browser.pages();
  await page.setContent(html);
  await page.waitForSelector("div.class div.class2 img");
  const imgSrcs = await page.$$eval(
    "div.class div.class2 img",
    els => els.map(e => e.getAttribute("src"))
  );
  const textContents = await page.$$eval(
    "div.class div.class3",
    els => els.map(e => e.textContent)
  );
  const data = imgSrcs.map((e, i) => ({
    src: e, text: textContents[i]
  }));
  console.log(JSON.stringify(data, null, 2));
})()
  .catch(err => console.error(err))
  .finally(() => browser?.close())
;

Output:

[
  {
    "src": "...",
    "text": "\n    message\n  "
  },
  {
    "src": "...",
    "text": "\n    message\n  "
  }
]

Upvotes: 2

Related Questions