Reputation: 118
I'm trying to get children elements of a div using puppeteer.
this is my code:
const puppeteer = require('puppeteer');
const url = 'https://example.com/';
//browser initialization
async function initBrowser() {
const browser = await puppeteer.launch({
headless: false,
});
return browser;
}
async function getPage(browser) {
const page = await browser.newPage();
await page.goto(url);
return page;
}
async function getNewMessage(page) {
let els = [];
els = await page.$$('div.class');
for (let i = 0; i < els.length; i++) {
const imgSrc = await els[i]
.$('div.class2')
.$eval('img', (el) => el.getAttribute('src'));
console.log(imgSrc);
const link = await els[i].$eval('div.class3', (elem) => elem.texContent);
console.log(link);
}
}
async function loading() {
const browser = await initBrowser();
const page = await getPage(browser);
await getNewMessage(page);
}
loading();
the page structure looks like this:
<div class="class">
<div class="class2">
<img src="...">
</div>
<div class="class3">
message
</div>
</div>
the error I get is:
Error: Error: failed to find element matching selector "img".
I actually think I found out what the problem is, because in some cases I get the image. I think that when I try to get the image it is still loading and I can't catch it.
Upvotes: 0
Views: 1477
Reputation: 56855
This code is missing an await
:
const imgSrc = await els[i]
.$('div.class2')
.$eval('img', (el) => el.getAttribute('src'));
It's not possible to chain like this, because .$
returns a promise that isn't awaited, so you're calling .$eval
on a plain old JS promise rather than a Puppeteer function. The fix would be to await
each new Puppeteer call on a separate line and use intermediate variables.
const div = await els[i].$('div.class2');
const imgSrc = await div.$eval('img', (el) => el.getAttribute('src'));
Technically, you could chain by tossing in parenthesis and await
s inline, but it'd be harder to read:
const imgSrc = await (await els[i].$('div.class2'))
.$eval('img', (el) => el.getAttribute('src'));
Better yet is to combine both selectors into one. There's no need for the two-step procedure:
const imgSrc = await div.$eval('div.class2 img', (el) => el.getAttribute('src'));
Following that logic a step further, you might be able to skip all of the intermediate selectors, using one .$$eval
per array you want to build. You can optionally merge them if you know they'll be the same length:
const puppeteer = require("puppeteer");
const html = `<div class="class">
<div class="class2">
<img src="...">
</div>
<div class="class3">
message
</div>
</div>`.repeat(2); // repeat the structure for demonstration purposes
let browser;
(async () => {
browser = await puppeteer.launch({headless: true});
const [page] = await browser.pages();
await page.setContent(html);
await page.waitForSelector("div.class div.class2 img");
const imgSrcs = await page.$$eval(
"div.class div.class2 img",
els => els.map(e => e.getAttribute("src"))
);
const textContents = await page.$$eval(
"div.class div.class3",
els => els.map(e => e.textContent)
);
const data = imgSrcs.map((e, i) => ({
src: e, text: textContents[i]
}));
console.log(JSON.stringify(data, null, 2));
})()
.catch(err => console.error(err))
.finally(() => browser?.close())
;
Output:
[
{
"src": "...",
"text": "\n message\n "
},
{
"src": "...",
"text": "\n message\n "
}
]
Upvotes: 2