Reputation: 3
This code will retrieve the attribute value of the first element I selected and if I put /html/body/section/div[3]/img<2>
or img<3>
in the xpath I can retrieve the data for the consecutive img
elements.
However on the site I am scraping the parent element can have any number of img
elements within it and I want to get the attribute values for all of them.
Is there some way for me to retrieve all of them?
const puppeteer = require("puppeteer");
async function scrapeProduct(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.goto(url);
await page.waitForXPath(
"/html/body/section/div[3]/img"
);
const [el] = await page.$x(
"/html/body/section/div[3]/img"
);
const attr = await page.evaluate(
(ele) => ele.getAttribute("data-original-name"),
el
);
console.log({ attr });
await browser.close();
}
Upvotes: 0
Views: 401
Reputation: 13772
You can get them via XPath Web API:
'use strict';
const html = `
<html><body>
<section><div>
<img data-original-name="foo"></img>
<img data-original-name="bar"></img>
<img data-original-name="baz"></img>
</div></section>
</body></html>`;
const puppeteer = require('puppeteer');
(async function main() {
try {
const browser = await puppeteer.launch();
const [page] = await browser.pages();
await page.goto(`data:text/html,${html}`);
const arrayOfAttributes = await page.evaluate(() => {
const attributes = [];
const xpathList = document.evaluate(
'/html/body/section/div/img',
document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null,
);
for (let i = 0; i < xpathList.snapshotLength; i++) {
attributes.push(xpathList.snapshotItem(i).dataset.originalName);
}
return attributes;
});
console.log(arrayOfAttributes);
await browser.close();
} catch (err) {
console.error(err);
}
})();
Result:
[ 'foo', 'bar', 'baz' ]
Upvotes: 1