Reputation: 23
When I run the code the nameGen page evaluation returns a type error that states: "Cannot read property 'innerHTML' of null". The span tag it is targeting has a number value for price and that is what I am trying to get to. How do I access the number value that is contained in the span tag I am targeting? Any help or insight would be greatly appreciated. The element I am targeting looks like this:
<span id="priceblock_ourprice" class="a-size-medium a-color-price priceBlockBuyingPriceString">
$44.99
</span>
const puppeteer = require('puppeteer');
let nameArr = [];
const rand1 = Math.random().toString(16).substr(2, 8);
nameArr.push({ id: 1, link: `<img src ="${rand1}">` });
//creates a random string to used as the image name and pushes it to an array
amazonScraper = (url) =>{
(async () => {
let imageUrl = url ;
let path = `./scrapers/amazonScrapers/imageScraper/screenshots`;
//assign a name to url and the path for saving images
let browser = await puppeteer.launch({headless: false});
let page = await browser.newPage();
//launch puppeteer
await page.goto(imageUrl), { waitUntil: 'networkidle2' };
//sends puppeteer to the url and waits until everything is rendered
await page.waitForSelector('#landingImage');
let element1 = await page.$('#landingImage');
await element1.screenshot({ path: `${path}/${rand1}.png` });
//screenshot the image
let nameGen =await page.evaluate(() => {
let name = document.getElementById('productTitle').innerHTML;
return name;
});
// grabs name of the item
let priceGen =await page.evaluate(() => {
let price = document.getElementById('priceblock_ourprice').innerHTML;
return price;
});
//Broken: attempts to grab item price
console.log(nameGen);
console.log(priceGen);
await browser.close();
//closes puppeteer
})();
};
amazonScraper ("https://www.amazon.com/TOMLOV-Microscope-50X-1300X-Magnification-Ultra-Precise/dp/B08MVKKSLY/?_encoding=UTF8&pd_rd_w=yqTTn&pf_rd_p=2eed4166-2052-4602-96d1-514e72c433c6&pf_rd_r=8E0WGYYVYE5017ECAJPG&pd_rd_r=03b5a7f9-3f43-4f72-b9c8-d3ec581b450c&pd_rd_wg=jBNiN&ref_=pd_gw_crs_wish");
//calling scraper function
Here is the error:
(node:11276) UnhandledPromiseRejectionWarning: Error: Evaluation failed: TypeError: Cannot read property 'innerHTML' of null
at __puppeteer_evaluation_script__:2:66
at ExecutionContext._evaluateInternal (c:\Users\grung\node_modules\puppeteer\lib\cjs\puppeteer\common\ExecutionContext.js:221:19)
at processTicksAndRejections (internal/process/task_queues.js:93:5)
at async ExecutionContext.evaluate (c:\Users\grung\node_modules\puppeteer\lib\cjs\puppeteer\common\ExecutionContext.js:110:16)
at async c:\Users\grung\javaScriptPractice\jsPractice\scrapers\amazonScrapers\imageScraper\scraper.js:32:21
(Use `node --trace-warnings ...` to show where the warning was created)
(node:11276) UnhandledPromiseRejectionWarning: Unhandled promise rejection. This error originated either by throwing inside of an async function without a catch block, or by rejecting a promise which was not handled with .catch(). To terminate the node process on unhandled promise rejection, use the CLI flag `--unhandled-rejections=strict` (see https://nodejs.org/api/cli.html#cli_unhandled_rejections_mode). (rejection id: 1)
(node:11276) [DEP0018] DeprecationWarning: Unhandled promise rejections are deprecated. In the future, promise rejections that are not handled will terminate the Node.js process with a non-zero exit code.```
Upvotes: 2
Views: 234
Reputation: 7012
You have several problems in your code:
you need to wait for the item to be available on the page. looks like the priceblock_ourprice
is generated after the page is send to the client.
In puppeteer, there's a build in function to wait for a certain selector:
let priceGen =await page
.waitForSelector('#priceblock_ourprice')
.evaluate(() => {
let price = document.getElementById('priceblock_ourprice').innerHTML;
return price;
});
Amazon doesn't use a single id for pricing. There are several in use. some examples:
priceblock_ourprice
priceblock_dealprice
So you probably need to account for those as well. You can wait for multiple items like this:
await page.waitForFunction((priceSelectors) =>
document.querySelectorAll(priceSelectors).length, {}, priceSelectors
)
const puppeteer = require('puppeteer');
(async () => {
let browser = await puppeteer.launch({ headless: false, });
let page = await browser.newPage();
await page.goto('https://www.amazon.com/Insect-Lore-Butterfly-Growing-Kit/dp/B00000ISC5?ref_=Oct_DLandingS_D_a46a25b3_60&smid=ATVPDKIKX0DER');
const priceSelectors = [
'#priceblock_ourprice',
'#priceblock_dealprice' /* more here if you find more selectors */
];
await page.waitForFunction((priceSelectors) =>
document.querySelectorAll(priceSelectors).length,
{},
priceSelectors // pass priceSelectors to wairForFunction
)
const pricer = await page.evaluate((priceSelectors) => {
const priceRegex = /^\D\d+(\.\d+)?$/;
const asSingleSelector = priceSelectors.join(',');
const priceElements = document.querySelectorAll(asSingleSelector);
let price;
priceElements.forEach((item) => {
if (item && // item is not null
item.innerHTML && // innerHTML exists
priceRegex.test(item.innerHTML)) { // make sure string is a price
price = item.innerHTML;
}
});
return price;
}, priceSelectors); // pass priceSelectors to evaluate
console.log(pricer);
await browser.close();
})();
If you don't find the price in a specific page, you probably missed a price selector for that specific scenario
Upvotes: 2