Reputation: 491
Error
Cannnot read property 'querySelectorAll' of null
I am scraping this site, when I write the below lines in console it gives me the HTML. But when I scrape the HTML from puppeteer it gives me error
document.querySelectorAll('#stroke-play-container > .stroke-play-leaderboard > .the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large')[0].nextSibling;
Code
'use strict';
const puppeteer = require('puppeteer');
function run() {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless : false
});
const page = await browser.newPage();
await page.goto("https://www.pgatour.com/leaderboard.html");
await page.evaluate(`window.scrollTo(0, document.body.scrollHeight)`);
await page.waitFor(5000);
let urls = await page.evaluateHandle(() => {
let results = [];
var parser = new DOMParser();
var node = document.querySelectorAll('#stroke-play-container > .stroke-play-leaderboard > .the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large')[0].nextSibling;
if(node){
var $ = parser.parseFromString(node, 'text/html');
return {
name: $.querySelectorAll('table > tbody:nth-child(1) > tr > td.player-name > div > div.player-name-col').innerText
};
}
else{
return 'error';
}
})
browser.close();
return resolve(urls);
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
Upvotes: 2
Views: 1482
Reputation: 54984
Try it like this:
let names = await page.evaluate(() => {
let css = '.the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large + div div.player-name-col'
let divs = [...document.querySelectorAll(css)]
return divs.map(div => div.innerText)
})
I'm not sure what you were trying to accomplish with DOMParser, you shouldn't ever need to use that.
Upvotes: 1
Reputation: 71
EDIT: as pointed out in the comments, please be mindful of the Terms of Service of pgatours.com, which do not allow for scraping, crawling etc. The below solution is only intended to illustrate how to solve the generic technical point behind your question.
I think this might be due to the default viewport size Puppeteer is using. The website is hiding the content you are looking for on smaller resolutions, hence the problem.
What made this work for me was specifying the viewport size explicitly, like so:
page.setViewport({ width: 1200, height: 1000 })
So your code would become:
'use strict';
const puppeteer = require('puppeteer');
function run() {
return new Promise(async (resolve, reject) => {
try {
const browser = await puppeteer.launch({
headless : false
});
const page = await browser.newPage();
page.setViewport({ width: 1200, height: 1000 })
await page.goto("https://www.pgatour.com/leaderboard.html");
await page.evaluate(`window.scrollTo(0, document.body.scrollHeight)`);
await page.waitFor(5000);
let urls = await page.evaluateHandle(() => {
let results = [];
var parser = new DOMParser();
var node = document.querySelectorAll('#stroke-play-container > .stroke-play-leaderboard > .the-leaderboard.with-rolex > table.leaderboard.leaderboard-table.large')[0].nextSibling;
if(node){
var $ = parser.parseFromString(node, 'text/html');
return {
name: $.querySelectorAll('table > tbody:nth-child(1) > tr > td.player-name > div > div.player-name-col').innerText
};
}
else{
return 'error';
}
})
browser.close();
return resolve(urls);
} catch (e) {
return reject(e);
}
})
}
run().then(console.log).catch(console.error);
Upvotes: 0