Reputation: 531
so am working with puppeteer to automate stuffs and it working fine, but when i load the website it take a bit more time to load than my normal website, i tried doing cache using this
const puppeteer = require('puppeteer');
let time = new Date()
async function test() {
const browser = await puppeteer.launch({
headless: true,
executablePath:"D:\\Desktop\\node_modules\\puppeteer\\.local-chromium\\win64-848005\\chrome-win\\chrome.exe",
args: ['--no-sandbox'],
});
const page = await browser.newPage();
const response = await page.goto('https://example.com/');
console.log(`${new Date() -time }`)
console.log(response);
await browser.close();
}
and it worked for the example.com the cache was stored and it became faster to load but my targeted website seem to dont allow for cache storing
any another way to fasten the process ?
Upvotes: 4
Views: 6744
Reputation: 9123
If you just want the site to load faster when scraping and you do not rely on some of the images or javascript, you have the possibility to block these resources.
Blocking by Resource Type
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setRequestInterception(true);
page.on('request', (req) => {
if (req.resourceType() === 'image') {
req.abort();
} else {
req.continue();
}
});
await page.goto('https://bbc.com');
await page.screenshot({path: 'no-images.png', fullPage: true});
await browser.close();
})();
Blocking by Domain
const puppeteer = require('puppeteer');
(async () => {
const browser = await puppeteer.launch({
headless: true,
});
const page = await browser.newPage();
const options = {
waitUntil: 'networkidle2',
timeout: 30000,
};
// Before: Normal navigtation
await page.goto('https://theverge.com', options);
await page.screenshot({path: 'before.png', fullPage: true});
const metrics = await page.metrics();
console.info(metrics);
// After: Navigation with some domains blocked
// Array of third-party domains to block
const blockedDomains = [
'https://pagead2.googlesyndication.com',
'https://creativecdn.com',
'https://www.googletagmanager.com',
'https://cdn.krxd.net',
'https://adservice.google.com',
'https://cdn.concert.io',
'https://z.moatads.com',
'https://cdn.permutive.com'];
await page.setRequestInterception(true);
page.on('request', (request) => {
const url = request.url();
if (blockedDomains.some((d) => url.startsWith(d))) {
request.abort();
} else {
request.continue();
}
});
await page.goto('https://theverge.com', options);
await page.screenshot({path: 'after.png', fullPage: true});
const metricsAfter = await page.metrics();
console.info(metricsAfter);
await browser.close();
})();
Source: https://github.com/addyosmani/puppeteer-webperf
Upvotes: 6