Reputation: 68
I am using puppeteer-clustor and imagemagick (convert) / xwd command to take screenshot of complete desktop.
Would need browser with viewable part of page and browser navigation buttons and URL. I could get screenshot most of the times, however it does fail other times.
Error message is the tab is closed screenshot is done. Please suggest what is that I am doing wrong.
Code runs on linux with a X running on DISPLAY:0.3. I can see
Below is the code which I have I tried blockingWait and also
const {
Cluster
} = require('puppeteer-cluster');
const execSync = require('child_process').execSync;
process.env['DISPLAY'] = ':0.3';
let i = 0;
function wait(time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
function blockingWait(seconds) {
//simple blocking technique (wait...)
var waitTill = new Date(new Date().getTime() + seconds * 1000);
while (waitTill > new Date()) {}
}
function getscreenshot(url, page) {
page.bringToFront(); // Get the tab to focus
wait(200);
i = i + 1; // For now get screenshot as number will add image named based on URL
path = i + '.jpg';
var r = execSync('import -window root ' + path);
console.log('Taken screenshot: ' + path);
console.log(url);
blockingWait(1);
}
(async () => {
// Create a cluster with 6 workers or 6 tabs which loads all the url
const cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_PAGE,
maxConcurrency: 6,
timeout: 120000,
puppeteerOptions: {
executablePath: 'google-chrome-stable',
args: [
'--ignore-certificate-errors',
'--no-sandbox',
'--incognito',
'--disable-infobars',
'--disable-setuid-sandbox',
'--window-size=1600,1200',
'--start-maximized',
'--disable-gpu'
],
headless: false, //headless:false so we can watch the browser as it works
},
});
console.log('cluster launched');
// We don't define a task and instead use own functions
const screenshot = async ({
page,
data: url
}) => {
console.log('screenshot entered ');
await page.setExtraHTTPHeaders({
'CUSTOMER-ID': "66840"
}, ); // use same customer id as header
await page.setViewport({
width: 1600,
height: 1200
});
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3419.0 Safari/537.36');
await page.goto(url, {
waitUntil: 'domcontentloaded'
}, {
waitUntil: 'networkidle0'
}, {
waitUntil: 'load'
});
// Since we wait the page to fully load
await page.waitForResponse(response => response.ok()) // ok page is ready .. will deal here for other HTTP error beside 200, 404,500 etc
await page.waitForNavigation({
waitUntil: 'domcontentloaded'
}, {
waitUntil: 'networkidle0'
}, ); // Wait for page to load before screenshot
await page.bringToFront(); // Get the tab to focus
wait(100); // Blocking wait
console.log('Waiting 5 sec');
blockingWait(5); // different kind of wait
getscreenshot(url, page);
console.log('screenshot exited');
};
const extractTitle = async ({
page,
data: url
}) => {
console.log('scrapelinks entered');
await page.setExtraHTTPHeaders({
'CUSTOMER-ID': "66840"
}, );
await page.setViewport({
width: 1600,
height: 1200
});
await page.goto(url);
const pageTitle = await page.evaluate(() => document.title); // will later used to confirm the page matches with client details.
// get all Links on the page
const hrefs = await page.$$eval('a', hrefs => hrefs.map((a) => {
return {
href: a.href,
text: a.textContent,
};
}));
// get 1st links matching text or link value having bioanalyzer-systems/instrument-2100.xhtml
for (let postUrl of hrefs) {
if (postUrl.text.indexOf("Client-s") > -1) {
cluster.execute(postUrl.href, screenshot); // add this link also to queue
} else if (postUrl.href.indexOf("bioanalyzer-systems/instrument-2100.xhtml") > -1) {
cluster.execute(postUrl.href, screenshot); // add this url to queue
break;
}
}
console.log('scrapelinks exited');
};
// Make screenshots
cluster.execute('http://www.internal-site.int/en/product/66840?product=NEW&CodeList=bio&Id=66840', screenshot);
cluster.execute('http://www.internal-site.int/en/product/66840?product=USED&CodeList=nonbio&Id=66840', screenshot);
// But also do some other stuff
cluster.execute('http://www.internal-site.int/en/product/66840?product=NEW&CodeList=bio&Id=66840', extractTitle);
cluster.execute('http://www.internal-site.int/en/product/66840?product=USED&CodeList=nonbio&Id=66840', extractTitle);
await cluster.idle();
await cluster.close();
})();```
I expect output to take screenshot once the page or tab load is completed.
Upvotes: 0
Views: 1771
Reputation: 25280
The page is being closed as soon as the function is finished executing (or the Promise is resolved). You are not using await
to wait for the asynchronous action to finish.
For example, in your screenshot
function, there is the following code:
wait(100);
console.log('Waiting 5 sec');
blockingWait(5);
getscreenshot(url, page);
console.log('screenshot exited');
The first line calls the wait
function (which is async
), but as you are not await
ing it, the function will be executed in the background and Node.js will continue to execute your script.
The blockingWait
is not the JavaScript-like way to write code. This completely blocks the execution.
The getscreenshot
function should again be async
so that you can await
it. Also, some of the puppeteer function calls should have await
in front of them (e.g. page.bringToFront
) to wait until they are finished.
In general, you should check out the concept of async
/await
and Promises to understand where and why you should be using these keywords.
Upvotes: 1