Reputation: 1078
I want to check all broken links present in a webpage using casperjs. I write following code but it not working:
casper.then(function() {
var urls=casper.getElementsAttribute('a[href]', 'href');
casper.eachThen(urls, function(response) {
var link=response.data;
this.thenOpen(demourl, function(response) {
if (response == undefined || response.status >= 400) {
this.echo("failed");
}
})
this.on('http.status.404', function(resource) {
this.echo('wait, this url is 404: ' + resource.url);
})
})
})
My Webpage consists of more than 400 links. My code doesn't complete its execution and remains idle after some links. It is not giving me any response. I am not getting why this happening?
Upvotes: 0
Views: 1093
Reputation: 61922
There is a difference between attributes and properties of DOM elements. If you have a website which is on the domain http://example.com and you want to get the href
of the following link on that page
<a href="/path/to/stuff">text</a>
If you use aElement.getAttribute("href")
you will get "/path/to/stuff"
, but if you use aElement.href
, you will get the computed URL "http://example.com/path/to/stuff"
. Only the latter is a URL that CasperJS (actually PhantomJS) understands.
I'm telling you this, because casper.getElementsAttribute()
internally uses the element.getAttribute()
approach which produces URLs that cannot be opened with casper.thenOpen()
.
The fix is easy:
var urls = casper.evaluate(function(){
return [].map.call(document.querySelectorAll('a[href]'), function(a){
return a.href;
});
});
Also, you might want to move the casper.on()
event registration above the casper.eachThen()
call. You don't need to register the event in every iteration.
Since you have problems with some URLs not loading (probably because they are broken), you can use casper.options.stepTimeout
to set a timeout for steps, so that CasperJS won't freeze on some unretrievable URL. You also need to define the onStepTimeout()
callback, otherwise CasperJS will exit.
casper.then(function() {
var currentURL;
casper.options.stepTimeout = 10000; // 10 seconds
casper.options.onStepTimeout = function(timeout, stepNum){
this.echo('wait, this url timed out: ' + currentURL);
};
var urls = this.evaluate(function(){
return [].map.call(document.querySelectorAll('a[href]'), function(a){
return a.href;
});
});
this.on('http.status.404', function(resource) {
this.echo('wait, this url is 404: ' + resource.url);
});
urls.forEach(function(link) {
this.then(function(){
currentURL = link;
});
this.thenOpen(link, function(response) {
if (response == undefined || response.status >= 400) {
this.echo("failed: " + link);
}
});
});
});
Upvotes: 1