LearningCode
LearningCode

Reputation: 59

How to find broken links using Selenium WebDriver with Java

I want to verify broken links on a website and I am using this code:

 public static int invalidLink;
    String currentLink;
    String temp;

    public static void main(String[] args) throws IOException {
        // Launch The Browser
        WebDriver driver = new FirefoxDriver();
        // Enter URL
        driver.get("http://www.applicoinc.com");

        // Get all the links URL
        List<WebElement> ele = driver.findElements(By.tagName("a"));
        System.out.println("size:" + ele.size());
        boolean isValid = false;
        for (int i = 0; i < ele.size(); i++) {

            isValid = getResponseCode(ele.get(i).getAttribute("href"));
            if (isValid) {
                System.out.println("ValidLinks:" + ele.get(i).getAttribute("href"));
                driver.get(ele.get(i).getAttribute("href"));
                List<WebElement> ele1 = driver.findElements(By.tagName("a"));
                System.out.println("InsideSize:" + ele1.size());
                for (int j=0; j<ele1.size(); j++){
                    isValid = getResponseCode(ele.get(j).getAttribute("href"));
                    if (isValid) {
                        System.out.println("ValidLinks:" + ele.get(j).getAttribute("href"));
                    }
                    else{
                        System.out.println("InvalidLinks:"+ ele.get(j).getAttribute("href"));
                    }
                }

                } else {
                    System.out.println("InvalidLinks:"
                            + ele.get(i).getAttribute("href"));
                }

            }
        }
    }


    public static boolean getResponseCode(String urlString) {
        boolean isValid = false;
        try {
            URL u = new URL(urlString);
            HttpURLConnection h = (HttpURLConnection) u.openConnection();
            h.setRequestMethod("GET");
            h.connect();
            System.out.println(h.getResponseCode());
            if (h.getResponseCode() != 404) {
                isValid = true;
            }
        } catch (Exception e) {

        }
        return isValid;
    }

}

Upvotes: 4

Views: 17445

Answers (6)

Pankaj Vadade
Pankaj Vadade

Reputation: 21

  //allHref -for count of actual active links==after if statement filter
List<WebElement> allHref = new ArrayList<WebElement>();
    List<WebElement> linklist = driver.findElements(By.tagName("a"));

    for (int i = 0; i < linklist.size(); i++) {
        if (linklist.get(i).getAttribute("href").contains("https:")
                && linklist.get(i).getAttribute("href") != null) {
            System.out.println(linklist.get(i).getAttribute("href"));
            
            HttpURLConnection connection = (HttpURLConnection) new URL(linklist.get(i).getAttribute("href"))
                    .openConnection();
            connection.connect();
            String response = connection.getResponseMessage();
            connection.disconnect();
            System.out.println(linklist.get(i).getAttribute("href") + "R=e=s=p=o=n=s=e=>" + response);
            allHref.add(linklist.get(i));
        }
        
    }

Upvotes: -1

Ripon Al Wasim
Ripon Al Wasim

Reputation: 37806

Steps:
1. Open a browser and navigate to TestURL
2. Grab all the links from the entire page
3. Check HTTP status code for all the links grabbed in step 2 (status 200 is OK, others are broken)
Selenium WebDriver Java code:

WebDriver driver = new FirefoxDriver();
driver.get("<TestURL>");
List<WebElement> total_links = driver.findElements(By.tagName("a"));
System.out.println("Total Number of links: " + total_links.size());
for(int i = 0; i < total_links.size(); i++){
String url = total_links.get(i).getAttribute("href");
int resp_Code = 0;
try{
HttpResponse urlresp = new DefaultHttpClient().execute(new HttpGet(url));
resp_Code = urlresp.getStatusLine().getStatusCode();
}catch(Exception e){
}
if(resp_Code >= 400){
System.out.println(url + " is a broken link");
}
else{
System.out.println(url + " is a valid link");
}
}

Upvotes: 0

Java By Kiran
Java By Kiran

Reputation: 93

In web application we have to verify all the links whether they are broken means the after clicking on link ‘page not found’ page displays. Below is the code:

import java.net.HttpURLConnection;
import java.net.URL;
import java.util.List;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver; 
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;

public class VerifyLinks {
    public static void main(String[] args) {
        WebDriver driver = new FirefoxDriver(); 
        driver.manage().window().maximize(); 
        driver.get("https://www.google.co.in");
        List< WebElement > allLink = driver.findElements(By.tagName("a")); 
        System.out.println("Total links are " + allLink.size());
        for (int i = 0; i < allLink.size(); i++) {
        WebElement ele = allLink.get(i); 
        String url = ele.getAttribute("href"); 
        verifyLinkActive(url);
    }
}
    public static void verifyLinkActive(String linkurl) {
        try {
           URL url = new URL(linkurl);
           HttpURLConnection httpUrlConnect = (HttpURLConnection) url.openConnection(); 
           httpUrlConnect.setConnectTimeout(3000); 
           httpUrlConnect.connect();
           if (httpUrlConnect.getResponseCode() == 200) {
              System.out.println(linkurl + " - " + httpUrlConnect.getResponseMessage());
           }
              if (httpUrlConnect.getResponseCode() == HttpURLConnection.HTTP_NOT_FOUND) {
                  System.out.println(linkurl + " - " + httpUrlConnect.getResponseMessage() 
                                     + " - " + HttpURLConnection.HTTP_NOT_FOUND);
              }
       }
       catch (Exception e) {
       }
   }
}

For more tutorial visit https://www.jbktutorials.com/selenium

Upvotes: 0

Mayur Shah
Mayur Shah

Reputation: 528

You can try below code.

public static void main(String[] args) {

WebDriver driver = new FirefoxDriver();

List<String> brokenLinks = getBrokenURLs(driver, "http://mayurshah.in", 2, new ArrayList<String>());
for(String brokenLink : brokenLinks){
System.out.println(brokenLink);
}


}
public static List<String> getBrokenURLs(WebDriver driver, String appURL, int depth, List<String> links){
{
driver.navigate().to(appURL);
System.out.println("Depth is: " + depth);
while(depth > 0){
List<WebElement> linkElems = driver.findElements(By.tagName("a"));
for(WebElement linkElement : linkElems)
if(!links.contains(linkElement))
links.add(linkElement.getAttribute("href"));
for(String link : links)
getBrokenURLs(driver, link, --depth, links);
}
}
return getBrokenURLs(driver, links, new ArrayList<String>()) ;
}

public static List<String> getBrokenURLs(WebDriver driver, List<String> links, List<String> brokenLinks){
{
for(String link : brokenLinks){
driver.navigate().to(link);
if(driver.getTitle().contains("404 Page Not Found")){
brokenLinks.add(link);
}
}
}
return brokenLinks ;
}

In above code, I am first getting list of URLs from the first page. Now I am navigating to the first link of the IInd page and getting all URLs, this way I will keep on storing all URL by going to each page one by one, till depth is mentioned.

After collecting all URLs, I will verify validity of each URL one by one and return List of URLs with 404 page.

Hope that helps!

src: https://softwaretestingboard.com/qna/1380/how-to-find-broken-links-images-from-page-using-webdriver#axzz4wM3UEZtq

Upvotes: 0

StrikerVillain
StrikerVillain

Reputation: 3776

It seems, that some of your href attribute contains expressions which are not identified as url's. What comes immediately to mind is to use the try catch block to identify such url's. Try the following piece of code.

package com.automation.test;

import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.List;

import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;

public class Test {
    public static int invalidLink;
    String currentLink;
    String temp;

    public static void main(String[] args) throws IOException {
        // Launch The Browser
        WebDriver driver = new FirefoxDriver();
        // Enter Url
        driver.get("file:///home/sighil/Desktop/file");

        // Get all the links url
        List<WebElement> ele = driver.findElements(By.tagName("a"));
        System.out.println("size:" + ele.size());
        boolean isValid = false;
        for (int i = 0; i < ele.size(); i++) {
            // System.out.println(ele.get(i).getAttribute("href"));
            isValid = getResponseCode(ele.get(i).getAttribute("href"));
            if (isValid) {
                System.out.println("ValidLinks:"
                        + ele.get(i).getAttribute("href"));
            } else {
                System.out.println("InvalidLinks:"
                        + ele.get(i).getAttribute("href"));
            }
        }

    }

    public static boolean getResponseCode(String urlString) {
        boolean isValid = false;
        try {
            URL u = new URL(urlString);
            HttpURLConnection h = (HttpURLConnection) u.openConnection();
            h.setRequestMethod("GET");
            h.connect();
            System.out.println(h.getResponseCode());
            if (h.getResponseCode() != 404) {
                isValid = true;
            }
        } catch (Exception e) {

        }
        return isValid;
    }

}

I have modified getResponseCode to return boolean values based on whether the url is valid(true) or invalid(false).

Hope this helps you.

Upvotes: 0

Steve Weaver Crawford
Steve Weaver Crawford

Reputation: 1059

I would keep it returning an int, and just have the MalformedURLException be a special case, returning -1.

public static int getResponseCode(String urlString) {
    try {
        URL u = new URL(urlString);
        HttpURLConnection h =  (HttpURLConnection)  u.openConnection();
        h.setRequestMethod("GET");
        h.connect();
        return h.getResponseCode();

    } catch (MalformedURLException e) {
        return -1;
    }
}

EDIT: It seems you're sticking with the boolean approach, as I said before this has it's limitations but should work ok for demonstartion purposes.

There is no reason to find all elements a second time taking the approach you have. Try this:

// Get all the links
List<WebElement> ele = driver.findElements(By.tagName("a"));
System.out.println("size:" + ele.size());
boolean isValid = false;
for (int i = 0; i < ele.size(); i++) {
    string nextHref = ele.get(i).getAttribute("href");
    isValid = getResponseCode(nextHref);
    if (isValid) {
        System.out.println("Valid Link:" + nextHref);

    }
    else {
        System.out.println("INVALID Link:" + nextHref);

    }
}

This is untested code, so if it does not work, please provide more detail than just saying 'it doesn't work', provide output & any stack traces/error messages if possible. Cheers

Upvotes: 2

Related Questions