Reputation: 307
I try to attempt scan all links in a web page according to the selector level.I've given
Here's my statement, I have read the selectors a certain way fixed, I want to read it more in the loop, recursive or anything to help me become more flexible command for level selector my future may be greater than 2
public static void main(String[] args) {
String website = website("http://www.java2s.com/");
System.out.println(website);
}
private static String website(String url) {
String lstLink = "";
try {
String level[] = {"div.col-md-9 li a", "div#sidebar ul li a"};
//Level 1
Document connect = Jsoup.connect(url).get();
Elements selectLevel1 = connect.select(level[0]);
for (Element level1 : selectLevel1) {
lstLink += level1.attr("href") + "\n";
//Level2
Document connect2 = Jsoup.connect(level1.attr("href")).get();
Elements selectLevel2 = connect2.select(level[1]);
for (Element level2 : selectLevel2) {
lstLink += level2.attr("href") + "\n";
}
}
} catch (IOException ex) {
Logger.getLogger(AWebsite.class.getName()).log(Level.SEVERE, null, ex);
}
return lstLink;
}
Upvotes: 3
Views: 161
Reputation: 1098
Please check it.
static String levels[] = {"div.col-md-9 li a", "div#sidebar ul li a"};
private static String getRecursive(String href, int level) {
String links = "";
if (level > levels.length-1) {
return "";
}
Document doc;
try {
doc = Jsoup.connect(href).get();
Elements elements = doc.select(levels[level]);
level++;
for (Element element : elements) {
if(!element.attr("href").isEmpty())
{
links += element.attr("abs:href") + "\n";
links += getRecursive(element.attr("abs:href"), level);
}
}
} catch (IOException e1) {
e1.printStackTrace();
}
return links;
}
public static void main(String[] args) {
String website = getRecursive("http://www.java2s.com/", 0);
System.out.println(website);
}
Upvotes: 4