Reputation: 23
<tr class="s">
<td style="text-align: center; width: 100px"></td>
<td colspan="3">
<a target="_blank" href="" title="Localiser avec Google Maps">
<img src="images/gm.gif?v=7" alt="Google Maps" class="gm noauto" align="right">
</a>
10, rue Edmond Rostand - 13006 MARSEILLE
<br>
<strong>Tél :</strong> 04.33.54.03.09<br>
<strong>Fax :</strong> 04.11.54.29.85<br>
<strong>Email :</strong> <a href="mailto:[email protected]" class="icone email">[email protected]</a><br>
<strong>Début d'activité :</strong> 10/06/2013<br>
</td>
</tr>
I am trying to extract email, phone, name and date from the code above. I have managed to get name and email address, though I am stuck.
I also want to get phone, date, and fax which seems to be outside elements.
for (Element headline : newsHeadlines)
{
Elements trs = headline.select("tr.t");
Elements phnDiv = headline.select("tr.s td:eq(1)");
for (int l = 0; l < trs.size(); l++)
{
j++;
Elements name = trs.get(l).select("th");
Elements emailAddress = phnDiv.get(l).select("a");
Elements phone = phnDiv.get(l).select("strong:contains(Tél :)");
Elements faxx = phnDiv.get(l).select("strong:contains(Fax :)");
//Elements debutdactivite = phnDiv.get(l).select("strong:contains(Début d'activité :)");
String contactName = name.text();
String email = emailAddress.text();
String tel = phone.text().replace("Tél :", "");
String fax = faxx.text().replace("Fax :", "");
//String date = debutdactivite.text();
System.out.println(j + " Name : " + contactName + " Email : " + email + " Phone : " + tel + " Fax : " + fax );
}
}
Upvotes: 0
Views: 185
Reputation: 23
for (Element headline : newsHeadlines) {
Elements trs = headline.select("tr.t");
Elements phnDiv = headline.select("tr.s td:eq(1)");
for (int l = 0; l < trs.size(); l++) {
j++;
Elements name = trs.get(l).select("th");
Elements emailAddress = phnDiv.get(l).select("a");
Elements phone = phnDiv.get(l).select("strong:contains(Tél :)");
Elements faxx = phnDiv.get(l).select("strong:contains(Fax :)");
// Elements debutdactivite = phnDiv.get(l).select("strong:contains(Début d'activité :)");
for (Element nnjnj : phone) {
String ph = nnjnj.nextSibling().toString();
for (Element fxx : faxx) {
String fx = fxx.nextSibling().toString();
String contactName = name.text();
String email = emailAddress.text();
// String date = debutdactivite.text();
System.out.println(j + " Name : " + contactName + " Email : " + email + " Phone : " + ph + " Fax : " + fx);
}
}
}
}
Now after i run this i get all the results thanks @Justin for bringing light on this.
Upvotes: 0
Reputation: 2434
You should be able to get those values by using the nextSibling()
method. It returns a Node
so you don't have to worry about the values being inside an element.
Once you grab the elements you want, the code would look like this:
String tel = phone.nextSibling().toString();
String fax = faxx.nextSibling().toString();
This will grab the Node directly next to the phone and faxx elements that have been selected.
Upvotes: 2
Reputation: 1503
I don't think you can use jsoup
to directy get the parts you want, since they're not Elements ...
What you could do is get the html()
of the <th>
-Element and then use a regular expression to get phone, fax and date with a regex like this
"<.*Fax.*> (.*?)<br>"
Should work like this
@Test
public void test() {
final Pattern p = Pattern.compile("<.*?Fax.*?> (.*?)<br>");
final Matcher m = p.matcher("<strong>Tel :</strong> 04.11.55.29.85<br> <strong>Fax :</strong> 04.11.55.29.85<br>");
m.matches();
assertEquals("04.11.54.29.85", m.group(1));
}
Upvotes: 0