Reputation: 2586
I want the value from pricecell/WebRupee class from this HTML document.
The document snippet looks like this.
<tr prodid="143012" class="tablerow style2">
<td class="pricecell"><span class="WebRupee">Rs.</span> 29 <br><font style="font-size:smaller;font-weight:normal"> 3 days </font></td>
<td class="spacer"></td>
<td class="detailcell"><span><span class="label label-default" style="background-color:#3cb521;color:#fff;border:1px solid #3cb521">FULL TT</span> </span><span><span class="label label-default" style="background-color:#fff;color:#0c7abc;border:1px solid #0c7abc">SMS</span> </span>
<div style="padding-top:5px">
29 Full Talktime
</div>
<div class="detailtext">
5 Local A2A SMS valid for 1 day
</div></td>
</tr>
<tr prodid="127535" class="tablerow style2">
<td class="pricecell"><span class="WebRupee">Rs.</span> 59 <br><font style="font-size:smaller;font-weight:normal"> 7 days </font></td>
<td class="spacer"></td>
<td class="detailcell"><span><span class="label label-default" style="background-color:#3cb521;color:#fff;border:1px solid #3cb521">FULL TT</span> </span><span><span class="label label-default" style="background-color:#fff;color:#0c7abc;border:1px solid #0c7abc">SMS</span> </span>
<div style="padding-top:5px">
59 Full Talktime
</div>
<div class="detailtext">
10 A2A SMS valid for 2 days
</div></td>
</tr>
<tr prodid="143025" class="tablerow style2">
<td class="pricecell"><span class="WebRupee">Rs.</span> 99 <br><font style="font-size:smaller;font-weight:normal"> 12 days </font></td>
<td class="spacer"></td>
<td class="detailcell"><span><span class="label label-default" style="background-color:#3cb521;color:#fff;border:1px solid #3cb521">FULL TT</span> </span>
<div style="padding-top:5px">
99 Full Talktime
</div>
<div class="detailtext">
10 Local A2A SMS for 2 days only
</div></td>
</tr>
I specifically want the value's 29, 59, 99 which are enclosed in the pricecell->webrupee class, I need it to be parsed by jsoup.
The code I have tried :-
class kp extends AsyncTask<Void,Void,Void> {
ArrayList<HashMap<String, String>> arraylist2 = new ArrayList<>();
@Override
protected void onPreExecute() {
super.onPreExecute();
}
@Override
protected Void doInBackground(Void... voids) {
try {
Document doc = Jsoup.connect("http://www.ireff.in/plans/" + operator+"/" + state).userAgent("Mozilla/5.0 " +
"(Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36").get();
int count = 0, j = 0, i = 0;
String TopupTable="";
for (Element table : doc.select("div[id=Topup]")) {
for (Element row : table.select("tr")) {
count++;
TopupTable=TopupTable+row.toString();//has all the values of topup category
System.out.print(TopupTable+"TopupTable row string here");
}
}
....
....
....
Elements r2;
String temp;
Document doc2 = Jsoup.parse(TopupTable, "",Parser.xmlParser());//doc2 has the TopupTable string converted to a "Document" type variable
for (Element table : doc.select("div[id=Topup]")) {
for (Element row : table.select("tr")) {
i++;
j++;
k++;
try {
Elements tds = row.select("td:not([rowspan])");
if(tds.contains("tr[id=download]"))
continue;
Elements tds2 = doc2.getElementsByClass("td[class=pricecell]");
temp=doc2.getElementsByClass("span[class=WebRupee]").toString();//trying to get those numeric values and store it in temp variable
System.out.print(temp+"temp var");
I am getting blank value for the temp variable, kindly tell me where am I going wrong.
Thankyou for your time :-) If this you more details regarding this question, kindly comment below.
Upvotes: 0
Views: 819
Reputation: 4037
Just use selectors and org.jsoup.nodes.Element.ownText() to extract the cell text without the children text.
Gets the text owned by this element only; does not get the combined text of all children.
Document doc = Jsoup
.connect(url)
.userAgent(userAgent)
.get();
Elements cells = doc.select("td.pricecell");
ListIterator<Element> itr = cells.listIterator();
while (itr.hasNext()) {
Element cell = itr.next();
System.out.println(cell.ownText());
}
Output
29
59
99
Upvotes: 1
Reputation: 4509
I have tried like this working for me :
public class Test {
public static void main(String[] args) {
String parseText = "<table><tr prodid=\"143012\" class=\"tablerow style2\">\n" +
" <td class=\"pricecell\"><span class=\"WebRupee\">Rs.</span> 29 <br><font style=\"font-size:smaller;font-weight:normal\"> 3 days </font></td>\n" +
" <td class=\"spacer\"></td>\n" +
" <td class=\"detailcell\"><span><span class=\"label label-default\" style=\"background-color:#3cb521;color:#fff;border:1px solid #3cb521\">FULL TT</span> </span><span><span class=\"label label-default\" style=\"background-color:#fff;color:#0c7abc;border:1px solid #0c7abc\">SMS</span> </span>\n" +
" <div style=\"padding-top:5px\">\n" +
" 29 Full Talktime \n" +
" </div>\n" +
" <div class=\"detailtext\">\n" +
" 5 Local A2A SMS valid for 1 day \n" +
" </div></td>\n" +
" </tr>\n" +
" <tr prodid=\"127535\" class=\"tablerow style2\">\n" +
" <td class=\"pricecell\"><span class=\"WebRupee\">Rs.</span> 59 <br><font style=\"font-size:smaller;font-weight:normal\"> 7 days </font></td>\n" +
" <td class=\"spacer\"></td>\n" +
" <td class=\"detailcell\"><span><span class=\"label label-default\" style=\"background-color:#3cb521;color:#fff;border:1px solid #3cb521\">FULL TT</span> </span><span><span class=\"label label-default\" style=\"background-color:#fff;color:#0c7abc;border:1px solid #0c7abc\">SMS</span> </span>\n" +
" <div style=\"padding-top:5px\">\n" +
" 59 Full Talktime \n" +
" </div>\n" +
" <div class=\"detailtext\">\n" +
" 10 A2A SMS valid for 2 days \n" +
" </div></td>\n" +
" </tr>\n" +
" <tr prodid=\"143025\" class=\"tablerow style2\">\n" +
" <td class=\"pricecell\"><span class=\"WebRupee\">Rs.</span> 99 <br><font style=\"font-size:smaller;font-weight:normal\"> 12 days </font></td>\n" +
" <td class=\"spacer\"></td>\n" +
" <td class=\"detailcell\"><span><span class=\"label label-default\" style=\"background-color:#3cb521;color:#fff;border:1px solid #3cb521\">FULL TT</span> </span>\n" +
" <div style=\"padding-top:5px\">\n" +
" 99 Full Talktime \n" +
" </div>\n" +
" <div class=\"detailtext\">\n" +
" 10 Local A2A SMS for 2 days only \n" +
" </div></td>\n" +
" </tr></table>";
Document doc = Jsoup.parse(parseText);
doc.select("font").remove();
doc.select("span").remove();
for (Element row : doc.select("tr")) {
Elements tds = row.select("td.pricecell");
Whitelist wl = Whitelist.basic();
String value = Jsoup.clean(tds.get(0).text(), wl);
System.out.println(value);
}
}
}
Output:
29
59
99
Upvotes: 1
Reputation: 8738
You can use Node.childNodes
to retrieve the List
of Node
objects and check the instance of each of them (in your case TextNode
):
Document doc = Jsoup.parse(html);
Elements trs = doc.select("table tr");
for (Element tr : trs) {
Element priceCell = tr.select(".pricecell").first();
for (Node child : priceCell.childNodes()) {
if (child instanceof TextNode) {
System.out.println(((TextNode) child).text().trim());
}
}
}
Upvotes: 0