penta
penta

Reputation: 2586

How to get this class's value in jsoup?

I want the value from pricecell/WebRupee class from this HTML document.

The document snippet looks like this.

<tr prodid="143012" class="tablerow style2">
            <td class="pricecell"><span class="WebRupee">Rs.</span> 29 <br><font style="font-size:smaller;font-weight:normal"> 3 days </font></td>
            <td class="spacer"></td>
            <td class="detailcell"><span><span class="label label-default" style="background-color:#3cb521;color:#fff;border:1px solid #3cb521">FULL TT</span>&nbsp; </span><span><span class="label label-default" style="background-color:#fff;color:#0c7abc;border:1px solid #0c7abc">SMS</span>&nbsp; </span>
             <div style="padding-top:5px">
               29 Full Talktime 
             </div>
             <div class="detailtext">
               5 Local A2A SMS valid for 1 day 
             </div></td>
           </tr>
           <tr prodid="127535" class="tablerow style2">
            <td class="pricecell"><span class="WebRupee">Rs.</span> 59 <br><font style="font-size:smaller;font-weight:normal"> 7 days </font></td>
            <td class="spacer"></td>
            <td class="detailcell"><span><span class="label label-default" style="background-color:#3cb521;color:#fff;border:1px solid #3cb521">FULL TT</span>&nbsp; </span><span><span class="label label-default" style="background-color:#fff;color:#0c7abc;border:1px solid #0c7abc">SMS</span>&nbsp; </span>
             <div style="padding-top:5px">
               59 Full Talktime 
             </div>
             <div class="detailtext">
               10 A2A SMS valid for 2 days 
             </div></td>
           </tr>
           <tr prodid="143025" class="tablerow style2">
            <td class="pricecell"><span class="WebRupee">Rs.</span> 99 <br><font style="font-size:smaller;font-weight:normal"> 12 days </font></td>
            <td class="spacer"></td>
            <td class="detailcell"><span><span class="label label-default" style="background-color:#3cb521;color:#fff;border:1px solid #3cb521">FULL TT</span>&nbsp; </span>
             <div style="padding-top:5px">
               99 Full Talktime 
             </div>
             <div class="detailtext">
               10 Local A2A SMS for 2 days only 
             </div></td>
           </tr>

I specifically want the value's 29, 59, 99 which are enclosed in the pricecell->webrupee class, I need it to be parsed by jsoup.

The code I have tried :-

 class kp extends AsyncTask<Void,Void,Void> {
            ArrayList<HashMap<String, String>> arraylist2 = new ArrayList<>();
            @Override
            protected void onPreExecute() {
                super.onPreExecute();

            }
            @Override
            protected Void doInBackground(Void... voids) {
                try {
                    Document doc = Jsoup.connect("http://www.ireff.in/plans/" + operator+"/" + state).userAgent("Mozilla/5.0 " +
                            "(Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36").get();
                    int count = 0, j = 0, i = 0;    
                    String TopupTable="";

                    for (Element table : doc.select("div[id=Topup]")) {
                        for (Element row : table.select("tr")) {
                            count++;

                            TopupTable=TopupTable+row.toString();//has all the values of topup category

                            System.out.print(TopupTable+"TopupTable row string here");
                        }

                    }

    ....
    ....
    ....
                    Elements r2;
                    String temp;
                    Document doc2 = Jsoup.parse(TopupTable, "",Parser.xmlParser());//doc2 has the TopupTable string converted to a "Document" type variable
                    for (Element table : doc.select("div[id=Topup]")) {
                        for (Element row : table.select("tr")) {
                            i++;
                            j++;
                            k++;


                            try {
                                Elements tds = row.select("td:not([rowspan])");
                                if(tds.contains("tr[id=download]"))
                                    continue;

     Elements tds2 = doc2.getElementsByClass("td[class=pricecell]");
temp=doc2.getElementsByClass("span[class=WebRupee]").toString();//trying to get those numeric values and store it in temp variable
                                        System.out.print(temp+"temp var");

I am getting blank value for the temp variable, kindly tell me where am I going wrong.

Thankyou for your time :-) If this you more details regarding this question, kindly comment below.

Upvotes: 0

Views: 819

Answers (3)

Zack
Zack

Reputation: 4037

Just use selectors and org.jsoup.nodes.Element.ownText() to extract the cell text without the children text.

Gets the text owned by this element only; does not get the combined text of all children.

     Document doc = Jsoup
            .connect(url)
            .userAgent(userAgent)
            .get();

     Elements cells = doc.select("td.pricecell");

     ListIterator<Element> itr = cells.listIterator();
     while (itr.hasNext()) {
         Element cell = itr.next();
         System.out.println(cell.ownText());
     }

Output

29
59
99

Upvotes: 1

soorapadman
soorapadman

Reputation: 4509

I have tried like this working for me :

public class Test {
    public static void main(String[] args) {
        String parseText = "<table><tr prodid=\"143012\" class=\"tablerow style2\">\n" +
                "            <td class=\"pricecell\"><span class=\"WebRupee\">Rs.</span> 29 <br><font style=\"font-size:smaller;font-weight:normal\"> 3 days </font></td>\n" +
                "            <td class=\"spacer\"></td>\n" +
                "            <td class=\"detailcell\"><span><span class=\"label label-default\" style=\"background-color:#3cb521;color:#fff;border:1px solid #3cb521\">FULL TT</span>&nbsp; </span><span><span class=\"label label-default\" style=\"background-color:#fff;color:#0c7abc;border:1px solid #0c7abc\">SMS</span>&nbsp; </span>\n" +
                "             <div style=\"padding-top:5px\">\n" +
                "               29 Full Talktime \n" +
                "             </div>\n" +
                "             <div class=\"detailtext\">\n" +
                "               5 Local A2A SMS valid for 1 day \n" +
                "             </div></td>\n" +
                "           </tr>\n" +
                "           <tr prodid=\"127535\" class=\"tablerow style2\">\n" +
                "            <td class=\"pricecell\"><span class=\"WebRupee\">Rs.</span> 59 <br><font style=\"font-size:smaller;font-weight:normal\"> 7 days </font></td>\n" +
                "            <td class=\"spacer\"></td>\n" +
                "            <td class=\"detailcell\"><span><span class=\"label label-default\" style=\"background-color:#3cb521;color:#fff;border:1px solid #3cb521\">FULL TT</span>&nbsp; </span><span><span class=\"label label-default\" style=\"background-color:#fff;color:#0c7abc;border:1px solid #0c7abc\">SMS</span>&nbsp; </span>\n" +
                "             <div style=\"padding-top:5px\">\n" +
                "               59 Full Talktime \n" +
                "             </div>\n" +
                "             <div class=\"detailtext\">\n" +
                "               10 A2A SMS valid for 2 days \n" +
                "             </div></td>\n" +
                "           </tr>\n" +
                "           <tr prodid=\"143025\" class=\"tablerow style2\">\n" +
                "            <td class=\"pricecell\"><span class=\"WebRupee\">Rs.</span> 99 <br><font style=\"font-size:smaller;font-weight:normal\"> 12 days </font></td>\n" +
                "            <td class=\"spacer\"></td>\n" +
                "            <td class=\"detailcell\"><span><span class=\"label label-default\" style=\"background-color:#3cb521;color:#fff;border:1px solid #3cb521\">FULL TT</span>&nbsp; </span>\n" +
                "             <div style=\"padding-top:5px\">\n" +
                "               99 Full Talktime \n" +
                "             </div>\n" +
                "             <div class=\"detailtext\">\n" +
                "               10 Local A2A SMS for 2 days only \n" +
                "             </div></td>\n" +
                "           </tr></table>";

              Document doc = Jsoup.parse(parseText);
              doc.select("font").remove();
              doc.select("span").remove();
            for (Element row : doc.select("tr")) {
                Elements tds = row.select("td.pricecell");
                Whitelist wl = Whitelist.basic();
                String value = Jsoup.clean(tds.get(0).text(), wl);
                System.out.println(value);
            }

    }
}

Output:

29
59
99

Upvotes: 1

Davide Pastore
Davide Pastore

Reputation: 8738

You can use Node.childNodes to retrieve the List of Node objects and check the instance of each of them (in your case TextNode):

Document doc = Jsoup.parse(html);
Elements trs = doc.select("table tr");
for (Element tr : trs) {
    Element priceCell = tr.select(".pricecell").first();
    for (Node child : priceCell.childNodes()) {
        if (child instanceof TextNode) {
            System.out.println(((TextNode) child).text().trim());
        }
    }
}

Upvotes: 0

Related Questions