Reputation: 364
i am building up a java application to extract the values inside the table tags using xpath.
Please suggest me an efficient way to get all 200 values from the page. my code works perfectly fine for the 100 rows withing the 1st DataTable. However, i have no way to get to the 2nd dataTable.
i am able to extract them using the following java class.
the expected output
http://a.com/ data for a 526735 Z
http://b.com/ data for b 522273 Z
.
.
.
.
http://c.com/ data for c 578335 Z
http://d.com/ data for d 513445 Z
<table>
<tbody>
<tr>
<td style="padding-right>
<table class = dataTabe>
<tbody>
<tr>
<td><a HREF="http://a.com/" target="_parent">data for a</a></td>
<td class="numericalColumn">526735</td>
<td class="numericalColumn">Z</td></tr>
<tr>
<td><a HREF="http://b.com/" target="_parent">data for b</a></td>
<td class="numericalColumn">522273</td>
<td class="numericalColumn">B</td></tr>
.
.
.100 <tr> here
.
</tbody>
</table>
</td>
<td style="padding-right>
<table class = dataTabe>
<tbody>
<tr>
<td><a HREF="http://c.com/" target="_parent">data for c</a></td>
<td class="numericalColumn">526735</td>
<td class="numericalColumn">Z</td></tr>
<tr>
<td><a HREF="http://d.com/" target="_parent">data for d</a></td>
<td class="numericalColumn">522273</td>
<td class="numericalColumn">B</td></tr>
.
.
.100 rows here
.
</tbody>
</table>
</td>
</tr>
</tbody>
</table>
This is the class used to get the data.
import java.io.BufferedReader;
import java.io.InputStream;
import org.w3c.tidy.*;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.w3c.tidy.Node;
import org.w3c.tidy.Tidy;
import org.w3c.tidy.Tidy;
public class CompaniesGetter {
public static void main(String[] args) throws Exception{
String name,link,scripcode,group,s,key;
int a=1;
int count=1;
URL oracle = new URL("http://money.rediff.com/companies");
URLConnection yc = oracle.openConnection();
InputStream is = yc.getInputStream();
is = oracle.openStream();
Tidy tidy = new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
Document tidyDOM = tidy.parseDOM(is, null);
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xPath = xPathFactory.newXPath();
Map<String,String> mLink=new HashMap<String,String>();
Map<String,String> mCode=new HashMap<String,String>();
Map<String,String> mGroup=new HashMap<String,String>();
ArrayList<String> aName=new ArrayList<String>();
//for(int j=0;j<2;j++)
for(int i =1;i<=200;i++)
{if(i==100)
{
a=2;
s=attrib[1];
}
link = "//table[@class='dataTable']/tbody/tr["+i+"]/td/a/@href";
name = "//table[@class='dataTable']/tbody/tr["+i+"]/td/a";
scripcode = "//table[@class='dataTable']/tbody/tr["+i+"]/td[2]";
group = "//table[@class='dataTable']/tbody/tr["+i+"]/td[3]";
String linkValue = (String)xPath.evaluate(link, tidyDOM, XPathConstants.STRING);
String nameValue = (String)xPath.evaluate(name, tidyDOM, XPathConstants.STRING);
String scripValue = (String)xPath.evaluate(scripcode, tidyDOM, XPathConstants.STRING);
String groupValue = (String)xPath.evaluate(group, tidyDOM, XPathConstants.STRING);
aName.add(nameValue);
mLink.put(nameValue, linkValue);
mCode.put(nameValue, scripValue);
mGroup.put(nameValue,groupValue);
}
Iterator<String> itr=aName.iterator();
while (itr.hasNext()){
key=itr.next();
System.out.println("::"+(count++)+" "+key + " "+mLink.get(key)+" "+mCode.get(key)+" "+mGroup.get(key)+" ::");
}
}
}
Upvotes: 0
Views: 2788
Reputation: 6552
Hm. Just a tip: Do you use the variable "a" in the XPaths?
link = "//table[@class='dataTable']/tbody/tr["+i+"]/td/a/@href";
should be
link = "//table[@class='dataTable'][" + a + "]/tbody/tr["+i+"]/td/a/@href";
Upvotes: 1