Reputation: 1374
The HTML table input doesn't have TH for table headers and has TD with bold text instead
It looks like this:
<html>
<body>
<table>
<tr>
<td>
<b>column1</b>
</td>
<td>
<b>column2</b>
</td>
<td>
<b>column3</b>
</td>
</tr><tr>
<td>
value1
</td>
<td>
value2
</td>
<td>
value3
</td>
</tr>
</table>
</html>
So I would like the HTML table headers to processed as JTable columns and the other TDs after the first table row to be processed as JTable cells.
Upvotes: 1
Views: 323
Reputation: 324118
If you want to use standard classes of the JDK you could do something like:
import java.io.*;
import java.net.*;
import java.util.*;
import javax.swing.*;
import javax.swing.table.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
public class ParserCallbackTable extends HTMLEditorKit.ParserCallback
{
private boolean tableFound = false;
private Vector<Object> columnNames;
private Vector<Vector<Object>> data = new Vector<Vector<Object>>();
private Vector<Object> row;
public ParserCallbackTable(String uri)
{
try
{
Reader reader = getReader( uri );
new ParserDelegator().parse(reader, this, true);
}
catch(Exception e)
{
e.printStackTrace();
}
}
public void handleStartTag(HTML.Tag tag, MutableAttributeSet a, int pos)
{
if ("table".equals(tag.toString()))
tableFound = true;
if ("tr".equals(tag.toString()))
row = new Vector<Object>();
}
public void handleEndTag(HTML.Tag tag, int pos)
{
if ("table".equals(tag.toString()))
tableFound = false;
if ("tr".equals(tag.toString()))
{
if (columnNames == null) // first row of data is the column names
columnNames = row;
else
data.add( row );
}
}
public void handleText(char[] data, int pos)
{
if (tableFound)
row.add( new String(data) );
}
public TableModel getModel()
{
return new DefaultTableModel(data, columnNames);
}
private Reader getReader(String uri) throws IOException
{
if (uri.startsWith("http")) // Retrieve from Internet
{
URLConnection conn = new URL(uri).openConnection();
return new InputStreamReader(conn.getInputStream());
}
else // Retrieve from file
{
return new FileReader(uri);
}
}
public static void main(String[] args) throws IOException
{
ParserCallbackTable parser = new ParserCallbackTable( args[0] );
JTable table = new JTable( parser.getModel() );
JFrame frame = new JFrame("HTML to JTable");
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.add( new JScrollPane(table) );
frame.pack();
frame.setLocationByPlatform( true );
frame.setVisible( true );
}
}
Just pass the name of you HTML file for a quick test. For example:
java ParserCallbackTable table.html
Upvotes: 1
Reputation: 1374
I chose to use this parser as a maven dependency
<dependency>
<groupId>org.htmlparser</groupId>
<artifactId>htmlparser</artifactId>
<version>2.1</version>
</dependency>
I invoked the SAX parser like this :
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import javax.swing.JFrame;
import javax.swing.JScrollPane;
import javax.swing.JTable;
import javax.swing.SwingUtilities;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class TestHTML {
public static void main(String[] args) throws IOException, SAXException {
org.xml.sax.XMLReader reader = org.xml.sax.helpers.XMLReaderFactory.createXMLReader("org.htmlparser.sax.XMLReader");
HTMLTableContentHandler htmlTableContentHandler = new HTMLTableContentHandler();
reader.setContentHandler(htmlTableContentHandler);
String userDir = System.getProperty("user.home");
byte[] contents = Files.readAllBytes(Paths.get(userDir, "test.html"));
reader.parse(new InputSource(new ByteArrayInputStream(contents)));
SwingUtilities.invokeLater(() -> {
JFrame frame = new JFrame();
frame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
frame.add(new JScrollPane(new JTable(htmlTableContentHandler.getRowData(), htmlTableContentHandler.getColumnNames())));
frame.pack();
frame.setVisible(true);
});
}
}
The content handler fills the arrays for the JTable as it goes through the SAX events:
import java.util.ArrayList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class HTMLTableContentHandler extends DefaultHandler {
private List<Object> columnNames = new ArrayList<>();
private List<Object[]> rowData = new ArrayList<>();
private StringBuilder content = new StringBuilder();
private int rowIndex;
private int colIndex;
private boolean readingColumnName;
private boolean readingCell;
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if ("B".equals(localName) && rowIndex == 0) {
if (readingColumnName) {
columnNames.add(content.toString().trim());
readingColumnName = false;
} else {
readingColumnName = true;
}
}
if ("TD".equals(localName) && rowIndex > 0) {
readingCell = true;
}
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (readingColumnName || readingCell) {
content.append(ch, start, length);
}
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if ("TR".equals(localName)) {
rowIndex++;
colIndex = 0;
}
if ("TD".equals(localName) && rowIndex > 0) {
if (colIndex == 0) {
rowData.add(new Object[columnNames.size()]);
}
rowData.get(rowIndex - 1)[colIndex++] = content.toString().trim();
readingCell = false;
}
content.setLength(0);
}
public Object[] getColumnNames() {
return columnNames.toArray();
}
public Object[][] getRowData() {
return rowData.toArray(new Object[0][]);
}
}
Upvotes: 1