Reputation: 1929
When I parse the file using Apace POI, the empty rows are getting skipped and a List of String arrays with non-empty rows are being returned.
How can I tell the API, not to skip reading the empty row or columns ?
The code to read is somehow skipping the rows with no data.
XExcelFileReader fileReader = new XExcelFileReader(excelByteArray, sheetFromWhichToRead);
dataFromFile = fileReader.readRows();
is used to read the data from the class XExcelFileReader. The variable dataFromFile is a List of String array.
This is the code to read the data row:
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.regex.Pattern;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
/**
* This class is responsible for reading the row contents for a given byte array and sheet name
*/
public class XExcelFileReader {
/**
* This is the Row Num of the current row that was read
*/
private int rowNum = 0;
/**
* The OPCPackage is the package used to laod the Input Stream used for only .xlsx Files
*/
private OPCPackage opcPkg;
/**
* These are the String Tables that are read from the Excel File
*/
private ReadOnlySharedStringsTable stringsTable;
/**
* The XML Streaming API will be used
*/
private XMLStreamReader xmlReader;
/** The styles table which has formatting information about cells. */
private StylesTable styles;
/**
* @param excelByteArray the excel byte array
* @param sheetFromWhichToRead the excel sheet from which to read
* @throws Exception the exception
*/
public XExcelFileReader(final byte[] excelByteArray, final String sheetFromWhichToRead) throws Exception {
InputStream excelStream = new ByteArrayInputStream(excelByteArray);
opcPkg = OPCPackage.open(excelStream);
this.stringsTable = new ReadOnlySharedStringsTable(opcPkg);
XSSFReader xssfReader = new XSSFReader(opcPkg);
styles = xssfReader.getStylesTable();
XMLInputFactory factory = XMLInputFactory.newInstance();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
InputStream inputStream = null;
while (iter.hasNext()) {
inputStream = iter.next();
String tempSheetName = iter.getSheetName();
if (StringUtils.isNotEmpty(tempSheetName)) {
tempSheetName = tempSheetName.trim();
if (tempSheetName.equals(sheetFromWhichToRead)) {
break;
}
}
}
xmlReader = factory.createXMLStreamReader(inputStream);
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("sheetData")) {
break;
}
}
}
}
/**
* @return rowNum
*/
public final int rowNum() {
return rowNum;
}
/**
* @return List<String[]> List of String array which can hold the content
* @throws XMLStreamException the XMLStreamException
*/
public final List<String[]> readRows() throws XMLStreamException {
String elementName = "row";
List<String[]> dataRows = new ArrayList<String[]>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals(elementName)) {
rowNum++;
dataRows.add(getDataRow());
// TODO need to see if the batch Size is required
// if (dataRows.size() == batchSize)
// break;
}
}
}
return dataRows;
}
/**
* @return String [] of Row Data
* @throws XMLStreamException the XMLStreamException
*/
private String[] getDataRow() throws XMLStreamException {
List<String> rowValues = new ArrayList<String>();
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("c")) {
CellReference cellReference = new CellReference(xmlReader.getAttributeValue(null, "r"));
// Fill in the possible blank cells!
while (rowValues.size() < cellReference.getCol()) {
rowValues.add("");
}
String cellType = xmlReader.getAttributeValue(null, "t");
String cellStyleStr = xmlReader.getAttributeValue(null, "s");
rowValues.add(getCellValue(cellType, cellStyleStr));
}
} else if (xmlReader.isEndElement() && xmlReader.getLocalName().equals("row")) {
break;
}
}
return rowValues.toArray(new String[rowValues.size()]);
}
/**
* @param cellType the cell type
* @param cellStyleStr the cell style
* @return cell content the cell value
* @throws XMLStreamException the XMLStreamException
*/
private String getCellValue(final String cellType, final String cellStyleStr) throws XMLStreamException {
String value = ""; // by default
while (xmlReader.hasNext()) {
xmlReader.next();
if (xmlReader.isStartElement()) {
if (xmlReader.getLocalName().equals("v")) {
if (cellType != null && cellType.equals("s")) {
int idx = Integer.parseInt(xmlReader.getElementText());
String s = stringsTable.getEntryAt(idx);
return new XSSFRichTextString(s).toString();
}
if (cellStyleStr != null) {
String cellValue = xmlReader.getElementText();
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = styles.getStyleAt(styleIndex);
short formatIndex = style.getDataFormat();
if (!isValidDouble(cellValue)) {
return cellValue;
}
double doubleVal = Double.valueOf(cellValue);
boolean isValidExcelDate = HSSFDateUtil.isInternalDateFormat(formatIndex);
Date date = null;
if (isValidExcelDate) {
date = HSSFDateUtil.getJavaDate(doubleVal);
String dateStr = dateAsString(date);
return dateStr;
}
if (!(doubleVal == Math.floor(doubleVal))) {
return Double.toString(doubleVal);
}
return cellValue;
}
else {
return xmlReader.getElementText();
}
}
} else if (xmlReader.isEndElement() && xmlReader.getLocalName().equals("c")) {
break;
}
}
return value;
}
/**
* To check whether the incoming value can be used in the Double utility method Double.valueOf() to prevent
* NumberFormatException.
* @param stringVal - String to be validated.
* @return - true if it is a valid String which can be passed into Double.valueOf method. <br/> For more information
* refer- <a>https://docs.oracle.com/javase/7/docs/api/java/lang/Double.html#valueOf(java.lang.String)</a>
*/
private boolean isValidDouble(String stringVal) {
final String Digits = "(\\p{Digit}+)";
final String HexDigits = "(\\p{XDigit}+)";
// an exponent is 'e' or 'E' followed by an optionally
// signed decimal integer.
final String Exp = "[eE][+-]?" + Digits;
final String fpRegex = ("[\\x00-\\x20]*" + // Optional leading "whitespace"
"[+-]?(" + // Optional sign character
"NaN|" + // "NaN" string
"Infinity|" + // "Infinity" string
// A decimal floating-point string representing a finite positive
// number without a leading sign has at most five basic pieces:
// Digits . Digits ExponentPart FloatTypeSuffix
//
// Since this method allows integer-only strings as input
// in addition to strings of floating-point literals, the
// two sub-patterns below are simplifications of the grammar
// productions from the Java Language Specification, 2nd
// edition, section 3.10.2.
// Digits ._opt Digits_opt ExponentPart_opt FloatTypeSuffix_opt
"(((" + Digits + "(\\.)?(" + Digits + "?)(" + Exp + ")?)|" +
// . Digits ExponentPart_opt FloatTypeSuffix_opt
"(\\.(" + Digits + ")(" + Exp + ")?)|" +
// Hexadecimal strings
"((" +
// 0[xX] HexDigits ._opt BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "(\\.)?)|" +
// 0[xX] HexDigits_opt . HexDigits BinaryExponent FloatTypeSuffix_opt
"(0[xX]" + HexDigits + "?(\\.)" + HexDigits + ")" +
")[pP][+-]?" + Digits + "))" + "[fFdD]?))" + "[\\x00-\\x20]*");// Optional trailing "whitespace"
if (Pattern.matches(fpRegex, stringVal))
return true;
else {
return false;
}
}
/**
* Date as string.
* @param date the date
* @return the string
*/
public static String dateAsString(final Date date) {
String dateAsString = null;
if (date != null) {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
dateAsString = sdf.format(date);
}
return dateAsString;
}
@Override
protected final void finalize() throws Throwable {
if (opcPkg != null) {
opcPkg.close();
}
super.finalize();
}
}
I want the Empty row also to be part of the List of String array. Just that the String array will be empty.
If it is in the Excel like:
First Row
<< No Data >>
Third Row
then the List should be
dataFromFile
0 ->[First Row]
1 ->[]
2 ->[Third Row]
Upvotes: 2
Views: 11320
Reputation: 11473
Here is what the Busy Developers Guide on the POI site says about reading sheets with missing rows:
for (int rowNum = rowStart; rowNum < rowEnd; rowNum++) {
Row r = sheet.getRow(rowNum);
if (r == null) {
// This whole row is empty
// Handle it as needed
continue;
}
int lastColumn = Math.max(r.getLastCellNum(), MY_MINIMUM_COLUMN_COUNT);
for (int cn = 0; cn < lastColumn; cn++) {
Cell c = r.getCell(cn, Row.RETURN_BLANK_AS_NULL);
if (c == null) {
// The spreadsheet is empty in this cell
} else {
// Do something useful with the cell's contents
}
}
}
You really can get a lot of your answers there. You shouldn't ever have to parse the XML directly. Even things that are missing from the high level usermodel usually have a way to get at it through the CT classes generated by XMLBeans.
Upvotes: 7