Reputation: 765
Actually i am working on a java program that extracts data from an Excel file, and i am using the POI Library, as a matter of fact i must specify the type of every extracted value, but the file contains a huge number of data with different types, So i am asking if there is another way to get all the data as a string.
Thank you. Best regards
package DAO;
import java.io.FileInputStream;
import java.util.Iterator;
import java.util.Vector;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class ReadExcelFile {
public static void main(String[] args) {
String fileName = "C:\\Users\\marrah\\Desktop\\TRIAL FILE1.xls";
Vector dataHolder = ReadCSV(fileName);
printCellData(dataHolder);
}
public static Vector ReadCSV(String fileName) {
Vector cellVectorHolder = new Vector();
try {
FileInputStream myInput = new FileInputStream(fileName);
POIFSFileSystem myFileSystem = new POIFSFileSystem(myInput);
HSSFWorkbook myWorkBook = new HSSFWorkbook(myFileSystem);
HSSFSheet mySheet = myWorkBook.getSheetAt(0);
Iterator rowIter = mySheet.rowIterator();
while (rowIter.hasNext()) {
HSSFRow myRow = (HSSFRow) rowIter.next();
Iterator cellIter = myRow.cellIterator();
Vector cellStoreVector = new Vector();
while (cellIter.hasNext()) {
HSSFCell myCell = (HSSFCell) cellIter.next();
cellStoreVector.addElement(myCell);
}
cellVectorHolder.addElement(cellStoreVector);
}
} catch (Exception e) {
e.printStackTrace();
}
return cellVectorHolder;
}
private static void printCellData(Vector dataHolder) {
for (int i = 0; i < dataHolder.size(); i++) {
Vector cellStoreVector = (Vector) dataHolder.elementAt(i);
for (int j = 0; j < cellStoreVector.size(); j++) {
HSSFCell myCell = (HSSFCell) cellStoreVector.elementAt(j);
Object stringCellValue="";
stringCellValue =cellStoreVector.get(j).toString();
System.out.print(stringCellValue.toString()+"\t");
}
}
}
}
Upvotes: 4
Views: 13363
Reputation: 15872
I have a unit-test where I use the following to extract all text from an Excel file without any of the formatting, for some use-cases this might be quicker than iterating over all the elements one-by-one:
private POITextExtractor extractText(File file) throws IOException {
InputStream inp = null;
try {
inp = new PushbackInputStream(
new FileInputStream(file), 8);
if(POIFSFileSystem.hasPOIFSHeader(inp)) {
return createExtractor(new POIFSFileSystem(inp));
}
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
} finally {
if(inp != null) inp.close();
}
}
private static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
return createExtractor(fs.getRoot(), fs);
}
private static POITextExtractor createExtractor(DirectoryNode poifsDir, POIFSFileSystem fs) throws IOException {
for(Iterator<Entry> entries = poifsDir.getEntries(); entries.hasNext(); ) {
Entry entry = entries.next();
if(entry.getName().equals("Workbook")) {
{
return new ExcelExtractor(poifsDir, fs);
}
}
}
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
}
private String assertContains(File file, String... contents) throws IOException {
assertTrue(file.exists());
POITextExtractor extractor = extractText(file);
assertNotNull(extractor);
String str = extractor.getText();
for(String s : contents) {
assertTrue("Did expect to find text '" + s + "' in resulting Excel file, but did not find it in str: " + str, str.contains(s));
}
return str;
}
Upvotes: 2
Reputation: 7569
You can create a common function to use on every cell when you runs thru each row, which validates the data type and then retrieves it in your preferred format. So you move row to row and, for each cell you call something like:
private static String getCellvalue(HSSFRow poiRow, int intColActual) {
if (poiFilaActual != null && poiRowActual.getLastCellNum() >= (short) intColActual) {
HSSFCell cell = poiRowActual.getCell(intColActual);
if (cell != null) {
if (HSSFCell.CELL_TYPE_STRING == cell.getCellType()) {
return cell.getRichStringCellValue().toString();
} else if (HSSFCell.CELL_TYPE_BOOLEAN == cell.getCellType()) {
return new String( (cell.getBooleanCellValue() == true ? "true" : "false") );
} else if (HSSFCell.CELL_TYPE_BLANK == cell.getCellType()) {
return "";
} else if (HSSFCell.CELL_TYPE_NUMERIC == cell.getCellType()) {
if(HSSFDateUtil.isCellDateFormatted(cell)){
return ( new SimpleDateFormat("dd/MM/yyyy").format(cell.getDateCellValue()) );
}else{
return new BigDecimal(cell.getNumericCellValue()).toString();
}
}
}
}
return null;
}
Upvotes: 1