Ehrakis
Ehrakis

Reputation: 327

Java - Filtering rows of an Excel file using POI

I have an Excel file with many rows (more than 60,000) and I want to apply a filter on them in order to read only the rows that I'm looking for.

I'm using the POI library in Java but I didn't find how to filter values.

For example, with the following data in my Excel file:

First name | Last name | Age
-----------+-----------+----
Jhon       | Doe       |  25
Foo        | Bar       |  20
Aaa        | Doe       |  22

How could I select every row with the last name equal to Doe ?

This is my code so far:

public void parseExcelFile(XSSFWorkbook myExcelFile) {
    XSSFSheet worksheet = myExcelFile.getSheetAt(1);

    // Cell range to filter
    CellRangeAddress data = new CellRangeAddress(
            1,
            worksheet.getLastRowNum(),
            0,
            worksheet.getRow(0).getPhysicalNumberOfCells());

    worksheet.setAutoFilter(data);
}

I tried to use the AutoFilter but I don't know how it works.

I'm looking for a feature that would look like this:

Filter filter = new Filter();
filter.setRange(myRange);
filter.addFilter(
    0, // The column index
    "Doe" // The value that I'm searching for
)
filter.apply()

This is purely hypothetical code.

Thank you for your help !

Upvotes: 3

Views: 8097

Answers (2)

Ehrakis
Ehrakis

Reputation: 327

Maybe this could help other people, so this the solution I came up with before this answer.
Take in consideration that I am not very good with Java so the following code can surely be optimized.

I implemented a filter by myself, to do so, I created 3 classes :

  • ExcelWorksheetFilter
  • FilterRule
  • FilterRuleOperation

ExcelWorksheetFilter

import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;

import java.util.ArrayList;
import java.util.List;

public class ExcelWorksheetFilter {

    private List<FilterRule> ruleList = new ArrayList<>();
    private CellRangeAddress cellRange;
    private XSSFSheet worksheet;
    private XSSFWorkbook workbook;

    public ExcelWorksheetFilter(XSSFWorkbook workbook, int worksheetId) {
        this.workbook = workbook;
        this.worksheet = workbook.getSheetAt(worksheetId);
    }

    /**
     * Apply rules of ruleList to the worksheet.
     * The row is put in the result if at least one rule match.
     */
    public void apply(){

        for(int rowId = cellRange.getFirstRow(); rowId <= cellRange.getLastRow(); rowId++){
            worksheet.getRow(rowId).getCTRow().setHidden(true);
            for(FilterRule rule : ruleList){
                if(rule.match(worksheet.getRow(rowId))){
                    worksheet.getRow(rowId).getCTRow().setHidden(false);
                    break;
                }
            }
        }
    }

    /**
     * Apply rules of ruleList to the worksheet.
     * The row is put in the result if every rules match.
     */
    public void applyStrict(){
        for(int rowId = cellRange.getFirstRow(); rowId <= cellRange.getLastRow(); rowId++){
            worksheet.getRow(rowId).getCTRow().setHidden(false);
            for(FilterRule rule : ruleList){
                if(!rule.match(worksheet.getRow(rowId))){
                    worksheet.getRow(rowId).getCTRow().setHidden(true);
                    break;
                }
            }
        }
    }

    public List<Row> getRowList(){
        List<Row> rowList = new ArrayList<>();

        for(int rowId = cellRange.getFirstRow(); rowId <= cellRange.getLastRow(); rowId++){
            if(!worksheet.getRow(rowId).getCTRow().getHidden()){
                rowList.add(worksheet.getRow(rowId));
            }
        }

        return rowList;
    }

    public void addRule(FilterRule rule) {
        this.ruleList.add(rule);
    }

    // Getters and setters omitted...
}

FilterRule

import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.usermodel.XSSFRow;

public class FilterRule {

    private final static DataFormatter df = new DataFormatter();

    private Integer columnId;
    private String[] values;
    private FilterRuleOperation operator;

    public FilterRule(Integer columnId, FilterRuleOperation operator, String[] values){
        this.columnId = columnId;
        this.operator = operator;
        this.values = values;
    }

    /**
     * If at least one of the value matches return true.
     * @param row The row to match
     * @return a boolean
     */
    public boolean match(XSSFRow row){
        for(String value : values){
            if(operator.match(df.formatCellValue(row.getCell(columnId)), value)){
                return true;
            };
        }
        return false;
    }
}

FilterRuleOperation

public enum FilterRuleOperation {

    DIFFERENT("!="){
        @Override public boolean match(String x, String y){
            return !x.equals(y);
        }
    },
    EQUAL("=="){
        @Override public boolean match(String x, String y){
            return x.equals(y);
        }
    };

    private final String text;

    private FilterRuleOperation(String text) {
        this.text = text;
    }

    public abstract boolean match(String x, String y);

    @Override public String toString() {
        return text;
    }
}

Then you can use it almost like described the OP.
For example with this Excel file:
Excel data screenshot

And this code:

public void parseExcelFile(XSSFWorkbook myExcelFile) {
    XSSFSheet worksheet = myExcelFile.getSheetAt(1);

    // Create the filter
    ExcelWorksheetFilter excelWorksheetFilter = new ExcelWorksheetFilter(myExcelFile, 0);
    excelWorksheetFilter.setCellRange(new CellRangeAddress(
        1, // Exclude the row with columns titles
        worksheet.getLastRowNum(),
        0,
        worksheet.getRow(0).getPhysicalNumberOfCells()-1
    ));

    // Create rules for filtering
    excelWorksheetFilter.addRule(new FilterRule(
            1, // Last name column
            FilterRuleOperation.EQUAL,
            new String[]{"Doe"}
            ));

    excelWorksheetFilter.addRule(new FilterRule(
            0, // First name column
            FilterRuleOperation.EQUAL,
            new String[]{"Jhon"}
    ));

    // Apply with applyStrict function puts a AND condition between rules
    excelWorksheetFilter.applyStrict();
    // You can also use apply function it puts a OR condition between rules
    // excelWorksheetFilter.apply();
    
    excelWorksheetFilter.getRowList().forEach(row -> {
        for(int i = 0; i <3; i++) {
            System.out.print(df.formatCellValue(row.getCell(i)) + '\t');
        }
        System.out.println();
    });

    // Save the file
    FileOutputStream out = new FileOutputStream("filter_test.xlsx");
    excelWorksheetFilter.getWorkbook().write(out);
    out.close();
    excelWorksheetFilter.getWorkbook().close();
}

This will print:

Jhon    Doe 25

enter image description here

And if you use excelWorksheetFilter.apply() it will print:

Jhon    Doe    25   
Aaa     Doe    22   
Jhon    Smith  30

enter image description here

The two main downside are:

  • It doesn't use the Excel filter so it is harder to use the Excel file afterward.
  • Not memory efficient as the ExcelWorksheetFilter.getRowList() function return a list and not an Iterator.

Also it only works with strings but I suppose that it could be adapted to work with other type of data.

Upvotes: 0

Axel Richter
Axel Richter

Reputation: 61945

If your question is how to set AutoFilter criteria "Doe" for last name, then this only can be achieved using underlying low level ooxml-schemas classes. The XSSFAutoFilter is useless until now. It does not provide any methods until now.

Complete example using your example data:

import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.xssf.usermodel.*;

import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTAutoFilter;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFilterColumn;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFilters;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCustomFilters;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCustomFilter;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STFilterOperator;

import java.io.FileOutputStream;

class AutoFilterSetTest {

 private static void setCellData(Sheet sheet) {

  Object[][] data = new Object[][] {
   new Object[] {"First name", "Last name", "Age"},
   new Object[] {"John", "Doe", 25},
   new Object[] {"Foo", "Bar", 20},
   new Object[] {"Jane", "Doe", 22},
   new Object[] {"Ruth", "Moss", 42},
   new Object[] {"Manuel", "Doe", 32},
   new Object[] {"Axel", "Richter", 56},
  };

  Row row = null;
  Cell cell = null;
  int r = 0;
  int c = 0;
  for (Object[] dataRow : data) {
   row = sheet.createRow(r);
   c = 0;
   for (Object dataValue : dataRow) {
    cell = row.createCell(c);
    if (dataValue instanceof String) {
     cell.setCellValue((String)dataValue);
    } else if (dataValue instanceof Number) {
     cell.setCellValue(((Number)dataValue).doubleValue());
    }
    c++;
   }
   r++;
  }
 }

 private static void setCriteriaFilter(XSSFSheet sheet, int colId, int firstRow, int lastRow, String[] criteria) throws Exception {
  CTAutoFilter ctAutoFilter = sheet.getCTWorksheet().getAutoFilter();
  CTFilterColumn ctFilterColumn = null;
  for (CTFilterColumn filterColumn : ctAutoFilter.getFilterColumnList()) {
   if (filterColumn.getColId() == colId) ctFilterColumn = filterColumn;
  }
  if (ctFilterColumn == null) ctFilterColumn = ctAutoFilter.addNewFilterColumn();
  ctFilterColumn.setColId(colId);
  if (ctFilterColumn.isSetFilters()) ctFilterColumn.unsetFilters();

  CTFilters ctFilters = ctFilterColumn.addNewFilters();
  for (int i = 0; i < criteria.length; i++) {
   ctFilters.addNewFilter().setVal(criteria[i]);
  }

  //hiding the rows not matching the criterias
  DataFormatter dataformatter = new DataFormatter();
  for (int r = firstRow; r <= lastRow; r++) {
   XSSFRow row = sheet.getRow(r);
   boolean hidden = true;
   for (int i = 0; i < criteria.length; i++) {
    String cellValue = dataformatter.formatCellValue(row.getCell(colId));
    if (criteria[i].equals(cellValue)) hidden = false;
   }
   if (hidden) {
    row.getCTRow().setHidden(hidden);
   } else {
    if (row.getCTRow().getHidden()) row.getCTRow().unsetHidden();
   }
  }
 }

 public static void main(String[] args) throws Exception {

  XSSFWorkbook wb = new XSSFWorkbook();
  XSSFSheet sheet = wb.createSheet();

  //create rows of data
  setCellData(sheet);

  for (int c = 0; c < 2; c++) sheet.autoSizeColumn(c);

  int lastRow = sheet.getLastRowNum();
  XSSFAutoFilter autofilter = sheet.setAutoFilter(new CellRangeAddress(0, lastRow, 0, 2));
  //XSSFAutoFilter is useless until now

  //set filter criteria 
  setCriteriaFilter(sheet, 1, 1, lastRow, new String[]{"Doe"});

  //get only visible rows after filtering
  XSSFRow row = null;
  for (int r = 1; r <= lastRow; r++) {
   row = sheet.getRow(r);
   if (row.getCTRow().getHidden()) continue;
   for (int c = 0; c < 3; c++) {
    System.out.print(row.getCell(c) + "\t");
   }
   System.out.println();
  }

  FileOutputStream out = new FileOutputStream("AutoFilterSetTest.xlsx");
  wb.write(out);
  out.close();
  wb.close();
 }
}

It prints:

John    Doe   25.0  
Jane    Doe   22.0  
Manuel  Doe   32.0  

Resulting AutoFilterSetTest.xlsx looks like:

enter image description here

Upvotes: 2

Related Questions