Reputation: 327
I have an Excel file with many rows (more than 60,000) and I want to apply a filter on them in order to read only the rows that I'm looking for.
I'm using the POI library in Java but I didn't find how to filter values.
For example, with the following data in my Excel file:
First name | Last name | Age
-----------+-----------+----
Jhon | Doe | 25
Foo | Bar | 20
Aaa | Doe | 22
How could I select every row with the last name equal to Doe
?
This is my code so far:
public void parseExcelFile(XSSFWorkbook myExcelFile) {
XSSFSheet worksheet = myExcelFile.getSheetAt(1);
// Cell range to filter
CellRangeAddress data = new CellRangeAddress(
1,
worksheet.getLastRowNum(),
0,
worksheet.getRow(0).getPhysicalNumberOfCells());
worksheet.setAutoFilter(data);
}
I tried to use the AutoFilter
but I don't know how it works.
I'm looking for a feature that would look like this:
Filter filter = new Filter();
filter.setRange(myRange);
filter.addFilter(
0, // The column index
"Doe" // The value that I'm searching for
)
filter.apply()
This is purely hypothetical code.
Thank you for your help !
Upvotes: 3
Views: 8097
Reputation: 327
Maybe this could help other people, so this the solution I came up with before this answer.
Take in consideration that I am not very good with Java so the following code can surely be optimized.
I implemented a filter by myself, to do so, I created 3 classes :
ExcelWorksheetFilter
FilterRule
FilterRuleOperation
ExcelWorksheetFilter
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.util.ArrayList;
import java.util.List;
public class ExcelWorksheetFilter {
private List<FilterRule> ruleList = new ArrayList<>();
private CellRangeAddress cellRange;
private XSSFSheet worksheet;
private XSSFWorkbook workbook;
public ExcelWorksheetFilter(XSSFWorkbook workbook, int worksheetId) {
this.workbook = workbook;
this.worksheet = workbook.getSheetAt(worksheetId);
}
/**
* Apply rules of ruleList to the worksheet.
* The row is put in the result if at least one rule match.
*/
public void apply(){
for(int rowId = cellRange.getFirstRow(); rowId <= cellRange.getLastRow(); rowId++){
worksheet.getRow(rowId).getCTRow().setHidden(true);
for(FilterRule rule : ruleList){
if(rule.match(worksheet.getRow(rowId))){
worksheet.getRow(rowId).getCTRow().setHidden(false);
break;
}
}
}
}
/**
* Apply rules of ruleList to the worksheet.
* The row is put in the result if every rules match.
*/
public void applyStrict(){
for(int rowId = cellRange.getFirstRow(); rowId <= cellRange.getLastRow(); rowId++){
worksheet.getRow(rowId).getCTRow().setHidden(false);
for(FilterRule rule : ruleList){
if(!rule.match(worksheet.getRow(rowId))){
worksheet.getRow(rowId).getCTRow().setHidden(true);
break;
}
}
}
}
public List<Row> getRowList(){
List<Row> rowList = new ArrayList<>();
for(int rowId = cellRange.getFirstRow(); rowId <= cellRange.getLastRow(); rowId++){
if(!worksheet.getRow(rowId).getCTRow().getHidden()){
rowList.add(worksheet.getRow(rowId));
}
}
return rowList;
}
public void addRule(FilterRule rule) {
this.ruleList.add(rule);
}
// Getters and setters omitted...
}
FilterRule
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.usermodel.XSSFRow;
public class FilterRule {
private final static DataFormatter df = new DataFormatter();
private Integer columnId;
private String[] values;
private FilterRuleOperation operator;
public FilterRule(Integer columnId, FilterRuleOperation operator, String[] values){
this.columnId = columnId;
this.operator = operator;
this.values = values;
}
/**
* If at least one of the value matches return true.
* @param row The row to match
* @return a boolean
*/
public boolean match(XSSFRow row){
for(String value : values){
if(operator.match(df.formatCellValue(row.getCell(columnId)), value)){
return true;
};
}
return false;
}
}
FilterRuleOperation
public enum FilterRuleOperation {
DIFFERENT("!="){
@Override public boolean match(String x, String y){
return !x.equals(y);
}
},
EQUAL("=="){
@Override public boolean match(String x, String y){
return x.equals(y);
}
};
private final String text;
private FilterRuleOperation(String text) {
this.text = text;
}
public abstract boolean match(String x, String y);
@Override public String toString() {
return text;
}
}
Then you can use it almost like described the OP.
For example with this Excel file:
And this code:
public void parseExcelFile(XSSFWorkbook myExcelFile) {
XSSFSheet worksheet = myExcelFile.getSheetAt(1);
// Create the filter
ExcelWorksheetFilter excelWorksheetFilter = new ExcelWorksheetFilter(myExcelFile, 0);
excelWorksheetFilter.setCellRange(new CellRangeAddress(
1, // Exclude the row with columns titles
worksheet.getLastRowNum(),
0,
worksheet.getRow(0).getPhysicalNumberOfCells()-1
));
// Create rules for filtering
excelWorksheetFilter.addRule(new FilterRule(
1, // Last name column
FilterRuleOperation.EQUAL,
new String[]{"Doe"}
));
excelWorksheetFilter.addRule(new FilterRule(
0, // First name column
FilterRuleOperation.EQUAL,
new String[]{"Jhon"}
));
// Apply with applyStrict function puts a AND condition between rules
excelWorksheetFilter.applyStrict();
// You can also use apply function it puts a OR condition between rules
// excelWorksheetFilter.apply();
excelWorksheetFilter.getRowList().forEach(row -> {
for(int i = 0; i <3; i++) {
System.out.print(df.formatCellValue(row.getCell(i)) + '\t');
}
System.out.println();
});
// Save the file
FileOutputStream out = new FileOutputStream("filter_test.xlsx");
excelWorksheetFilter.getWorkbook().write(out);
out.close();
excelWorksheetFilter.getWorkbook().close();
}
This will print:
Jhon Doe 25
And if you use excelWorksheetFilter.apply()
it will print:
Jhon Doe 25
Aaa Doe 22
Jhon Smith 30
The two main downside are:
ExcelWorksheetFilter.getRowList()
function return a list and not an Iterator.Also it only works with strings but I suppose that it could be adapted to work with other type of data.
Upvotes: 0
Reputation: 61945
If your question is how to set AutoFilter
criteria "Doe" for last name, then this only can be achieved using underlying low level ooxml-schemas
classes. The XSSFAutoFilter
is useless until now. It does not provide any methods until now.
Complete example using your example data:
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.xssf.usermodel.*;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTAutoFilter;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFilterColumn;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTFilters;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCustomFilters;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCustomFilter;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STFilterOperator;
import java.io.FileOutputStream;
class AutoFilterSetTest {
private static void setCellData(Sheet sheet) {
Object[][] data = new Object[][] {
new Object[] {"First name", "Last name", "Age"},
new Object[] {"John", "Doe", 25},
new Object[] {"Foo", "Bar", 20},
new Object[] {"Jane", "Doe", 22},
new Object[] {"Ruth", "Moss", 42},
new Object[] {"Manuel", "Doe", 32},
new Object[] {"Axel", "Richter", 56},
};
Row row = null;
Cell cell = null;
int r = 0;
int c = 0;
for (Object[] dataRow : data) {
row = sheet.createRow(r);
c = 0;
for (Object dataValue : dataRow) {
cell = row.createCell(c);
if (dataValue instanceof String) {
cell.setCellValue((String)dataValue);
} else if (dataValue instanceof Number) {
cell.setCellValue(((Number)dataValue).doubleValue());
}
c++;
}
r++;
}
}
private static void setCriteriaFilter(XSSFSheet sheet, int colId, int firstRow, int lastRow, String[] criteria) throws Exception {
CTAutoFilter ctAutoFilter = sheet.getCTWorksheet().getAutoFilter();
CTFilterColumn ctFilterColumn = null;
for (CTFilterColumn filterColumn : ctAutoFilter.getFilterColumnList()) {
if (filterColumn.getColId() == colId) ctFilterColumn = filterColumn;
}
if (ctFilterColumn == null) ctFilterColumn = ctAutoFilter.addNewFilterColumn();
ctFilterColumn.setColId(colId);
if (ctFilterColumn.isSetFilters()) ctFilterColumn.unsetFilters();
CTFilters ctFilters = ctFilterColumn.addNewFilters();
for (int i = 0; i < criteria.length; i++) {
ctFilters.addNewFilter().setVal(criteria[i]);
}
//hiding the rows not matching the criterias
DataFormatter dataformatter = new DataFormatter();
for (int r = firstRow; r <= lastRow; r++) {
XSSFRow row = sheet.getRow(r);
boolean hidden = true;
for (int i = 0; i < criteria.length; i++) {
String cellValue = dataformatter.formatCellValue(row.getCell(colId));
if (criteria[i].equals(cellValue)) hidden = false;
}
if (hidden) {
row.getCTRow().setHidden(hidden);
} else {
if (row.getCTRow().getHidden()) row.getCTRow().unsetHidden();
}
}
}
public static void main(String[] args) throws Exception {
XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet();
//create rows of data
setCellData(sheet);
for (int c = 0; c < 2; c++) sheet.autoSizeColumn(c);
int lastRow = sheet.getLastRowNum();
XSSFAutoFilter autofilter = sheet.setAutoFilter(new CellRangeAddress(0, lastRow, 0, 2));
//XSSFAutoFilter is useless until now
//set filter criteria
setCriteriaFilter(sheet, 1, 1, lastRow, new String[]{"Doe"});
//get only visible rows after filtering
XSSFRow row = null;
for (int r = 1; r <= lastRow; r++) {
row = sheet.getRow(r);
if (row.getCTRow().getHidden()) continue;
for (int c = 0; c < 3; c++) {
System.out.print(row.getCell(c) + "\t");
}
System.out.println();
}
FileOutputStream out = new FileOutputStream("AutoFilterSetTest.xlsx");
wb.write(out);
out.close();
wb.close();
}
}
It prints:
John Doe 25.0
Jane Doe 22.0
Manuel Doe 32.0
Resulting AutoFilterSetTest.xlsx
looks like:
Upvotes: 2