Berkay Bingol
Berkay Bingol

Reputation: 88

Apache POI Large Excel Export is slow

I am trying to convert CSV to Excel with Apache POI by using SXSSFWorkbook.

My CSV file has about 230,000 records and 50 columns. Total processing time for both read and write processes around 1 minute.

I am looking around to solve my performance issue to get lower times (max. 30 sec) since the connection time out occurs after 30 sec. from gateway.

What am I doing wrong? Is there anything I could change?

public static byte[] readCsvAndWriteExcel(String filePath, File csvFile) {
    logger.info("Read csv and write excel has been started for {}", csvFile.getName());

    String line;
    BufferedReader bufferedReader = readFile(filePath);

    SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE);
    workbook.setCompressTempFiles(true);
    int rowNumber = 0;
    int cellNumber;
    SXSSFRow row;
    SXSSFSheet sheet = workbook.createSheet(FilenameUtils.getBaseName(csvFile.getName()));

    try {
        while ((line = bufferedReader.readLine()) != null) {
            cellNumber = 0;
            row = sheet.createRow(rowNumber++);
            addCell(cellNumber, row, line.trim().split(SEPERATOR_PATTERN));
        }
    } catch (IOException e) {
        logger.error("File is not found: {}", filePath, e);
        throw new Exception("Exception");
    }

    logger.info("Read csv and write excel has been finished for {}", csvFile.getName());
    return retrieveBytesOfXlsxFile(workbook);
}

And addCell method for saving cell and styling;

private static void addCell(int cellNumber, SXSSFRow row, String[] splitRow) {
    for (String field : splitRow) {
        SXSSFCell cell = row.createCell(cellNumber++);
        cell.setCellValue(field);
    }
}

Fastexcel gives same time with POI streaming.

Upvotes: 0

Views: 4662

Answers (1)

meiMing
meiMing

Reputation: 68

There is no problem with the way you write to Excel. Time is mainly wasted on the method of reading CSV. In addition, fastexcel is the best choice I tested, it is faster than others( SXSSFWorkBook ,easyexcel and myexcel).The following code hopes to be useful to you.

import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import com.alibaba.excel.EasyExcel;
import com.github.liaochong.myexcel.core.DefaultStreamExcelBuilder;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

public class ExcelTest {

    Logger logger = LoggerFactory.getLogger(ExcelTest.class);

    /**
     * generate test xlsx (time < 15s)
     */
    @Test
    public void Test1() {
        LocalDateTime start = LocalDateTime.now();
        try (OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST1.xlsx")) {
            Workbook wb = new Workbook(os, "MyApplication", "1.0");
            Worksheet ws = wb.newWorksheet("Sheet 1");
            HashMap<Pair<Integer, Integer>, String> dataMap = new HashMap<>();
            for (int i = 0; i < 230000; i++) {
                String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
                for (int j = 0; j < 50; j++) {
                    ws.value(i, j, strings[j]);
                }
            }
            wb.finish();
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * generate test csv  (time < 2s)
     */
    @Test
    public void Test2() {
        LocalDateTime start = LocalDateTime.now();
        try (CsvWriter writer = CsvUtil.getWriter("D:\\tmp\\TEST2.csv", StandardCharsets.UTF_8)) {
            writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
            for (int i = 0; i < 230000; i++) {
                String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
                writer.writeLine(strings);
            }

        } catch (Exception e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * convert by fastexcel   (time < 15s)
     */
    @Test
    public void Test3() {
        LocalDateTime start = LocalDateTime.now();
        try (OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST3.xlsx"); CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv"))) {
            Workbook wb = new Workbook(os, "MyApplication", "1.0");
            Worksheet ws = wb.newWorksheet("Sheet 1");
            reader.stream().forEach(a -> {
                long originalLineNumber = a.getOriginalLineNumber();
                for (int i = 0; i < a.size(); i++) {
                    ws.value((int) originalLineNumber, i, a.get(i));
                }
            });
            wb.finish();
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * convert by SXSSFWorkbook  (time < 30s)
     */
    @Test
    public void Test4() {
        LocalDateTime start = LocalDateTime.now();
        try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv")); OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST4.xlsx"); SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE)) {
            SXSSFSheet sheet = workbook.createSheet("Sheet 1");
            reader.stream().forEach(a -> {
                long originalLineNumber = a.getOriginalLineNumber();
                SXSSFRow row = sheet.createRow((int) originalLineNumber);
                for (int i = 0; i < a.size(); i++) {
                    SXSSFCell cell = row.createCell(i);
                    cell.setCellValue(a.get(i));
                }
            });
            workbook.write(os);
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    /**
     * convert by easyexcel  (time < 30s)
     */
    @Test
    public void Test5() {
        LocalDateTime start = LocalDateTime.now();
        try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv"))) {
            EasyExcel.write("D:\\tmp\\TEST5.xlsx", ExcelTest.class).sheet("Sheet 1").needHead(false).doWrite(() -> reader.stream().collect(Collectors.toList()));
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

    private ExecutorService executorService = Executors.newFixedThreadPool(10);

    /**
     * convert by myexcel  (time < 30s)
     */
    @Test
    public void Test6() {
        LocalDateTime start = LocalDateTime.now();
        try (OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST6.xlsx");
             Reader reader = FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv");
             CsvReader csvReader = CsvUtil.getReader();
             DefaultStreamExcelBuilder<Map> defaultExcelBuilder = DefaultStreamExcelBuilder.of(Map.class)
                     .threadPool(executorService)
                     .start()) {
            csvReader.setContainsHeader(true);
            List maps = csvReader.readMapList(reader);
            defaultExcelBuilder.asyncAppend(() -> maps);
            org.apache.poi.ss.usermodel.Workbook workbook = defaultExcelBuilder.build();
            workbook.write(os);
        } catch (IOException e) {
            logger.error("Error", e);
        }
        LocalDateTime end = LocalDateTime.now();
        logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
    }

}

Dependencys:

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml-schemas</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>org.dhatim</groupId>
    <artifactId>fastexcel</artifactId>
    <version>0.12.12</version>
</dependency>
<dependency>
    <groupId>cn.hutool</groupId>
    <artifactId>hutool-all</artifactId>
    <version>5.7.16</version>
</dependency>
<dependency>
    <groupId>com.alibaba</groupId>
    <artifactId>easyexcel</artifactId>
    <version>3.0.5</version>
</dependency>
<dependency>
    <groupId>com.github.liaochong</groupId>
    <artifactId>myexcel</artifactId>
    <version>3.11.8</version>
</dependency>

Test result: Test result Image

Upvotes: 1

Related Questions