Reputation: 88
I am trying to convert CSV to Excel with Apache POI by using SXSSFWorkbook.
My CSV file has about 230,000 records and 50 columns. Total processing time for both read and write processes around 1 minute.
I am looking around to solve my performance issue to get lower times (max. 30 sec) since the connection time out occurs after 30 sec. from gateway.
What am I doing wrong? Is there anything I could change?
public static byte[] readCsvAndWriteExcel(String filePath, File csvFile) {
logger.info("Read csv and write excel has been started for {}", csvFile.getName());
String line;
BufferedReader bufferedReader = readFile(filePath);
SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE);
workbook.setCompressTempFiles(true);
int rowNumber = 0;
int cellNumber;
SXSSFRow row;
SXSSFSheet sheet = workbook.createSheet(FilenameUtils.getBaseName(csvFile.getName()));
try {
while ((line = bufferedReader.readLine()) != null) {
cellNumber = 0;
row = sheet.createRow(rowNumber++);
addCell(cellNumber, row, line.trim().split(SEPERATOR_PATTERN));
}
} catch (IOException e) {
logger.error("File is not found: {}", filePath, e);
throw new Exception("Exception");
}
logger.info("Read csv and write excel has been finished for {}", csvFile.getName());
return retrieveBytesOfXlsxFile(workbook);
}
And addCell method for saving cell and styling;
private static void addCell(int cellNumber, SXSSFRow row, String[] splitRow) {
for (String field : splitRow) {
SXSSFCell cell = row.createCell(cellNumber++);
cell.setCellValue(field);
}
}
Fastexcel gives same time with POI streaming.
Upvotes: 0
Views: 4662
Reputation: 68
There is no problem with the way you write to Excel. Time is mainly wasted on the method of reading CSV. In addition, fastexcel is the best choice I tested, it is faster than others( SXSSFWorkBook ,easyexcel and myexcel).The following code hopes to be useful to you.
import cn.hutool.core.io.FileUtil;
import cn.hutool.core.lang.Pair;
import cn.hutool.core.text.csv.CsvReader;
import cn.hutool.core.text.csv.CsvUtil;
import cn.hutool.core.text.csv.CsvWriter;
import com.alibaba.excel.EasyExcel;
import com.github.liaochong.myexcel.core.DefaultStreamExcelBuilder;
import org.apache.poi.xssf.streaming.SXSSFCell;
import org.apache.poi.xssf.streaming.SXSSFRow;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.dhatim.fastexcel.Workbook;
import org.dhatim.fastexcel.Worksheet;
import org.junit.jupiter.api.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.time.Duration;
import java.time.LocalDateTime;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class ExcelTest {
Logger logger = LoggerFactory.getLogger(ExcelTest.class);
/**
* generate test xlsx (time < 15s)
*/
@Test
public void Test1() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST1.xlsx")) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
Worksheet ws = wb.newWorksheet("Sheet 1");
HashMap<Pair<Integer, Integer>, String> dataMap = new HashMap<>();
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
for (int j = 0; j < 50; j++) {
ws.value(i, j, strings[j]);
}
}
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* generate test csv (time < 2s)
*/
@Test
public void Test2() {
LocalDateTime start = LocalDateTime.now();
try (CsvWriter writer = CsvUtil.getWriter("D:\\tmp\\TEST2.csv", StandardCharsets.UTF_8)) {
writer.writeHeaderLine(IntStream.range(0, 50).mapToObj(String::valueOf).collect(Collectors.toList()).toArray(new String[]{}));
for (int i = 0; i < 230000; i++) {
String[] strings = new Random().ints(50, 100, 150).mapToObj(a -> "TEST_" + a).collect(Collectors.toList()).toArray(new String[]{});
writer.writeLine(strings);
}
} catch (Exception e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by fastexcel (time < 15s)
*/
@Test
public void Test3() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST3.xlsx"); CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv"))) {
Workbook wb = new Workbook(os, "MyApplication", "1.0");
Worksheet ws = wb.newWorksheet("Sheet 1");
reader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
for (int i = 0; i < a.size(); i++) {
ws.value((int) originalLineNumber, i, a.get(i));
}
});
wb.finish();
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by SXSSFWorkbook (time < 30s)
*/
@Test
public void Test4() {
LocalDateTime start = LocalDateTime.now();
try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv")); OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST4.xlsx"); SXSSFWorkbook workbook = new SXSSFWorkbook(SXSSFWorkbook.DEFAULT_WINDOW_SIZE)) {
SXSSFSheet sheet = workbook.createSheet("Sheet 1");
reader.stream().forEach(a -> {
long originalLineNumber = a.getOriginalLineNumber();
SXSSFRow row = sheet.createRow((int) originalLineNumber);
for (int i = 0; i < a.size(); i++) {
SXSSFCell cell = row.createCell(i);
cell.setCellValue(a.get(i));
}
});
workbook.write(os);
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
/**
* convert by easyexcel (time < 30s)
*/
@Test
public void Test5() {
LocalDateTime start = LocalDateTime.now();
try (CsvReader reader = CsvUtil.getReader(FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv"))) {
EasyExcel.write("D:\\tmp\\TEST5.xlsx", ExcelTest.class).sheet("Sheet 1").needHead(false).doWrite(() -> reader.stream().collect(Collectors.toList()));
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
private ExecutorService executorService = Executors.newFixedThreadPool(10);
/**
* convert by myexcel (time < 30s)
*/
@Test
public void Test6() {
LocalDateTime start = LocalDateTime.now();
try (OutputStream os = FileUtil.getOutputStream("D:\\tmp\\TEST6.xlsx");
Reader reader = FileUtil.getUtf8Reader("D:\\tmp\\TEST2.csv");
CsvReader csvReader = CsvUtil.getReader();
DefaultStreamExcelBuilder<Map> defaultExcelBuilder = DefaultStreamExcelBuilder.of(Map.class)
.threadPool(executorService)
.start()) {
csvReader.setContainsHeader(true);
List maps = csvReader.readMapList(reader);
defaultExcelBuilder.asyncAppend(() -> maps);
org.apache.poi.ss.usermodel.Workbook workbook = defaultExcelBuilder.build();
workbook.write(os);
} catch (IOException e) {
logger.error("Error", e);
}
LocalDateTime end = LocalDateTime.now();
logger.info("Cost time {}", Duration.between(start, end).toMillis() + "ms");
}
}
Dependencys:
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.dhatim</groupId>
<artifactId>fastexcel</artifactId>
<version>0.12.12</version>
</dependency>
<dependency>
<groupId>cn.hutool</groupId>
<artifactId>hutool-all</artifactId>
<version>5.7.16</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.0.5</version>
</dependency>
<dependency>
<groupId>com.github.liaochong</groupId>
<artifactId>myexcel</artifactId>
<version>3.11.8</version>
</dependency>
Test result: Test result Image
Upvotes: 1