How to Supercharge Excel-to-Database Imports with Async and Thread‑Pool Tuning in Java
This article walks through the common slow Excel‑to‑DB import pattern, shows why it becomes a bottleneck, and presents three progressive optimizations—including caching, asynchronous multi‑threaded reading, and double‑async processing—along with detailed Spring @Async thread‑pool configuration and EasyExcel alternatives, all illustrated with complete Java code samples.
1. Typical Import Approach
Read the Excel file with POI, use the file name as the table name, column headers as column names, concatenate the data into SQL statements, and insert into the database via JDBC or MyBatis.
Performance Issue
When processing many large files, the import becomes extremely slow; reading a 100,000‑row Excel took 191 seconds.
private void readXls(String filePath, String filename) throws Exception {
@SuppressWarnings("resource")
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(filePath));
// read first sheet
XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
int maxRow = sheet.getLastRowNum();
StringBuilder insertBuilder = new StringBuilder();
insertBuilder.append("insert into ").append(filename).append(" ( UUID,");
XSSFRow row = sheet.getRow(0);
for (int i = 0; i < row.getPhysicalNumberOfCells(); i++) {
insertBuilder.append(row.getCell(i)).append(",");
}
insertBuilder.deleteCharAt(insertBuilder.length() - 1);
insertBuilder.append(" ) values ( ");
StringBuilder stringBuilder = new StringBuilder();
for (int i = 1; i <= maxRow; i++) {
XSSFRow xssfRow = sheet.getRow(i);
String id = "";
String name = "";
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
if (j == 0) {
id = xssfRow.getCell(j) + "";
} else if (j == 1) {
name = xssfRow.getCell(j) + "";
}
}
boolean flag = isExisted(id, name);
if (!flag) {
stringBuilder.append(insertBuilder);
stringBuilder.append('\'').append(uuid()).append('\'').append(",");
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
stringBuilder.append('\'').append(value).append('\'').append(",");
}
stringBuilder.deleteCharAt(stringBuilder.length() - 1);
stringBuilder.append(" )
");
}
}
List<String> collect = Arrays.stream(stringBuilder.toString().split("
")).collect(Collectors.toList());
int sum = JdbcUtil.executeDML(collect);
}
private static boolean isExisted(String id, String name) {
String sql = "select count(1) as num from " + static_TABLE + " where ID = '" + id + "' and NAME = '" + name + "'";
String num = JdbcUtil.executeSelect(sql, "num");
return Integer.valueOf(num) > 0;
}
private static String uuid() {
return UUID.randomUUID().toString().replace("-", "");
}2. Optimizations
Optimization 1 – Cache Lookup
Load all existing records into a Map before insertion and check against the cache, dramatically speeding up the process.
Optimization 2 – Async + Multi‑Threaded Reading
If a single Excel file is large, read it asynchronously in chunks and insert in batches.
Optimization 3 – Double Async (File‑Level + Chunk‑Level)
Run one async task per file and within each task process chunks asynchronously, reducing the import time from 191 seconds to about 2 seconds.
Key Async Code
1. readExcelCacheAsync Controller
@RequestMapping(value = "/readExcelCacheAsync", method = RequestMethod.POST)
@ResponseBody
public String readExcelCacheAsync() {
String path = "G:\\测试\\data\\";
try {
// cache all data before reading Excel
USER_INFO_SET = getUserInfo();
File file = new File(path);
String[] xlsxArr = file.list();
for (int i = 0; i < xlsxArr.length; i++) {
File fileTemp = new File(path + "\\" + xlsxArr[i]);
String filename = fileTemp.getName().replace(".xlsx", "");
readExcelCacheAsyncService.readXls(path + filename + ".xlsx", filename);
}
} catch (Exception e) {
logger.error("|#ReadDBCsv|#异常: ", e);
return "error";
}
return "success";
}2. Batch Reading Large Excel
@Async("async-executor")
public void readXls(String filePath, String filename) throws Exception {
@SuppressWarnings("resource")
XSSFWorkbook xssfWorkbook = new XSSFWorkbook(new FileInputStream(filePath));
XSSFSheet sheet = xssfWorkbook.getSheetAt(0);
int maxRow = sheet.getLastRowNum();
logger.info(filename + ".xlsx,一共" + maxRow + "行数据!");
StringBuilder insertBuilder = new StringBuilder();
insertBuilder.append("insert into ").append(filename).append(" ( UUID,");
XSSFRow row = sheet.getRow(0);
for (int i = 0; i < row.getPhysicalNumberOfCells(); i++) {
insertBuilder.append(row.getCell(i)).append(",");
}
insertBuilder.deleteCharAt(insertBuilder.length() - 1);
insertBuilder.append(" ) values ( ");
int times = maxRow / STEP + 1;
for (int time = 0; time < times; time++) {
int start = STEP * time + 1;
int end = STEP * time + STEP;
if (time == times - 1) {
end = maxRow;
}
if (end + 1 - start > 0) {
readExcelDataAsyncService.readXlsCacheAsyncMybatis(sheet, row, start, end, insertBuilder);
}
}
}3. Async Batch Insert
@Async("async-executor")
public void readXlsCacheAsync(XSSFSheet sheet, XSSFRow row, int start, int end, StringBuilder insertBuilder) {
StringBuilder stringBuilder = new StringBuilder();
for (int i = start; i <= end; i++) {
XSSFRow xssfRow = sheet.getRow(i);
String id = "";
String name = "";
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
if (j == 0) {
id = xssfRow.getCell(j) + "";
} else if (j == 1) {
name = xssfRow.getCell(j) + "";
}
}
boolean flag = isExisted(id, name);
if (!flag) {
stringBuilder.append(insertBuilder);
stringBuilder.append('\'').append(uuid()).append('\'').append(",");
for (int j = 0; j < row.getPhysicalNumberOfCells(); j++) {
stringBuilder.append('\'').append(value).append('\'').append(",");
}
stringBuilder.deleteCharAt(stringBuilder.length() - 1);
stringBuilder.append(" )
");
}
}
List<String> collect = Arrays.stream(stringBuilder.toString().split("
")).collect(Collectors.toList());
if (collect != null && collect.size() > 0) {
int sum = JdbcUtil.executeDML(collect);
}
}
private boolean isExisted(String id, String name) {
return ReadExcelCacheAsyncController.USER_INFO_SET.contains(id + "," + name);
}4. Thread‑Pool Configuration
Using Spring’s @Async requires a thread‑pool bean. Example configuration:
spring:
task:
execution:
pool:
max-size: 10
core-size: 5
keep-alive: 3s
queue-capacity: 1000
thread-name-prefix: my-executorCustom executor example:
@EnableAsync
@Configuration
public class AsyncTaskConfig {
@Bean("my-executor")
public Executor firstExecutor() {
ThreadFactory threadFactory = new ThreadFactoryBuilder().setNameFormat("my-executor").build();
int curSystemThreads = Runtime.getRuntime().availableProcessors() * 2;
ThreadPoolExecutor threadPool = new ThreadPoolExecutor(curSystemThreads, 100,
200, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), threadFactory);
threadPool.allowsCoreThreadTimeOut();
return threadPool;
}
@Bean("async-executor")
public Executor asyncExecutor() {
ThreadPoolTaskExecutor taskExecutor = new ThreadPoolTaskExecutor();
taskExecutor.setCorePoolSize(24);
taskExecutor.setMaxPoolSize(200);
taskExecutor.setQueueCapacity(50);
taskExecutor.setKeepAliveSeconds(200);
taskExecutor.setThreadNamePrefix("async-executor-");
taskExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
taskExecutor.initialize();
return taskExecutor;
}
}Common Async Pitfalls
The @Async method must be public.
Return type must be void or Future.
Static methods annotated with @Async do not work.
The class must be managed by Spring and annotated with @EnableAsync.
The caller and the @Async method cannot be in the same class.
@Transactional on the async method has no effect; it must be on the called method.
3. Thread‑Pool Core Size Tuning
Setting CorePoolSize equal to the number of CPU cores (or CPU × 2 for I/O‑bound workloads) often yields the best performance. Experiments with a 24‑core machine showed that processing 100 k rows with a core size of ~4200 (≈10 k / 24 × 2) gave optimal speed.
4. EasyExcel Alternative
Using EasyExcel simplifies reading and batch insertion. The author advises against re‑implementing the double‑async optimization with EasyExcel, encouraging developers to avoid low‑level “hard‑working” loops.
ReadEasyExcelController
@RequestMapping(value = "/readEasyExcel", method = RequestMethod.POST)
@ResponseBody
public String readEasyExcel() {
try {
String path = "G:\\测试\\data\\";
String[] xlsxArr = new File(path).list();
for (int i = 0; i < xlsxArr.length; i++) {
String filePath = path + xlsxArr[i];
File fileTemp = new File(path + xlsxArr[i]);
String fileName = fileTemp.getName().replace(".xlsx", "");
List<UserInfo> list = new ArrayList<>();
EasyExcel.read(filePath, UserInfo.class, new ReadEasyExeclAsyncListener(readEasyExeclService, fileName, batchCount, list))
.sheet().doRead();
}
} catch (Exception e) {
logger.error("readEasyExcel 异常:", e);
return "error";
}
return "success";
}ReadEasyExeclAsyncListener
public class ReadEasyExeclAsyncListener implements ReadListener<UserInfo> {
private ReadEasyExeclService readEasyExeclService;
private String TABLE_NAME;
private int BATCH_COUNT;
private List<UserInfo> LIST;
public ReadEasyExeclAsyncListener(ReadEasyExeclService service, String tableName, int batchCount, List<UserInfo> list) {
this.readEasyExeclService = service;
this.TABLE_NAME = tableName;
this.BATCH_COUNT = batchCount;
this.LIST = list;
}
@Override
public void invoke(UserInfo data, AnalysisContext context) {
data.setUuid(uuid());
data.setTableName(TABLE_NAME);
LIST.add(data);
if (LIST.size() >= BATCH_COUNT) {
readEasyExeclService.saveDataBatch(LIST);
}
}
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
if (LIST.size() > 0) {
readEasyExeclService.saveDataBatch(LIST);
}
}
public static String uuid() {
return UUID.randomUUID().toString().replace("-", "");
}
}ReadEasyExeclServiceImpl
@Service
public class ReadEasyExeclServiceImpl implements ReadEasyExeclService {
@Resource
private ReadEasyExeclMapper readEasyExeclMapper;
@Override
public void saveDataBatch(List<UserInfo> list) {
// MyBatis batch insert
readEasyExeclMapper.saveDataBatch(list);
list.clear();
}
private void insertByJdbc(List<UserInfo> list) {
List<String> sqlList = new ArrayList<>();
for (UserInfo u : list) {
StringBuilder sqlBuilder = new StringBuilder();
sqlBuilder.append("insert into ").append(u.getTableName())
.append(" ( UUID,ID,NAME,AGE,ADDRESS,PHONE,OP_TIME ) values ( ")
.append("'" + ReadEasyExeclAsyncListener.uuid() + "',")
.append("'" + u.getId() + "',")
.append("'" + u.getName() + "',")
.append("'" + u.getAge() + "',")
.append("'" + u.getAddress() + "',")
.append("'" + u.getPhone() + "', sysdate )");
sqlList.add(sqlBuilder.toString());
}
JdbcUtil.executeDML(sqlList);
}
}UserInfo Entity
@Data
public class UserInfo {
private String tableName;
private String uuid;
@ExcelProperty("ID")
private String id;
@ExcelProperty("NAME")
private String name;
@ExcelProperty("AGE")
private String age;
@ExcelProperty("ADDRESS")
private String address;
@ExcelProperty("PHONE")
private String phone;
}Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Su San Talks Tech
Su San, former staff at several leading tech companies, is a top creator on Juejin and a premium creator on CSDN, and runs the free coding practice site www.susan.net.cn.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
