java poi sax方式处理大数据量excel文件
发布时间:2020-05-24 13:36:36 所属栏目:Java 来源:互联网
导读:java poi sax方式处理大数据量excel文件 系统需要用到一个导入excel文件的功能,使用poi组件常规方式读取excel时,内存耗尽,OutOfMemoryError,或者读取非常慢 所以写了一个工具类,使用poi sax方式读取excel,速度快很多,内存消耗可以接受. 测试结果如下: .xlsx文
|
系统需要用到一个导入excel文件的功能,使用poi组件常规方式读取excel时,内存耗尽,OutOfMemoryError,或者读取非常慢 测试结果如下: 总行数:37434 主要代码如下: ExcelUtils.class 主入口
package com.xxx.bi.utils.excel;
import java.util.List;
import java.util.Objects;
import org.apache.commons.lang3.StringUtils;
import com.google.common.collect.Lists;
public class ExcelUtils {
/** logger日志. */
// public static final Logger LOGGER = Logger.getLogger(ExcelUtils.class);
public ExcelUtils() {
}
/**
* 获取excel的表头
*
* @param filePath
* 文件路径
* @param headerNum
* 表头所在行数
* @return
*/
public static List<String> getHeader(String filePath,int headerNum) {
if (StringUtils.isBlank(filePath)) {
throw new IllegalArgumentException("传入文件路径不能为空");
}
if (Objects.isNull(headerNum) || headerNum < 1) {
headerNum = 1;
}
try {
return LargeExcelFileReadUtil.getRowFromSheetOne(filePath,headerNum);
} catch (Exception e) {
// LOGGER.info("获取excel[" + filePath + "]表头失败,原因:",e);
e.printStackTrace();
}
return Lists.newArrayList();
}
/**
* 获取excel的所有数据<br/>
* 所有数据类型都是String<br/>
* 会以第一行数据的列数为总列数,所以第一行的数据必须都不为空,否则可能出java.lang.IndexOutOfBoundsException
*
* @param filePath
* 文件路径
* @param headerNum
* 表头所在行数
* @return
*/
public static List<List<String>> getAllData(String filePath) {
if (StringUtils.isBlank(filePath)) {
throw new IllegalArgumentException("传入文件路径不能为空");
}
try {
return LargeExcelFileReadUtil.getRowsFromSheetOne(filePath);
} catch (Exception e) {
// LOGGER.info("获取excel[" + filePath + "]表头失败,原因:",e);
e.printStackTrace();
}
return Lists.newArrayList();
}
public static void main(String[] args) {
long start = System.currentTimeMillis();
String filepath = "C:/Users/Administrator/Desktop/05-作业调配表 -快递.xlsx";
// List<String> result = ExcelUtils.getHeader(filepath,1);
// for (String col : result) {
// System.out.println(col);
// }
List<List<String>> result = ExcelUtils.getAllData(filepath);
long end = System.currentTimeMillis();
for (List<String> list : result) {
System.out.println(list.toString());
}
long end1 = System.currentTimeMillis();
try {
Thread.sleep(1000l);
} catch (InterruptedException e) {
e.printStackTrace();
}
System.err.println("总行数:" + result.size());
System.err.println(("读取耗时:" + (end - start) / 1000) + "秒");
System.err.println(("打印耗时:" + (end1 - end) / 1000) + "秒");
}
}
LargeExcelFileReadUtil.class 真正的工具类
package com.xxx.bi.utils.excel;
import java.io.InputStream;
import java.util.List;
import java.util.Objects;
import org.apache.log4j.Logger;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
public class LargeExcelFileReadUtil {
/** logger日志. */
public static final Logger LOGGER = Logger.getLogger(LargeExcelFileReadUtil.class);
// 处理一个sheet
public static List<String> getRowFromSheetOne(String filename,Integer rowNum) throws Exception {
InputStream inputStream = null;
OPCPackage pkg = null;
SingleRowHandler singleRowHandler = null;
try {
pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
singleRowHandler = new SingleRowHandler(sst,rowNum);
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
parser.setContentHandler(singleRowHandler);
inputStream = r.getSheet("rId1");
InputSource sheetSource = new InputSource(inputStream);
parser.parse(sheetSource);
return singleRowHandler.getRow();
} catch (Exception e) {
String message = e.getMessage();
if (Objects.nonNull(rowNum) && Objects.nonNull(singleRowHandler)
&& SingleRowHandler.FINISH_ROW_MESSAGE.equalsIgnoreCase(message)) {
// 获取某一行数据完成 ,暂时不知道怎么能终止excel解析,直接抛出了异常,实际是成功的
return singleRowHandler.getRow();
}
throw e;
} finally {
if (Objects.nonNull(pkg)) {
pkg.close();
}
if (Objects.nonNull(inputStream)) {
inputStream.close();
}
}
}
// 处理一个sheet
public static List<List<String>> getRowsFromSheetOne(String filename) throws Exception {
InputStream inputStream = null;
OPCPackage pkg = null;
MultiRowHandler multiRowHandler = null;
try {
pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
multiRowHandler = new MultiRowHandler(sst);
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
parser.setContentHandler(multiRowHandler);
inputStream = r.getSheet("rId1");
InputSource sheetSource = new InputSource(inputStream);
parser.parse(sheetSource);
return multiRowHandler.getRows();
} catch (Exception e) {
throw e;
} finally {
if (Objects.nonNull(pkg)) {
pkg.close();
}
if (Objects.nonNull(inputStream)) {
inputStream.close();
}
}
}
}
SingleRowHandler.class 当行处理类,可以只获取表头或表格中的某一行数据
package com.xxx.bi.utils.excel;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.regex.Pattern;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SingleRowHandler extends DefaultHandler {
public final static String FINISH_ROW_MESSAGE = "row data process finish";
private Integer rowNum = null;// rowNum不为空时则标示只需要获取这一行的数据
private int curRowNum = 1;
private String cellType = "";
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private String cellPosition;
private List<String> row = new ArrayList<>();
public List<String> getRow() {
return row;
}
public SingleRowHandler(SharedStringsTable sst,Integer rowNum) {
this.sst = sst;
this.rowNum = rowNum;
}
public void startElement(String uri,String localName,String name,Attributes attributes) throws SAXException {
if (name.equals("c")) {
cellPosition = attributes.getValue("r");
// 这是一个新行
if (Pattern.compile("^A[0-9]+$").matcher(cellPosition).find()) {
curRowNum = Integer.valueOf(cellPosition.substring(1));
}
cellType = "";
cellType = attributes.getValue("t");
if ("s".equals(cellType)) {
nextIsString = true;
} else {
nextIsString = false;
}
}
// 清楚缓存内容
lastContents = "";
if (Objects.nonNull(rowNum) && curRowNum > rowNum) {
// 获取某一行数据完成 ,暂时不知道怎么能终止excel解析,直接抛出了异常,实际是成功的
throw new SAXException(FINISH_ROW_MESSAGE);
}
}
public void endElement(String uri,String name) throws SAXException {
if (nextIsString) {
int idx = Integer.parseInt(lastContents);
lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
nextIsString = false;
}
if (name.equals("v")) {
if (Objects.isNull(rowNum) || rowNum == curRowNum) {
row.add(lastContents);
}
}
}
public void characters(char[] ch,int start,int length) throws SAXException {
lastContents += new String(ch,start,length);
}
}
(编辑:安卓应用网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |
