使用Apache POI进行低内存写入/读取

我正在尝试编写一个非常大的XLSX文件(4M +单元),而且我遇到了一些内存问题。

我不能使用SXSSF,因为我还需要读取模板中的现有单元格。

有什么办法可以减少内存占用吗?
也许结合流媒体阅读和流媒体写作?

要处理内存不足的大数据,我认为唯一的选择就是SXSSF api-s。 如果您需要读取现有单元格的一些数据,我假设您不需要同时使用整个4M +。 在这种情况下,根据您的应用程序要求,您可以自己处理窗口大小,并在内存中仅保留特定时间所需的数据量。 您可以从以下示例开始: http : //poi.apache.org/spreadsheet/how-to.html#sxssf

有点像

SXSSFWorkbook wb = new SXSSFWorkbook(-1); // turn off auto-flushing and accumulate all rows in memory // manually control how rows are flushed to disk if(rownum % NOR == 0) { ((SXSSFSheet)sh).flushRows(NOR); // retain NOR last rows and flush all others 

希望这可以帮助。

我使用SAX解析器来处理XML文档表示的事件。 这是

 import com.sun.org.apache.xerces.internal.parsers.SAXParser; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.DefaultHandler; import java.io.BufferedInputStream; import java.io.InputStream; import java.util.Collections; import java.util.Iterator; import java.util.LinkedList; import java.util.List; public class LowMemoryExcelFileReader { private String file; public LowMemoryExcelFileReader(String file) { this.file = file; } public List read() { try { return processFirstSheet(file); } catch (Exception e) { throw new RuntimeException(e); } } private List readSheet(Sheet sheet) { List res = new LinkedList<>(); Iterator rowIterator = sheet.rowIterator(); while (rowIterator.hasNext()) { Row row = rowIterator.next(); int cellsNumber = row.getLastCellNum(); String [] cellsValues = new String[cellsNumber]; Iterator cellIterator = row.cellIterator(); int cellIndex = 0; while (cellIterator.hasNext()) { Cell cell = cellIterator.next(); cellsValues[cellIndex++] = cell.getStringCellValue(); } res.add(cellsValues); } return res; } public String getFile() { return file; } public void setFile(String file) { this.file = file; } private List processFirstSheet(String filename) throws Exception { OPCPackage pkg = OPCPackage.open(filename, PackageAccess.READ); XSSFReader r = new XSSFReader(pkg); SharedStringsTable sst = r.getSharedStringsTable(); SheetHandler handler = new SheetHandler(sst); XMLReader parser = fetchSheetParser(handler); Iterator sheetIterator = r.getSheetsData(); if (!sheetIterator.hasNext()) { return Collections.emptyList(); } InputStream sheetInputStream = sheetIterator.next(); BufferedInputStream bisSheet = new BufferedInputStream(sheetInputStream); InputSource sheetSource = new InputSource(bisSheet); parser.parse(sheetSource); List res = handler.getRowCache(); bisSheet.close(); return res; } public XMLReader fetchSheetParser(ContentHandler handler) throws SAXException { XMLReader parser = new SAXParser(); parser.setContentHandler(handler); return parser; } /** * See org.xml.sax.helpers.DefaultHandler javadocs */ private static class SheetHandler extends DefaultHandler { private static final String ROW_EVENT = "row"; private static final String CELL_EVENT = "c"; private SharedStringsTable sst; private String lastContents; private boolean nextIsString; private List cellCache = new LinkedList<>(); private List rowCache = new LinkedList<>(); private SheetHandler(SharedStringsTable sst) { this.sst = sst; } public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException { // c => cell if (CELL_EVENT.equals(name)) { String cellType = attributes.getValue("t"); if(cellType != null && cellType.equals("s")) { nextIsString = true; } else { nextIsString = false; } } else if (ROW_EVENT.equals(name)) { if (!cellCache.isEmpty()) { rowCache.add(cellCache.toArray(new String[cellCache.size()])); } cellCache.clear(); } // Clear contents cache lastContents = ""; } public void endElement(String uri, String localName, String name) throws SAXException { // Process the last contents as required. // Do now, as characters() may be called more than once if(nextIsString) { int idx = Integer.parseInt(lastContents); lastContents = new XSSFRichTextString(sst.getEntryAt(idx)).toString(); nextIsString = false; } // v => contents of a cell // Output after we've seen the string contents if(name.equals("v")) { cellCache.add(lastContents); } } public void characters(char[] ch, int start, int length) throws SAXException { lastContents += new String(ch, start, length); } public List getRowCache() { return rowCache; } } }