当前位置: 首页 > 工具软件 > EXCEL-UTIL4J > 使用案例 >

java -excel-读取较大的excel文件防止内存溢出(兼容Excel2003和2007)

岳劲
2023-12-01

如果觉得写得可以 或者太差 就 评论一下或者赞一下呗,多谢支持!!


1. 需要的jar
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi excle 文件 -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>3.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>3.14</version>

</dependency>

2.代码

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

/**
 *
 * @author lhy
 *
 */
public class ReadExcel2003_2007 {
    private List<String[]> rows = new ArrayList<String[]>();
    private final OPCPackage xlsxPackage;
    private int minColumns;

    private class SheetToCSV implements SheetContentsHandler {
        private String[] record;
        private  int minColumns;
        public SheetToCSV(int minColumns) {
            super();
            this.minColumns = minColumns;
        }

        @Override
        public void startRow(int rowNum) {
            record=new String[this.minColumns];
        }

        @Override
        public void endRow(int rowNum) {
            rows.add(this.record);
        }

        @Override
        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
            int thisCol = (new CellReference(cellReference)).getCol();
            record[thisCol]=formattedValue;

        }

        @Override
        public void headerFooter(String text, boolean isHeader, String tagName) {
            // Skip, no headers or footers in CSV
        }

    }

    public ReadExcel2003_2007(OPCPackage pkg, int minColumns) {
        this.xlsxPackage = pkg;
        this.minColumns = minColumns;
    }

    public void processSheet(StylesTable styles,  ReadOnlySharedStringsTable strings, SheetContentsHandler sheetHandler,InputStream sheetInputStream)
            throws IOException, ParserConfigurationException, SAXException {
        DataFormatter formatter = new DataFormatter();
        InputSource sheetSource = new InputSource(sheetInputStream);
        try {
            XMLReader sheetParser = SAXHelper.newXMLReader();
            ContentHandler handler = new XSSFSheetXMLHandler(styles, null, strings, sheetHandler, formatter, false);
            sheetParser.setContentHandler(handler);
            sheetParser.parse(sheetSource);
        } catch (ParserConfigurationException e) {
            throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
        }
    }

    public  List<String[]> process() throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
        ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
        XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
        StylesTable styles = xssfReader.getStylesTable();
        XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
        int index = 0;
        while (iter.hasNext()) {
            InputStream stream = iter.next();
            String sheetName = iter.getSheetName();
            processSheet(styles, strings, new SheetToCSV(this.minColumns), stream);
            stream.close();
            ++index;
        }
        return this.rows;
    }

    /**
     * 得到excel的记录
     * @param excelPath
     * @param minColumns 输出多少列
     * @return
     * @throws Exception
     */
    public static List<String[]> getRecords(String excelPath,int minColumns) throws Exception{
        File xlsxFile = new File(excelPath);
        if (!xlsxFile.exists()) {
            System.err.println("Not found or not a file: " + xlsxFile.getPath());
            return null;
        }
        OPCPackage p = OPCPackage.open(xlsxFile);
        ReadExcel2003_2007 xlsx2csv = new ReadExcel2003_2007(p,minColumns);
        List<String[]>list=xlsx2csv.process();
        p.close();
        return list;
    }

    public static void main(String[] args) throws Exception {
        //需要的参数 是  excel文件的地址,和 一共有多少列
        List<String[]>list=getRecords("d:/123.xlsx",1);

      //如果需要数组里放数组
        ArrayList<ArrayList<String>> result = new ArrayList<>();
        for(int i=0;i<list.size();i++)
        {
            ArrayList<String> arrayList = new ArrayList<>();
            for(String a:list.get(i))
            {
                arrayList.add(a);
                System.out.println(a);
            }
           result.add(arrayList);
        }
    }
}

对于上面的代码   可以把最后的主函数 携程一个工具类, 然后 传入excel的地址 和 一共有多少列, 然后输出一个 list, 这样就可以获取excel 里的所有数据.

至于里面具体如何实现的 没有特殊要求 可以不做过多研究.  能用就可以.

 类似资料: