文档章节

poi通过事件模式sax解析大excel文件,防止内存溢出

Sheav
 Sheav
发布于 10/12 13:38
字数 1122
阅读 7
收藏 0
POI

1.解析类

package com.cetc.ExcelPoi;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.*;
import org.xml.sax.helpers.DefaultHandler;

import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

/**
 * Created by shea on 2018/10/12.
 */
public class BigExcelParse extends DefaultHandler {
    enum xssfDataType {
        BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
    }
    /**
     * Table with styles
     */
    private StylesTable stylesTable;

    //取SST 的索引对应的值
    private SharedStringsTable sharedStringsTable;
    /**
     * 最大列数
     */
    private final int minColumnCount;

    //单元格内容是SST 的索引
    private boolean vIsOpen;

    private xssfDataType nextDataType;

    private short formatIndex;
    private String formatString;
    private final DataFormatter formatter;

    private int thisColumn = -1;
    // The last column printed to the output stream
    private int lastColumnNumber = -1;

    // Gathers characters as they are seen.
    private StringBuffer value;
    private String[] record;//封装每一行的结果
    private List<String[]> rows = new ArrayList<String[]>();//封装结果
    private boolean isCellNull = false;
    private SimpleDateFormat sdf = null;
    private static DecimalFormat df = new DecimalFormat("###########");


    //构造器
    public BigExcelParse(StylesTable styles,
                              SharedStringsTable strings, int cols) {
        this.stylesTable = styles;
        this.sharedStringsTable = strings;
        this.minColumnCount = cols;
        this.value = new StringBuffer();
        this.nextDataType = xssfDataType.NUMBER;
        this.formatter = new DataFormatter();
        record = new String[this.minColumnCount];
        rows.clear();// 每次读取都清空行集合
    }

    public void startElement(String uri, String localName, String name,
                             Attributes attributes) throws SAXException {

        if ("inlineStr".equals(name) || "v".equals(name)) {
            vIsOpen = true;
            // Clear contents cache
            value.setLength(0);
        }
        // c => cell
        else if ("c".equals(name)) {
            // Get the cell reference
            String r = attributes.getValue("r");
            int firstDigit = -1;
            for (int c = 0; c < r.length(); ++c) {
                if (Character.isDigit(r.charAt(c))) {
                    firstDigit = c;
                    break;
                }
            }
            thisColumn = nameToColumn(r.substring(0, firstDigit));

            // Set up defaults.
            this.nextDataType = xssfDataType.NUMBER;
            this.formatIndex = -1;
            this.formatString = null;
            String cellType = attributes.getValue("t");
            String cellStyleStr = attributes.getValue("s");
            if ("b".equals(cellType))
                nextDataType = xssfDataType.BOOL;
            else if ("e".equals(cellType))
                nextDataType = xssfDataType.ERROR;
            else if ("inlineStr".equals(cellType))
                nextDataType = xssfDataType.INLINESTR;
            else if ("s".equals(cellType))
                nextDataType = xssfDataType.SSTINDEX;
            else if ("str".equals(cellType))
                nextDataType = xssfDataType.FORMULA;
            else if (cellStyleStr != null) {
                // It's a number, but almost certainly one
                // with a special style or format
                int styleIndex = Integer.parseInt(cellStyleStr);
                XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
                this.formatIndex = style.getDataFormat();
                this.formatString = style.getDataFormatString();
                if (this.formatString == null)
                    this.formatString = BuiltinFormats
                            .getBuiltinFormat(this.formatIndex);
            }
        }

    }

    public void endElement(String uri, String localName, String name)
            throws SAXException {

        String thisStr = null;

        // v => contents of a cell
        //单元格内容标签结束,characters方法会被调用处理内容
        if ("v".equals(name)) {
            // Process the value contents as required.
            // Do now, as characters() may be called more than once
            switch (nextDataType) {

                case BOOL:
                    char first = value.charAt(0);
                    thisStr = first == '0' ? "FALSE" : "TRUE";
                    break;

                case ERROR:
                    thisStr = "\"ERROR:" + value.toString() + '"';
                    break;

                case FORMULA:
                    // A formula could result in a string value,
                    // so always add double-quote characters.
                    thisStr = value.toString();
                    break;

                case INLINESTR:
                    // TODO: have seen an example of this, so it's untested.
                    XSSFRichTextString rtsi = new XSSFRichTextString(
                            value.toString());
                    thisStr =rtsi.toString();
                    break;

                case SSTINDEX:
                    String sstIndex = value.toString();
                    try {
                        int idx = Integer.parseInt(sstIndex);
                        XSSFRichTextString rtss = new XSSFRichTextString(
                                sharedStringsTable.getEntryAt(idx));
                        thisStr = rtss.toString();
                    } catch (NumberFormatException ex) {
                        System.out.println("Failed to parse SST index '" + sstIndex
                                + "': " + ex.toString());
                    }
                    break;

                case NUMBER:
                    String n = value.toString();
                    // 判断是否是日期格式
                    if (formatIndex == 14 || formatIndex == 31 || formatIndex == 57 || formatIndex == 58
                            || (176<=formatIndex && formatIndex<=178) || (182<=formatIndex && formatIndex<=196)
                            || (210<=formatIndex && formatIndex<=213) || (208==formatIndex ) ) {// 日期
                        sdf = new SimpleDateFormat("yyyy-MM-dd");
                        Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
                        thisStr=sdf.format(date);
                    } else if (formatIndex == 20 || formatIndex == 32 || formatIndex==183 || (200<=formatIndex && formatIndex<=209)) {//时间
                        sdf = new SimpleDateFormat("HH:mm");
                        Date date = org.apache.poi.ss.usermodel.DateUtil.getJavaDate(Double.parseDouble(n));
                        thisStr = sdf.format(date);
                    }
                    else{
                         if(n.contains("E")){//科学计数法
                             String[] split = n.split("\\+");
                             String e = split[0].replaceAll("E|e", "");
                             thisStr=e.replace(".","");
                        }else {
                             thisStr = n;
                         }
                    }
                    break;
                default:
                    thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
                    break;
            }
            if (lastColumnNumber == -1) {
                lastColumnNumber = 0;
            }
            //判断单元格的值是否为空
            if (thisStr == null || "".equals(isCellNull)) {
                isCellNull = true;// 设置单元格是否为空值
            }
            record[thisColumn] = thisStr;
            // Update column
            if (thisColumn > -1)
                lastColumnNumber = thisColumn;
            //行结束,存储一行数据
        } else if ("row".equals(name)) {
            // Print out any missing commas if needed
            if (minColumnCount > 0) {
                // Columns are 0 based
                if (lastColumnNumber == -1) {
                    lastColumnNumber = 0;
                }
                // 判断是否空行
                if(record!=null &&record.length!=0){
                    rows.add(record.clone());
                    isCellNull = false;
                    for (int i = 0; i < record.length; i++) {
                        record[i] = null;
                    }
                }
            }
            lastColumnNumber = -1;
        }

    }

    public List<String[]> getRows() {
        return rows;
    }

    public void setRows(List<String[]> rows) {
        this.rows = rows;
    }

    public void characters(char[] ch, int start, int length)
            throws SAXException {
        if (vIsOpen)
            value.append(ch, start, length);
    }

    private int nameToColumn(String name) {
        int column = -1;
        for (int i = 0; i < name.length(); ++i) {
            int c = name.charAt(i);
            column = (column + 1) * 26 + c - 'A';
        }
        return column;
    }
}

2.调用类

package com.cetc.ExcelPoi;

import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.*;
import java.util.HashMap;
import java.util.List;

/**
 * Created by shea on 2018/10/12.
 */
public class TestBigExcelParse {
    private OPCPackage xlsxPackage;
    private int minColumns;
    private PrintStream output;
    private String sheetName;
    //构造器
    public TestBigExcelParse(OPCPackage pkg, PrintStream output,
                               String sheetName, int minColumns) {
        this.xlsxPackage = pkg;
        this.output = output;
        this.minColumns = minColumns;
        this.sheetName = sheetName;
    }

    /**
     *
     * @param path----文件路径
     * @param sheetName---解析的sheet名称
     * @param minColumns---excel的最大列数
     * @return
     * @throws Exception
     */
    public static HashMap<String, Object> parseSheet(String path,String sheetName,int minColumns)throws Exception{
        //文件地址
        OPCPackage pkg = OPCPackage.open(path,PackageAccess.READ);
        XSSFReader r = new XSSFReader( pkg );
        //解析的sheet名称
        // InputStream rId1 = r.getSheet("rId1");//第一个sheet表
        XSSFReader.SheetIterator sheetsData = (XSSFReader.SheetIterator)r.getSheetsData();
        SharedStringsTable sst = r.getSharedStringsTable();
        StylesTable styles = r.getStylesTable();
        XMLReader parser = XMLReaderFactory.createXMLReader();
        BigExcelParse handler = new BigExcelParse(styles, sst, minColumns);
        parser.setContentHandler(handler);
        //遍历---获取指定的sheet名称
        HashMap<String, Object> map = new HashMap<String, Object>();
        while (sheetsData.hasNext()){
            InputStream in = sheetsData.next();
            if(sheetName.equals(sheetsData.getSheetName())){
                InputSource inputSource = new InputSource(in);
                parser.parse(inputSource);
                List<String[]> rows = handler.getRows();//返回所有的封装结果
                map.put("success", true);
                map.put("msg","解析完成!");
                map.put("rows",rows);
                in.close();
            }
        }
        if(map.isEmpty()){
            map.put("success", false);
            map.put("msg","解析失败,没有找到相应的sheet表!");
        }
        return map;
    }

    /**
     * 测试方法调用
     * @param args
     * @throws Exception
     */
    public static void main(String[] args) throws Exception{
        HashMap<String,Object> res = parseSheet("C:\\Users\\shea\\Desktop\\测试2.xlsx", "Sheet1", 285);
        System.out.println((String)res.get("msg"));
        List<String[]> rows = (List<String[]>)res.get("rows");
        BufferedWriter out = new BufferedWriter(new FileWriter("C:\\Users\\shea\\Desktop\\测试_bigExcel.txt"));
        for (String[] row:rows
             ) {
            for (String cell:row
                 ) {
                out.write(cell+"\t");
            }
           out.newLine();
        }
        out.flush();
        out.close();
    }
}

 

© 著作权归作者所有

共有 人打赏支持
Sheav
粉丝 1
博文 51
码字总数 30572
作品 0
后端工程师
Java 解析 Excel 工具 - easyexcel

easyexcel —— JAVA 解析 Excel 工具 Java 解析、生成 Excel 比较有名的框架有 Apache poi、jxl 。但他们都存在一个严重的问题就是非常的耗内存,poi 有一套 SAX 模式的 API 可以一定程度的...

匿名
08/13
0
0
Java解析excel工具easyexcel助你快速简单避免OOM[图]

Java解析excel工具easyexcel助你快速简单避免OOM[图] Java解析、生成Excel比较有名的框架有Apache poi、jxl。但他们都存在一个严重的问题就是非常的耗内存,poi有一套SAX模式的API可以一定程...

原创小博客
07/19
0
0
POI框架EXCEL解析性能优化

背景 在做商品EXCEL的时候,线上发现了Full GC,排查得知是商家搞了一个巨大的excel,单商品发布接口平均耗时400ms(调用sell耗时200ms左右,系统自身处理商品同步耗时150ms左右),对于3000个商...

墨少伶
07/23
0
0
POI处理excel2007内存溢出问题

项目中遇到数据导入、导出用excle操作的问题,数据量在W级别,因03版有6W+的限制,系统统一采用07版excel来做,采用POI进行处理,在导入、导出的时候都遇到的内存溢出的问题,导入方面主要参...

sjzmlb
2015/05/13
0
0
XML的4中解析方式分析

基础方法:DOM(平台无关的官方解析方式)、SAX(基于事件驱动的解析方式) 扩展方法:JDOM、DOM4J(在基础的方法上扩展出的,只有在java中能够使用的解析方法) DOM:一次性将整个xml文件加...

Favour
2016/05/26
21
0

没有更多内容

加载失败,请刷新页面

加载更多

HashTable

Hashtable 是一个散列表,它存储的内容是键值对(key-value)映射 Hashtable 继承于Dictionary,实现了Map、Cloneable、java.io.Serializable接口 Hashtable 的函数都是同步的,这意味着它是线...

职业搬砖20年
5分钟前
0
0
Linux系统状态查看命令1

10月23日任务 10.1 使用w查看系统负载 10.2 vmstat命令 10.3 top命令 10.4 sar命令 10.5 nload命令 查看系统负载 w命令 # 第一行:当前系统时间,系统启动时间,登录的用户,系统负载:1分钟...

robertt15
20分钟前
0
0
缓存那些事

前言 一般而言,现在互联网应用(网站或App)的整体流程,可以概括如图1所示,用户请求从界面(浏览器或App界面)到网络转发、应用服务再到存储(数据库或文件系统),然后返回到界面呈现内容...

Skqing
29分钟前
0
0
nginx开启stub_status模块配置方法

nginx开启stub_status模块配置方法 2017年12月13日 15:57:29 ly_dengle 阅读数:3765 标签: stub_statusnginxnginx开启stub_status模块 更多 个人分类: 软件工具php 版权声明:本文为博主原...

linjin200
36分钟前
3
0
挑逗 Java 程序员的那些 Scala 绝技

有个问题一直困扰着 Scala 社区,为什么一些 Java 开发者将 Scala 捧到了天上,认为它是来自上帝之吻的完美语言;而另外一些 Java 开发者却对它望而却步,认为它过于复杂而难以理解。同样是 ...

joymufeng
39分钟前
94
0

没有更多内容

加载失败,请刷新页面

加载更多

返回顶部
顶部