diff --git a/pom.xml b/pom.xml index 7de71665..8c147840 100644 --- a/pom.xml +++ b/pom.xml @@ -11,7 +11,7 @@ com.github.liaochong myexcel - 4.0.0.RC2 + 4.0.0.RC3 jar myexcel @@ -22,7 +22,7 @@ UTF-8 UTF-8 1.8 - 5.1.0 + 5.2.0 1.14.3 1.18.22 3.9.0.RELEASE diff --git a/src/main/java/com/github/liaochong/myexcel/core/AbstractHSSFReadHandler.java b/src/main/java/com/github/liaochong/myexcel/core/AbstractHSSFReadHandler.java new file mode 100644 index 00000000..cb391df8 --- /dev/null +++ b/src/main/java/com/github/liaochong/myexcel/core/AbstractHSSFReadHandler.java @@ -0,0 +1,53 @@ +/* + * Copyright 2019 liaochong + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.liaochong.myexcel.core; + +import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder; +import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; +import org.apache.poi.hssf.eventusermodel.HSSFListener; +import org.apache.poi.hssf.eventusermodel.HSSFRequest; +import org.apache.poi.hssf.record.BoundSheetRecord; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * 抽象HSSF读取处理器 + * + * @author liaochong + * @version 1.0 + */ +abstract class AbstractHSSFReadHandler implements HSSFListener { + + protected BoundSheetRecord[] orderedBSRs; + + protected final List boundSheetRecords = new ArrayList<>(); + + protected int sheetIndex = -1; + + protected String sheetName; + + protected SaxExcelReader.ReadConfig readConfig; + + protected POIFSFileSystem fs; + + protected void process() throws IOException { + HSSFRequest request = new HSSFRequest(); + request.addListenerForAllRecords(new EventWorkbookBuilder.SheetRecordCollectingListener(this)); + new HSSFEventFactory().processWorkbookEvents(request, fs); + } +} diff --git a/src/main/java/com/github/liaochong/myexcel/core/CiConsumer.java b/src/main/java/com/github/liaochong/myexcel/core/CiConsumer.java new file mode 100644 index 00000000..1018364f --- /dev/null +++ b/src/main/java/com/github/liaochong/myexcel/core/CiConsumer.java @@ -0,0 +1,11 @@ +package com.github.liaochong.myexcel.core; + +/** + * @author liaochong + * @version 1.0 + */ +@FunctionalInterface +interface CiConsumer { + + void accept(T t, F f, U u); +} diff --git a/src/main/java/com/github/liaochong/myexcel/core/CiFunction.java b/src/main/java/com/github/liaochong/myexcel/core/CiFunction.java new file mode 100644 index 00000000..ce598158 --- /dev/null +++ b/src/main/java/com/github/liaochong/myexcel/core/CiFunction.java @@ -0,0 +1,11 @@ +package com.github.liaochong.myexcel.core; + +/** + * @author liaochong + * @version 1.0 + */ +@FunctionalInterface +interface CiFunction { + + U apply(T t, F f, R r); +} diff --git a/src/main/java/com/github/liaochong/myexcel/core/HSSFMergeReadHandler.java b/src/main/java/com/github/liaochong/myexcel/core/HSSFMergeReadHandler.java new file mode 100644 index 00000000..4b878b07 --- /dev/null +++ b/src/main/java/com/github/liaochong/myexcel/core/HSSFMergeReadHandler.java @@ -0,0 +1,99 @@ +/* + * Copyright 2019 liaochong + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.liaochong.myexcel.core; + +import org.apache.poi.hssf.record.BOFRecord; +import org.apache.poi.hssf.record.BoundSheetRecord; +import org.apache.poi.hssf.record.MergeCellsRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.util.CellAddress; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +/** + * @author liaochong + * @version 1.0 + */ +class HSSFMergeReadHandler extends AbstractHSSFReadHandler { + + private final Map> mergeCellIndexMapping; + + public HSSFMergeReadHandler(File file, + SaxExcelReader.ReadConfig readConfig, + Map> mergeCellIndexMapping) throws IOException { + this.readConfig = readConfig; + this.mergeCellIndexMapping = mergeCellIndexMapping; + this.fs = new POIFSFileSystem(new FileInputStream(file)); + } + + + @Override + public void processRecord(Record record) { + switch (record.getSid()) { + case BoundSheetRecord.sid: + boundSheetRecords.add((BoundSheetRecord) record); + break; + case BOFRecord.sid: + BOFRecord br = (BOFRecord) record; + if (br.getType() == BOFRecord.TYPE_WORKSHEET) { + sheetIndex++; + if (orderedBSRs == null) { + orderedBSRs = BoundSheetRecord.orderByBofPosition(boundSheetRecords); + } + sheetName = orderedBSRs[sheetIndex].getSheetname(); + } + break; + case MergeCellsRecord.sid: + if (!isSelectedSheet()) { + return; + } + MergeCellsRecord mergeCellsRecord = (MergeCellsRecord) record; + int numAreas = mergeCellsRecord.getNumAreas(); + Map mergeCellMapping = new HashMap<>(); + for (int i = 0; i < numAreas; i++) { + Iterator iterator = mergeCellsRecord.getAreaAt(i).iterator(); + CellAddress firstCellAddress = null; + while (iterator.hasNext()) { + CellAddress cellAddress = iterator.next(); + if (firstCellAddress == null) { + firstCellAddress = cellAddress; + } else { + mergeCellMapping.put(cellAddress, firstCellAddress); + } + } + } + mergeCellIndexMapping.put(sheetIndex, mergeCellMapping); + break; + default: + break; + } + } + + private boolean isSelectedSheet() { + if (readConfig.readAllSheet) { + return true; + } + if (!readConfig.sheetNames.isEmpty()) { + return readConfig.sheetNames.contains(sheetName); + } + return readConfig.sheetIndexs.contains(sheetIndex); + } +} diff --git a/src/main/java/com/github/liaochong/myexcel/core/HSSFMetaDataSaxReadHandler.java b/src/main/java/com/github/liaochong/myexcel/core/HSSFMetaDataSaxReadHandler.java index 6d66f537..dc2c7e33 100644 --- a/src/main/java/com/github/liaochong/myexcel/core/HSSFMetaDataSaxReadHandler.java +++ b/src/main/java/com/github/liaochong/myexcel/core/HSSFMetaDataSaxReadHandler.java @@ -14,12 +14,6 @@ */ package com.github.liaochong.myexcel.core; -import org.apache.poi.hssf.eventusermodel.EventWorkbookBuilder; -import org.apache.poi.hssf.eventusermodel.FormatTrackingHSSFListener; -import org.apache.poi.hssf.eventusermodel.HSSFEventFactory; -import org.apache.poi.hssf.eventusermodel.HSSFListener; -import org.apache.poi.hssf.eventusermodel.HSSFRequest; -import org.apache.poi.hssf.eventusermodel.MissingRecordAwareHSSFListener; import org.apache.poi.hssf.eventusermodel.dummyrecord.LastCellOfRowDummyRecord; import org.apache.poi.hssf.record.BOFRecord; import org.apache.poi.hssf.record.BlankRecord; @@ -32,13 +26,11 @@ import org.apache.poi.hssf.record.NumberRecord; import org.apache.poi.hssf.record.RKRecord; import org.apache.poi.hssf.record.Record; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.util.ArrayList; import java.util.List; /** @@ -47,27 +39,10 @@ * @author liaochong * @version 1.0 */ -public class HSSFMetaDataSaxReadHandler implements HSSFListener { - - private BoundSheetRecord[] orderedBSRs; - - private final List boundSheetRecords = new ArrayList<>(); - - private final POIFSFileSystem fs; +class HSSFMetaDataSaxReadHandler extends AbstractHSSFReadHandler { private int lastRowNumber = -1; - /** - * For parsing Formulas - */ - private EventWorkbookBuilder.SheetRecordCollectingListener workbookBuildingListener; - private HSSFWorkbook stubWorkbook; - - /** - * So we known which sheet we're on - */ - private int sheetIndex = -1; - private final WorkbookMetaData workbookMetaData; public HSSFMetaDataSaxReadHandler(File file, WorkbookMetaData workbookMetaData) throws IOException { @@ -75,14 +50,9 @@ public HSSFMetaDataSaxReadHandler(File file, WorkbookMetaData workbookMetaData) this.workbookMetaData = workbookMetaData; } + @Override public void process() throws IOException { - MissingRecordAwareHSSFListener listener = new MissingRecordAwareHSSFListener(this); - FormatTrackingHSSFListener formatListener = new FormatTrackingHSSFListener(listener); - - HSSFRequest request = new HSSFRequest(); - workbookBuildingListener = new EventWorkbookBuilder.SheetRecordCollectingListener(formatListener); - request.addListenerForAllRecords(workbookBuildingListener); - new HSSFEventFactory().processWorkbookEvents(request, fs); + super.process(); // 处理最后一个sheet if (lastRowNumber > -1) { workbookMetaData.getSheetMetaDataList().get(sheetIndex).setLastRowNum(lastRowNumber + 1); @@ -99,9 +69,6 @@ public void processRecord(Record record) { case BOFRecord.sid: BOFRecord br = (BOFRecord) record; if (br.getType() == BOFRecord.TYPE_WORKSHEET) { - if (workbookBuildingListener != null && stubWorkbook == null) { - stubWorkbook = workbookBuildingListener.getStubHSSFWorkbook(); - } List sheetMetaDataList = workbookMetaData.getSheetMetaDataList(); if (lastRowNumber > -1) { sheetMetaDataList.get(sheetIndex).setLastRowNum(lastRowNumber + 1); @@ -118,46 +85,36 @@ public void processRecord(Record record) { } break; case BlankRecord.sid: - BlankRecord brec = (BlankRecord) record; - thisRow = brec.getRow(); + thisRow = ((BlankRecord) record).getRow(); break; case BoolErrRecord.sid: - BoolErrRecord berec = (BoolErrRecord) record; - thisRow = berec.getRow(); + thisRow = ((BoolErrRecord) record).getRow(); break; case FormulaRecord.sid: - FormulaRecord frec = (FormulaRecord) record; - thisRow = frec.getRow(); + thisRow = ((FormulaRecord) record).getRow(); break; case LabelRecord.sid: - LabelRecord lrec = (LabelRecord) record; - thisRow = lrec.getRow(); + thisRow = ((LabelRecord) record).getRow(); break; case LabelSSTRecord.sid: - LabelSSTRecord lsrec = (LabelSSTRecord) record; - thisRow = lsrec.getRow(); + thisRow = ((LabelSSTRecord) record).getRow(); break; case NoteRecord.sid: - NoteRecord nrec = (NoteRecord) record; - thisRow = nrec.getRow(); + thisRow = ((NoteRecord) record).getRow(); break; case NumberRecord.sid: - NumberRecord numrec = (NumberRecord) record; - thisRow = numrec.getRow(); + thisRow = ((NumberRecord) record).getRow(); break; case RKRecord.sid: - RKRecord rkrec = (RKRecord) record; - thisRow = rkrec.getRow(); + thisRow = ((RKRecord) record).getRow(); break; default: break; } - if (record instanceof LastCellOfRowDummyRecord) { LastCellOfRowDummyRecord lc = (LastCellOfRowDummyRecord) record; thisRow = lc.getRow(); } - // Handle new row if (thisRow != -1 && thisRow != lastRowNumber) { lastRowNumber = thisRow; diff --git a/src/main/java/com/github/liaochong/myexcel/core/HSSFSaxReadHandler.java b/src/main/java/com/github/liaochong/myexcel/core/HSSFSaxReadHandler.java index f1c085b4..85efe456 100644 --- a/src/main/java/com/github/liaochong/myexcel/core/HSSFSaxReadHandler.java +++ b/src/main/java/com/github/liaochong/myexcel/core/HSSFSaxReadHandler.java @@ -38,16 +38,18 @@ import org.apache.poi.hssf.record.StringRecord; import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.util.CellAddress; import org.slf4j.Logger; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.io.InputStream; import java.util.ArrayList; +import java.util.Collections; import java.util.LinkedHashMap; import java.util.List; -import java.util.Set; +import java.util.Map; +import java.util.stream.Collectors; /** * HSSF sax处理 @@ -58,18 +60,17 @@ class HSSFSaxReadHandler extends AbstractReadHandler implements HSSFListener { private static final Logger log = org.slf4j.LoggerFactory.getLogger(HSSFSaxReadHandler.class); - private Set sheetIndexs; private String sheetName; - private POIFSFileSystem fs; + private final POIFSFileSystem fs; private int lastRowNumber = -1; /** * Should we output the formula, or the value it has? */ - private boolean outputFormulaValues = true; + private final boolean outputFormulaValues = true; /** * For parsing Formulas @@ -86,25 +87,24 @@ class HSSFSaxReadHandler extends AbstractReadHandler implements HSSFListen */ private int sheetIndex = -1; private BoundSheetRecord[] orderedBSRs; - private List boundSheetRecords = new ArrayList<>(); + private final List boundSheetRecords = new ArrayList<>(); // For handling formulas with string results private int nextRow; private int nextColumn; private boolean outputNextStringRecord; - public HSSFSaxReadHandler(File file, - List result, - SaxExcelReader.ReadConfig readConfig) throws IOException { - this(new FileInputStream(file), result, readConfig); - } + private final Map> mergeCellIndexMapping; + + private Map mergeFirstCellMapping; - public HSSFSaxReadHandler(InputStream inputStream, + public HSSFSaxReadHandler(File file, List result, - SaxExcelReader.ReadConfig readConfig) throws IOException { + SaxExcelReader.ReadConfig readConfig, + Map> mergeCellIndexMapping) throws IOException { super(false, result, readConfig); - this.fs = new POIFSFileSystem(inputStream); - this.sheetIndexs = readConfig.sheetIndexs; + this.fs = new POIFSFileSystem(new FileInputStream(file)); + this.mergeCellIndexMapping = mergeCellIndexMapping; } public void process() throws IOException { @@ -151,6 +151,7 @@ public void processRecord(Record record) { } sheetName = orderedBSRs[sheetIndex].getSheetname(); readConfig.startSheetConsumer.accept(sheetName, sheetIndex); + mergeFirstCellMapping = mergeCellIndexMapping.getOrDefault(sheetIndex, Collections.emptyMap()).values().stream().distinct().collect(Collectors.toMap(cellAddress -> cellAddress, c -> "")); } break; @@ -163,7 +164,6 @@ public void processRecord(Record record) { thisRow = brec.getRow(); thisColumn = brec.getColumn(); - thisStr = null; break; case BoolErrRecord.sid: BoolErrRecord berec = (BoolErrRecord) record; @@ -215,9 +215,7 @@ public void processRecord(Record record) { thisRow = lsrec.getRow(); thisColumn = lsrec.getColumn(); - if (sstRecord == null) { - thisStr = null; - } else { + if (sstRecord != null) { thisStr = sstRecord.getString(lsrec.getSSTIndex()).toString(); } break; @@ -226,7 +224,6 @@ public void processRecord(Record record) { thisRow = nrec.getRow(); thisColumn = nrec.getColumn(); - thisStr = null; break; case NumberRecord.sid: NumberRecord numrec = (NumberRecord) record; @@ -242,7 +239,6 @@ public void processRecord(Record record) { thisRow = rkrec.getRow(); thisColumn = rkrec.getColumn(); - thisStr = null; break; default: break; @@ -267,7 +263,17 @@ public void processRecord(Record record) { newRow(thisRow); } boolean isSelectedSheet = this.isSelectedSheet(); - if (isSelectedSheet) { + if (isSelectedSheet && thisColumn != -1) { + if (readConfig.detectedMerge) { + CellAddress cellAddress = new CellAddress(thisRow, thisColumn); + String finalThisStr = thisStr; + mergeFirstCellMapping.computeIfPresent(cellAddress, (k, v) -> finalThisStr); + CellAddress firstCellAddress = mergeCellIndexMapping.get(sheetIndex).get(cellAddress); + if (firstCellAddress != null) { + thisStr = mergeFirstCellMapping.get(firstCellAddress); + mergeCellIndexMapping.get(sheetIndex).remove(cellAddress); + } + } handleField(thisColumn, thisStr); } // Handle end of row @@ -287,6 +293,6 @@ private boolean isSelectedSheet() { if (!readConfig.sheetNames.isEmpty()) { return readConfig.sheetNames.contains(sheetName); } - return sheetIndexs.contains(sheetIndex); + return readConfig.sheetIndexs.contains(sheetIndex); } } diff --git a/src/main/java/com/github/liaochong/myexcel/core/HtmlToExcelStreamFactory.java b/src/main/java/com/github/liaochong/myexcel/core/HtmlToExcelStreamFactory.java index 0d208d03..30bf6c3a 100644 --- a/src/main/java/com/github/liaochong/myexcel/core/HtmlToExcelStreamFactory.java +++ b/src/main/java/com/github/liaochong/myexcel/core/HtmlToExcelStreamFactory.java @@ -230,7 +230,7 @@ private void setTdStyle(Tr tr) { private Tr getTrFromQueue() throws InterruptedException { Tr tr = context.trWaitQueue.poll(15, TimeUnit.MINUTES); if (tr == null) { - throw new IllegalStateException("Get tr failure,timeout 1 hour."); + throw new IllegalStateException("Get tr failure,timeout 15 minutes."); } return tr; } diff --git a/src/main/java/com/github/liaochong/myexcel/core/SaxExcelReader.java b/src/main/java/com/github/liaochong/myexcel/core/SaxExcelReader.java index 5616477e..625a4ad6 100644 --- a/src/main/java/com/github/liaochong/myexcel/core/SaxExcelReader.java +++ b/src/main/java/com/github/liaochong/myexcel/core/SaxExcelReader.java @@ -23,10 +23,9 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.poifs.filesystem.FileMagic; -import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.util.CellAddress; import org.apache.poi.util.XMLHelper; import org.apache.poi.xssf.eventusermodel.XSSFReader; -import org.apache.poi.xssf.model.SharedStrings; import org.slf4j.Logger; import org.xml.sax.ContentHandler; import org.xml.sax.InputSource; @@ -41,9 +40,12 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.function.BiConsumer; import java.util.function.BiFunction; @@ -154,6 +156,11 @@ public SaxExcelReader startSheet(BiConsumer startSheetConsum return this; } + public SaxExcelReader detectedMerge() { + this.readConfig.detectedMerge = true; + return this; + } + public List read(Path path) { doRead(path.toFile()); return result; @@ -296,7 +303,11 @@ private void doReadXls(File file, boolean readMetaData) { workbookMetaData = new WorkbookMetaData(); new HSSFMetaDataSaxReadHandler(file, workbookMetaData).process(); } else { - new HSSFSaxReadHandler<>(file, result, readConfig).process(); + Map> mergeCellIndexMapping = new HashMap<>(); + if (readConfig.detectedMerge) { + new HSSFMergeReadHandler(file, readConfig, mergeCellIndexMapping).process(); + } + new HSSFSaxReadHandler<>(file, result, readConfig, mergeCellIndexMapping).process(); } } catch (StopReadException e) { // do nothing @@ -337,93 +348,96 @@ private void doReadCsv(File file) { */ private void process(OPCPackage xlsxPackage) throws IOException, OpenXML4JException, SAXException { long startTime = System.currentTimeMillis(); + Map> mergeCellIndexMapping = this.processMerge(xlsxPackage); StringsCache stringsCache = new StringsCache(); try { ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage, stringsCache); - XSSFReader xssfReader = new XSSFReader(xlsxPackage); - XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); - int index = -1; - if (readConfig.readAllSheet) { - while (iter.hasNext()) { - ++index; - try (InputStream stream = iter.next()) { - readConfig.startSheetConsumer.accept(iter.getSheetName(), index); - processSheet(strings, new XSSFSaxReadHandler<>(result, readConfig), stream); - } - } - } else if (!readConfig.sheetNames.isEmpty()) { - while (iter.hasNext()) { - ++index; - try (InputStream stream = iter.next()) { - if (readConfig.sheetNames.contains(iter.getSheetName())) { - readConfig.startSheetConsumer.accept(iter.getSheetName(), index); - processSheet(strings, new XSSFSaxReadHandler<>(result, readConfig), stream); - } - } - } - } else { - while (iter.hasNext()) { - ++index; - try (InputStream stream = iter.next()) { - if (readConfig.sheetIndexs.contains(index)) { - readConfig.startSheetConsumer.accept(iter.getSheetName(), index); - processSheet(strings, new XSSFSaxReadHandler<>(result, readConfig), stream); - } - } - } - } + this.doReadSheet(xlsxPackage, (stream, index, sheetName) -> { + readConfig.startSheetConsumer.accept(sheetName, index); + ContentHandler handler = new XSSFSheetXMLHandler( + mergeCellIndexMapping.getOrDefault(index, Collections.emptyMap()), strings, new XSSFSaxReadHandler<>(result, readConfig)); + processSheet(handler, stream); + mergeCellIndexMapping.remove(index); + }); } finally { stringsCache.clearAll(); } log.info("Sax import takes {} ms", System.currentTimeMillis() - startTime); } - private void processMetaData(OPCPackage xlsxPackage) throws IOException, OpenXML4JException, SAXException { - XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) new XSSFReader(xlsxPackage).getSheetsData(); + private Map> processMerge(OPCPackage xlsxPackage) throws IOException, OpenXML4JException { + if (!readConfig.detectedMerge) { + return Collections.emptyMap(); + } + Map> mergeCellIndexMapping = new HashMap<>(); + this.doReadSheet(xlsxPackage, (stream, index, sheetName) -> { + Map mergeCellMapping = new HashMap<>(); + processSheet(new XSSFSheetMergeXMLHandler(mergeCellMapping), stream); + mergeCellIndexMapping.put(index, mergeCellMapping); + }); + return mergeCellIndexMapping; + } + + private void processMetaData(OPCPackage xlsxPackage) throws IOException, OpenXML4JException { workbookMetaData = new WorkbookMetaData(); + readConfig.readAllSheet = true; + int lastIndex = this.doReadSheet(xlsxPackage, (stream, index, sheetName) -> { + SheetMetaData sheetMetaData = new SheetMetaData(sheetName, index); + this.processSheet(new XSSFSheetMetaDataXMLHandler(sheetMetaData), stream); + // 设置元数据信息 + workbookMetaData.getSheetMetaDataList().add(sheetMetaData); + }); + if (lastIndex > -1) { + workbookMetaData.setSheetCount(lastIndex + 1); + } + } + + private int doReadSheet(OPCPackage xlsxPackage, CiConsumer ciConsumer) throws IOException, OpenXML4JException { + XSSFReader.SheetIterator iter = this.getSheetIterator(xlsxPackage); + CiFunction acceptFunction = this.getSheetAcceptFunction(); int index = -1; while (iter.hasNext()) { ++index; try (InputStream stream = iter.next()) { - try { - SheetMetaData sheetMetaData = new SheetMetaData(iter.getSheetName(), index); - XMLReader sheetParser = XMLHelper.newXMLReader(); - sheetParser.setContentHandler(new XSSFSheetMetaDataXMLHandler(sheetMetaData)); - sheetParser.parse(new InputSource(stream)); - // 设置元数据信息 - workbookMetaData.getSheetMetaDataList().add(sheetMetaData); - } catch (ParserConfigurationException e) { - throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); + if (acceptFunction.apply(stream, index, iter.getSheetName())) { + ciConsumer.accept(stream, index, iter.getSheetName()); } } } - if (index > -1) { - workbookMetaData.setSheetCount(index + 1); + return index; + } + + private XSSFReader.SheetIterator getSheetIterator(OPCPackage xlsxPackage) throws IOException, OpenXML4JException { + XSSFReader xssfReader = new XSSFReader(xlsxPackage); + return (XSSFReader.SheetIterator) xssfReader.getSheetsData(); + } + + private CiFunction getSheetAcceptFunction() { + CiFunction acceptFunction = (is, index, sheetName) -> true; + if (readConfig.readAllSheet) { + acceptFunction = (is, index, sheetName) -> true; + } else if (!readConfig.sheetNames.isEmpty()) { + acceptFunction = (is, index, sheetName) -> readConfig.sheetNames.contains(sheetName); + } else if (!readConfig.sheetIndexs.isEmpty()) { + acceptFunction = (is, index, sheetName) -> readConfig.sheetIndexs.contains(index); } + return acceptFunction; } /** * Parses and shows the content of one sheet * using the specified styles and shared-strings tables. * - * @param strings The table of strings that may be referenced by cells in the sheet * @param sheetInputStream The stream to read the sheet-data from. - * @throws java.io.IOException An IO exception from the parser, - * possibly from a byte stream or character stream - * supplied by the application. - * @throws SAXException if parsing the XML data fails. */ private void processSheet( - SharedStrings strings, - XSSFSheetXMLHandler.SheetContentsHandler sheetHandler, - InputStream sheetInputStream) throws IOException, SAXException { + ContentHandler handler, + InputStream sheetInputStream) { try { XMLReader sheetParser = XMLHelper.newXMLReader(); - ContentHandler handler = new XSSFSheetXMLHandler( - null, null, strings, sheetHandler, new DataFormatter(), false); sheetParser.setContentHandler(handler); sheetParser.parse(new InputSource(sheetInputStream)); - } catch (ParserConfigurationException e) { + } catch (ParserConfigurationException | IOException | SAXException e) { throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage()); } } @@ -471,6 +485,8 @@ public static final class ReadConfig { */ public boolean stopReadingOnBlankRow = false; + public boolean detectedMerge; + public BiConsumer startSheetConsumer = (sheetName, sheetIndex) -> { log.info("Start read excel, sheet:{},index:{}", sheetName, sheetIndex); }; diff --git a/src/main/java/com/github/liaochong/myexcel/core/XSSFSaxReadHandler.java b/src/main/java/com/github/liaochong/myexcel/core/XSSFSaxReadHandler.java index e8ea2e15..f48753c4 100644 --- a/src/main/java/com/github/liaochong/myexcel/core/XSSFSaxReadHandler.java +++ b/src/main/java/com/github/liaochong/myexcel/core/XSSFSaxReadHandler.java @@ -14,8 +14,7 @@ */ package com.github.liaochong.myexcel.core; -import org.apache.poi.ss.util.CellReference; -import org.apache.poi.xssf.usermodel.XSSFComment; +import org.apache.poi.ss.util.CellAddress; import org.slf4j.Logger; import java.util.List; @@ -49,12 +48,8 @@ public void endRow(int rowNum) { } @Override - public void cell(String cellReference, String formattedValue, - XSSFComment comment) { - if (cellReference == null) { - return; - } - int thisCol = (new CellReference(cellReference)).getCol(); + public void cell(CellAddress cellAddress, String formattedValue) { + int thisCol = cellAddress.getColumn(); handleField(thisCol, formattedValue); } diff --git a/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetMergeXMLHandler.java b/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetMergeXMLHandler.java new file mode 100644 index 00000000..2dc91c6a --- /dev/null +++ b/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetMergeXMLHandler.java @@ -0,0 +1,61 @@ +/* + * Copyright 2019 liaochong + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.github.liaochong.myexcel.core; + +import org.apache.poi.ss.util.CellAddress; +import org.apache.poi.ss.util.CellRangeAddress; +import org.xml.sax.Attributes; +import org.xml.sax.SAXException; +import org.xml.sax.helpers.DefaultHandler; + +import java.util.Iterator; +import java.util.Map; + +import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML; + +/** + * 单元格合并处理器 + * + * @author liaochong + * @version 1.0 + */ +class XSSFSheetMergeXMLHandler extends DefaultHandler { + + private final Map mergeCellMapping; + + public XSSFSheetMergeXMLHandler(Map mergeCellMapping) { + this.mergeCellMapping = mergeCellMapping; + } + + @Override + public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { + if (uri != null && !uri.equals(NS_SPREADSHEETML)) { + return; + } + if ("mergeCell".equals(localName) || "x:mergeCell".equals(localName)) { + String range = attributes.getValue("ref"); + Iterator iterator = CellRangeAddress.valueOf(range).iterator(); + CellAddress firstCellAddress = null; + while (iterator.hasNext()) { + CellAddress cellAddress = iterator.next(); + if (firstCellAddress == null) { + firstCellAddress = cellAddress; + } else { + mergeCellMapping.put(cellAddress, firstCellAddress); + } + } + } + } +} diff --git a/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetXMLHandler.java b/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetXMLHandler.java index b26c68de..86fc71f8 100644 --- a/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetXMLHandler.java +++ b/src/main/java/com/github/liaochong/myexcel/core/XSSFSheetXMLHandler.java @@ -15,15 +15,9 @@ package com.github.liaochong.myexcel.core; import com.github.liaochong.myexcel.core.constant.Constants; -import org.apache.poi.ss.usermodel.BuiltinFormats; -import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.ss.usermodel.RichTextString; import org.apache.poi.ss.util.CellAddress; -import org.apache.poi.xssf.model.Comments; import org.apache.poi.xssf.model.SharedStrings; -import org.apache.poi.xssf.model.Styles; -import org.apache.poi.xssf.usermodel.XSSFCellStyle; -import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,9 +25,8 @@ import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; -import java.util.Iterator; -import java.util.LinkedList; -import java.util.Queue; +import java.util.Map; +import java.util.stream.Collectors; import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML; @@ -58,16 +51,6 @@ enum xssfDataType { NUMBER, } - /** - * Table with the styles used for formatting - */ - private final Styles stylesTable; - - /** - * Table with cell comments - */ - private final Comments comments; - /** * Read only access to the shared strings table, for looking * up (most) string cell's contents @@ -81,68 +64,40 @@ enum xssfDataType { // Set when V start element is seen private boolean vIsOpen; - // Set when F start element is seen - private boolean fIsOpen; // Set when an Inline String "is" is seen private boolean isIsOpen; - // Set when a header/footer element is seen - private boolean hfIsOpen; // Set when cell start element is seen; // used when cell close element is seen. private xssfDataType nextDataType; - // Used to format numeric cell values. - private short formatIndex; - private String formatString; - private final DataFormatter formatter; private int rowNum; private int preRowNum = -1; private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well private String cellRef; - private final boolean formulasNotResults; // Gathers characters as they are seen. private final StringBuilder value = new StringBuilder(64); - private final StringBuilder formula = new StringBuilder(64); - private final StringBuilder headerFooter = new StringBuilder(64); - private Queue commentCellRefs; + private final Map mergeCellMapping; + + private final Map mergeFirstCellMapping; /** * Accepts objects needed while parsing. * - * @param styles Table of styles * @param strings Table of shared strings - * @param formulasNotResults formulasNotResults * @param sheetContentsHandler sheetContentsHandler - * @param dataFormatter dataFormatter - * @param comments comments */ public XSSFSheetXMLHandler( - Styles styles, - Comments comments, + Map mergeCellMapping, SharedStrings strings, - XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler, - DataFormatter dataFormatter, - boolean formulasNotResults) { - this.stylesTable = styles; - this.comments = comments; + XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler) { + this.mergeCellMapping = mergeCellMapping; + this.mergeFirstCellMapping = mergeCellMapping.values().stream().distinct().collect(Collectors.toMap(cellAddress -> cellAddress, c -> "")); this.sharedStringsTable = strings; this.output = sheetContentsHandler; - this.formulasNotResults = formulasNotResults; this.nextDataType = xssfDataType.NUMBER; - this.formatter = dataFormatter; - init(comments); - } - - private void init(Comments commentsTable) { - if (commentsTable != null) { - commentCellRefs = new LinkedList<>(); - for (Iterator iter = commentsTable.getCellAddresses(); iter.hasNext(); ) { - commentCellRefs.add(iter.next()); - } - } } private boolean isTextTag(String name) { @@ -172,71 +127,11 @@ public void startElement(String uri, String localName, String qName, vIsOpen = true; // Clear contents cache value.setLength(0); - } else if ("is".equals(localName)) { - // Inline string outer tag - isIsOpen = true; - } else if ("f".equals(localName)) { - // Clear contents cache - formula.setLength(0); - - // Mark us as being a formula if not already - if (nextDataType == xssfDataType.NUMBER) { - nextDataType = xssfDataType.FORMULA; - } - - // Decide where to get the formula string from - String type = attributes.getValue("t"); - if (type != null && type.equals("shared")) { - // Is it the one that defines the shared, or uses it? - String ref = attributes.getValue("ref"); - String si = attributes.getValue("si"); - - if (ref != null) { - // This one defines it - // TODO Save it somewhere - fIsOpen = true; - } else { - // This one uses a shared formula - // TODO Retrieve the shared formula and tweak it to - // match the current cell - if (formulasNotResults) { - logger.warn("shared formulas not yet supported!"); - } /*else { - // It's a shared formula, so we can't get at the formula string yet - // However, they don't care about the formula string, so that's ok! - }*/ - } - } else { - fIsOpen = true; - } - } else if ("oddHeader".equals(localName) || "evenHeader".equals(localName) || - "firstHeader".equals(localName) || "firstFooter".equals(localName) || - "oddFooter".equals(localName) || "evenFooter".equals(localName)) { - hfIsOpen = true; - // Clear contents cache - headerFooter.setLength(0); - } else if ("row".equals(localName)) { - String rowNumStr = attributes.getValue("r"); - if (rowNumStr != null) { - rowNum = Integer.parseInt(rowNumStr) - 1; - } else { - rowNum = nextRowNum; - } - if (rowNum - 1 != preRowNum) { - for (int blankRowNum = preRowNum + 1; blankRowNum < rowNum; blankRowNum++) { - output.startRow(blankRowNum); - output.endRow(blankRowNum); - } - } - output.startRow(rowNum); - this.preRowNum = rowNum; } // c => cell else if ("c".equals(localName)) { // Set up defaults. this.nextDataType = xssfDataType.NUMBER; - this.formatIndex = -1; - this.formatString = null; cellRef = attributes.getValue("r"); String cellType = attributes.getValue("t"); String cellStyleStr = attributes.getValue("s"); @@ -250,24 +145,29 @@ else if ("s".equals(cellType)) nextDataType = xssfDataType.SST_STRING; else if ("str".equals(cellType)) nextDataType = xssfDataType.FORMULA; - else { - // Number, but almost certainly with a special style or format - XSSFCellStyle style = null; - if (stylesTable != null) { - if (cellStyleStr != null) { - int styleIndex = Integer.parseInt(cellStyleStr); - style = stylesTable.getStyleAt(styleIndex); - } else if (stylesTable.getNumCellStyles() > 0) { - style = stylesTable.getStyleAt(0); - } - } - if (style != null) { - this.formatIndex = style.getDataFormat(); - this.formatString = style.getDataFormatString(); - if (this.formatString == null) - this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex); + } else if ("row".equals(localName)) { + String rowNumStr = attributes.getValue("r"); + if (rowNumStr != null) { + rowNum = Integer.parseInt(rowNumStr) - 1; + } else { + rowNum = nextRowNum; + } + if (rowNum - 1 != preRowNum) { + for (int blankRowNum = preRowNum + 1; blankRowNum < rowNum; blankRowNum++) { + output.startRow(blankRowNum); + output.endRow(blankRowNum); } } + output.startRow(rowNum); + this.preRowNum = rowNum; + } else if ("is".equals(localName)) { + // Inline string outer tag + isIsOpen = true; + } else if ("f".equals(localName)) { + // Mark us as being a formula if not already + if (nextDataType == xssfDataType.NUMBER) { + nextDataType = xssfDataType.FORMULA; + } } } @@ -297,27 +197,9 @@ public void endElement(String uri, String localName, String qName) break; case FORMULA: - if (formulasNotResults) { - thisStr = formula.toString(); - } else { - String fv = value.toString(); - - if (this.formatString != null) { - try { - // Try to use the value as a formattable number - double d = Double.parseDouble(fv); - thisStr = formatter.formatRawCellContents(d, this.formatIndex, this.formatString); - } catch (NumberFormatException e) { - // Formula is a String result not a Numeric one - thisStr = fv; - } - } else { - // No formatting applied, just do raw value in all cases - thisStr = fv; - } - } + // No formatting applied, just do raw value in all cases + thisStr = value.toString(); break; - case INLINE_STRING: // TODO: Can these ever have formatting on them? XSSFRichTextString rtsi = new XSSFRichTextString(value.toString()); @@ -337,14 +219,10 @@ public void endElement(String uri, String localName, String qName) case NUMBER: String n = value.toString(); - if (this.formatString != null && n.length() > 0) - thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); - else { - if (n.length() > 0 && n.contains(Constants.SPOT)) { - n = String.valueOf(Double.parseDouble(n)); - } - thisStr = n; + if (n.contains(Constants.SPOT)) { + n = String.valueOf(Double.parseDouble(n)); } + thisStr = n; break; default: @@ -352,39 +230,28 @@ public void endElement(String uri, String localName, String qName) break; } - // Do we have a comment for this cell? - checkForEmptyCellComments(EmptyCellCommentsCheckType.CELL); - XSSFComment comment = comments != null ? comments.findCellComment(new CellAddress(cellRef)) : null; - + CellAddress cellAddress = new CellAddress(cellRef); + String finalThisStr = thisStr; + mergeFirstCellMapping.computeIfPresent(cellAddress, (k, v) -> finalThisStr); // Output - output.cell(cellRef, thisStr, comment); - } else if ("f".equals(localName)) { - fIsOpen = false; - } else if ("is".equals(localName)) { - isIsOpen = false; + output.cell(cellAddress, thisStr); + } else if ("c".equals(localName)) { + CellAddress cellAddress = new CellAddress(cellRef); + CellAddress firstCellAddress = mergeCellMapping.get(cellAddress); + if (firstCellAddress != null) { + output.cell(cellAddress, mergeFirstCellMapping.get(firstCellAddress)); + mergeCellMapping.remove(cellAddress); + } } else if ("row".equals(localName)) { - // Handle any "missing" cells which had comments attached - checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_ROW); - // Finish up the row output.endRow(rowNum); - // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well nextRowNum = rowNum + 1; } else if ("sheetData".equals(localName)) { - // Handle any "missing" cells which had comments attached - checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); - // indicate that this sheet is now done output.endSheet(); - } else if ("oddHeader".equals(localName) || "evenHeader".equals(localName) || - "firstHeader".equals(localName)) { - hfIsOpen = false; - output.headerFooter(headerFooter.toString(), true, localName); - } else if ("oddFooter".equals(localName) || "evenFooter".equals(localName) || - "firstFooter".equals(localName)) { - hfIsOpen = false; - output.headerFooter(headerFooter.toString(), false, localName); + } else if ("is".equals(localName)) { + isIsOpen = false; } } @@ -398,82 +265,6 @@ public void characters(char[] ch, int start, int length) if (vIsOpen) { value.append(ch, start, length); } - if (fIsOpen) { - formula.append(ch, start, length); - } - if (hfIsOpen) { - headerFooter.append(ch, start, length); - } - } - - /** - * Do a check for, and output, comments in otherwise empty cells. - */ - private void checkForEmptyCellComments(EmptyCellCommentsCheckType type) { - if (commentCellRefs != null && !commentCellRefs.isEmpty()) { - // If we've reached the end of the sheet data, output any - // comments we haven't yet already handled - if (type == EmptyCellCommentsCheckType.END_OF_SHEET_DATA) { - while (!commentCellRefs.isEmpty()) { - outputEmptyCellComment(commentCellRefs.remove()); - } - return; - } - - // At the end of a row, handle any comments for "missing" rows before us - if (this.cellRef == null) { - if (type == EmptyCellCommentsCheckType.END_OF_ROW) { - while (!commentCellRefs.isEmpty()) { - if (commentCellRefs.peek().getRow() == rowNum) { - outputEmptyCellComment(commentCellRefs.remove()); - } else { - return; - } - } - return; - } else { - throw new IllegalStateException("Cell ref should be null only if there are only empty cells in the row; rowNum: " + rowNum); - } - } - - CellAddress nextCommentCellRef; - do { - CellAddress cellRef = new CellAddress(this.cellRef); - CellAddress peekCellRef = commentCellRefs.peek(); - if (type == EmptyCellCommentsCheckType.CELL && cellRef.equals(peekCellRef)) { - // remove the comment cell ref from the list if we're about to handle it alongside the cell content - commentCellRefs.remove(); - return; - } else { - // fill in any gaps if there are empty cells with comment mixed in with non-empty cells - int comparison = peekCellRef.compareTo(cellRef); - if (comparison > 0 && type == EmptyCellCommentsCheckType.END_OF_ROW && peekCellRef.getRow() <= rowNum) { - nextCommentCellRef = commentCellRefs.remove(); - outputEmptyCellComment(nextCommentCellRef); - } else if (comparison < 0 && type == EmptyCellCommentsCheckType.CELL && peekCellRef.getRow() <= rowNum) { - nextCommentCellRef = commentCellRefs.remove(); - outputEmptyCellComment(nextCommentCellRef); - } else { - nextCommentCellRef = null; - } - } - } while (nextCommentCellRef != null && !commentCellRefs.isEmpty()); - } - } - - - /** - * Output an empty-cell comment. - */ - private void outputEmptyCellComment(CellAddress cellRef) { - XSSFComment comment = comments.findCellComment(cellRef); - output.cell(cellRef.formatAsString(), null, comment); - } - - private enum EmptyCellCommentsCheckType { - CELL, - END_OF_ROW, - END_OF_SHEET_DATA } /** @@ -504,21 +295,10 @@ public interface SheetContentsHandler { * src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java * for an example of how to handle this scenario. * - * @param cellReference cellReference - * @param comment comment + * @param cellAddress cellAddress * @param formattedValue formattedValue */ - void cell(String cellReference, String formattedValue, XSSFComment comment); - - /** - * A header or footer has been encountered - * - * @param isHeader isHeader - * @param tagName tagName - * @param text text - */ - default void headerFooter(String text, boolean isHeader, String tagName) { - } + void cell(CellAddress cellAddress, String formattedValue); /** * Signal that the end of a sheet was been reached