Skip to content

Commit

Permalink
Excel Reading (XLSX) using SAX Default Handler (#10877)
Browse files Browse the repository at this point in the history
- Implement Excel reading as a SAXMLParser.
  • Loading branch information
jdunkerley authored Dec 13, 2024
1 parent e6bcd5e commit 63ed629
Show file tree
Hide file tree
Showing 18 changed files with 1,257 additions and 219 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ type Excel_Workbook
- file: The file to load.
- xls_format: Whether to use the old XLS format (default is XLSX).
new : File | Temporary_File -> Boolean -> Excel_Workbook
new file:(File | Temporary_File) xls_format=False =
new file:(File | Temporary_File) xls_format:Boolean=False =
file_for_errors = if file.is_a Temporary_File then Nothing else file

continuation raw_file =
Expand All @@ -73,7 +73,7 @@ type Excel_Workbook
- xls_format: Whether to use the old XLS format (default is XLSX).
- file: Optional file reference.
from_stream : Input_Stream -> Boolean -> File | Nothing -> Excel_Workbook
from_stream stream xls_format=False file=Nothing = Excel_Reader.handle_bad_format file <|
from_stream stream xls_format:Boolean=False file=Nothing = Excel_Reader.handle_bad_format file <|
temp_file = Temporary_File.from_stream_light stream
Excel_Workbook.new temp_file xls_format

Expand All @@ -89,8 +89,8 @@ type Excel_Workbook
## PRIVATE
ICON metadata
Returns the list of databases (or catalogs) for the connection.
databases : Nothing
databases self = Nothing
databases : Vector (Text | Nothing)
databases self = [Nothing]

## PRIVATE
ICON metadata
Expand All @@ -109,7 +109,7 @@ type Excel_Workbook
Arguments:
- database: The target file to open as an Excel_Workbook.
set_database : Text | File -> Excel_Workbook ! Illegal_Argument
set_database self database =
set_database self database:(Text | File) =
if database == self.database then self else
file = File.new database
if file.exists && file.is_directory.not then Excel_Workbook.new file self.xls_format else
Expand Down Expand Up @@ -163,7 +163,7 @@ type Excel_Workbook
Gets the names of all the named ranges.
named_ranges : Vector Text
named_ranges self = self.with_java_workbook java_workbook->
Vector.from_polyglot_array (ExcelReader.readRangeNames java_workbook)
Vector.from_polyglot_array java_workbook.getRangeNames

## PRIVATE
ICON metadata
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.enso.table.excel.xssfreader.XSSFReaderWorkbook;

public class ExcelConnectionPool {
public static final ExcelConnectionPool INSTANCE = new ExcelConnectionPool();
Expand Down Expand Up @@ -64,7 +65,7 @@ public ReadOnlyExcelConnection openReadOnlyConnection(File file, ExcelFileFormat
record.refCount = 1;
record.file = file;
record.format = format;
record.workbook = openWorkbook(file, format, false);
record.reopen(true);
records.put(key, record);
return new ReadOnlyExcelConnection(this, key, record);
}
Expand Down Expand Up @@ -212,10 +213,10 @@ static class ConnectionRecord {
private int refCount;
private File file;
private ExcelFileFormat format;
private Workbook workbook;
private ExcelWorkbook workbook;
private IOException initializationException = null;

<T> T withWorkbook(Function<Workbook, T> action) throws IOException {
<T> T withWorkbook(Function<ExcelWorkbook, T> action) throws IOException {
synchronized (this) {
return action.apply(accessCurrentWorkbook());
}
Expand All @@ -238,7 +239,10 @@ void reopen(boolean throwOnFailure) throws IOException {
}

try {
workbook = openWorkbook(file, format, false);
workbook =
format == ExcelFileFormat.XLSX
? new XSSFReaderWorkbook(file.getAbsolutePath())
: ExcelWorkbook.forPOIUserModel(openWorkbook(file, format, false));
} catch (IOException e) {
initializationException = e;
if (throwOnFailure) {
Expand All @@ -248,7 +252,7 @@ void reopen(boolean throwOnFailure) throws IOException {
}
}

private Workbook accessCurrentWorkbook() throws IOException {
private ExcelWorkbook accessCurrentWorkbook() throws IOException {
synchronized (this) {
if (workbook == null) {
if (initializationException != null) {
Expand Down Expand Up @@ -278,7 +282,7 @@ private static Workbook openWorkbook(File file, ExcelFileFormat format, boolean
throw e;
}
}
case XLSX -> {
case XLSX, XLSX_FALLBACK -> {
try {
PackageAccess access = writeAccess ? PackageAccess.READ_WRITE : PackageAccess.READ;
OPCPackage pkg = OPCPackage.open(file, access);
Expand All @@ -300,7 +304,7 @@ private static Workbook openWorkbook(File file, ExcelFileFormat format, boolean
private static Workbook createEmptyWorkbook(ExcelFileFormat format) {
return switch (format) {
case XLS -> new HSSFWorkbook();
case XLSX -> new XSSFWorkbook();
case XLSX, XLSX_FALLBACK -> new XSSFWorkbook();
};
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@

public enum ExcelFileFormat {
XLS,
XLSX
XLSX,
XLSX_FALLBACK
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ private static String[] readRowAsHeaders(

String[] output = new String[currentEndCol - startCol + 1];
for (int col = startCol; col <= currentEndCol; col++) {
String cellText = row.getFormattedCell(col);
String cellText = row.getCellText(col);
String name = cellText.isEmpty() ? "" : deduplicator.makeUnique(cellText);

output[col - startCol] = name;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ public static ExcelRange expandSingleCell(ExcelRange excelRange, ExcelSheet shee

Context context = Context.getCurrent();
while (currentRow != null && !currentRow.isEmpty(excelRange.getLeftColumn(), rightColumn)) {
rightColumn = currentRow.findEndRight(rightColumn);
rightColumn = findEndRight(currentRow, rightColumn);
bottomRow++;
currentRow = sheet.get(bottomRow);

Expand All @@ -212,6 +212,16 @@ public static ExcelRange expandSingleCell(ExcelRange excelRange, ExcelSheet shee
bottomRow - 1);
}

private static int findEndRight(ExcelRow row, int start) {
Context context = Context.getCurrent();
int column = start;
while (!row.isEmpty(column + 1)) {
column++;
context.safepoint();
}
return column;
}

/**
* @param index The index to the next character after the parsed value
* @param value Parsed integer value or 0 if not valid
Expand Down
Loading

0 comments on commit 63ed629

Please sign in to comment.