-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: introduce eager loading functions (#147)
* chore(deps): Upgrade calamine 0.22.1 -> 0.23.0 Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * feat: introduce eager loading functions Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt to recent changes Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * feat: added support for schema_sample_rows Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * solve merge conflicts Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt to recent changes on main Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt to recent changes on main Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt error message Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt to recent changes on main Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * fat refactor, might support non-eager by-ref Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * add iterations to test.py Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * remove unused file Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt to recent changes on main Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * fix: ensure eager=True always returns a RecordBatch Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * remove commented out code Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * simplify lifetime annotations Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * adapt code to recent changes Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * remove dbg! Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * fix typing Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * chore: clippy rust 1.79 Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> * docs: improve docstrings Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com> --------- Signed-off-by: Luka Peschke <luka.peschke@toucantoco.com>
- Loading branch information
1 parent
4332278
commit 2147bb5
Showing
13 changed files
with
831 additions
and
361 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from datetime import date, datetime, timedelta | ||
|
||
import fastexcel | ||
import polars as pl | ||
from pandas.testing import assert_frame_equal as pd_assert_frame_equal | ||
from polars.testing import assert_frame_equal as pl_assert_frame_equal | ||
from pyarrow import RecordBatch | ||
from utils import path_for_fixture | ||
|
||
|
||
def test_load_sheet_eager_single_sheet() -> None: | ||
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-single-sheet.xlsx")) | ||
|
||
eager_pandas = excel_reader.load_sheet_eager(0).to_pandas() | ||
lazy_pandas = excel_reader.load_sheet(0).to_pandas() | ||
pd_assert_frame_equal(eager_pandas, lazy_pandas) | ||
|
||
eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager(0)) | ||
assert isinstance(eager_polars, pl.DataFrame) | ||
lazy_polars = excel_reader.load_sheet(0).to_polars() | ||
pl_assert_frame_equal(eager_polars, lazy_polars) | ||
|
||
|
||
def test_multiple_sheets_with_unnamed_columns(): | ||
excel_reader = fastexcel.read_excel(path_for_fixture("fixture-multi-sheet.xlsx")) | ||
|
||
eager_pandas = excel_reader.load_sheet_eager("With unnamed columns").to_pandas() | ||
lazy_pandas = excel_reader.load_sheet("With unnamed columns").to_pandas() | ||
pd_assert_frame_equal(eager_pandas, lazy_pandas) | ||
|
||
eager_polars = pl.from_arrow(data=excel_reader.load_sheet_eager("With unnamed columns")) | ||
assert isinstance(eager_polars, pl.DataFrame) | ||
lazy_polars = excel_reader.load_sheet("With unnamed columns").to_polars() | ||
pl_assert_frame_equal(eager_polars, lazy_polars) | ||
|
||
|
||
def test_eager_with_an_ods_file_should_return_a_recordbatch() -> None: | ||
ods_reader = fastexcel.read_excel(path_for_fixture("dates.ods")) | ||
|
||
record_batch = ods_reader.load_sheet_eager(0) | ||
assert isinstance(record_batch, RecordBatch) | ||
pl_df = pl.from_arrow(record_batch) | ||
assert isinstance(pl_df, pl.DataFrame) | ||
pl_assert_frame_equal( | ||
pl_df, | ||
pl.DataFrame( | ||
{ | ||
"date": [date(2023, 6, 1)], | ||
"datestr": ["2023-06-01T02:03:04+02:00"], | ||
"time": [timedelta(hours=1, minutes=2, seconds=3)], | ||
"datetime": [datetime(2023, 6, 1, 2, 3, 4)], | ||
} | ||
).with_columns(*(pl.col(col).dt.cast_time_unit("ms") for col in ("datetime", "time"))), | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
mod error; | ||
mod types; | ||
mod utils; | ||
|
||
use error::{py_errors, ErrorContext}; | ||
use pyo3::prelude::*; | ||
|
Oops, something went wrong.