Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): add "calamine" support to read_excel, using fastexcel (~8-10x speedup) #14000

Merged
merged 2 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 26 additions & 15 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
_check_for_pyarrow,
dataframe_api_compat,
hvplot,
import_optional,
)
from polars.dependencies import numpy as np
from polars.dependencies import pandas as pd
Expand Down Expand Up @@ -3073,15 +3074,8 @@ def write_excel(
... sheet_zoom=125,
... )
""" # noqa: W505
try:
import xlsxwriter
from xlsxwriter.utility import xl_cell_to_rowcol
except ImportError:
msg = (
"Excel export requires xlsxwriter"
"\n\nPlease run: pip install XlsxWriter"
)
raise ImportError(msg) from None
xlsxwriter = import_optional("xlsxwriter", err_prefix="Excel export requires")
from xlsxwriter.utility import xl_cell_to_rowcol

# setup workbook/worksheet
wb, ws, can_close = _xl_setup_workbook(workbook, worksheet)
Expand Down Expand Up @@ -6751,7 +6745,10 @@ def drop_in_place(self, name: str) -> Series:

def cast(
self,
dtypes: Mapping[ColumnNameOrSelector, PolarsDataType] | PolarsDataType,
dtypes: (
Mapping[ColumnNameOrSelector | PolarsDataType, PolarsDataType]
| PolarsDataType
),
*,
strict: bool = True,
) -> DataFrame:
Expand Down Expand Up @@ -6792,12 +6789,19 @@ def cast(
│ 3.0 ┆ 8 ┆ 2022-05-06 │
└─────┴─────┴────────────┘

Cast all frame columns to the specified dtype:
Cast all frame columns matching one dtype (or dtype group) to another dtype:

>>> df.cast(pl.String).to_dict(as_series=False)
{'foo': ['1', '2', '3'],
'bar': ['6.0', '7.0', '8.0'],
'ham': ['2020-01-02', '2021-03-04', '2022-05-06']}
>>> df.cast({pl.Date: pl.Datetime})
shape: (3, 3)
┌─────┬─────┬─────────────────────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ datetime[μs] │
╞═════╪═════╪═════════════════════╡
│ 1 ┆ 6.0 ┆ 2020-01-02 00:00:00 │
│ 2 ┆ 7.0 ┆ 2021-03-04 00:00:00 │
│ 3 ┆ 8.0 ┆ 2022-05-06 00:00:00 │
└─────┴─────┴─────────────────────┘

Use selectors to define the columns being cast:

Expand All @@ -6813,6 +6817,13 @@ def cast(
│ 2 ┆ 7 ┆ 2021-03-04 │
│ 3 ┆ 8 ┆ 2022-05-06 │
└─────┴─────┴────────────┘

Cast all frame columns to the specified dtype:

>>> df.cast(pl.String).to_dict(as_series=False)
{'foo': ['1', '2', '3'],
'bar': ['6.0', '7.0', '8.0'],
'ham': ['2020-01-02', '2021-03-04', '2022-05-06']}
"""
return self.lazy().cast(dtypes, strict=strict).collect(_eager=True)

Expand Down
44 changes: 44 additions & 0 deletions py-polars/polars/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,50 @@ def _check_for_pydantic(obj: Any, *, check_type: bool = True) -> bool:
)


def import_optional(
module_name: str,
err_prefix: str = "Required package",
err_suffix: str = "not installed",
min_version: str | tuple[int, ...] | None = None,
) -> Any:
"""
Import an optional dependency, returning the module.

Parameters
----------
module_name : str
Name of the dependency to import.
err_prefix : str, optional
Error prefix to use in the raised exception (appears before the module name).
err_suffix: str, optional
Error suffix to use in the raised exception (follows the module name).
min_version : {str, tuple[int]}, optional
If a minimum module version is required, specify it here.
"""
from polars.exceptions import ModuleUpgradeRequired
from polars.utils.various import parse_version

try:
module = import_module(module_name)
except ImportError:
prefix = f"{err_prefix.strip(' ')} " if err_prefix else ""
suffix = f" {err_prefix.strip(' ')}" if err_suffix else ""
err_message = (
f"{prefix}'{module_name}'{suffix}.\n"
f"Please install it using the command `pip install {module_name}`."
)
raise ImportError(err_message) from None

if min_version:
min_version = parse_version(min_version)
mod_version = parse_version(module.__version__)
if mod_version < min_version:
msg = f"requires module_name {min_version} or higher, found {mod_version}"
raise ModuleUpgradeRequired(msg)

return module


__all__ = [
# lazy-load rarely-used/heavy builtins (for fast startup)
"dataclasses",
Expand Down
Loading
Loading