Skip to content

Commit

Permalink
refactor(python): minor describe tidy-up, and slight rewording of s…
Browse files Browse the repository at this point in the history
…ome Exception docstrings (#13942)
  • Loading branch information
alexander-beedie committed Jan 24, 2024
1 parent aa8d37d commit f93e450
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 36 deletions.
58 changes: 31 additions & 27 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4459,60 +4459,64 @@ def describe(

@lru_cache
def skip_minmax(dt: PolarsDataType) -> bool:
return dt.is_nested() or dt in (Object, Null, Unknown, Categorical, Enum)
return dt.is_nested() or dt in (Categorical, Enum, Null, Object, Unknown)

# determine which columns get std/mean/percentile stats
# determine which columns will produce std/mean/percentile/etc
# statistics in a single pass over the frame schema
has_numeric_result, sort_cols = set(), set()
metric_exprs = []
metric_exprs: list[Expr] = []
null = F.lit(None)

for c, dt in self.schema.items():
is_numeric = dt.is_numeric()
is_temporal = not is_numeric and dt.is_temporal()
for c, dtype in self.schema.items():
is_numeric = dtype.is_numeric()
is_temporal = not is_numeric and dtype.is_temporal()

# counts
count_exprs = [
F.col(c).count().name.prefix("count:"),
F.col(c).null_count().name.prefix("null_count:"),
]
metric_exprs.extend(count_exprs)

# mean
if is_temporal:
mean_expr = F.col(c).to_physical().mean().cast(dt)
else:
mean_expr = F.col(c).mean() if is_numeric or dt == Boolean else null
metric_exprs.append(mean_expr.alias(f"mean:{c}"))
mean_expr = (
F.col(c).to_physical().mean().cast(dtype)
if is_temporal
else (F.col(c).mean() if is_numeric or dtype == Boolean else null)
)

# standard deviation
# standard deviation, min, max
expr_std = F.col(c).std() if is_numeric else null
metric_exprs.append(expr_std.alias(f"std:{c}"))

# min
min_expr = F.col(c).min() if not skip_minmax(dt) else null
metric_exprs.append(min_expr.alias(f"min:{c}"))
min_expr = F.col(c).min() if not skip_minmax(dtype) else null
max_expr = F.col(c).max() if not skip_minmax(dtype) else null

# percentiles
pct_exprs = []
for p in quantiles:
if is_numeric or is_temporal:
pct_expr = (
F.col(c).to_physical().quantile(p, interpolation).cast(dt)
F.col(c).to_physical().quantile(p, interpolation).cast(dtype)
if is_temporal
else F.col(c).quantile(p, interpolation)
)
sort_cols.add(c)
else:
pct_expr = null
metric_exprs.append(pct_expr.alias(f"{p}:{c}"))

# max
metric_exprs.append(
(F.col(c).max() if not skip_minmax(dt) else null).alias(f"max:{c}")
)
pct_exprs.append(pct_expr.alias(f"{p}:{c}"))

if is_numeric or dt.is_nested() or dt in (Null, Boolean):
if is_numeric or dtype.is_nested() or dtype in (Null, Boolean):
has_numeric_result.add(c)

# add column expressions (in end-state 'metrics' list order)
metric_exprs.extend(
[
*count_exprs,
mean_expr.alias(f"mean:{c}"),
expr_std.alias(f"std:{c}"),
min_expr.alias(f"min:{c}"),
*pct_exprs,
max_expr.alias(f"max:{c}"),
]
)

# if more than one quantile requested, sort relevant columns to make them O(1)
# TODO: remove once we have engine support for retrieving multiples quantiles
lf = (
Expand Down
18 changes: 9 additions & 9 deletions py-polars/polars/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,16 @@ class ColumnNotFoundError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when a specified column is not found."""

class ComputeError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when polars could not finish the computation."""
"""Exception raised when Polars could not perform an underlying computation."""

class DuplicateError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when a column name is duplicated."""

class InvalidOperationError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when an operation is not allowed on a certain data type."""
"""Exception raised when an operation is not allowed (or possible) against a given object or data structure.""" # noqa: W505

class NoDataError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when an operation can not be performed on an empty data structure.""" # noqa: W505
"""Exception raised when an operation cannot be performed on an empty data structure.""" # noqa: W505

class OutOfBoundsError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when the given index is out of bounds."""
Expand All @@ -44,19 +44,19 @@ class PolarsPanicError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when an unexpected state causes a panic in the underlying Rust library.""" # noqa: W505

class SchemaError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when trying to combine data structures with mismatched schemas.""" # noqa: W505
"""Exception raised when an unexpected schema mismatch causes an error."""

class SchemaFieldNotFoundError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when a specified schema field is not found."""

class ShapeError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when trying to combine data structures with incompatible shapes.""" # noqa: W505
"""Exception raised when trying to perform operations on data structures with incompatible shapes.""" # noqa: W505

class StringCacheMismatchError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when string caches come from different sources."""

class StructFieldNotFoundError(PolarsError): # type: ignore[no-redef, misc]
"""Exception raised when a specified schema field is not found."""
"""Exception raised when a specified Struct field is not found."""

class PolarsWarning(Exception): # type: ignore[no-redef]
"""Base class for all Polars warnings."""
Expand All @@ -82,7 +82,7 @@ class TooManyRowsReturnedError(RowsError):


class ModuleUpgradeRequired(ModuleNotFoundError):
"""Exception raised when the module is installed but needs to be upgraded."""
"""Exception raised when a module is installed but needs to be upgraded."""


class ParameterCollisionError(PolarsError): # type: ignore[misc]
Expand All @@ -106,15 +106,15 @@ class ChronoFormatWarning(PolarsWarning): # type: ignore[misc]


class PolarsInefficientMapWarning(PolarsWarning): # type: ignore[misc]
"""Warning raised when a potentially slow `apply` operation is performed."""
"""Warning raised when a potentially slow `map_*` operation is performed."""


class TimeZoneAwareConstructorWarning(PolarsWarning): # type: ignore[misc]
"""Warning raised when constructing Series from non-UTC time-zone-aware inputs."""


class ArrowError(Exception):
"""deprecated will be removed."""
"""Deprecated: will be removed."""


__all__ = [
Expand Down

0 comments on commit f93e450

Please sign in to comment.