From d2ce7c91c24ca58cbfc18c7c9b64058e06a3bd9a Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 13:07:54 +0000 Subject: [PATCH 1/3] fix(python): fix string_repr of non-nested types --- py-polars/polars/datatypes.py | 29 ++++++++++++++++--- py-polars/polars/internals/dataframe/frame.py | 7 +---- py-polars/tests/unit/test_df.py | 22 +++++++++++++- 3 files changed, 47 insertions(+), 11 deletions(-) diff --git a/py-polars/polars/datatypes.py b/py-polars/polars/datatypes.py index 2399a31c201d..05a64ef6e5ee 100644 --- a/py-polars/polars/datatypes.py +++ b/py-polars/polars/datatypes.py @@ -95,6 +95,25 @@ def get_args(tp: Any) -> Any: DTYPE_TEMPORAL_UNITS: frozenset[TimeUnit] = frozenset(["ns", "us", "ms"]) +class class_or_instance_method: + """ + Make method available to either instantiated or non-instantiated class. + + Source: https://stackoverflow.com/a/29473221/4451315 + """ + + def __init__(self, func: Callable[[Any], object]) -> None: + self.func = func + self.cmdescriptor = classmethod(func) + + def __get__( + self, instance: object, cls: type | None = None + ) -> Callable[[], object]: + if instance is None: + return self.cmdescriptor.__get__(None, cls) + return self.func.__get__(instance, cls) + + def get_idx_type() -> PolarsDataType: """ Get the datatype used for Polars indexing. @@ -126,8 +145,9 @@ class DataTypeClass(type): def __repr__(cls) -> str: return cls.__name__ - def string_repr(cls) -> str: - return dtype_str_repr(cls) + @class_or_instance_method + def string_repr(cls_or_self) -> str: + return dtype_str_repr(cls_or_self) class DataType(metaclass=DataTypeClass): @@ -143,8 +163,9 @@ def __new__(cls, *args: Any, **kwargs: Any) -> PolarsDataType: # type: ignore[m def __reduce__(self) -> Any: return (_custom_reconstruct, (type(self), object, None), self.__dict__) - def string_repr(self) -> str: - return dtype_str_repr(self) + @class_or_instance_method + def string_repr(cls_or_self) -> str: + return dtype_str_repr(cls_or_self) class NumericType(DataType): diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py index 6e3ce1863aad..6a4fe40838ba 100644 --- a/py-polars/polars/internals/dataframe/frame.py +++ b/py-polars/polars/internals/dataframe/frame.py @@ -34,7 +34,6 @@ N_INFER_DEFAULT, Boolean, Categorical, - DataTypeClass, Float64, Int8, Int16, @@ -2785,11 +2784,7 @@ def glimpse(self: DF) -> str: max_num_values = min(10, self.height) def _parse_column(col_name: str, dtype: PolarsDataType) -> tuple[str, str, str]: - dtype_str = ( - f"<{DataTypeClass.string_repr(dtype)}>" - if isinstance(dtype, DataTypeClass) - else f"<{dtype.string_repr()}>" - ) + dtype_str = f"<{dtype.string_repr()}>" val = self[:max_num_values][col_name].to_list() val_str = ", ".join(map(str, val)) return col_name, dtype_str, val_str diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py index 018412abedcc..040e70becf0c 100644 --- a/py-polars/tests/unit/test_df.py +++ b/py-polars/tests/unit/test_df.py @@ -13,7 +13,7 @@ import pytest import polars as pl -from polars.datatypes import DTYPE_TEMPORAL_UNITS, INTEGER_DTYPES +from polars.datatypes import DTYPE_TEMPORAL_UNITS, INTEGER_DTYPES, PolarsDataType from polars.dependencies import zoneinfo from polars.internals.construction import iterable_to_pydf from polars.testing import ( @@ -2788,6 +2788,26 @@ def test_glimpse() -> None: assert result == expected +@pytest.mark.parametrize( + ("dtype", "expected"), + [ + (pl.Int16, "i16"), + (pl.Float64, "f64"), + (pl.Boolean, "bool"), + (pl.Utf8, "str"), + (pl.Datetime("ms"), "datetime[ms]"), + (pl.Datetime("us"), "datetime[μs]"), + (pl.Datetime("ns"), "datetime[ns]"), + (pl.List(pl.Int64), "list[i64]"), + (pl.List(pl.Float64), "list[f64]"), + (pl.List(pl.Utf8), "list[str]"), + ], +) +def test_string_repr(dtype: PolarsDataType, expected: str) -> None: + result = dtype.string_repr() + assert result == expected + + def test_item() -> None: df = pl.DataFrame({"a": [1]}) assert df.item() == 1 From f4ac13dfe333c1cf049f43d9d24769ad6508cd83 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 13:22:32 +0000 Subject: [PATCH 2/3] move to utils, refactor to avoid circular import --- py-polars/polars/datatypes.py | 26 +++++----------------- py-polars/polars/utils.py | 41 ++++++++++++++++++++++++++++------- 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/py-polars/polars/datatypes.py b/py-polars/polars/datatypes.py index 05a64ef6e5ee..4194dac6efa5 100644 --- a/py-polars/polars/datatypes.py +++ b/py-polars/polars/datatypes.py @@ -24,6 +24,11 @@ ) from polars.dependencies import pyarrow as pa +from polars.utils import ( + _datetime_to_pl_timestamp, + _timedelta_to_pl_timedelta, + class_or_instance_method, +) try: from polars.polars import dtype_str_repr @@ -95,25 +100,6 @@ def get_args(tp: Any) -> Any: DTYPE_TEMPORAL_UNITS: frozenset[TimeUnit] = frozenset(["ns", "us", "ms"]) -class class_or_instance_method: - """ - Make method available to either instantiated or non-instantiated class. - - Source: https://stackoverflow.com/a/29473221/4451315 - """ - - def __init__(self, func: Callable[[Any], object]) -> None: - self.func = func - self.cmdescriptor = classmethod(func) - - def __get__( - self, instance: object, cls: type | None = None - ) -> Callable[[], object]: - if instance is None: - return self.cmdescriptor.__get__(None, cls) - return self.func.__get__(instance, cls) - - def get_idx_type() -> PolarsDataType: """ Get the datatype used for Polars indexing. @@ -811,8 +797,6 @@ def maybe_cast( ) -> Any: """Try casting a value to a value that is valid for the given Polars dtype.""" # cast el if it doesn't match - from polars.utils import _datetime_to_pl_timestamp, _timedelta_to_pl_timedelta - if isinstance(el, datetime): return _datetime_to_pl_timestamp(el, time_unit) elif isinstance(el, timedelta): diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py index 3b50dec1b358..8db8cd8ea127 100644 --- a/py-polars/polars/utils.py +++ b/py-polars/polars/utils.py @@ -21,13 +21,6 @@ ) import polars.internals as pli -from polars.datatypes import ( - Date, - Datetime, - Int64, - PolarsDataType, - is_polars_dtype, -) from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo try: @@ -52,6 +45,10 @@ if TYPE_CHECKING: + from polars.datatypes import ( + PolarsDataType, + is_polars_dtype, + ) from polars.internals.type_aliases import SizeUnit, TimeUnit @@ -177,9 +174,13 @@ def is_str_sequence( def range_to_series( - name: str, rng: range, dtype: PolarsDataType | None = Int64 + name: str, rng: range, dtype: PolarsDataType | None = None ) -> pli.Series: """Fast conversion of the given range to a Series.""" + from polars.datatypes import Int64 + + if dtype is None: + dtype = Int64 return pli.arange( low=rng.start, high=rng.stop, @@ -264,6 +265,11 @@ def _to_python_datetime( tu: TimeUnit | None = "ns", tz: str | None = None, ) -> date | datetime: + from polars.datatypes import ( + Date, + Datetime, + ) + if dtype == Date: # days to seconds # important to create from utc. Not doing this leads @@ -487,3 +493,22 @@ def scale_bytes(sz: int, unit: SizeUnit) -> int | float: raise ValueError( f"unit must be one of {{'b', 'kb', 'mb', 'gb', 'tb'}}, got {unit!r}" ) + + +class class_or_instance_method: + """ + Make method available to either instantiated or non-instantiated class. + + Source: https://stackoverflow.com/a/29473221/4451315 + """ + + def __init__(self, func: Callable[[Any], object]) -> None: + self.func = func + self.cmdescriptor = classmethod(func) + + def __get__( + self, instance: object, cls: type | None = None + ) -> Callable[[], object]: + if instance is None: + return self.cmdescriptor.__get__(None, cls) + return self.func.__get__(instance, cls) From c518135db396806fa7cd28e1674b9dbfc58ffa3e Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 2 Feb 2023 13:32:13 +0000 Subject: [PATCH 3/3] make string_repr private --- py-polars/polars/datatypes.py | 17 +++----- py-polars/polars/internals/dataframe/frame.py | 7 +++- py-polars/polars/utils.py | 41 ++++--------------- py-polars/tests/unit/test_df.py | 22 +--------- 4 files changed, 21 insertions(+), 66 deletions(-) diff --git a/py-polars/polars/datatypes.py b/py-polars/polars/datatypes.py index 4194dac6efa5..b897a902fbe9 100644 --- a/py-polars/polars/datatypes.py +++ b/py-polars/polars/datatypes.py @@ -24,11 +24,6 @@ ) from polars.dependencies import pyarrow as pa -from polars.utils import ( - _datetime_to_pl_timestamp, - _timedelta_to_pl_timedelta, - class_or_instance_method, -) try: from polars.polars import dtype_str_repr @@ -131,9 +126,8 @@ class DataTypeClass(type): def __repr__(cls) -> str: return cls.__name__ - @class_or_instance_method - def string_repr(cls_or_self) -> str: - return dtype_str_repr(cls_or_self) + def _string_repr(cls) -> str: + return dtype_str_repr(cls) class DataType(metaclass=DataTypeClass): @@ -149,9 +143,8 @@ def __new__(cls, *args: Any, **kwargs: Any) -> PolarsDataType: # type: ignore[m def __reduce__(self) -> Any: return (_custom_reconstruct, (type(self), object, None), self.__dict__) - @class_or_instance_method - def string_repr(cls_or_self) -> str: - return dtype_str_repr(cls_or_self) + def _string_repr(self) -> str: + return dtype_str_repr(self) class NumericType(DataType): @@ -797,6 +790,8 @@ def maybe_cast( ) -> Any: """Try casting a value to a value that is valid for the given Polars dtype.""" # cast el if it doesn't match + from polars.utils import _datetime_to_pl_timestamp, _timedelta_to_pl_timedelta + if isinstance(el, datetime): return _datetime_to_pl_timestamp(el, time_unit) elif isinstance(el, timedelta): diff --git a/py-polars/polars/internals/dataframe/frame.py b/py-polars/polars/internals/dataframe/frame.py index 6a4fe40838ba..103143d01cb0 100644 --- a/py-polars/polars/internals/dataframe/frame.py +++ b/py-polars/polars/internals/dataframe/frame.py @@ -34,6 +34,7 @@ N_INFER_DEFAULT, Boolean, Categorical, + DataTypeClass, Float64, Int8, Int16, @@ -2784,7 +2785,11 @@ def glimpse(self: DF) -> str: max_num_values = min(10, self.height) def _parse_column(col_name: str, dtype: PolarsDataType) -> tuple[str, str, str]: - dtype_str = f"<{dtype.string_repr()}>" + dtype_str = ( + f"<{DataTypeClass._string_repr(dtype)}>" + if isinstance(dtype, DataTypeClass) + else f"<{dtype._string_repr()}>" + ) val = self[:max_num_values][col_name].to_list() val_str = ", ".join(map(str, val)) return col_name, dtype_str, val_str diff --git a/py-polars/polars/utils.py b/py-polars/polars/utils.py index 8db8cd8ea127..3b50dec1b358 100644 --- a/py-polars/polars/utils.py +++ b/py-polars/polars/utils.py @@ -21,6 +21,13 @@ ) import polars.internals as pli +from polars.datatypes import ( + Date, + Datetime, + Int64, + PolarsDataType, + is_polars_dtype, +) from polars.dependencies import _ZONEINFO_AVAILABLE, zoneinfo try: @@ -45,10 +52,6 @@ if TYPE_CHECKING: - from polars.datatypes import ( - PolarsDataType, - is_polars_dtype, - ) from polars.internals.type_aliases import SizeUnit, TimeUnit @@ -174,13 +177,9 @@ def is_str_sequence( def range_to_series( - name: str, rng: range, dtype: PolarsDataType | None = None + name: str, rng: range, dtype: PolarsDataType | None = Int64 ) -> pli.Series: """Fast conversion of the given range to a Series.""" - from polars.datatypes import Int64 - - if dtype is None: - dtype = Int64 return pli.arange( low=rng.start, high=rng.stop, @@ -265,11 +264,6 @@ def _to_python_datetime( tu: TimeUnit | None = "ns", tz: str | None = None, ) -> date | datetime: - from polars.datatypes import ( - Date, - Datetime, - ) - if dtype == Date: # days to seconds # important to create from utc. Not doing this leads @@ -493,22 +487,3 @@ def scale_bytes(sz: int, unit: SizeUnit) -> int | float: raise ValueError( f"unit must be one of {{'b', 'kb', 'mb', 'gb', 'tb'}}, got {unit!r}" ) - - -class class_or_instance_method: - """ - Make method available to either instantiated or non-instantiated class. - - Source: https://stackoverflow.com/a/29473221/4451315 - """ - - def __init__(self, func: Callable[[Any], object]) -> None: - self.func = func - self.cmdescriptor = classmethod(func) - - def __get__( - self, instance: object, cls: type | None = None - ) -> Callable[[], object]: - if instance is None: - return self.cmdescriptor.__get__(None, cls) - return self.func.__get__(instance, cls) diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py index 040e70becf0c..018412abedcc 100644 --- a/py-polars/tests/unit/test_df.py +++ b/py-polars/tests/unit/test_df.py @@ -13,7 +13,7 @@ import pytest import polars as pl -from polars.datatypes import DTYPE_TEMPORAL_UNITS, INTEGER_DTYPES, PolarsDataType +from polars.datatypes import DTYPE_TEMPORAL_UNITS, INTEGER_DTYPES from polars.dependencies import zoneinfo from polars.internals.construction import iterable_to_pydf from polars.testing import ( @@ -2788,26 +2788,6 @@ def test_glimpse() -> None: assert result == expected -@pytest.mark.parametrize( - ("dtype", "expected"), - [ - (pl.Int16, "i16"), - (pl.Float64, "f64"), - (pl.Boolean, "bool"), - (pl.Utf8, "str"), - (pl.Datetime("ms"), "datetime[ms]"), - (pl.Datetime("us"), "datetime[μs]"), - (pl.Datetime("ns"), "datetime[ns]"), - (pl.List(pl.Int64), "list[i64]"), - (pl.List(pl.Float64), "list[f64]"), - (pl.List(pl.Utf8), "list[str]"), - ], -) -def test_string_repr(dtype: PolarsDataType, expected: str) -> None: - result = dtype.string_repr() - assert result == expected - - def test_item() -> None: df = pl.DataFrame({"a": [1]}) assert df.item() == 1