Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(python): Add __slots__ to most Polars classes #13236

Merged
merged 5 commits into from
Feb 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion py-polars/polars/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ def __get__(self, instance: NS | None, cls: type[NS]) -> NS | type[NS]:
return self._ns

ns_instance = self._ns(instance) # type: ignore[call-arg]
setattr(instance, self._accessor, ns_instance)
Copy link
Contributor Author

@Object905 Object905 Dec 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this were not required even before, but it served as "cache" on instance of expression.
Now it gives error because instance does not have __dict__ and _accessor goes straight to descriptor - and it's read only.

But since expressions are frequently recreated, then this "cache" were only working on chained namespace access like
pl.col("foo").namespace_foo.func().namespace_foo.other_func()

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd appreciate if @alexander-beedie could sign off on this change before this gets merged. I don't really see exactly what is going on here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out this did have user impact:
#14851

For this reason, and some related issues popping up, I will be reverting this PR.

return ns_instance


Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,8 @@ class DataFrame:
False
"""

__slots__ = ("_df",)
_df: PyDataFrame
_accessors: ClassVar[set[str]] = {"plot"}

def __init__(
Expand Down
61 changes: 61 additions & 0 deletions py-polars/polars/datatypes/classes.py
stinodego marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def is_nested(self) -> bool: # noqa: D102
class DataType(metaclass=DataTypeClass):
"""Base class for all Polars data types."""

__slots__ = ()

def __reduce__(self) -> Any:
return (_custom_reconstruct, (type(self), object, None), self.__dict__)

Expand Down Expand Up @@ -235,6 +237,7 @@ def _custom_reconstruct(
class DataTypeGroup(frozenset): # type: ignore[type-arg]
"""Group of data types."""

__slots__ = ("_match_base_type",)
_match_base_type: bool

def __new__(
Expand Down Expand Up @@ -267,70 +270,104 @@ def __contains__(self, item: Any) -> bool:
class NumericType(DataType):
"""Base class for numeric data types."""

__slots__ = ()


class IntegerType(NumericType):
"""Base class for integer data types."""

__slots__ = ()


class SignedIntegerType(IntegerType):
"""Base class for signed integer data types."""

__slots__ = ()


class UnsignedIntegerType(IntegerType):
"""Base class for unsigned integer data types."""

__slots__ = ()


class FloatType(NumericType):
"""Base class for float data types."""

__slots__ = ()


class TemporalType(DataType):
"""Base class for temporal data types."""

__slots__ = ()


class NestedType(DataType):
"""Base class for nested data types."""

__slots__ = ()


class Int8(SignedIntegerType):
"""8-bit signed integer type."""

__slots__ = ()


class Int16(SignedIntegerType):
"""16-bit signed integer type."""

__slots__ = ()


class Int32(SignedIntegerType):
"""32-bit signed integer type."""

__slots__ = ()


class Int64(SignedIntegerType):
"""64-bit signed integer type."""

__slots__ = ()


class UInt8(UnsignedIntegerType):
"""8-bit unsigned integer type."""

__slots__ = ()


class UInt16(UnsignedIntegerType):
"""16-bit unsigned integer type."""

__slots__ = ()


class UInt32(UnsignedIntegerType):
"""32-bit unsigned integer type."""

__slots__ = ()


class UInt64(UnsignedIntegerType):
"""64-bit unsigned integer type."""

__slots__ = ()


class Float32(FloatType):
"""32-bit floating point type."""

__slots__ = ()


class Float64(FloatType):
"""64-bit floating point type."""

__slots__ = ()


class Decimal(NumericType):
"""
Expand All @@ -342,6 +379,7 @@ class Decimal(NumericType):
It may be changed at any point without it being considered a breaking change.
"""

__slots__ = ("precision", "scale")
precision: int | None
scale: int

Expand Down Expand Up @@ -383,10 +421,14 @@ def __hash__(self) -> int:
class Boolean(DataType):
"""Boolean type."""

__slots__ = ()


class String(DataType):
"""UTF-8 encoded string type."""

__slots__ = ()


# Allow Utf8 as an alias for String
Utf8 = String
Expand All @@ -395,14 +437,20 @@ class String(DataType):
class Binary(DataType):
"""Binary type."""

__slots__ = ()


class Date(TemporalType):
"""Calendar date type."""

__slots__ = ()


class Time(TemporalType):
"""Time of day type."""

__slots__ = ()


class Datetime(TemporalType):
"""Calendar date and time type."""
Expand Down Expand Up @@ -510,6 +558,7 @@ class Categorical(DataType):
or string value (lexical).
"""

__slots__ = ("ordering",)
ordering: CategoricalOrdering | None

def __init__(
Expand Down Expand Up @@ -544,6 +593,7 @@ class Enum(DataType):
It may be changed at any point without it being considered a breaking change.
"""

__slots__ = ("categories",)
categories: Series

def __init__(self, categories: Series | Iterable[str]):
Expand Down Expand Up @@ -606,14 +656,20 @@ def __repr__(self) -> str:
class Object(DataType):
"""Type for wrapping arbitrary Python objects."""

__slots__ = ()


class Null(DataType):
"""Type representing Null / None values."""

__slots__ = ()


class Unknown(DataType):
"""Type representing Datatype values that could not be determined statically."""

__slots__ = ()


class List(NestedType):
"""Variable length list type."""
Expand Down Expand Up @@ -738,6 +794,10 @@ def __repr__(self) -> str:
class Field:
"""Definition of a single field within a `Struct` DataType."""

__slots__ = ("name", "dtype")
name: str
dtype: PolarsDataType

def __init__(self, name: str, dtype: PolarsDataType):
"""
Definition of a single field within a `Struct` DataType.
Expand Down Expand Up @@ -766,6 +826,7 @@ def __repr__(self) -> str:
class Struct(NestedType):
"""Struct composite type."""

__slots__ = ("fields",)
fields: list[Field]

def __init__(self, fields: Sequence[Field] | SchemaDict):
Expand Down
3 changes: 2 additions & 1 deletion py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,8 @@
class Expr:
"""Expressions that can be used in various contexts."""

_pyexpr: PyExpr = None
__slots__ = ("_pyexpr",)
_pyexpr: PyExpr
_accessors: ClassVar[set[str]] = {
"arr",
"cat",
Expand Down
8 changes: 8 additions & 0 deletions py-polars/polars/expr/whenthen.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class When:
In this state, `then` must be called to continue to finish the expression.
"""

__slots__ = ("_when",)

def __init__(self, when: Any):
self._when = when

Expand All @@ -49,6 +51,8 @@ class Then(Expr):
Represents the state of the expression after `pl.when(...).then(...)` is called.
"""

__slots__ = ("_then",)

def __init__(self, then: Any):
self._then = then

Expand Down Expand Up @@ -106,6 +110,8 @@ class ChainedWhen(Expr):
In this state, `then` must be called to continue to finish the expression.
"""

__slots__ = ("_chained_when",)

def __init__(self, chained_when: Any):
self._chained_when = chained_when

Expand All @@ -131,6 +137,8 @@ class ChainedThen(Expr):
Represents the state of the expression after an additional `then` is called.
"""

__slots__ = ("_chained_then",)

def __init__(self, chained_then: Any):
self._chained_then = chained_then

Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ class LazyFrame:
└─────┴─────┴─────┘
"""

__slots__ = ("_ldf",)
_ldf: PyLazyFrame
_accessors: ClassVar[set[str]] = set()

Expand Down
1 change: 1 addition & 0 deletions py-polars/polars/selectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,7 @@ def _combine_as_selector(
class _selector_proxy_(Expr):
"""Base column selector expression/proxy."""

__slots__ = ("_attrs", "_repr_override")
_attrs: dict[str, Any]
_repr_override: str

Expand Down
3 changes: 2 additions & 1 deletion py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,8 @@ class Series:
]
"""

_s: PySeries = None
__slots__ = ("_s",)
_s: PySeries
_accessors: ClassVar[set[str]] = {
"arr",
"cat",
Expand Down
Loading