Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add details to expectations for scalars #308

Merged
merged 31 commits into from
Nov 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
751a131
note what may raise
MarcoGorelli Oct 31, 2023
7cb90ea
Merge remote-tracking branch 'upstream/main' into expand-on-scalars
MarcoGorelli Nov 7, 2023
fc65648
list required methods
MarcoGorelli Nov 8, 2023
7c24afd
add scalar class
MarcoGorelli Nov 8, 2023
2714c13
reword
MarcoGorelli Nov 8, 2023
99b91a5
fixup
MarcoGorelli Nov 8, 2023
a867f00
fixup
MarcoGorelli Nov 8, 2023
9e13924
Merge remote-tracking branch 'upstream/main' into expand-on-scalars
MarcoGorelli Nov 8, 2023
f197672
fixup
MarcoGorelli Nov 8, 2023
a417b1c
Merge remote-tracking branch 'upstream/main' into expand-on-scalars
MarcoGorelli Nov 14, 2023
409d8f3
replace Scalar|NullType with Scalar
MarcoGorelli Nov 14, 2023
0db9871
type null as Scalar
MarcoGorelli Nov 14, 2023
6520ac4
add example of working with scalars
MarcoGorelli Nov 14, 2023
46bc08c
use AnyScalar;
MarcoGorelli Nov 15, 2023
b879f31
add Scalar.dtype and Scalar.persist
MarcoGorelli Nov 15, 2023
456c152
Merge remote-tracking branch 'upstream/main' into expand-on-scalars
MarcoGorelli Nov 15, 2023
b8011c7
update shift arg
MarcoGorelli Nov 15, 2023
97d8f9a
use BoolScalar
MarcoGorelli Nov 15, 2023
3b7bcb6
use float scalar in some parts
MarcoGorelli Nov 15, 2023
d598a8d
use float scalar in some parts
MarcoGorelli Nov 15, 2023
a12585b
string scalar for rename
MarcoGorelli Nov 15, 2023
35cd4ed
intscalar for shift
MarcoGorelli Nov 15, 2023
fade164
numeric scalar for correction
MarcoGorelli Nov 15, 2023
29ceed2
simplify
MarcoGorelli Nov 15, 2023
bee402f
update python builtin types desc
MarcoGorelli Nov 15, 2023
d1f4daf
Merge remote-tracking branch 'upstream/main' into expand-on-scalars
MarcoGorelli Nov 15, 2023
15090ac
fixup
MarcoGorelli Nov 15, 2023
24d2ad8
enable extra ruff rule, note AnyScalar
MarcoGorelli Nov 15, 2023
8360d96
remove some unnecessary nitpick ignores
MarcoGorelli Nov 15, 2023
f69679a
return Self from Scalar.persist, add column.persist
MarcoGorelli Nov 17, 2023
216b5e6
fixup
MarcoGorelli Nov 17, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 66 additions & 48 deletions spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,14 @@
if TYPE_CHECKING:
from typing_extensions import Self

from dataframe_api.dataframe_object import DataFrame

from .typing import DType, Namespace, NullType, Scalar
from .typing import (
AnyScalar,
DataFrame,
DType,
Namespace,
NullType,
Scalar,
)


__all__ = ["Column"]
Expand Down Expand Up @@ -224,7 +229,7 @@ def sorted_indices(
"""
...

def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
def __eq__(self, other: Self | AnyScalar) -> Self: # type: ignore[override]
"""Compare for equality.

Nulls should follow Kleene Logic.
Expand All @@ -247,7 +252,7 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
"""
...

def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
def __ne__(self, other: Self | AnyScalar) -> Self: # type: ignore[override]
"""Compare for non-equality.

Nulls should follow Kleene Logic.
Expand All @@ -270,7 +275,7 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
"""
...

def __ge__(self, other: Self | Scalar) -> Self:
def __ge__(self, other: Self | AnyScalar) -> Self:
"""Compare for "greater than or equal to" `other`.

Parameters
Expand All @@ -291,7 +296,7 @@ def __ge__(self, other: Self | Scalar) -> Self:
"""
...

def __gt__(self, other: Self | Scalar) -> Self:
def __gt__(self, other: Self | AnyScalar) -> Self:
"""Compare for "greater than" `other`.

Parameters
Expand All @@ -312,7 +317,7 @@ def __gt__(self, other: Self | Scalar) -> Self:
"""
...

def __le__(self, other: Self | Scalar) -> Self:
def __le__(self, other: Self | AnyScalar) -> Self:
"""Compare for "less than or equal to" `other`.

Parameters
Expand All @@ -333,7 +338,7 @@ def __le__(self, other: Self | Scalar) -> Self:
"""
...

def __lt__(self, other: Self | Scalar) -> Self:
def __lt__(self, other: Self | AnyScalar) -> Self:
"""Compare for "less than" `other`.

Parameters
Expand All @@ -354,7 +359,7 @@ def __lt__(self, other: Self | Scalar) -> Self:
"""
...

def __and__(self, other: Self | bool) -> Self:
def __and__(self, other: Self | bool | Scalar) -> Self:
"""Apply logical 'and' to `other` Column (or scalar) and this Column.

Nulls should follow Kleene Logic.
Expand All @@ -380,7 +385,7 @@ def __and__(self, other: Self | bool) -> Self:
"""
...

def __or__(self, other: Self | bool) -> Self:
def __or__(self, other: Self | bool | Scalar) -> Self:
"""Apply logical 'or' to `other` Column (or scalar) and this column.

Nulls should follow Kleene Logic.
Expand All @@ -406,7 +411,7 @@ def __or__(self, other: Self | bool) -> Self:
"""
...

def __add__(self, other: Self | Scalar) -> Self:
def __add__(self, other: Self | AnyScalar) -> Self:
"""Add `other` column or scalar to this column.

Parameters
Expand All @@ -427,7 +432,7 @@ def __add__(self, other: Self | Scalar) -> Self:
"""
...

def __sub__(self, other: Self | Scalar) -> Self:
def __sub__(self, other: Self | AnyScalar) -> Self:
"""Subtract `other` column or scalar from this column.

Parameters
Expand All @@ -448,7 +453,7 @@ def __sub__(self, other: Self | Scalar) -> Self:
"""
...

def __mul__(self, other: Self | Scalar) -> Self:
def __mul__(self, other: Self | AnyScalar) -> Self:
"""Multiply `other` column or scalar with this column.

Parameters
Expand All @@ -469,7 +474,7 @@ def __mul__(self, other: Self | Scalar) -> Self:
"""
...

def __truediv__(self, other: Self | Scalar) -> Self:
def __truediv__(self, other: Self | AnyScalar) -> Self:
"""Divide this column by `other` column or scalar. True division, returns floats.

Parameters
Expand All @@ -490,7 +495,7 @@ def __truediv__(self, other: Self | Scalar) -> Self:
"""
...

def __floordiv__(self, other: Self | Scalar) -> Self:
def __floordiv__(self, other: Self | AnyScalar) -> Self:
"""Floor-divide `other` column or scalar to this column.

Parameters
Expand All @@ -511,7 +516,7 @@ def __floordiv__(self, other: Self | Scalar) -> Self:
"""
...

def __pow__(self, other: Self | Scalar) -> Self:
def __pow__(self, other: Self | AnyScalar) -> Self:
"""Raise this column to the power of `other`.

Integer dtype to the power of non-negative integer dtype is integer dtype.
Expand All @@ -536,7 +541,7 @@ def __pow__(self, other: Self | Scalar) -> Self:
"""
...

def __mod__(self, other: Self | Scalar) -> Self:
def __mod__(self, other: Self | AnyScalar) -> Self:
"""Return modulus of this column by `other` (`%` operator).

Parameters
Expand All @@ -557,7 +562,7 @@ def __mod__(self, other: Self | Scalar) -> Self:
"""
...

def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
def __divmod__(self, other: Self | AnyScalar) -> tuple[Column, Column]:
"""Return quotient and remainder of integer division. See `divmod` builtin.

Parameters
Expand All @@ -578,16 +583,16 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
"""
...

def __radd__(self, other: Self | Scalar) -> Self:
def __radd__(self, other: Self | AnyScalar) -> Self:
...

def __rsub__(self, other: Self | Scalar) -> Self:
def __rsub__(self, other: Self | AnyScalar) -> Self:
...

def __rmul__(self, other: Self | Scalar) -> Self:
def __rmul__(self, other: Self | AnyScalar) -> Self:
...

def __rtruediv__(self, other: Self | Scalar) -> Self:
def __rtruediv__(self, other: Self | AnyScalar) -> Self:
...

def __rand__(self, other: Self | bool) -> Self:
Expand All @@ -596,13 +601,13 @@ def __rand__(self, other: Self | bool) -> Self:
def __ror__(self, other: Self | bool) -> Self:
...

def __rfloordiv__(self, other: Self | Scalar) -> Self:
def __rfloordiv__(self, other: Self | AnyScalar) -> Self:
...

def __rpow__(self, other: Self | Scalar) -> Self:
def __rpow__(self, other: Self | AnyScalar) -> Self:
...

def __rmod__(self, other: Self | Scalar) -> Self:
def __rmod__(self, other: Self | AnyScalar) -> Self:
...

def __invert__(self) -> Self:
Expand All @@ -615,7 +620,7 @@ def __invert__(self) -> Self:
"""
...

def any(self, *, skip_nulls: bool = True) -> bool | NullType:
def any(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a bool.

Raises
Expand All @@ -625,7 +630,7 @@ def any(self, *, skip_nulls: bool = True) -> bool | NullType:
"""
...

def all(self, *, skip_nulls: bool = True) -> bool | NullType:
def all(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a bool.

Raises
Expand All @@ -635,23 +640,23 @@ def all(self, *, skip_nulls: bool = True) -> bool | NullType:
"""
...

def min(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def min(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.

Any data type that supports comparisons
must be supported. The returned value has the same dtype as the column.
"""
...

def max(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def max(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.

Any data type that supports comparisons
must be supported. The returned value has the same dtype as the column.
"""
...

def sum(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def sum(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.

Must be supported for numerical and
Expand All @@ -660,15 +665,15 @@ def sum(self, *, skip_nulls: bool = True) -> Scalar | NullType:
"""
...

def prod(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def prod(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.

Must be supported for numerical data types.
The returned value has the same dtype as the column.
"""
...

def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def median(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.

Must be supported for numerical and
Expand All @@ -678,7 +683,7 @@ def median(self, *, skip_nulls: bool = True) -> Scalar | NullType:
"""
...

def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def mean(self, *, skip_nulls: bool | Scalar = True) -> Scalar:
"""Reduction returns a scalar.

Must be supported for numerical and
Expand All @@ -691,9 +696,9 @@ def mean(self, *, skip_nulls: bool = True) -> Scalar | NullType:
def std(
self,
*,
correction: int | float = 1,
skip_nulls: bool = True,
) -> Scalar | NullType:
correction: float = 1,
skip_nulls: bool | Scalar = True,
) -> Scalar:
"""Reduction returns a scalar.

Must be supported for numerical and
Expand Down Expand Up @@ -724,9 +729,9 @@ def std(
def var(
self,
*,
correction: int | float = 1,
skip_nulls: bool = True,
) -> Scalar | NullType:
correction: float | Scalar = 1,
skip_nulls: bool | Scalar = True,
) -> Scalar:
"""Reduction returns a scalar.

Must be supported for numerical and
Expand Down Expand Up @@ -835,7 +840,7 @@ def is_in(self, values: Self) -> Self:
"""
...

def unique_indices(self, *, skip_nulls: bool = True) -> Self:
def unique_indices(self, *, skip_nulls: bool | Scalar = True) -> Self:
"""Return indices corresponding to unique values in Column.

Returns
Expand All @@ -855,7 +860,7 @@ def unique_indices(self, *, skip_nulls: bool = True) -> Self:
"""
...

def fill_nan(self, value: float | NullType, /) -> Self:
def fill_nan(self, value: float | NullType | Scalar, /) -> Self:
"""Fill floating point ``nan`` values with the given fill value.

Parameters
Expand All @@ -868,7 +873,7 @@ def fill_nan(self, value: float | NullType, /) -> Self:
"""
...

def fill_null(self, value: Scalar, /) -> Self:
def fill_null(self, value: AnyScalar, /) -> Self:
"""Fill null values with the given fill value.

Parameters
Expand Down Expand Up @@ -914,7 +919,7 @@ def to_array(self) -> Any:
"""
...

def rename(self, name: str) -> Self:
def rename(self, name: str | Scalar) -> Self:
"""Rename column.

Parameters
Expand All @@ -929,17 +934,17 @@ def rename(self, name: str) -> Self:
"""
...

def shift(self, offset: int) -> Self:
def shift(self, offset: int | Scalar) -> Self:
"""Shift values by `offset` positions, filling missing values with `null`.

For example, if the original column contains values `[1, 4, 2]`, then:

- `.shift(1)` will return `[null, 1, 4]`,
- `.shift(-1)` will return `[4, 2, null]`,

Parameters
----------
offset
offset : int
How many positions to shift by.
"""
...
Expand Down Expand Up @@ -1020,7 +1025,7 @@ def iso_weekday(self) -> Self:
"""
...

def unix_timestamp(self, *, time_unit: Literal["s", "ms", "us"] = "s") -> Self:
def unix_timestamp(self, *, time_unit: str | Scalar = "s") -> Self:
"""Return number of seconds / milliseconds / microseconds since the Unix epoch.

The Unix epoch is 00:00:00 UTC on 1 January 1970.
Expand All @@ -1039,3 +1044,16 @@ def unix_timestamp(self, *, time_unit: Literal["s", "ms", "us"] = "s") -> Self:
discarded.
"""
...

def persist(self) -> Self:
"""Hint that computation prior to this point should not be repeated.

This is intended as a hint, rather than as a directive. Implementations
which do not separate lazy vs eager execution may ignore this method and
treat it as a no-op.

.. note::
This method may trigger execution. If necessary, it should be called
at most once per dataframe, and as late as possible in the pipeline.
"""
...
Loading