Skip to content

Commit

Permalink
Add note about parent dataframes and cross-dataframe column compariso…
Browse files Browse the repository at this point in the history
…ns (#310)

* wip

* add notes about parent dataframes

* add note about free-standing columns

* post merge fixup

* 🚚 Column.dataframe -> Column.parent_dataframe, note unsupportedness rather than impossibility
  • Loading branch information
MarcoGorelli authored Nov 16, 2023
1 parent 098e11e commit 25e5a52
Show file tree
Hide file tree
Showing 2 changed files with 131 additions and 3 deletions.
120 changes: 119 additions & 1 deletion spec/API_specification/dataframe_api/column_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
if TYPE_CHECKING:
from typing_extensions import Self

from dataframe_api.dataframe_object import DataFrame

from .typing import DType, Namespace, NullType, Scalar


Expand All @@ -17,9 +19,45 @@ class Column(Protocol):
Note that this column object is not meant to be instantiated directly by
users of the library implementing the dataframe API standard. Rather, use
constructor functions or an already-created dataframe object retrieved via
:meth:`DataFrame.col`.
The parent dataframe (which can be retrieved via the :meth:`parent_dataframe`
property) plays a key role here:
- If two columns were retrieved from the same dataframe,
then they can be combined and compared at will.
- If two columns were retrieved from different dataframes,
then there is no guarantee about how or whether they can be combined and
compared, this may vary across implementations.
- If two columns are both "free-standing" (i.e. not retrieved from a dataframe
but constructed directly from a 1D array or sequence), then they can be
combined and compared with each other. Note, however, that there's no guarantee
about whether they can be compared or combined with columns retrieved from a
different dataframe, this may vary across implementations.
"""

@property
def parent_dataframe(self) -> DataFrame | None:
"""Return parent DataFrame, if present.
For example, if we have the following
.. code-block:: python
df: DataFrame
column = df.col('a')
then `column.parent_dataframe` should return `df`.
On the other hand, if we had:
.. code-block:: python
column = column_from_1d_array(...)
then `column.parent_dataframe` should return `None`.
"""

def __column_namespace__(self) -> Namespace:
"""Return an object that has all the Dataframe Standard API functions on it.
Expand Down Expand Up @@ -201,6 +239,11 @@ def __eq__(self, other: Self | Scalar) -> Self: # type: ignore[override]
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -219,6 +262,11 @@ def __ne__(self, other: Self | Scalar) -> Self: # type: ignore[override]
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -235,6 +283,11 @@ def __ge__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -251,6 +304,11 @@ def __gt__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -267,6 +325,11 @@ def __le__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -283,6 +346,11 @@ def __lt__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -300,6 +368,11 @@ def __and__(self, other: Self | bool) -> Self:
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
Raises
------
ValueError
Expand All @@ -321,6 +394,11 @@ def __or__(self, other: Self | bool) -> Self:
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
Raises
------
ValueError
Expand All @@ -338,6 +416,11 @@ def __add__(self, other: Self | Scalar) -> Self:
"Scalar" here is defined implicitly by what scalar types are allowed
for the operation by the underling dtypes.
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
Returns
-------
Column
Expand All @@ -357,6 +440,11 @@ def __sub__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -373,6 +461,11 @@ def __mul__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -389,6 +482,11 @@ def __truediv__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -405,6 +503,11 @@ def __floordiv__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -425,6 +528,11 @@ def __pow__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -441,6 +549,11 @@ def __mod__(self, other: Self | Scalar) -> Self:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand All @@ -457,6 +570,11 @@ def __divmod__(self, other: Self | Scalar) -> tuple[Column, Column]:
Returns
-------
Column
Notes
-----
`other`'s parent DataFrame must be the same as `self`'s - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down
14 changes: 12 additions & 2 deletions spec/API_specification/dataframe_api/dataframe_object.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,11 @@ def get_rows(self, indices: Column) -> Self:
Returns
-------
DataFrame
Notes
-----
`indices`'s parent DataFrame must be `self` - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down Expand Up @@ -172,8 +177,8 @@ def filter(self, mask: Column) -> Self:
Notes
-----
Some participants preferred a weaker type Arraylike[bool] for mask,
where 'Arraylike' denotes an object adhering to the Array API standard.
`mask`'s parent DataFrame must be `self` - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down Expand Up @@ -201,6 +206,11 @@ def assign(self, *columns: Column) -> Self:
Returns
-------
DataFrame
Notes
-----
All of `columns`'s parent DataFrame must be `self` - else,
the operation is unsupported and may vary across implementations.
"""
...

Expand Down

0 comments on commit 25e5a52

Please sign in to comment.