Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(python): add upper_bound and lower_bound methods to Series #6990

Merged
merged 3 commits into from
Feb 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/series/descriptive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ Descriptive
Series.is_unique
Series.is_utf8
Series.len
Series.lower_bound
Series.n_chunks
Series.n_unique
Series.null_count
Series.unique_counts
Series.upper_bound
Series.value_counts
106 changes: 81 additions & 25 deletions py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1619,30 +1619,25 @@ def n_chunks(self) -> int:
"""
return self._s.n_chunks()

def cumsum(self, reverse: bool = False) -> Series:
def cummax(self, reverse: bool = False) -> Series:
"""
Get an array with the cumulative sum computed at every element.
Get an array with the cumulative max computed at every element.

Parameters
----------
reverse
reverse the operation.

Notes
-----
Dtypes in {Int8, UInt8, Int16, UInt16} are cast to
Int64 before summing to prevent overflow issues.

Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s.cumsum()
>>> s = pl.Series("s", [3, 5, 1])
>>> s.cummax()
shape: (3,)
Series: 'a' [i64]
Series: 's' [i64]
[
1
3
6
5
5
]

"""
Expand All @@ -1658,10 +1653,10 @@ def cummin(self, reverse: bool = False) -> Series:

Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s = pl.Series("s", [1, 2, 3])
>>> s.cummin()
shape: (3,)
Series: 'a' [i64]
Series: 's' [i64]
[
1
1
Expand All @@ -1670,32 +1665,37 @@ def cummin(self, reverse: bool = False) -> Series:

"""

def cummax(self, reverse: bool = False) -> Series:
def cumprod(self, reverse: bool = False) -> Series:
"""
Get an array with the cumulative max computed at every element.
Get an array with the cumulative product computed at every element.

Parameters
----------
reverse
reverse the operation.

Notes
-----
Dtypes in {Int8, UInt8, Int16, UInt16} are cast to
Int64 before summing to prevent overflow issues.

Examples
--------
>>> s = pl.Series("a", [3, 5, 1])
>>> s.cummax()
>>> s = pl.Series("a", [1, 2, 3])
>>> s.cumprod()
shape: (3,)
Series: 'a' [i64]
[
3
5
5
1
2
6
]

"""

def cumprod(self, reverse: bool = False) -> Series:
def cumsum(self, reverse: bool = False) -> Series:
"""
Get an array with the cumulative product computed at every element.
Get an array with the cumulative sum computed at every element.

Parameters
----------
Expand All @@ -1710,12 +1710,12 @@ def cumprod(self, reverse: bool = False) -> Series:
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s.cumprod()
>>> s.cumsum()
shape: (3,)
Series: 'a' [i64]
[
1
2
3
6
]

Expand Down Expand Up @@ -4781,6 +4781,62 @@ def clip_max(self, max_val: int | float) -> Series:

"""

def lower_bound(self) -> Self:
"""
Return the lower bound of this Series' dtype as a unit Series.

See Also
--------
upper_bound : return the upper bound of the given Series' dtype.

Examples
--------
>>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int32)
>>> s.lower_bound()
shape: (1,)
Series: 's' [i32]
[
-2147483648
]

>>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float32)
>>> s.lower_bound()
shape: (1,)
Series: 's' [f32]
[
-inf
]

"""

def upper_bound(self) -> Self:
"""
Return the upper bound of this Series' dtype as a unit Series.

See Also
--------
lower_bound : return the lower bound of the given Series' dtype.

Examples
--------
>>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int8)
>>> s.upper_bound()
shape: (1,)
Series: 's' [i8]
[
127
]

>>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float64)
>>> s.upper_bound()
shape: (1,)
Series: 's' [f64]
[
inf
]

"""

def map_dict(
self,
remapping: dict[Any, Any],
Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@ def fruits_cars() -> pl.DataFrame:
"fruits": ["banana", "banana", "apple", "apple", "banana"],
"B": [5, 4, 3, 2, 1],
"cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
}
},
schema_overrides={"A": pl.Int64, "B": pl.Int64},
)


ISO8601_FORMATS = []

for T in ["T", " "]:
for hms in (
[
Expand Down
16 changes: 11 additions & 5 deletions py-polars/tests/unit/operations/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def test_overflow_mean_partitioned_groupby_5194(dtype: pl.PolarsDataType) -> Non


def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None:
assert (
df = (
pl.DataFrame(
{
"a": [
Expand All @@ -571,7 +571,13 @@ def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None:
.groupby_dynamic("a", every="10s", period="100s")
.agg([pl.col("b").mean().sin().alias("c")])
.collect()
).to_dict(False) == {
"a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)],
"c": [0.9092974268256817, -0.7568024953079282],
}
)
assert_frame_equal(
df,
pl.DataFrame(
{
"a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)],
"c": [0.9092974268256817, -0.7568024953079282],
}
),
)
10 changes: 7 additions & 3 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1358,9 +1358,13 @@ def test_head_tail(fruits_cars: pl.DataFrame) -> None:

def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None:
res_expr = fruits_cars.select(pl.col("A").lower_bound())
assert res_expr["A"][0] < -10_000_000
res_expr = fruits_cars.select(pl.col("A").upper_bound())
assert res_expr["A"][0] > 10_000_000
assert res_expr.item() == -9223372036854775808

res_expr = fruits_cars.select(pl.col("B").upper_bound())
assert res_expr.item() == 9223372036854775807

with pytest.raises(pl.ComputeError):
fruits_cars.select(pl.col("fruits").upper_bound())


def test_nested_min_max() -> None:
Expand Down
23 changes: 23 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,3 +2406,26 @@ def test_map_dict() -> None:
s.map_dict(remap, default=s.cast(pl.Utf8)),
pl.Series("s", ["-1", "two", None, "four", "-5"]),
)


@pytest.mark.parametrize(
("dtype", "lower", "upper"),
[
(pl.Int8, -128, 127),
(pl.UInt8, 0, 255),
(pl.Int16, -32768, 32767),
(pl.UInt16, 0, 65535),
(pl.Int32, -2147483648, 2147483647),
(pl.UInt32, 0, 4294967295),
(pl.Int64, -9223372036854775808, 9223372036854775807),
(pl.UInt64, 0, 18446744073709551615),
(pl.Float32, float("-inf"), float("inf")),
(pl.Float64, float("-inf"), float("inf")),
],
)
def test_upper_lower_bounds(
dtype: PolarsDataType, upper: int | float, lower: int | float
) -> None:
s = pl.Series("s", dtype=dtype)
assert s.lower_bound().item() == lower
assert s.upper_bound().item() == upper