Skip to content

Commit

Permalink
feat(python): add upper_bound and lower_bound methods to Series (
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie authored Feb 18, 2023
1 parent 98f664d commit 8abf200
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 34 deletions.
2 changes: 2 additions & 0 deletions py-polars/docs/source/reference/series/descriptive.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ Descriptive
Series.is_unique
Series.is_utf8
Series.len
Series.lower_bound
Series.n_chunks
Series.n_unique
Series.null_count
Series.unique_counts
Series.upper_bound
Series.value_counts
106 changes: 81 additions & 25 deletions py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1619,30 +1619,25 @@ def n_chunks(self) -> int:
"""
return self._s.n_chunks()

def cumsum(self, reverse: bool = False) -> Series:
def cummax(self, reverse: bool = False) -> Series:
"""
Get an array with the cumulative sum computed at every element.
Get an array with the cumulative max computed at every element.
Parameters
----------
reverse
reverse the operation.
Notes
-----
Dtypes in {Int8, UInt8, Int16, UInt16} are cast to
Int64 before summing to prevent overflow issues.
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s.cumsum()
>>> s = pl.Series("s", [3, 5, 1])
>>> s.cummax()
shape: (3,)
Series: 'a' [i64]
Series: 's' [i64]
[
1
3
6
5
5
]
"""
Expand All @@ -1658,10 +1653,10 @@ def cummin(self, reverse: bool = False) -> Series:
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s = pl.Series("s", [1, 2, 3])
>>> s.cummin()
shape: (3,)
Series: 'a' [i64]
Series: 's' [i64]
[
1
1
Expand All @@ -1670,32 +1665,37 @@ def cummin(self, reverse: bool = False) -> Series:
"""

def cummax(self, reverse: bool = False) -> Series:
def cumprod(self, reverse: bool = False) -> Series:
"""
Get an array with the cumulative max computed at every element.
Get an array with the cumulative product computed at every element.
Parameters
----------
reverse
reverse the operation.
Notes
-----
Dtypes in {Int8, UInt8, Int16, UInt16} are cast to
Int64 before summing to prevent overflow issues.
Examples
--------
>>> s = pl.Series("a", [3, 5, 1])
>>> s.cummax()
>>> s = pl.Series("a", [1, 2, 3])
>>> s.cumprod()
shape: (3,)
Series: 'a' [i64]
[
3
5
5
1
2
6
]
"""

def cumprod(self, reverse: bool = False) -> Series:
def cumsum(self, reverse: bool = False) -> Series:
"""
Get an array with the cumulative product computed at every element.
Get an array with the cumulative sum computed at every element.
Parameters
----------
Expand All @@ -1710,12 +1710,12 @@ def cumprod(self, reverse: bool = False) -> Series:
Examples
--------
>>> s = pl.Series("a", [1, 2, 3])
>>> s.cumprod()
>>> s.cumsum()
shape: (3,)
Series: 'a' [i64]
[
1
2
3
6
]
Expand Down Expand Up @@ -4781,6 +4781,62 @@ def clip_max(self, max_val: int | float) -> Series:
"""

def lower_bound(self) -> Self:
"""
Return the lower bound of this Series' dtype as a unit Series.
See Also
--------
upper_bound : return the upper bound of the given Series' dtype.
Examples
--------
>>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int32)
>>> s.lower_bound()
shape: (1,)
Series: 's' [i32]
[
-2147483648
]
>>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float32)
>>> s.lower_bound()
shape: (1,)
Series: 's' [f32]
[
-inf
]
"""

def upper_bound(self) -> Self:
"""
Return the upper bound of this Series' dtype as a unit Series.
See Also
--------
lower_bound : return the lower bound of the given Series' dtype.
Examples
--------
>>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int8)
>>> s.upper_bound()
shape: (1,)
Series: 's' [i8]
[
127
]
>>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float64)
>>> s.upper_bound()
shape: (1,)
Series: 's' [f64]
[
inf
]
"""

def map_dict(
self,
remapping: dict[Any, Any],
Expand Down
4 changes: 3 additions & 1 deletion py-polars/tests/unit/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@ def fruits_cars() -> pl.DataFrame:
"fruits": ["banana", "banana", "apple", "apple", "banana"],
"B": [5, 4, 3, 2, 1],
"cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
}
},
schema_overrides={"A": pl.Int64, "B": pl.Int64},
)


ISO8601_FORMATS = []

for T in ["T", " "]:
for hms in (
[
Expand Down
16 changes: 11 additions & 5 deletions py-polars/tests/unit/operations/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ def test_overflow_mean_partitioned_groupby_5194(dtype: pl.PolarsDataType) -> Non


def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None:
assert (
df = (
pl.DataFrame(
{
"a": [
Expand All @@ -571,7 +571,13 @@ def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None:
.groupby_dynamic("a", every="10s", period="100s")
.agg([pl.col("b").mean().sin().alias("c")])
.collect()
).to_dict(False) == {
"a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)],
"c": [0.9092974268256817, -0.7568024953079282],
}
)
assert_frame_equal(
df,
pl.DataFrame(
{
"a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)],
"c": [0.9092974268256817, -0.7568024953079282],
}
),
)
10 changes: 7 additions & 3 deletions py-polars/tests/unit/test_lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -1358,9 +1358,13 @@ def test_head_tail(fruits_cars: pl.DataFrame) -> None:

def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None:
res_expr = fruits_cars.select(pl.col("A").lower_bound())
assert res_expr["A"][0] < -10_000_000
res_expr = fruits_cars.select(pl.col("A").upper_bound())
assert res_expr["A"][0] > 10_000_000
assert res_expr.item() == -9223372036854775808

res_expr = fruits_cars.select(pl.col("B").upper_bound())
assert res_expr.item() == 9223372036854775807

with pytest.raises(pl.ComputeError):
fruits_cars.select(pl.col("fruits").upper_bound())


def test_nested_min_max() -> None:
Expand Down
23 changes: 23 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2420,3 +2420,26 @@ def test_map_dict() -> None:
s.map_dict(remap, default=s.cast(pl.Utf8)),
pl.Series("s", ["-1", "two", None, "four", "-5"]),
)


@pytest.mark.parametrize(
("dtype", "lower", "upper"),
[
(pl.Int8, -128, 127),
(pl.UInt8, 0, 255),
(pl.Int16, -32768, 32767),
(pl.UInt16, 0, 65535),
(pl.Int32, -2147483648, 2147483647),
(pl.UInt32, 0, 4294967295),
(pl.Int64, -9223372036854775808, 9223372036854775807),
(pl.UInt64, 0, 18446744073709551615),
(pl.Float32, float("-inf"), float("inf")),
(pl.Float64, float("-inf"), float("inf")),
],
)
def test_upper_lower_bounds(
dtype: PolarsDataType, upper: int | float, lower: int | float
) -> None:
s = pl.Series("s", dtype=dtype)
assert s.lower_bound().item() == lower
assert s.upper_bound().item() == upper

0 comments on commit 8abf200

Please sign in to comment.