feat(python): add upper_bound and lower_bound methods to Series (…

…#6990)
pola-rs · Feb 18, 2023 · 8abf200 · 8abf200
1 parent 98f664d
commit 8abf200
Show file tree

Hide file tree

Showing 6 changed files with 127 additions and 34 deletions.
diff --git a/py-polars/docs/source/reference/series/descriptive.rst b/py-polars/docs/source/reference/series/descriptive.rst
@@ -28,8 +28,10 @@ Descriptive
     Series.is_unique
     Series.is_utf8
     Series.len
+    Series.lower_bound
     Series.n_chunks
     Series.n_unique
     Series.null_count
     Series.unique_counts
+    Series.upper_bound
     Series.value_counts
diff --git a/py-polars/polars/internals/series/series.py b/py-polars/polars/internals/series/series.py
@@ -1619,30 +1619,25 @@ def n_chunks(self) -> int:
         """
         return self._s.n_chunks()
 
-    def cumsum(self, reverse: bool = False) -> Series:
+    def cummax(self, reverse: bool = False) -> Series:
         """
-        Get an array with the cumulative sum computed at every element.
+        Get an array with the cumulative max computed at every element.
 
         Parameters
         ----------
         reverse
             reverse the operation.
 
-        Notes
-        -----
-        Dtypes in {Int8, UInt8, Int16, UInt16} are cast to
-        Int64 before summing to prevent overflow issues.
-
         Examples
         --------
-        >>> s = pl.Series("a", [1, 2, 3])
-        >>> s.cumsum()
+        >>> s = pl.Series("s", [3, 5, 1])
+        >>> s.cummax()
         shape: (3,)
-        Series: 'a' [i64]
+        Series: 's' [i64]
         [
-            1
             3
-            6
+            5
+            5
         ]
 
         """
@@ -1658,10 +1653,10 @@ def cummin(self, reverse: bool = False) -> Series:
 
         Examples
         --------
-        >>> s = pl.Series("a", [1, 2, 3])
+        >>> s = pl.Series("s", [1, 2, 3])
         >>> s.cummin()
         shape: (3,)
-        Series: 'a' [i64]
+        Series: 's' [i64]
         [
             1
             1
@@ -1670,32 +1665,37 @@ def cummin(self, reverse: bool = False) -> Series:
 
         """
 
-    def cummax(self, reverse: bool = False) -> Series:
+    def cumprod(self, reverse: bool = False) -> Series:
         """
-        Get an array with the cumulative max computed at every element.
+        Get an array with the cumulative product computed at every element.
 
         Parameters
         ----------
         reverse
             reverse the operation.
 
+        Notes
+        -----
+        Dtypes in {Int8, UInt8, Int16, UInt16} are cast to
+        Int64 before summing to prevent overflow issues.
+
         Examples
         --------
-        >>> s = pl.Series("a", [3, 5, 1])
-        >>> s.cummax()
+        >>> s = pl.Series("a", [1, 2, 3])
+        >>> s.cumprod()
         shape: (3,)
         Series: 'a' [i64]
         [
-            3
-            5
-            5
+            1
+            2
+            6
         ]
 
         """
 
-    def cumprod(self, reverse: bool = False) -> Series:
+    def cumsum(self, reverse: bool = False) -> Series:
         """
-        Get an array with the cumulative product computed at every element.
+        Get an array with the cumulative sum computed at every element.
 
         Parameters
         ----------
@@ -1710,12 +1710,12 @@ def cumprod(self, reverse: bool = False) -> Series:
         Examples
         --------
         >>> s = pl.Series("a", [1, 2, 3])
-        >>> s.cumprod()
+        >>> s.cumsum()
         shape: (3,)
         Series: 'a' [i64]
         [
             1
-            2
+            3
             6
         ]
 
@@ -4781,6 +4781,62 @@ def clip_max(self, max_val: int | float) -> Series:
 
         """
 
+    def lower_bound(self) -> Self:
+        """
+        Return the lower bound of this Series' dtype as a unit Series.
+
+        See Also
+        --------
+        upper_bound : return the upper bound of the given Series' dtype.
+
+        Examples
+        --------
+        >>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int32)
+        >>> s.lower_bound()
+        shape: (1,)
+        Series: 's' [i32]
+        [
+            -2147483648
+        ]
+
+        >>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float32)
+        >>> s.lower_bound()
+        shape: (1,)
+        Series: 's' [f32]
+        [
+            -inf
+        ]
+
+        """
+
+    def upper_bound(self) -> Self:
+        """
+        Return the upper bound of this Series' dtype as a unit Series.
+
+        See Also
+        --------
+        lower_bound : return the lower bound of the given Series' dtype.
+
+        Examples
+        --------
+        >>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int8)
+        >>> s.upper_bound()
+        shape: (1,)
+        Series: 's' [i8]
+        [
+            127
+        ]
+
+        >>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float64)
+        >>> s.upper_bound()
+        shape: (1,)
+        Series: 's' [f64]
+        [
+            inf
+        ]
+
+        """
+
     def map_dict(
         self,
         remapping: dict[Any, Any],

diff --git a/py-polars/tests/unit/conftest.py b/py-polars/tests/unit/conftest.py
@@ -53,11 +53,13 @@ def fruits_cars() -> pl.DataFrame:
             "fruits": ["banana", "banana", "apple", "apple", "banana"],
             "B": [5, 4, 3, 2, 1],
             "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
-        }
+        },
+        schema_overrides={"A": pl.Int64, "B": pl.Int64},
     )
 
 
 ISO8601_FORMATS = []
+
 for T in ["T", " "]:
     for hms in (
         [

diff --git a/py-polars/tests/unit/operations/test_groupby.py b/py-polars/tests/unit/operations/test_groupby.py
@@ -558,7 +558,7 @@ def test_overflow_mean_partitioned_groupby_5194(dtype: pl.PolarsDataType) -> Non
 
 
 def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None:
-    assert (
+    df = (
         pl.DataFrame(
             {
                 "a": [
@@ -571,7 +571,13 @@ def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None:
         .groupby_dynamic("a", every="10s", period="100s")
         .agg([pl.col("b").mean().sin().alias("c")])
         .collect()
-    ).to_dict(False) == {
-        "a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)],
-        "c": [0.9092974268256817, -0.7568024953079282],
-    }
+    )
+    assert_frame_equal(
+        df,
+        pl.DataFrame(
+            {
+                "a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)],
+                "c": [0.9092974268256817, -0.7568024953079282],
+            }
+        ),
+    )
diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py
@@ -1358,9 +1358,13 @@ def test_head_tail(fruits_cars: pl.DataFrame) -> None:
 
 def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None:
     res_expr = fruits_cars.select(pl.col("A").lower_bound())
-    assert res_expr["A"][0] < -10_000_000
-    res_expr = fruits_cars.select(pl.col("A").upper_bound())
-    assert res_expr["A"][0] > 10_000_000
+    assert res_expr.item() == -9223372036854775808
+
+    res_expr = fruits_cars.select(pl.col("B").upper_bound())
+    assert res_expr.item() == 9223372036854775807
+
+    with pytest.raises(pl.ComputeError):
+        fruits_cars.select(pl.col("fruits").upper_bound())
 
 
 def test_nested_min_max() -> None:

diff --git a/py-polars/tests/unit/test_series.py b/py-polars/tests/unit/test_series.py
@@ -2420,3 +2420,26 @@ def test_map_dict() -> None:
         s.map_dict(remap, default=s.cast(pl.Utf8)),
         pl.Series("s", ["-1", "two", None, "four", "-5"]),
     )
+
+
+@pytest.mark.parametrize(
+    ("dtype", "lower", "upper"),
+    [
+        (pl.Int8, -128, 127),
+        (pl.UInt8, 0, 255),
+        (pl.Int16, -32768, 32767),
+        (pl.UInt16, 0, 65535),
+        (pl.Int32, -2147483648, 2147483647),
+        (pl.UInt32, 0, 4294967295),
+        (pl.Int64, -9223372036854775808, 9223372036854775807),
+        (pl.UInt64, 0, 18446744073709551615),
+        (pl.Float32, float("-inf"), float("inf")),
+        (pl.Float64, float("-inf"), float("inf")),
+    ],
+)
+def test_upper_lower_bounds(
+    dtype: PolarsDataType, upper: int | float, lower: int | float
+) -> None:
+    s = pl.Series("s", dtype=dtype)
+    assert s.lower_bound().item() == lower
+    assert s.upper_bound().item() == upper