diff --git a/py-polars/docs/source/reference/series/descriptive.rst b/py-polars/docs/source/reference/series/descriptive.rst index e5e4d89033ae..cecb5c21daa2 100644 --- a/py-polars/docs/source/reference/series/descriptive.rst +++ b/py-polars/docs/source/reference/series/descriptive.rst @@ -28,8 +28,10 @@ Descriptive Series.is_unique Series.is_utf8 Series.len + Series.lower_bound Series.n_chunks Series.n_unique Series.null_count Series.unique_counts + Series.upper_bound Series.value_counts diff --git a/py-polars/polars/internals/series/series.py b/py-polars/polars/internals/series/series.py index 00259e896b21..67e19caa61c4 100644 --- a/py-polars/polars/internals/series/series.py +++ b/py-polars/polars/internals/series/series.py @@ -1619,30 +1619,25 @@ def n_chunks(self) -> int: """ return self._s.n_chunks() - def cumsum(self, reverse: bool = False) -> Series: + def cummax(self, reverse: bool = False) -> Series: """ - Get an array with the cumulative sum computed at every element. + Get an array with the cumulative max computed at every element. Parameters ---------- reverse reverse the operation. - Notes - ----- - Dtypes in {Int8, UInt8, Int16, UInt16} are cast to - Int64 before summing to prevent overflow issues. - Examples -------- - >>> s = pl.Series("a", [1, 2, 3]) - >>> s.cumsum() + >>> s = pl.Series("s", [3, 5, 1]) + >>> s.cummax() shape: (3,) - Series: 'a' [i64] + Series: 's' [i64] [ - 1 3 - 6 + 5 + 5 ] """ @@ -1658,10 +1653,10 @@ def cummin(self, reverse: bool = False) -> Series: Examples -------- - >>> s = pl.Series("a", [1, 2, 3]) + >>> s = pl.Series("s", [1, 2, 3]) >>> s.cummin() shape: (3,) - Series: 'a' [i64] + Series: 's' [i64] [ 1 1 @@ -1670,32 +1665,37 @@ def cummin(self, reverse: bool = False) -> Series: """ - def cummax(self, reverse: bool = False) -> Series: + def cumprod(self, reverse: bool = False) -> Series: """ - Get an array with the cumulative max computed at every element. + Get an array with the cumulative product computed at every element. Parameters ---------- reverse reverse the operation. + Notes + ----- + Dtypes in {Int8, UInt8, Int16, UInt16} are cast to + Int64 before summing to prevent overflow issues. + Examples -------- - >>> s = pl.Series("a", [3, 5, 1]) - >>> s.cummax() + >>> s = pl.Series("a", [1, 2, 3]) + >>> s.cumprod() shape: (3,) Series: 'a' [i64] [ - 3 - 5 - 5 + 1 + 2 + 6 ] """ - def cumprod(self, reverse: bool = False) -> Series: + def cumsum(self, reverse: bool = False) -> Series: """ - Get an array with the cumulative product computed at every element. + Get an array with the cumulative sum computed at every element. Parameters ---------- @@ -1710,12 +1710,12 @@ def cumprod(self, reverse: bool = False) -> Series: Examples -------- >>> s = pl.Series("a", [1, 2, 3]) - >>> s.cumprod() + >>> s.cumsum() shape: (3,) Series: 'a' [i64] [ 1 - 2 + 3 6 ] @@ -4781,6 +4781,62 @@ def clip_max(self, max_val: int | float) -> Series: """ + def lower_bound(self) -> Self: + """ + Return the lower bound of this Series' dtype as a unit Series. + + See Also + -------- + upper_bound : return the upper bound of the given Series' dtype. + + Examples + -------- + >>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int32) + >>> s.lower_bound() + shape: (1,) + Series: 's' [i32] + [ + -2147483648 + ] + + >>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float32) + >>> s.lower_bound() + shape: (1,) + Series: 's' [f32] + [ + -inf + ] + + """ + + def upper_bound(self) -> Self: + """ + Return the upper bound of this Series' dtype as a unit Series. + + See Also + -------- + lower_bound : return the lower bound of the given Series' dtype. + + Examples + -------- + >>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int8) + >>> s.upper_bound() + shape: (1,) + Series: 's' [i8] + [ + 127 + ] + + >>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float64) + >>> s.upper_bound() + shape: (1,) + Series: 's' [f64] + [ + inf + ] + + """ + def map_dict( self, remapping: dict[Any, Any], diff --git a/py-polars/tests/unit/conftest.py b/py-polars/tests/unit/conftest.py index 373cdbb17867..acd814d5b02e 100644 --- a/py-polars/tests/unit/conftest.py +++ b/py-polars/tests/unit/conftest.py @@ -53,11 +53,13 @@ def fruits_cars() -> pl.DataFrame: "fruits": ["banana", "banana", "apple", "apple", "banana"], "B": [5, 4, 3, 2, 1], "cars": ["beetle", "audi", "beetle", "beetle", "beetle"], - } + }, + schema_overrides={"A": pl.Int64, "B": pl.Int64}, ) ISO8601_FORMATS = [] + for T in ["T", " "]: for hms in ( [ diff --git a/py-polars/tests/unit/operations/test_groupby.py b/py-polars/tests/unit/operations/test_groupby.py index 5bae6300b46d..b8ef46b1f535 100644 --- a/py-polars/tests/unit/operations/test_groupby.py +++ b/py-polars/tests/unit/operations/test_groupby.py @@ -558,7 +558,7 @@ def test_overflow_mean_partitioned_groupby_5194(dtype: pl.PolarsDataType) -> Non def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None: - assert ( + df = ( pl.DataFrame( { "a": [ @@ -571,7 +571,13 @@ def test_groupby_dynamic_elementwise_following_mean_agg_6904() -> None: .groupby_dynamic("a", every="10s", period="100s") .agg([pl.col("b").mean().sin().alias("c")]) .collect() - ).to_dict(False) == { - "a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)], - "c": [0.9092974268256817, -0.7568024953079282], - } + ) + assert_frame_equal( + df, + pl.DataFrame( + { + "a": [datetime(2021, 1, 1, 0, 0), datetime(2021, 1, 1, 0, 0, 10)], + "c": [0.9092974268256817, -0.7568024953079282], + } + ), + ) diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 92e6d5b9f6a6..ae111bd3ecbe 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -1358,9 +1358,13 @@ def test_head_tail(fruits_cars: pl.DataFrame) -> None: def test_lower_bound_upper_bound(fruits_cars: pl.DataFrame) -> None: res_expr = fruits_cars.select(pl.col("A").lower_bound()) - assert res_expr["A"][0] < -10_000_000 - res_expr = fruits_cars.select(pl.col("A").upper_bound()) - assert res_expr["A"][0] > 10_000_000 + assert res_expr.item() == -9223372036854775808 + + res_expr = fruits_cars.select(pl.col("B").upper_bound()) + assert res_expr.item() == 9223372036854775807 + + with pytest.raises(pl.ComputeError): + fruits_cars.select(pl.col("fruits").upper_bound()) def test_nested_min_max() -> None: diff --git a/py-polars/tests/unit/test_series.py b/py-polars/tests/unit/test_series.py index e8ef0d2c0a75..eb1ea4408e94 100644 --- a/py-polars/tests/unit/test_series.py +++ b/py-polars/tests/unit/test_series.py @@ -2406,3 +2406,26 @@ def test_map_dict() -> None: s.map_dict(remap, default=s.cast(pl.Utf8)), pl.Series("s", ["-1", "two", None, "four", "-5"]), ) + + +@pytest.mark.parametrize( + ("dtype", "lower", "upper"), + [ + (pl.Int8, -128, 127), + (pl.UInt8, 0, 255), + (pl.Int16, -32768, 32767), + (pl.UInt16, 0, 65535), + (pl.Int32, -2147483648, 2147483647), + (pl.UInt32, 0, 4294967295), + (pl.Int64, -9223372036854775808, 9223372036854775807), + (pl.UInt64, 0, 18446744073709551615), + (pl.Float32, float("-inf"), float("inf")), + (pl.Float64, float("-inf"), float("inf")), + ], +) +def test_upper_lower_bounds( + dtype: PolarsDataType, upper: int | float, lower: int | float +) -> None: + s = pl.Series("s", dtype=dtype) + assert s.lower_bound().item() == lower + assert s.upper_bound().item() == upper