From 1b76dc4df5619edac86a5b210b4e709881721b23 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 5 Jun 2024 20:42:18 +0200 Subject: [PATCH 1/3] Update rolling implementation --- py-polars/polars/_utils/deprecation.py | 36 +- py-polars/polars/expr/expr.py | 340 ++---------------- py-polars/polars/series/series.py | 91 ----- py-polars/src/expr/rolling.rs | 27 +- .../unit/operations/rolling/test_rolling.py | 104 ++---- py-polars/tests/unit/test_errors.py | 7 - 6 files changed, 73 insertions(+), 532 deletions(-) diff --git a/py-polars/polars/_utils/deprecation.py b/py-polars/polars/_utils/deprecation.py index 0961c652d5e9..d547da5b711c 100644 --- a/py-polars/polars/_utils/deprecation.py +++ b/py-polars/polars/_utils/deprecation.py @@ -6,13 +6,12 @@ from typing import TYPE_CHECKING, Callable, Sequence, TypeVar from polars._utils.various import find_stacklevel -from polars.exceptions import InvalidOperationError if TYPE_CHECKING: import sys from typing import Mapping - from polars.type_aliases import Ambiguous, ClosedInterval + from polars.type_aliases import Ambiguous if sys.version_info >= (3, 10): from typing import ParamSpec @@ -242,36 +241,3 @@ def _format_argument_list(allowed_args: list[str]) -> str: last = allowed_args[-1] args = ", ".join([f"{x!r}" for x in allowed_args[:-1]]) return f" except for {args} and {last!r}" - - -def validate_rolling_by_aggs_arguments( - weights: list[float] | None, *, center: bool -) -> None: - if weights is not None: - msg = "`weights` is not supported in `rolling_*(..., by=...)` expression" - raise InvalidOperationError(msg) - if center: - msg = "`center=True` is not supported in `rolling_*(..., by=...)` expression" - raise InvalidOperationError(msg) - - -def validate_rolling_aggs_arguments( - window_size: int | str, closed: ClosedInterval | None -) -> int: - if isinstance(window_size, str): - issue_deprecation_warning( - "Passing a str to `rolling_*` is deprecated.\n\n" - "Please, either:\n" - "- pass an integer if you want a fixed window size (e.g. `rolling_mean(3)`)\n" - "- pass a string if you are computing the rolling operation based on another column (e.g. `rolling_mean_by('date', '3d'))\n", - version="0.20.26", - ) - try: - window_size = int(window_size.rstrip("i")) - except ValueError: - msg = f"Expected a string of the form 'ni', where `n` is a positive integer, got: {window_size}" - raise InvalidOperationError(msg) from None - if closed is not None: - msg = "`closed` is not supported in `rolling_*(...)` expression" - raise InvalidOperationError(msg) - return window_size diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index b000300421d1..7bdef405af15 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -26,12 +26,7 @@ import polars._reexport as pl from polars import functions as F from polars._utils.convert import negate_duration_string, parse_as_duration_string -from polars._utils.deprecation import ( - deprecate_function, - issue_deprecation_warning, - validate_rolling_aggs_arguments, - validate_rolling_by_aggs_arguments, -) +from polars._utils.deprecation import deprecate_function, issue_deprecation_warning from polars._utils.parse_expr_input import ( parse_as_expression, parse_as_list_of_expressions, @@ -7292,14 +7287,11 @@ def rolling_quantile_by( @unstable() def rolling_min( self, - window_size: int | timedelta | str, + window_size: int, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling min (moving min) over the values in this array. @@ -7312,67 +7304,21 @@ def rolling_min( this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their min. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: - - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic - temporal size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_min` is deprecated - please use - :meth:`.rolling_min_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -7439,98 +7385,24 @@ def rolling_min( │ 5.0 ┆ 4.0 │ │ 6.0 ┆ null │ └─────┴─────────────┘ - - Create a DataFrame with a datetime column and a row number column - - >>> from datetime import timedelta, datetime - >>> start = datetime(2001, 1, 1) - >>> stop = datetime(2001, 1, 2) - >>> df_temporal = pl.DataFrame( - ... {"date": pl.datetime_range(start, stop, "1h", eager=True)} - ... ).with_row_index() - >>> df_temporal - shape: (25, 2) - ┌───────┬─────────────────────┐ - │ index ┆ date │ - │ --- ┆ --- │ - │ u32 ┆ datetime[μs] │ - ╞═══════╪═════════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 │ - │ 1 ┆ 2001-01-01 01:00:00 │ - │ 2 ┆ 2001-01-01 02:00:00 │ - │ 3 ┆ 2001-01-01 03:00:00 │ - │ 4 ┆ 2001-01-01 04:00:00 │ - │ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 │ - │ 21 ┆ 2001-01-01 21:00:00 │ - │ 22 ┆ 2001-01-01 22:00:00 │ - │ 23 ┆ 2001-01-01 23:00:00 │ - │ 24 ┆ 2001-01-02 00:00:00 │ - └───────┴─────────────────────┘ - - Compute the rolling min with the temporal windows closed on the right (default) - - >>> df_temporal.with_columns( - ... rolling_row_min=pl.col("index").rolling_min(window_size="2h", by="date") - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_min │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ u32 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 1 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 2 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 3 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 19 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 20 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 21 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 22 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 23 │ - └───────┴─────────────────────┴─────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_min` is deprecated. Instead of " - "`rolling_min(..., by='foo')`, please use `rolling_min_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_min_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_min( window_size, weights, min_periods, - center, + center=center, ) ) @unstable() def rolling_max( self, - window_size: int | timedelta | str, + window_size: int, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling max (moving max) over the values in this array. @@ -7749,25 +7621,6 @@ def rolling_max( │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │ └───────┴─────────────────────┴─────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_max` is deprecated. Instead of " - "`rolling_max(..., by='foo')`, please use `rolling_max_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_max_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_max( window_size, @@ -7780,14 +7633,11 @@ def rolling_max( @unstable() def rolling_mean( self, - window_size: int | timedelta | str, + window_size: int, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling mean (moving mean) over the values in this array. @@ -8008,25 +7858,6 @@ def rolling_mean( │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │ └───────┴─────────────────────┴──────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_mean` is deprecated. Instead of " - "`rolling_mean(..., by='foo')`, please use `rolling_mean_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_mean_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_mean( window_size, @@ -8039,14 +7870,11 @@ def rolling_mean( @unstable() def rolling_sum( self, - window_size: int | timedelta | str, + window_size: int | timedelta, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling sum (moving sum) over the values in this array. @@ -8265,25 +8093,6 @@ def rolling_sum( │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │ └───────┴─────────────────────┴─────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_sum` is deprecated. Instead of " - "`rolling_sum(..., by='foo')`, please use `rolling_sum_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_sum_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_sum( window_size, @@ -8296,15 +8105,12 @@ def rolling_sum( @unstable() def rolling_std( self, - window_size: int | timedelta | str, + window_size: int | timedelta, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, ddof: int = 1, - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling standard deviation. @@ -8521,48 +8327,25 @@ def rolling_std( │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │ └───────┴─────────────────────┴─────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_std` is deprecated. Instead of " - "`rolling_std(..., by='foo')`, please use `rolling_std_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_std_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - ddof=ddof, - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_std( window_size, weights, min_periods, - center, - ddof, + center=center, + ddof=ddof, ) ) @unstable() def rolling_var( self, - window_size: int | timedelta | str, + window_size: int | timedelta, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, ddof: int = 1, - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling variance. @@ -8779,47 +8562,24 @@ def rolling_var( │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │ └───────┴─────────────────────┴─────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_var` is deprecated. Instead of " - "`rolling_var(..., by='foo')`, please use `rolling_var_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_var_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - ddof=ddof, - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_var( window_size, weights, min_periods, - center, - ddof, + center=center, + ddof=ddof, ) ) @unstable() def rolling_median( self, - window_size: int | timedelta | str, + window_size: int | timedelta, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling median. @@ -8956,31 +8716,12 @@ def rolling_median( │ 6.0 ┆ null │ └─────┴────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_median` is deprecated. Instead of " - "`rolling_median(..., by='foo')`, please use `rolling_median_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_median_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - warn_if_unsorted=warn_if_unsorted, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_median( window_size, weights, min_periods, - center, + center=center, ) ) @@ -8989,14 +8730,11 @@ def rolling_quantile( self, quantile: float, interpolation: RollingInterpolationMethod = "nearest", - window_size: int | timedelta | str = 2, + window_size: int | timedelta = 2, weights: list[float] | None = None, *, min_periods: int | None = None, center: bool = False, - by: str | None = None, - closed: ClosedInterval | None = None, - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling quantile. @@ -9165,26 +8903,6 @@ def rolling_quantile( │ 6.0 ┆ null │ └─────┴──────────────────┘ """ - window_size, min_periods = _prepare_rolling_window_args( - window_size, min_periods - ) - if by is not None: - issue_deprecation_warning( - "Passing `by` to `rolling_quantile` is deprecated. Instead of " - "`rolling_quantile(..., by='foo')`, please use `rolling_quantile_by('foo', ...)`.", - version="0.20.24", - ) - validate_rolling_by_aggs_arguments(weights, center=center) - return self.rolling_quantile_by( - by=by, - # integer `window_size` was already not supported when `by` was passed - window_size=window_size, # type: ignore[arg-type] - min_periods=min_periods, - closed=closed or "right", - warn_if_unsorted=warn_if_unsorted, - quantile=quantile, - ) - window_size = validate_rolling_aggs_arguments(window_size, closed) return self._from_pyexpr( self._pyexpr.rolling_quantile( quantile, @@ -9192,7 +8910,7 @@ def rolling_quantile( window_size, weights, min_periods, - center, + center=center, ) ) @@ -11534,24 +11252,6 @@ def _prepare_alpha( return alpha -def _prepare_rolling_window_args( - window_size: int | timedelta | str, - min_periods: int | None = None, -) -> tuple[int | str, int]: - if isinstance(window_size, int): - if window_size < 1: - msg = "`window_size` must be positive" - raise ValueError(msg) - - if min_periods is None: - min_periods = window_size - elif isinstance(window_size, timedelta): - window_size = parse_as_duration_string(window_size) - if min_periods is None: - min_periods = 1 - return window_size, min_periods - - def _prepare_rolling_by_window_args( window_size: timedelta | str, ) -> str: diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index e62cfad1679b..4456a2e226bc 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -5635,15 +5635,6 @@ def rolling_min( 300 ] """ - return ( - self.to_frame() - .select( - F.col(self.name).rolling_min( - window_size, weights, min_periods=min_periods, center=center - ) - ) - .to_series() - ) @unstable() def rolling_max( @@ -5698,15 +5689,6 @@ def rolling_max( 500 ] """ - return ( - self.to_frame() - .select( - F.col(self.name).rolling_max( - window_size, weights, min_periods=min_periods, center=center - ) - ) - .to_series() - ) @unstable() def rolling_mean( @@ -5761,15 +5743,6 @@ def rolling_mean( 450.0 ] """ - return ( - self.to_frame() - .select( - F.col(self.name).rolling_mean( - window_size, weights, min_periods=min_periods, center=center - ) - ) - .to_series() - ) @unstable() def rolling_sum( @@ -5824,15 +5797,6 @@ def rolling_sum( 9 ] """ - return ( - self.to_frame() - .select( - F.col(self.name).rolling_sum( - window_size, weights, min_periods=min_periods, center=center - ) - ) - .to_series() - ) @unstable() def rolling_std( @@ -5891,19 +5855,6 @@ def rolling_std( 2.0 ] """ - return ( - self.to_frame() - .select( - F.col(self.name).rolling_std( - window_size, - weights, - min_periods=min_periods, - center=center, - ddof=ddof, - ) - ) - .to_series() - ) @unstable() def rolling_var( @@ -5962,19 +5913,6 @@ def rolling_var( 4.0 ] """ - return ( - self.to_frame() - .select( - F.col(self.name).rolling_var( - window_size, - weights, - min_periods=min_periods, - center=center, - ddof=ddof, - ) - ) - .to_series() - ) @unstable() def rolling_map( @@ -6083,18 +6021,6 @@ def rolling_median( 6.0 ] """ - if min_periods is None: - min_periods = window_size - - return ( - self.to_frame() - .select( - F.col(self.name).rolling_median( - window_size, weights, min_periods=min_periods, center=center - ) - ) - .to_series() - ) @unstable() def rolling_quantile( @@ -6163,23 +6089,6 @@ def rolling_quantile( 5.32 ] """ - if min_periods is None: - min_periods = window_size - - return ( - self.to_frame() - .select( - F.col(self.name).rolling_quantile( - quantile, - interpolation, - window_size, - weights, - min_periods=min_periods, - center=center, - ) - ) - .to_series() - ) @unstable() def rolling_skew(self, window_size: int, *, bias: bool = True) -> Series: diff --git a/py-polars/src/expr/rolling.rs b/py-polars/src/expr/rolling.rs index d5183c93e9f0..b854cb4bd89b 100644 --- a/py-polars/src/expr/rolling.rs +++ b/py-polars/src/expr/rolling.rs @@ -15,9 +15,10 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -50,9 +51,10 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -85,9 +87,10 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -119,9 +122,10 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -156,10 +160,11 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ddof: u8, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -195,10 +200,11 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ddof: u8, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -234,9 +240,10 @@ impl PyExpr { &self, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, min_periods, @@ -274,9 +281,10 @@ impl PyExpr { interpolation: Wrap, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, @@ -324,9 +332,10 @@ impl PyExpr { lambda: PyObject, window_size: usize, weights: Option>, - min_periods: usize, + min_periods: Option, center: bool, ) -> Self { + let min_periods = min_periods.unwrap_or(window_size); let options = RollingOptionsFixedWindow { window_size, weights, diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index 22fdd699ef97..9912d7e30112 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -201,12 +201,12 @@ def test_rolling_skew() -> None: @pytest.mark.parametrize( ("rolling_fn", "expected_values", "expected_dtype"), [ - ("rolling_mean", [None, 1.0, 2.0, 3.0, 4.0, 5.0], pl.Float64), - ("rolling_sum", [None, 1, 2, 3, 4, 5], pl.Int64), - ("rolling_min", [None, 1, 2, 3, 4, 5], pl.Int64), - ("rolling_max", [None, 1, 2, 3, 4, 5], pl.Int64), - ("rolling_std", [None, None, None, None, None, None], pl.Float64), - ("rolling_var", [None, None, None, None, None, None], pl.Float64), + ("rolling_mean_by", [None, 1.0, 2.0, 3.0, 4.0, 5.0], pl.Float64), + ("rolling_sum_by", [None, 1, 2, 3, 4, 5], pl.Int64), + ("rolling_min_by", [None, 1, 2, 3, 4, 5], pl.Int64), + ("rolling_max_by", [None, 1, 2, 3, 4, 5], pl.Int64), + ("rolling_std_by", [None, None, None, None, None, None], pl.Float64), + ("rolling_var_by", [None, None, None, None, None, None], pl.Float64), ], ) def test_rolling_crossing_dst( @@ -219,10 +219,11 @@ def test_rolling_crossing_dst( datetime(2021, 11, 5), datetime(2021, 11, 10), "1d", time_zone="UTC", eager=True ).dt.replace_time_zone(time_zone) df = pl.DataFrame({"ts": ts, "value": [1, 2, 3, 4, 5, 6]}) - with pytest.deprecated_call(match=f"{rolling_fn}_by"): - result = df.with_columns( - getattr(pl.col("value"), rolling_fn)("1d", by="ts", closed="left") - ) + + result = df.with_columns( + getattr(pl.col("value"), rolling_fn)(by="ts", window_size="1d", closed="left") + ) + expected = pl.DataFrame( {"ts": ts, "value": expected_values}, schema_overrides={"value": expected_dtype} ) @@ -247,17 +248,6 @@ def test_rolling_infinity() -> None: assert_series_equal(s, expected) -def test_rolling_invalid_closed_option() -> None: - df = pl.DataFrame( - {"a": [4, 5, 6], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} - ).sort("a", "b") - with pytest.raises( - InvalidOperationError, - match=r"`closed` is not supported in `rolling_\*\(...\)` expression", - ): - df.with_columns(pl.col("a").rolling_sum(2, closed="left")) - - def test_rolling_by_non_temporal_window_size() -> None: df = pl.DataFrame( {"a": [4, 5, 6], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} @@ -267,26 +257,6 @@ def test_rolling_by_non_temporal_window_size() -> None: df.with_columns(pl.col("a").rolling_sum_by("b", "2i", closed="left")) -def test_rolling_by_weights() -> None: - df = pl.DataFrame( - {"a": [4, 5, 6], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} - ).sort("b") - msg = r"`weights` is not supported in `rolling_\*\(..., by=...\)` expression" - with pytest.raises(InvalidOperationError, match=msg): # noqa: SIM117 - with pytest.deprecated_call(match="rolling_sum_by"): - df.with_columns(pl.col("a").rolling_sum("2d", by="b", weights=[1, 2])) - - -def test_rolling_by_center() -> None: - df = pl.DataFrame( - {"a": [4, 5, 6], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} - ).sort("b") - msg = r"`center=True` is not supported in `rolling_\*\(..., by=...\)` expression" - with pytest.raises(InvalidOperationError, match=msg): # noqa: SIM117 - with pytest.deprecated_call(match="rolling_sum_by"): - df.with_columns(pl.col("a").rolling_sum("2d", by="b", center=True)) - - def test_rolling_extrema() -> None: # sorted data and nulls flags trigger different kernels df = ( @@ -742,33 +712,33 @@ def test_rolling_aggregations_with_over_11225() -> None: def test_rolling() -> None: - a = pl.Series("a", [1, 2, 3, 2, 1]) - assert_series_equal(a.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1])) - assert_series_equal(a.rolling_max(2), pl.Series("a", [None, 2, 3, 3, 2])) - assert_series_equal(a.rolling_sum(2), pl.Series("a", [None, 3, 5, 5, 3])) - assert_series_equal(a.rolling_mean(2), pl.Series("a", [None, 1.5, 2.5, 2.5, 1.5])) + s = pl.Series("a", [1, 2, 3, 2, 1]) + assert_series_equal(s.rolling_min(2), pl.Series("a", [None, 1, 2, 2, 1])) + assert_series_equal(s.rolling_max(2), pl.Series("a", [None, 2, 3, 3, 2])) + assert_series_equal(s.rolling_sum(2), pl.Series("a", [None, 3, 5, 5, 3])) + assert_series_equal(s.rolling_mean(2), pl.Series("a", [None, 1.5, 2.5, 2.5, 1.5])) - assert a.rolling_std(2).to_list()[1] == pytest.approx(0.7071067811865476) - assert a.rolling_var(2).to_list()[1] == pytest.approx(0.5) - assert a.rolling_std(2, ddof=0).to_list()[1] == pytest.approx(0.5) - assert a.rolling_var(2, ddof=0).to_list()[1] == pytest.approx(0.25) + assert s.rolling_std(2).to_list()[1] == pytest.approx(0.7071067811865476) + assert s.rolling_var(2).to_list()[1] == pytest.approx(0.5) + assert s.rolling_std(2, ddof=0).to_list()[1] == pytest.approx(0.5) + assert s.rolling_var(2, ddof=0).to_list()[1] == pytest.approx(0.25) assert_series_equal( - a.rolling_median(4), pl.Series("a", [None, None, None, 2, 2], dtype=pl.Float64) + s.rolling_median(4), pl.Series("a", [None, None, None, 2, 2], dtype=pl.Float64) ) assert_series_equal( - a.rolling_quantile(0, "nearest", 3), + s.rolling_quantile(0, "nearest", 3), pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64), ) assert_series_equal( - a.rolling_quantile(0, "lower", 3), + s.rolling_quantile(0, "lower", 3), pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64), ) assert_series_equal( - a.rolling_quantile(0, "higher", 3), + s.rolling_quantile(0, "higher", 3), pl.Series("a", [None, None, 1, 2, 1], dtype=pl.Float64), ) - assert a.rolling_skew(4).null_count() == 3 + assert s.rolling_skew(4).null_count() == 3 # 3099 # test if we maintain proper dtype @@ -802,9 +772,9 @@ def test_rolling() -> None: ) nan = float("nan") - a = pl.Series("a", [11.0, 2.0, 9.0, nan, 8.0]) + s = pl.Series("a", [11.0, 2.0, 9.0, nan, 8.0]) assert_series_equal( - a.rolling_sum(3), + s.rolling_sum(3), pl.Series("a", [None, None, 22.0, nan, nan]), ) @@ -1031,19 +1001,6 @@ def test_rolling_invalid() -> None: ) -def test_temporal_windows_size_without_by_15977() -> None: - df = pl.DataFrame( - {"a": [1, 2, 3], "b": [date(2020, 1, 1), date(2020, 1, 2), date(2020, 1, 3)]} - ) - with pytest.raises( # noqa: SIM117 - InvalidOperationError, match="Expected a string of the form 'ni'" - ): - with pytest.deprecated_call( - match=r"Passing a str to `rolling_\*` is deprecated" - ): - df.select(pl.col("a").rolling_mean("3d")) - - def test_by_different_length() -> None: df = pl.DataFrame({"b": [1]}) with pytest.raises(InvalidOperationError, match="must be the same length"): @@ -1240,3 +1197,10 @@ def test_rolling_by_nulls() -> None: InvalidOperationError, match="not yet supported for series with null values" ): df.select(pl.col("b").rolling_min_by("a", "2i")) + + +def test_window_size_validation() -> None: + df = pl.DataFrame({"x": [1.0]}) + + with pytest.raises(OverflowError, match=r"can't convert negative int to unsigned"): + df.with_columns(trailing_min=pl.col("x").rolling_min(window_size=-3)) diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index baa691e0ee5e..1e577d169324 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -502,13 +502,6 @@ def test_lit_agg_err() -> None: pl.DataFrame({"y": [1]}).with_columns(pl.lit(1).sum().over("y")) -def test_window_size_validation() -> None: - df = pl.DataFrame({"x": [1.0]}) - - with pytest.raises(ValueError, match=r"`window_size` must be positive"): - df.with_columns(trailing_min=pl.col("x").rolling_min(window_size=-3)) - - def test_invalid_group_by_arg() -> None: df = pl.DataFrame({"a": [1]}) with pytest.raises( From 209c6c8952adbf6d45f88e692ce249aa2c892ece Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 5 Jun 2024 20:49:19 +0200 Subject: [PATCH 2/3] Remove warn_if_unsorted --- py-polars/polars/expr/expr.py | 199 +----------------- .../unit/operations/rolling/test_rolling.py | 18 +- 2 files changed, 9 insertions(+), 208 deletions(-) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 7bdef405af15..e24899419f84 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -6048,7 +6048,6 @@ def rolling_min_by( *, min_periods: int = 1, closed: ClosedInterval = "right", - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling min based on another column. @@ -6095,12 +6094,6 @@ def rolling_min_by( closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -6163,12 +6156,6 @@ def rolling_min_by( └───────┴─────────────────────┴─────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_min_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_min_by(by, window_size, min_periods, closed) @@ -6182,7 +6169,6 @@ def rolling_max_by( *, min_periods: int = 1, closed: ClosedInterval = "right", - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling max based on another column. @@ -6229,12 +6215,6 @@ def rolling_max_by( closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -6323,12 +6303,6 @@ def rolling_max_by( └───────┴─────────────────────┴─────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_max_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_max_by(by, window_size, min_periods, closed) @@ -6342,7 +6316,6 @@ def rolling_mean_by( *, min_periods: int = 1, closed: ClosedInterval = "right", - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling mean based on another column. @@ -6389,12 +6362,6 @@ def rolling_mean_by( closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -6485,12 +6452,6 @@ def rolling_mean_by( └───────┴─────────────────────┴──────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_mean_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_mean_by( @@ -6509,7 +6470,6 @@ def rolling_sum_by( *, min_periods: int = 1, closed: ClosedInterval = "right", - warn_if_unsorted: bool | None = None, ) -> Self: """ Apply a rolling sum based on another column. @@ -6556,12 +6516,6 @@ def rolling_sum_by( closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -6650,12 +6604,6 @@ def rolling_sum_by( └───────┴─────────────────────┴─────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_sum_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_sum_by(by, window_size, min_periods, closed) @@ -6670,7 +6618,6 @@ def rolling_std_by( min_periods: int = 1, closed: ClosedInterval = "right", ddof: int = 1, - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling standard deviation based on another column. @@ -6719,12 +6666,6 @@ def rolling_std_by( defaults to `'right'`. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -6813,12 +6754,6 @@ def rolling_std_by( └───────┴─────────────────────┴─────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_std_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_std_by( @@ -6839,7 +6774,6 @@ def rolling_var_by( min_periods: int = 1, closed: ClosedInterval = "right", ddof: int = 1, - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling variance based on another column. @@ -6888,12 +6822,6 @@ def rolling_var_by( defaults to `'right'`. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -6982,12 +6910,6 @@ def rolling_var_by( └───────┴─────────────────────┴─────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_var_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_var_by( @@ -7007,7 +6929,6 @@ def rolling_median_by( *, min_periods: int = 1, closed: ClosedInterval = "right", - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling median based on another column. @@ -7054,12 +6975,6 @@ def rolling_median_by( closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -7124,12 +7039,6 @@ def rolling_median_by( └───────┴─────────────────────┴────────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_median_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_median_by(by, window_size, min_periods, closed) @@ -7145,7 +7054,6 @@ def rolling_quantile_by( interpolation: RollingInterpolationMethod = "nearest", min_periods: int = 1, closed: ClosedInterval = "right", - warn_if_unsorted: bool | None = None, ) -> Self: """ Compute a rolling quantile based on another column. @@ -7196,12 +7104,6 @@ def rolling_quantile_by( closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. - warn_if_unsorted - Warn if data is not known to be sorted by `by` column. - - .. deprecated:: 0.20.27 - This operation no longer requires sorted data, you can safely remove - the `warn_if_unsorted` argument. Notes ----- @@ -7266,12 +7168,6 @@ def rolling_quantile_by( └───────┴─────────────────────┴──────────────────────┘ """ window_size = _prepare_rolling_by_window_args(window_size) - if warn_if_unsorted is not None: - issue_deprecation_warning( - "`warn_if_unsorted` is deprecated in `rolling_quantile_by` because it " - "no longer requires sorted data - you can safely remove this argument.", - version="0.20.27", - ) by = parse_as_expression(by) return self._from_pyexpr( self._pyexpr.rolling_quantile_by( @@ -7463,19 +7359,6 @@ def rolling_max( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal, for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_max` is deprecated - please use - :meth:`.rolling_max_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -7698,19 +7581,6 @@ def rolling_mean( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_mean` is deprecated - please use - :meth:`.rolling_mean_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -7935,19 +7805,6 @@ def rolling_sum( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - of dtype `{Date, Datetime}` - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_sum` is deprecated - please use - :meth:`.rolling_sum_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -8167,21 +8024,8 @@ def rolling_std( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_std` is deprecated - please use - :meth:`.rolling_std_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -8402,21 +8246,8 @@ def rolling_var( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_var` is deprecated - please use - :meth:`.rolling_var_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -8636,19 +8467,6 @@ def rolling_median( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_median` is deprecated - please use - :meth:`.rolling_median_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -8795,19 +8613,6 @@ def rolling_quantile( - 1, if `window_size` is a dynamic temporal size center Set the labels at the center of the window - by - If the `window_size` is temporal for instance `"5h"` or `"3s"`, you must - set the column that will be used to determine the windows. This column must - be of dtype Datetime or Date. - - .. deprecated:: 0.20.24 - Passing `by` to `rolling_quantile` is deprecated - please use - :meth:`.rolling_quantile_by` instead. - closed : {'left', 'right', 'both', 'none'} - Define which sides of the temporal interval are closed (inclusive); only - applicable if `by` has been set (in which case, it defaults to `'right'`). - warn_if_unsorted - Warn if data is not known to be sorted by `by` column (if passed). Notes ----- @@ -11252,9 +11057,7 @@ def _prepare_alpha( return alpha -def _prepare_rolling_by_window_args( - window_size: timedelta | str, -) -> str: +def _prepare_rolling_by_window_args(window_size: timedelta | str) -> str: if isinstance(window_size, timedelta): window_size = parse_as_duration_string(window_size) return window_size diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py index 9912d7e30112..7e90bb53ccd3 100644 --- a/py-polars/tests/unit/operations/rolling/test_rolling.py +++ b/py-polars/tests/unit/operations/rolling/test_rolling.py @@ -688,17 +688,15 @@ def test_rolling_aggregations_with_over_11225() -> None: df_temporal = df_temporal.sort("group", "date") - with pytest.deprecated_call(match="you can safely remove this argument"): - result = df_temporal.with_columns( - rolling_row_mean=pl.col("index") - .rolling_mean_by( - by="date", - window_size="2d", - closed="left", - warn_if_unsorted=False, - ) - .over("group") + result = df_temporal.with_columns( + rolling_row_mean=pl.col("index") + .rolling_mean_by( + by="date", + window_size="2d", + closed="left", ) + .over("group") + ) expected = pl.DataFrame( { "index": [0, 1, 2, 3, 4], From a7977ce547fa0c8ecd2b923e38a7f7af56ef1c03 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Wed, 5 Jun 2024 21:07:20 +0200 Subject: [PATCH 3/3] Docs --- py-polars/polars/expr/expr.py | 837 ++++-------------------------- py-polars/polars/series/series.py | 90 ++-- 2 files changed, 144 insertions(+), 783 deletions(-) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index e24899419f84..13230f06a307 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -7198,7 +7198,7 @@ def rolling_min( A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the - `weight` vector. The resulting values will be aggregated to their min. + `weights` vector. The resulting values will be aggregated to their min. The window at a given row will include the row itself, and the `window_size - 1` elements before it. @@ -7214,7 +7214,7 @@ def rolling_min( The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Notes ----- @@ -7309,56 +7309,23 @@ def rolling_max( A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the - `weight` vector. The resulting values will be aggregated to their max. - - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: + `weights` vector. The resulting values will be aggregated to their max. - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic temporal - size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Notes ----- @@ -7425,84 +7392,6 @@ def rolling_max( │ 5.0 ┆ 6.0 │ │ 6.0 ┆ null │ └─────┴─────────────┘ - - Create a DataFrame with a datetime column and a row number column - - >>> from datetime import timedelta, datetime - >>> start = datetime(2001, 1, 1) - >>> stop = datetime(2001, 1, 2) - >>> df_temporal = pl.DataFrame( - ... {"date": pl.datetime_range(start, stop, "1h", eager=True)} - ... ).with_row_index() - >>> df_temporal - shape: (25, 2) - ┌───────┬─────────────────────┐ - │ index ┆ date │ - │ --- ┆ --- │ - │ u32 ┆ datetime[μs] │ - ╞═══════╪═════════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 │ - │ 1 ┆ 2001-01-01 01:00:00 │ - │ 2 ┆ 2001-01-01 02:00:00 │ - │ 3 ┆ 2001-01-01 03:00:00 │ - │ 4 ┆ 2001-01-01 04:00:00 │ - │ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 │ - │ 21 ┆ 2001-01-01 21:00:00 │ - │ 22 ┆ 2001-01-01 22:00:00 │ - │ 23 ┆ 2001-01-01 23:00:00 │ - │ 24 ┆ 2001-01-02 00:00:00 │ - └───────┴─────────────────────┘ - - Compute the rolling max with the temporal windows closed on the right (default) - - >>> df_temporal.with_columns( - ... rolling_row_max=pl.col("index").rolling_max(window_size="2h", by="date") - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_max │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ u32 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │ - └───────┴─────────────────────┴─────────────────┘ - - Compute the rolling max with the closure of windows on both sides - - >>> df_temporal.with_columns( - ... rolling_row_max=pl.col("index").rolling_max( - ... window_size="2h", by="date", closed="both" - ... ) - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_max │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ u32 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 2 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 3 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 4 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 20 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 21 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 22 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 23 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 24 │ - └───────┴─────────────────────┴─────────────────┘ """ return self._from_pyexpr( self._pyexpr.rolling_max( @@ -7531,56 +7420,23 @@ def rolling_mean( A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the - `weight` vector. The resulting values will be aggregated to their mean. + `weights` vector. The resulting values will be aggregated to their mean. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: - - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic temporal - size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Notes ----- @@ -7647,86 +7503,6 @@ def rolling_mean( │ 5.0 ┆ 5.0 │ │ 6.0 ┆ null │ └─────┴──────────────┘ - - Create a DataFrame with a datetime column and a row number column - - >>> from datetime import timedelta, datetime - >>> start = datetime(2001, 1, 1) - >>> stop = datetime(2001, 1, 2) - >>> df_temporal = pl.DataFrame( - ... {"date": pl.datetime_range(start, stop, "1h", eager=True)} - ... ).with_row_index() - >>> df_temporal - shape: (25, 2) - ┌───────┬─────────────────────┐ - │ index ┆ date │ - │ --- ┆ --- │ - │ u32 ┆ datetime[μs] │ - ╞═══════╪═════════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 │ - │ 1 ┆ 2001-01-01 01:00:00 │ - │ 2 ┆ 2001-01-01 02:00:00 │ - │ 3 ┆ 2001-01-01 03:00:00 │ - │ 4 ┆ 2001-01-01 04:00:00 │ - │ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 │ - │ 21 ┆ 2001-01-01 21:00:00 │ - │ 22 ┆ 2001-01-01 22:00:00 │ - │ 23 ┆ 2001-01-01 23:00:00 │ - │ 24 ┆ 2001-01-02 00:00:00 │ - └───────┴─────────────────────┘ - - Compute the rolling mean with the temporal windows closed on the right (default) - - >>> df_temporal.with_columns( - ... rolling_row_mean=pl.col("index").rolling_mean( - ... window_size="2h", by="date" - ... ) - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬──────────────────┐ - │ index ┆ date ┆ rolling_row_mean │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ f64 │ - ╞═══════╪═════════════════════╪══════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.5 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.5 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.5 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.5 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.5 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.5 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.5 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.5 │ - └───────┴─────────────────────┴──────────────────┘ - - Compute the rolling mean with the closure of windows on both sides - - >>> df_temporal.with_columns( - ... rolling_row_mean=pl.col("index").rolling_mean( - ... window_size="2h", by="date", closed="both" - ... ) - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬──────────────────┐ - │ index ┆ date ┆ rolling_row_mean │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ f64 │ - ╞═══════╪═════════════════════╪══════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0.0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 2.0 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 3.0 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 19.0 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 20.0 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 21.0 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 22.0 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 23.0 │ - └───────┴─────────────────────┴──────────────────┘ """ return self._from_pyexpr( self._pyexpr.rolling_mean( @@ -7755,56 +7531,23 @@ def rolling_sum( A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the - `weight` vector. The resulting values will be aggregated to their sum. + `weights` vector. The resulting values will be aggregated to their sum. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: - - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic temporal - size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Notes ----- @@ -7871,84 +7614,6 @@ def rolling_sum( │ 5.0 ┆ 15.0 │ │ 6.0 ┆ null │ └─────┴─────────────┘ - - Create a DataFrame with a datetime column and a row number column - - >>> from datetime import timedelta, datetime - >>> start = datetime(2001, 1, 1) - >>> stop = datetime(2001, 1, 2) - >>> df_temporal = pl.DataFrame( - ... {"date": pl.datetime_range(start, stop, "1h", eager=True)} - ... ).with_row_index() - >>> df_temporal - shape: (25, 2) - ┌───────┬─────────────────────┐ - │ index ┆ date │ - │ --- ┆ --- │ - │ u32 ┆ datetime[μs] │ - ╞═══════╪═════════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 │ - │ 1 ┆ 2001-01-01 01:00:00 │ - │ 2 ┆ 2001-01-01 02:00:00 │ - │ 3 ┆ 2001-01-01 03:00:00 │ - │ 4 ┆ 2001-01-01 04:00:00 │ - │ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 │ - │ 21 ┆ 2001-01-01 21:00:00 │ - │ 22 ┆ 2001-01-01 22:00:00 │ - │ 23 ┆ 2001-01-01 23:00:00 │ - │ 24 ┆ 2001-01-02 00:00:00 │ - └───────┴─────────────────────┘ - - Compute the rolling sum with the temporal windows closed on the right (default) - - >>> df_temporal.with_columns( - ... rolling_row_sum=pl.col("index").rolling_sum(window_size="2h", by="date") - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_sum │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ u32 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 5 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 7 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 39 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 41 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 43 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 45 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 47 │ - └───────┴─────────────────────┴─────────────────┘ - - Compute the rolling sum with the closure of windows on both sides - - >>> df_temporal.with_columns( - ... rolling_row_sum=pl.col("index").rolling_sum( - ... window_size="2h", by="date", closed="both" - ... ) - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_sum │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ u32 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ 0 │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 1 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 3 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 6 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 9 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 57 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 60 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 63 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 66 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 69 │ - └───────┴─────────────────────┴─────────────────┘ """ return self._from_pyexpr( self._pyexpr.rolling_sum( @@ -7976,54 +7641,25 @@ def rolling_std( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: + A window of length `window_size` will traverse the array. The values that fill + this window will (optionally) be multiplied with the weights given by the + `weights` vector. The resulting values will be aggregated to their std. - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic temporal - size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights - An optional slice with the same length as the window that determines the - relative contribution of each value in a window to the output. + An optional slice with the same length as the window that will be multiplied + elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof @@ -8042,134 +7678,56 @@ def rolling_std( shape: (6, 2) ┌─────┬─────────────┐ │ A ┆ rolling_std │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════════════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.707107 │ - │ 3.0 ┆ 0.707107 │ - │ 4.0 ┆ 0.707107 │ - │ 5.0 ┆ 0.707107 │ - │ 6.0 ┆ 0.707107 │ - └─────┴─────────────┘ - - Specify weights to multiply the values in the window with: - - >>> df.with_columns( - ... rolling_std=pl.col("A").rolling_std( - ... window_size=2, weights=[0.25, 0.75] - ... ), - ... ) - shape: (6, 2) - ┌─────┬─────────────┐ - │ A ┆ rolling_std │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════════════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.433013 │ - │ 3.0 ┆ 0.433013 │ - │ 4.0 ┆ 0.433013 │ - │ 5.0 ┆ 0.433013 │ - │ 6.0 ┆ 0.433013 │ - └─────┴─────────────┘ - - Center the values in the window - - >>> df.with_columns( - ... rolling_std=pl.col("A").rolling_std(window_size=3, center=True), - ... ) - shape: (6, 2) - ┌─────┬─────────────┐ - │ A ┆ rolling_std │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════════════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 1.0 │ - │ 3.0 ┆ 1.0 │ - │ 4.0 ┆ 1.0 │ - │ 5.0 ┆ 1.0 │ - │ 6.0 ┆ null │ - └─────┴─────────────┘ - - Create a DataFrame with a datetime column and a row number column - - >>> from datetime import timedelta, datetime - >>> start = datetime(2001, 1, 1) - >>> stop = datetime(2001, 1, 2) - >>> df_temporal = pl.DataFrame( - ... {"date": pl.datetime_range(start, stop, "1h", eager=True)} - ... ).with_row_index() - >>> df_temporal - shape: (25, 2) - ┌───────┬─────────────────────┐ - │ index ┆ date │ - │ --- ┆ --- │ - │ u32 ┆ datetime[μs] │ - ╞═══════╪═════════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 │ - │ 1 ┆ 2001-01-01 01:00:00 │ - │ 2 ┆ 2001-01-01 02:00:00 │ - │ 3 ┆ 2001-01-01 03:00:00 │ - │ 4 ┆ 2001-01-01 04:00:00 │ - │ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 │ - │ 21 ┆ 2001-01-01 21:00:00 │ - │ 22 ┆ 2001-01-01 22:00:00 │ - │ 23 ┆ 2001-01-01 23:00:00 │ - │ 24 ┆ 2001-01-02 00:00:00 │ - └───────┴─────────────────────┘ - - Compute the rolling std with the temporal windows closed on the right (default) - - >>> df_temporal.with_columns( - ... rolling_row_std=pl.col("index").rolling_std(window_size="2h", by="date") - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_std │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ f64 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ null │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.707107 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.707107 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.707107 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.707107 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.707107 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.707107 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.707107 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.707107 │ - └───────┴─────────────────────┴─────────────────┘ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════════════╡ + │ 1.0 ┆ null │ + │ 2.0 ┆ 0.707107 │ + │ 3.0 ┆ 0.707107 │ + │ 4.0 ┆ 0.707107 │ + │ 5.0 ┆ 0.707107 │ + │ 6.0 ┆ 0.707107 │ + └─────┴─────────────┘ - Compute the rolling std with the closure of windows on both sides + Specify weights to multiply the values in the window with: - >>> df_temporal.with_columns( - ... rolling_row_std=pl.col("index").rolling_std( - ... window_size="2h", by="date", closed="both" - ... ) - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_std │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ f64 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ null │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.707107 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │ - └───────┴─────────────────────┴─────────────────┘ + >>> df.with_columns( + ... rolling_std=pl.col("A").rolling_std( + ... window_size=2, weights=[0.25, 0.75] + ... ), + ... ) + shape: (6, 2) + ┌─────┬─────────────┐ + │ A ┆ rolling_std │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════════════╡ + │ 1.0 ┆ null │ + │ 2.0 ┆ 0.433013 │ + │ 3.0 ┆ 0.433013 │ + │ 4.0 ┆ 0.433013 │ + │ 5.0 ┆ 0.433013 │ + │ 6.0 ┆ 0.433013 │ + └─────┴─────────────┘ + + Center the values in the window + + >>> df.with_columns( + ... rolling_std=pl.col("A").rolling_std(window_size=3, center=True), + ... ) + shape: (6, 2) + ┌─────┬─────────────┐ + │ A ┆ rolling_std │ + │ --- ┆ --- │ + │ f64 ┆ f64 │ + ╞═════╪═════════════╡ + │ 1.0 ┆ null │ + │ 2.0 ┆ 1.0 │ + │ 3.0 ┆ 1.0 │ + │ 4.0 ┆ 1.0 │ + │ 5.0 ┆ 1.0 │ + │ 6.0 ┆ null │ + └─────┴─────────────┘ """ return self._from_pyexpr( self._pyexpr.rolling_std( @@ -8198,54 +7756,25 @@ def rolling_var( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: + A window of length `window_size` will traverse the array. The values that fill + this window will (optionally) be multiplied with the weights given by the + `weights` vector. The resulting values will be aggregated to their var. - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic temporal - size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights - An optional slice with the same length as the window that determines the - relative contribution of each value in a window to the output. + An optional slice with the same length as the window that will be multiplied + elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof @@ -8314,84 +7843,6 @@ def rolling_var( │ 5.0 ┆ 1.0 │ │ 6.0 ┆ null │ └─────┴─────────────┘ - - Create a DataFrame with a datetime column and a row number column - - >>> from datetime import timedelta, datetime - >>> start = datetime(2001, 1, 1) - >>> stop = datetime(2001, 1, 2) - >>> df_temporal = pl.DataFrame( - ... {"date": pl.datetime_range(start, stop, "1h", eager=True)} - ... ).with_row_index() - >>> df_temporal - shape: (25, 2) - ┌───────┬─────────────────────┐ - │ index ┆ date │ - │ --- ┆ --- │ - │ u32 ┆ datetime[μs] │ - ╞═══════╪═════════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 │ - │ 1 ┆ 2001-01-01 01:00:00 │ - │ 2 ┆ 2001-01-01 02:00:00 │ - │ 3 ┆ 2001-01-01 03:00:00 │ - │ 4 ┆ 2001-01-01 04:00:00 │ - │ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 │ - │ 21 ┆ 2001-01-01 21:00:00 │ - │ 22 ┆ 2001-01-01 22:00:00 │ - │ 23 ┆ 2001-01-01 23:00:00 │ - │ 24 ┆ 2001-01-02 00:00:00 │ - └───────┴─────────────────────┘ - - Compute the rolling var with the temporal windows closed on the right (default) - - >>> df_temporal.with_columns( - ... rolling_row_var=pl.col("index").rolling_var(window_size="2h", by="date") - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_var │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ f64 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ null │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 0.5 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 0.5 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 0.5 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 0.5 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 0.5 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 0.5 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 0.5 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 0.5 │ - └───────┴─────────────────────┴─────────────────┘ - - Compute the rolling var with the closure of windows on both sides - - >>> df_temporal.with_columns( - ... rolling_row_var=pl.col("index").rolling_var( - ... window_size="2h", by="date", closed="both" - ... ) - ... ) # doctest:+SKIP - shape: (25, 3) - ┌───────┬─────────────────────┬─────────────────┐ - │ index ┆ date ┆ rolling_row_var │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ datetime[μs] ┆ f64 │ - ╞═══════╪═════════════════════╪═════════════════╡ - │ 0 ┆ 2001-01-01 00:00:00 ┆ null │ - │ 1 ┆ 2001-01-01 01:00:00 ┆ 0.5 │ - │ 2 ┆ 2001-01-01 02:00:00 ┆ 1.0 │ - │ 3 ┆ 2001-01-01 03:00:00 ┆ 1.0 │ - │ 4 ┆ 2001-01-01 04:00:00 ┆ 1.0 │ - │ … ┆ … ┆ … │ - │ 20 ┆ 2001-01-01 20:00:00 ┆ 1.0 │ - │ 21 ┆ 2001-01-01 21:00:00 ┆ 1.0 │ - │ 22 ┆ 2001-01-01 22:00:00 ┆ 1.0 │ - │ 23 ┆ 2001-01-01 23:00:00 ┆ 1.0 │ - │ 24 ┆ 2001-01-02 00:00:00 ┆ 1.0 │ - └───────┴─────────────────────┴─────────────────┘ """ return self._from_pyexpr( self._pyexpr.rolling_var( @@ -8419,54 +7870,25 @@ def rolling_median( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: + A window of length `window_size` will traverse the array. The values that fill + this window will (optionally) be multiplied with the weights given by the + `weights` vector. The resulting values will be aggregated to their median. - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- window_size - The length of the window. Can be a fixed integer size, or a dynamic temporal - size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights - An optional slice with the same length as the window that determines the - relative contribution of each value in a window to the output. + An optional slice with the same length as the window that will be multiplied + elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Notes ----- @@ -8561,16 +7983,12 @@ def rolling_quantile( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - If `by` has not been specified (the default), the window at a given row will - include the row itself, and the `window_size - 1` elements before it. - - If you pass a `by` column ``, then `closed="right"` - (the default) means the windows will be: + A window of length `window_size` will traverse the array. The values that fill + this window will (optionally) be multiplied with the weights given by the + `weights` vector. The resulting values will be aggregated to their quantile. - - (t_0 - window_size, t_0] - - (t_1 - window_size, t_1] - - ... - - (t_n - window_size, t_n] + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- @@ -8579,40 +7997,15 @@ def rolling_quantile( interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear'} Interpolation method. window_size - The length of the window. Can be a fixed integer size, or a dynamic - temporal size indicated by a timedelta or the following string language: - - - 1ns (1 nanosecond) - - 1us (1 microsecond) - - 1ms (1 millisecond) - - 1s (1 second) - - 1m (1 minute) - - 1h (1 hour) - - 1d (1 calendar day) - - 1w (1 calendar week) - - 1mo (1 calendar month) - - 1q (1 calendar quarter) - - 1y (1 calendar year) - - 1i (1 index count) - - By "calendar day", we mean the corresponding time on the next day - (which may not be 24 hours, due to daylight savings). Similarly for - "calendar week", "calendar month", "calendar quarter", and - "calendar year". - - If a timedelta or the dynamic string language is used, the `by` - and `closed` arguments must also be set. + The length of the window in number of elements. weights - An optional slice with the same length as the window that determines the - relative contribution of each value in a window to the output. + An optional slice with the same length as the window that will be multiplied + elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Notes ----- @@ -8728,8 +8121,8 @@ def rolling_skew(self, window_size: int, *, bias: bool = True) -> Self: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. - The window at a given row includes the row itself and the - `window_size - 1` elements before it. + The window at a given row will include the row itself, and the `window_size - 1` + elements before it. Parameters ---------- @@ -8783,17 +8176,13 @@ def rolling_map( function Custom aggregation function. window_size - Size of the window. The window at a given row will include the row - itself and the `window_size - 1` elements before it. + The length of the window in number of elements. weights - A list of weights with the same length as the window that will be multiplied + An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 4456a2e226bc..bc4c4f603c8b 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -5608,18 +5608,15 @@ def rolling_min( Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Examples -------- @@ -5662,18 +5659,15 @@ def rolling_max( Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Examples -------- @@ -5716,18 +5710,15 @@ def rolling_mean( Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Examples -------- @@ -5770,18 +5761,15 @@ def rolling_sum( Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights - An optional slice with the same length of the window that will be multiplied + An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Examples -------- @@ -5825,18 +5813,15 @@ def rolling_std( Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof @@ -5883,18 +5868,15 @@ def rolling_var( Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof @@ -5936,17 +5918,13 @@ def rolling_map( function Custom aggregation function. window_size - Size of the window. The window at a given row will include the row - itself and the `window_size - 1` elements before it. + The length of the window in number of elements. weights - A list of weights with the same length as the window that will be multiplied + An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. @@ -5987,24 +5965,21 @@ def rolling_median( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. + The window at a given row will include the row itself and the `window_size - 1` + elements before it. + Parameters ---------- window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window - - The window at a given row will include the row itself and the `window_size - 1` - elements before it. + Set the labels at the center of the window. Examples -------- @@ -6050,18 +6025,15 @@ def rolling_quantile( interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear'} Interpolation method. window_size - The length of the window. + The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_periods The number of values in the window that should be non-null before computing - a result. If None, it will be set equal to: - - - the window size, if `window_size` is a fixed integer - - 1, if `window_size` is a dynamic temporal size + a result. If set to `None` (default), it will be set equal to `window_size`. center - Set the labels at the center of the window + Set the labels at the center of the window. Examples --------