From 75aa4a39e8bc02d0e8a310142c6f4cc2684c53de Mon Sep 17 00:00:00 2001 From: Gijs Burghoorn Date: Mon, 28 Oct 2024 19:43:41 +0100 Subject: [PATCH] fix: Make Duration parsing fallible and not panic (#19490) --- crates/polars-python/src/dataframe/general.rs | 6 +- crates/polars-python/src/expr/general.rs | 18 +++--- crates/polars-python/src/expr/rolling.rs | 55 ++++++++++--------- crates/polars-python/src/functions/range.rs | 46 ++++++++++------ crates/polars-python/src/lazyframe/general.rs | 18 +++--- crates/polars-time/src/windows/duration.rs | 39 ++++++++----- .../unit/functions/range/test_date_range.py | 4 +- .../functions/range/test_datetime_range.py | 4 +- py-polars/tests/unit/test_errors.py | 3 +- 9 files changed, 108 insertions(+), 85 deletions(-) diff --git a/crates/polars-python/src/dataframe/general.rs b/crates/polars-python/src/dataframe/general.rs index d7bbab29177f..ac4febced0f6 100644 --- a/crates/polars-python/src/dataframe/general.rs +++ b/crates/polars-python/src/dataframe/general.rs @@ -591,11 +591,11 @@ impl PyDataFrame { every: &str, stable: bool, ) -> PyResult { + let every = Duration::try_parse(every).map_err(PyPolarsErr::from)?; let out = if stable { - self.df - .upsample_stable(by, index_column, Duration::parse(every)) + self.df.upsample_stable(by, index_column, every) } else { - self.df.upsample(by, index_column, Duration::parse(every)) + self.df.upsample(by, index_column, every) }; let out = out.map_err(PyPolarsErr::from)?; Ok(out.into()) diff --git a/crates/polars-python/src/expr/general.rs b/crates/polars-python/src/expr/general.rs index 604049f62b66..7125388e88cd 100644 --- a/crates/polars-python/src/expr/general.rs +++ b/crates/polars-python/src/expr/general.rs @@ -9,6 +9,7 @@ use pyo3::class::basic::CompareOp; use pyo3::prelude::*; use crate::conversion::{parse_fill_null_strategy, vec_extract_wrapped, Wrap}; +use crate::error::PyPolarsErr; use crate::map::lazy::map_single; use crate::PyExpr; @@ -614,15 +615,15 @@ impl PyExpr { period: &str, offset: &str, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingGroupOptions { index_column: index_column.into(), - period: Duration::parse(period), - offset: Duration::parse(offset), + period: Duration::try_parse(period).map_err(PyPolarsErr::from)?, + offset: Duration::try_parse(offset).map_err(PyPolarsErr::from)?, closed_window: closed.0, }; - self.inner.clone().rolling(options).into() + Ok(self.inner.clone().rolling(options).into()) } fn and_(&self, expr: Self) -> Self { @@ -812,12 +813,13 @@ impl PyExpr { }; self.inner.clone().ewm_mean(options).into() } - fn ewm_mean_by(&self, times: PyExpr, half_life: &str) -> Self { - let half_life = Duration::parse(half_life); - self.inner + fn ewm_mean_by(&self, times: PyExpr, half_life: &str) -> PyResult { + let half_life = Duration::try_parse(half_life).map_err(PyPolarsErr::from)?; + Ok(self + .inner .clone() .ewm_mean_by(times.inner, half_life) - .into() + .into()) } fn ewm_std( diff --git a/crates/polars-python/src/expr/rolling.rs b/crates/polars-python/src/expr/rolling.rs index a5ef9213128f..5ef511902613 100644 --- a/crates/polars-python/src/expr/rolling.rs +++ b/crates/polars-python/src/expr/rolling.rs @@ -3,6 +3,7 @@ use pyo3::prelude::*; use pyo3::types::PyFloat; use crate::conversion::Wrap; +use crate::error::PyPolarsErr; use crate::map::lazy::call_lambda_with_series; use crate::{PyExpr, PySeries}; @@ -34,14 +35,14 @@ impl PyExpr { window_size: &str, min_periods: usize, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: None, }; - self.inner.clone().rolling_sum_by(by.inner, options).into() + Ok(self.inner.clone().rolling_sum_by(by.inner, options).into()) } #[pyo3(signature = (window_size, weights, min_periods, center))] @@ -70,14 +71,14 @@ impl PyExpr { window_size: &str, min_periods: usize, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: None, }; - self.inner.clone().rolling_min_by(by.inner, options).into() + Ok(self.inner.clone().rolling_min_by(by.inner, options).into()) } #[pyo3(signature = (window_size, weights, min_periods, center))] @@ -105,14 +106,14 @@ impl PyExpr { window_size: &str, min_periods: usize, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: None, }; - self.inner.clone().rolling_max_by(by.inner, options).into() + Ok(self.inner.clone().rolling_max_by(by.inner, options).into()) } #[pyo3(signature = (window_size, weights, min_periods, center))] @@ -142,15 +143,15 @@ impl PyExpr { window_size: &str, min_periods: usize, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: None, }; - self.inner.clone().rolling_mean_by(by.inner, options).into() + Ok(self.inner.clone().rolling_mean_by(by.inner, options).into()) } #[pyo3(signature = (window_size, weights, min_periods, center, ddof))] @@ -182,15 +183,15 @@ impl PyExpr { min_periods: usize, closed: Wrap, ddof: u8, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })), }; - self.inner.clone().rolling_std_by(by.inner, options).into() + Ok(self.inner.clone().rolling_std_by(by.inner, options).into()) } #[pyo3(signature = (window_size, weights, min_periods, center, ddof))] @@ -222,15 +223,15 @@ impl PyExpr { min_periods: usize, closed: Wrap, ddof: u8, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: Some(RollingFnParams::Var(RollingVarParams { ddof })), }; - self.inner.clone().rolling_var_by(by.inner, options).into() + Ok(self.inner.clone().rolling_var_by(by.inner, options).into()) } #[pyo3(signature = (window_size, weights, min_periods, center))] @@ -259,17 +260,18 @@ impl PyExpr { window_size: &str, min_periods: usize, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: None, }; - self.inner + Ok(self + .inner .clone() .rolling_median_by(by.inner, options) - .into() + .into()) } #[pyo3(signature = (quantile, interpolation, window_size, weights, min_periods, center))] @@ -306,18 +308,19 @@ impl PyExpr { window_size: &str, min_periods: usize, closed: Wrap, - ) -> Self { + ) -> PyResult { let options = RollingOptionsDynamicWindow { - window_size: Duration::parse(window_size), + window_size: Duration::try_parse(window_size).map_err(PyPolarsErr::from)?, min_periods, closed_window: closed.0, fn_params: None, }; - self.inner + Ok(self + .inner .clone() .rolling_quantile_by(by.inner, interpolation.0, quantile, options) - .into() + .into()) } fn rolling_skew(&self, window_size: usize, bias: bool) -> Self { diff --git a/crates/polars-python/src/functions/range.rs b/crates/polars-python/src/functions/range.rs index e6e421dac84c..b6eae4400dd8 100644 --- a/crates/polars-python/src/functions/range.rs +++ b/crates/polars-python/src/functions/range.rs @@ -71,12 +71,12 @@ pub fn date_range( end: PyExpr, interval: &str, closed: Wrap, -) -> PyExpr { +) -> PyResult { let start = start.inner; let end = end.inner; - let interval = Duration::parse(interval); + let interval = Duration::try_parse(interval).map_err(PyPolarsErr::from)?; let closed = closed.0; - dsl::date_range(start, end, interval, closed).into() + Ok(dsl::date_range(start, end, interval, closed).into()) } #[pyfunction] @@ -85,12 +85,12 @@ pub fn date_ranges( end: PyExpr, interval: &str, closed: Wrap, -) -> PyExpr { +) -> PyResult { let start = start.inner; let end = end.inner; - let interval = Duration::parse(interval); + let interval = Duration::try_parse(interval).map_err(PyPolarsErr::from)?; let closed = closed.0; - dsl::date_ranges(start, end, interval, closed).into() + Ok(dsl::date_ranges(start, end, interval, closed).into()) } #[pyfunction] @@ -102,14 +102,14 @@ pub fn datetime_range( closed: Wrap, time_unit: Option>, time_zone: Option>, -) -> PyExpr { +) -> PyResult { let start = start.inner; let end = end.inner; - let every = Duration::parse(every); + let every = Duration::try_parse(every).map_err(PyPolarsErr::from)?; let closed = closed.0; let time_unit = time_unit.map(|x| x.0); let time_zone = time_zone.map(|x| x.0); - dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into() + Ok(dsl::datetime_range(start, end, every, closed, time_unit, time_zone).into()) } #[pyfunction] @@ -121,30 +121,40 @@ pub fn datetime_ranges( closed: Wrap, time_unit: Option>, time_zone: Option>, -) -> PyExpr { +) -> PyResult { let start = start.inner; let end = end.inner; - let every = Duration::parse(every); + let every = Duration::try_parse(every).map_err(PyPolarsErr::from)?; let closed = closed.0; let time_unit = time_unit.map(|x| x.0); let time_zone = time_zone.map(|x| x.0); - dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into() + Ok(dsl::datetime_ranges(start, end, every, closed, time_unit, time_zone).into()) } #[pyfunction] -pub fn time_range(start: PyExpr, end: PyExpr, every: &str, closed: Wrap) -> PyExpr { +pub fn time_range( + start: PyExpr, + end: PyExpr, + every: &str, + closed: Wrap, +) -> PyResult { let start = start.inner; let end = end.inner; - let every = Duration::parse(every); + let every = Duration::try_parse(every).map_err(PyPolarsErr::from)?; let closed = closed.0; - dsl::time_range(start, end, every, closed).into() + Ok(dsl::time_range(start, end, every, closed).into()) } #[pyfunction] -pub fn time_ranges(start: PyExpr, end: PyExpr, every: &str, closed: Wrap) -> PyExpr { +pub fn time_ranges( + start: PyExpr, + end: PyExpr, + every: &str, + closed: Wrap, +) -> PyResult { let start = start.inner; let end = end.inner; - let every = Duration::parse(every); + let every = Duration::try_parse(every).map_err(PyPolarsErr::from)?; let closed = closed.0; - dsl::time_ranges(start, end, every, closed).into() + Ok(dsl::time_ranges(start, end, every, closed).into()) } diff --git a/crates/polars-python/src/lazyframe/general.rs b/crates/polars-python/src/lazyframe/general.rs index c08b838404ba..da0b597418eb 100644 --- a/crates/polars-python/src/lazyframe/general.rs +++ b/crates/polars-python/src/lazyframe/general.rs @@ -842,7 +842,7 @@ impl PyLazyFrame { offset: &str, closed: Wrap, by: Vec, - ) -> PyLazyGroupBy { + ) -> PyResult { let closed_window = closed.0; let ldf = self.ldf.clone(); let by = by @@ -854,13 +854,13 @@ impl PyLazyFrame { by, RollingGroupOptions { index_column: "".into(), - period: Duration::parse(period), - offset: Duration::parse(offset), + period: Duration::try_parse(period).map_err(PyPolarsErr::from)?, + offset: Duration::try_parse(offset).map_err(PyPolarsErr::from)?, closed_window, }, ); - PyLazyGroupBy { lgb: Some(lazy_gb) } + Ok(PyLazyGroupBy { lgb: Some(lazy_gb) }) } fn group_by_dynamic( @@ -874,7 +874,7 @@ impl PyLazyFrame { closed: Wrap, group_by: Vec, start_by: Wrap, - ) -> PyLazyGroupBy { + ) -> PyResult { let closed_window = closed.0; let group_by = group_by .into_iter() @@ -885,9 +885,9 @@ impl PyLazyFrame { index_column.inner, group_by, DynamicGroupOptions { - every: Duration::parse(every), - period: Duration::parse(period), - offset: Duration::parse(offset), + every: Duration::try_parse(every).map_err(PyPolarsErr::from)?, + period: Duration::try_parse(period).map_err(PyPolarsErr::from)?, + offset: Duration::try_parse(offset).map_err(PyPolarsErr::from)?, label: label.0, include_boundaries, closed_window, @@ -896,7 +896,7 @@ impl PyLazyFrame { }, ); - PyLazyGroupBy { lgb: Some(lazy_gb) } + Ok(PyLazyGroupBy { lgb: Some(lazy_gb) }) } fn with_context(&self, contexts: Vec) -> Self { diff --git a/crates/polars-time/src/windows/duration.rs b/crates/polars-time/src/windows/duration.rs index 4f300f733100..56ce3e4bdcd5 100644 --- a/crates/polars-time/src/windows/duration.rs +++ b/crates/polars-time/src/windows/duration.rs @@ -153,7 +153,7 @@ impl Duration { /// # Panics /// If the given str is invalid for any reason. pub fn parse(duration: &str) -> Self { - Self::_parse(duration, false) + Self::try_parse(duration).unwrap() } #[doc(hidden)] @@ -161,23 +161,31 @@ impl Duration { /// units (such as 'year', 'minutes', etc.) and whitespace, as /// well as being case-insensitive. pub fn parse_interval(interval: &str) -> Self { + Self::try_parse_interval(interval).unwrap() + } + + pub fn try_parse(duration: &str) -> PolarsResult { + Self::_parse(duration, false) + } + + pub fn try_parse_interval(interval: &str) -> PolarsResult { Self::_parse(&interval.to_ascii_lowercase(), true) } - fn _parse(s: &str, as_interval: bool) -> Self { + fn _parse(s: &str, as_interval: bool) -> PolarsResult { let s = if as_interval { s.trim_start() } else { s }; let parse_type = if as_interval { "interval" } else { "duration" }; let num_minus_signs = s.matches('-').count(); if num_minus_signs > 1 { - panic!("{} string can only have a single minus sign", parse_type) + polars_bail!(InvalidOperation: "{} string can only have a single minus sign", parse_type); } if num_minus_signs > 0 { if as_interval { // TODO: intervals need to support per-element minus signs - panic!("minus signs are not currently supported in interval strings") + polars_bail!(InvalidOperation: "minus signs are not currently supported in interval strings"); } else if !s.starts_with('-') { - panic!("only a single minus sign is allowed, at the front of the string") + polars_bail!(InvalidOperation: "only a single minus sign is allowed, at the front of the string"); } } let mut months = 0; @@ -211,12 +219,12 @@ impl Duration { while let Some((i, mut ch)) = iter.next() { if !ch.is_ascii_digit() { - let n = s[start..i].parse::().unwrap_or_else(|_| { - panic!( + let Ok(n) = s[start..i].parse::() else { + polars_bail!(InvalidOperation: "expected leading integer in the {} string, found {}", parse_type, ch - ) - }); + ); + }; loop { match ch { @@ -233,10 +241,10 @@ impl Duration { } } if unit.is_empty() { - panic!( + polars_bail!(InvalidOperation: "expected a unit to follow integer in the {} string '{}'", parse_type, s - ) + ); } match &*unit { // matches that are allowed for both duration/interval @@ -270,24 +278,25 @@ impl Duration { "year" | "years" => months += n * 12, _ => { let valid_units = "'year', 'month', 'quarter', 'week', 'day', 'hour', 'minute', 'second', 'millisecond', 'microsecond', 'nanosecond'"; - panic!("unit: '{unit}' not supported; available units include: {} (and their plurals)", valid_units) + polars_bail!(InvalidOperation: "unit: '{unit}' not supported; available units include: {} (and their plurals)", valid_units); }, }, _ => { - panic!("unit: '{unit}' not supported; available units are: 'y', 'mo', 'q', 'w', 'd', 'h', 'm', 's', 'ms', 'us', 'ns'") + polars_bail!(InvalidOperation: "unit: '{unit}' not supported; available units are: 'y', 'mo', 'q', 'w', 'd', 'h', 'm', 's', 'ms', 'us', 'ns'"); }, } unit.clear(); } } - Duration { + + Ok(Duration { nsecs: nsecs.abs(), days: days.abs(), weeks: weeks.abs(), months: months.abs(), negative, parsed_int, - } + }) } fn to_positive(v: i64) -> (bool, i64) { diff --git a/py-polars/tests/unit/functions/range/test_date_range.py b/py-polars/tests/unit/functions/range/test_date_range.py index 0531ab1878d5..a88287bedf41 100644 --- a/py-polars/tests/unit/functions/range/test_date_range.py +++ b/py-polars/tests/unit/functions/range/test_date_range.py @@ -7,7 +7,7 @@ import pytest import polars as pl -from polars.exceptions import ComputeError, PanicException +from polars.exceptions import ComputeError, InvalidOperationError from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: @@ -21,7 +21,7 @@ def test_date_range() -> None: def test_date_range_invalid_time_unit() -> None: - with pytest.raises(PanicException, match="'x' not supported"): + with pytest.raises(InvalidOperationError, match="'x' not supported"): pl.date_range( start=date(2021, 12, 16), end=date(2021, 12, 18), diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 8dbc9da15c29..ce99cd27b802 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -9,7 +9,7 @@ import polars as pl from polars.datatypes import DTYPE_TEMPORAL_UNITS -from polars.exceptions import ComputeError, PanicException, SchemaError +from polars.exceptions import ComputeError, InvalidOperationError, SchemaError from polars.testing import assert_frame_equal, assert_series_equal if TYPE_CHECKING: @@ -96,7 +96,7 @@ def test_datetime_range_precision( def test_datetime_range_invalid_time_unit() -> None: - with pytest.raises(PanicException, match="'x' not supported"): + with pytest.raises(InvalidOperationError, match="'x' not supported"): pl.datetime_range( start=datetime(2021, 12, 16), end=datetime(2021, 12, 16, 3), diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 9154d19a7029..0ca504d20e20 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -16,7 +16,6 @@ ComputeError, InvalidOperationError, OutOfBoundsError, - PanicException, SchemaError, SchemaFieldNotFoundError, ShapeError, @@ -116,7 +115,7 @@ def test_string_numeric_comp_err() -> None: def test_panic_error() -> None: with pytest.raises( - PanicException, + InvalidOperationError, match="unit: 'k' not supported", ): pl.datetime_range(