From af165fe766c6a256389aed4ae47484c188917eee Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Wed, 19 Oct 2022 14:13:48 +0200 Subject: [PATCH] =?UTF-8?q?feat(rust,=20python):=20show=20expression=20whe?= =?UTF-8?q?re=20error=20originated=20if=20raised=20=E2=80=A6=20(#5263)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../polars-lazy/src/physical_plan/errors.rs | 11 +++ .../src/physical_plan/expressions/apply.rs | 29 ++++---- .../src/physical_plan/expressions/binary.rs | 8 +-- .../src/physical_plan/expressions/slice.rs | 69 ++++++++++--------- .../src/physical_plan/expressions/take.rs | 18 ++--- .../src/physical_plan/expressions/ternary.rs | 4 +- .../src/physical_plan/expressions/window.rs | 28 +++----- polars/polars-lazy/src/physical_plan/mod.rs | 2 + 8 files changed, 94 insertions(+), 75 deletions(-) create mode 100644 polars/polars-lazy/src/physical_plan/errors.rs diff --git a/polars/polars-lazy/src/physical_plan/errors.rs b/polars/polars-lazy/src/physical_plan/errors.rs new file mode 100644 index 0000000000000..0b8eaf5b8e99f --- /dev/null +++ b/polars/polars-lazy/src/physical_plan/errors.rs @@ -0,0 +1,11 @@ +macro_rules! expression_err { + ($msg:expr, $source:expr, $error:ident) => {{ + let msg = format!( + "{}\n\n> Error originated in expression: '{:?}'", + $msg, $source + ); + PolarsError::$error(msg.into()) + }}; +} + +pub(super) use expression_err; diff --git a/polars/polars-lazy/src/physical_plan/expressions/apply.rs b/polars/polars-lazy/src/physical_plan/expressions/apply.rs index 009e43e29a46f..8e8085c7f6af8 100644 --- a/polars/polars-lazy/src/physical_plan/expressions/apply.rs +++ b/polars/polars-lazy/src/physical_plan/expressions/apply.rs @@ -12,6 +12,7 @@ use polars_io::predicates::StatsEvaluator; use polars_plan::dsl::FunctionExpr; use rayon::prelude::*; +use crate::physical_plan::expression_err; use crate::physical_plan::state::ExecutionState; use crate::prelude::*; @@ -80,9 +81,10 @@ fn all_unit_length(ca: &ListChunked) -> bool { (offset[offset.len() - 1] as usize) == list_arr.len() as usize } -fn check_map_output_len(input_len: usize, output_len: usize) -> PolarsResult<()> { +fn check_map_output_len(input_len: usize, output_len: usize, expr: &Expr) -> PolarsResult<()> { if input_len != output_len { - Err(PolarsError::ComputeError("A 'map' functions output length must be equal to that of the input length. Consider using 'apply' in favor of 'map'.".into())) + let msg = "A 'map' functions output length must be equal to that of the input length. Consider using 'apply' in favor of 'map'."; + Err(expression_err!(msg, expr, ComputeError)) } else { Ok(()) } @@ -131,13 +133,11 @@ impl PhysicalExpr for ApplyExpr { let s = ac.series(); if matches!(ac.agg_state(), AggState::AggregatedFlat(_)) { - return Err(PolarsError::ComputeError( - format!( - "Cannot aggregate {:?}. The column is already aggregated.", - self.expr - ) - .into(), - )); + let msg = format!( + "Cannot aggregate {:?}. The column is already aggregated.", + self.expr + ); + return Err(expression_err!(msg, self.expr, ComputeError)); } // collection of empty list leads to a null dtype @@ -194,7 +194,7 @@ impl PhysicalExpr for ApplyExpr { let input_len = input.len(); let s = self.function.call_udf(&mut [input])?; - check_map_output_len(input_len, s.len())?; + check_map_output_len(input_len, s.len(), &self.expr)?; ac.with_series(s, false); if set_update_groups { @@ -236,7 +236,7 @@ impl PhysicalExpr for ApplyExpr { self.collect_groups, acs[0].agg_state(), ) { - apply_multiple_flat(acs, self.function.as_ref()) + apply_multiple_flat(acs, self.function.as_ref(), &self.expr) } else { let mut container = vec![Default::default(); acs.len()]; let name = acs[0].series().name().to_string(); @@ -272,7 +272,9 @@ impl PhysicalExpr for ApplyExpr { Ok(ac) } } - (_, ApplyOptions::ApplyFlat) => apply_multiple_flat(acs, self.function.as_ref()), + (_, ApplyOptions::ApplyFlat) => { + apply_multiple_flat(acs, self.function.as_ref(), &self.expr) + } } } } @@ -308,6 +310,7 @@ impl PhysicalExpr for ApplyExpr { fn apply_multiple_flat<'a>( mut acs: Vec>, function: &dyn SeriesUdf, + expr: &Expr, ) -> PolarsResult> { let mut s = acs .iter_mut() @@ -324,7 +327,7 @@ fn apply_multiple_flat<'a>( let input_len = s[0].len(); let s = function.call_udf(&mut s)?; - check_map_output_len(input_len, s.len())?; + check_map_output_len(input_len, s.len(), expr)?; // take the first aggregation context that as that is the input series let mut ac = acs.swap_remove(0); diff --git a/polars/polars-lazy/src/physical_plan/expressions/binary.rs b/polars/polars-lazy/src/physical_plan/expressions/binary.rs index 2b96c2a0e4f9f..b3f3cfe6325cd 100644 --- a/polars/polars-lazy/src/physical_plan/expressions/binary.rs +++ b/polars/polars-lazy/src/physical_plan/expressions/binary.rs @@ -7,6 +7,7 @@ use polars_core::series::unstable::UnstableSeries; use polars_core::POOL; use rayon::prelude::*; +use crate::physical_plan::expression_err; use crate::physical_plan::state::{ExecutionState, StateFlags}; use crate::prelude::*; @@ -124,11 +125,8 @@ impl PhysicalExpr for BinaryExpr { let mut ac_r = result_b?; if !ac_l.can_combine(&ac_r) { - return Err(PolarsError::InvalidOperation( - "\ - cannot combine this binary expression, the groups do not match" - .into(), - )); + let msg = "Cannot combine this binary expression, the groups do not match."; + return Err(expression_err!(msg, self.expr, InvalidOperation)); } match ( diff --git a/polars/polars-lazy/src/physical_plan/expressions/slice.rs b/polars/polars-lazy/src/physical_plan/expressions/slice.rs index bb1b853776d1a..84b208daef0ab 100644 --- a/polars/polars-lazy/src/physical_plan/expressions/slice.rs +++ b/polars/polars-lazy/src/physical_plan/expressions/slice.rs @@ -7,6 +7,7 @@ use polars_core::POOL; use rayon::prelude::*; use AnyValue::Null; +use crate::physical_plan::expression_err; use crate::physical_plan::state::ExecutionState; use crate::prelude::*; @@ -17,50 +18,56 @@ pub struct SliceExpr { pub(crate) expr: Expr, } -fn extract_offset(offset: &Series) -> PolarsResult { +fn extract_offset(offset: &Series, expr: &Expr) -> PolarsResult { if offset.len() > 1 { - return Err(PolarsError::ComputeError(format!("Invalid argument to slice; expected an offset literal but got a Series of length {}", offset.len()).into())); + let msg = format!( + "Invalid argument to slice; expected an offset literal but got a Series of length {}.", + offset.len() + ); + return Err(expression_err!(msg, expr, ComputeError)); } offset.get(0).extract::().ok_or_else(|| { PolarsError::ComputeError(format!("could not get an offset from {:?}", offset).into()) }) } -fn extract_length(length: &Series) -> PolarsResult { +fn extract_length(length: &Series, expr: &Expr) -> PolarsResult { if length.len() > 1 { - return Err(PolarsError::ComputeError(format!("Invalid argument to slice; expected a length literal but got a Series of length {}", length.len()).into())); + let msg = format!( + "Invalid argument to slice; expected a length literal but got a Series of length {}.", + length.len() + ); + return Err(expression_err!(msg, expr, ComputeError)); } match length.get(0) { Null => Ok(usize::MAX), v => v.extract::().ok_or_else(|| { - PolarsError::ComputeError(format!("could not get a length from {:?}", length).into()) + let msg = format!("Could not get a length from {:?}.", length); + expression_err!(msg, expr, ComputeError) }), } } -fn extract_args(offset: &Series, length: &Series) -> PolarsResult<(i64, usize)> { - Ok((extract_offset(offset)?, extract_length(length)?)) +fn extract_args(offset: &Series, length: &Series, expr: &Expr) -> PolarsResult<(i64, usize)> { + Ok((extract_offset(offset, expr)?, extract_length(length, expr)?)) } -fn check_argument(arg: &Series, groups: &GroupsProxy, name: &str) -> PolarsResult<()> { +fn check_argument(arg: &Series, groups: &GroupsProxy, name: &str, expr: &Expr) -> PolarsResult<()> { if let DataType::List(_) = arg.dtype() { - Err(PolarsError::ComputeError( - format!( - "Invalid slice argument: cannot use an array as {} argument", - name - ) - .into(), - )) + let msg = format!( + "Invalid slice argument: cannot use an array as {} argument.", + name + ); + Err(expression_err!(msg, expr, ComputeError)) } else if arg.len() != groups.len() { - Err(PolarsError::ComputeError(format!("Invalid slice argument: the evaluated length expression was of different {} than the number of groups", name).into())) + let msg = format!("Invalid slice argument: the evaluated length expression was of different {} than the number of groups.", name); + Err(expression_err!(msg, expr, ComputeError)) } else if arg.null_count() > 0 { - Err(PolarsError::ComputeError( - format!( - "Invalid slice argument: the {} expression should not have null values", - name - ) - .into(), - )) + let msg = format!( + "Invalid slice argument: the {} expression should not have null values.", + name + ); + Err(expression_err!(msg, expr, ComputeError)) } else { Ok(()) } @@ -94,7 +101,7 @@ impl PhysicalExpr for SliceExpr { let offset = &results[0]; let length = &results[1]; let series = &results[2]; - let (offset, length) = extract_args(offset, length)?; + let (offset, length) = extract_args(offset, length, &self.expr)?; Ok(series.slice(offset, length)) } @@ -120,7 +127,7 @@ impl PhysicalExpr for SliceExpr { use AggState::*; let groups = match (&ac_offset.state, &ac_length.state) { (Literal(offset), Literal(length)) => { - let (offset, length) = extract_args(offset, length)?; + let (offset, length) = extract_args(offset, length, &self.expr)?; match groups.as_ref() { GroupsProxy::Idx(groups) => { @@ -143,9 +150,9 @@ impl PhysicalExpr for SliceExpr { } } (Literal(offset), _) => { - let offset = extract_offset(offset)?; + let offset = extract_offset(offset, &self.expr)?; let length = ac_length.aggregated(); - check_argument(&length, groups, "length")?; + check_argument(&length, groups, "length", &self.expr)?; let length = length.cast(&IDX_DTYPE)?; let length = length.idx().unwrap(); @@ -177,9 +184,9 @@ impl PhysicalExpr for SliceExpr { } } (_, Literal(length)) => { - let length = extract_length(length)?; + let length = extract_length(length, &self.expr)?; let offset = ac_offset.aggregated(); - check_argument(&offset, groups, "offset")?; + check_argument(&offset, groups, "offset", &self.expr)?; let offset = offset.cast(&DataType::Int64)?; let offset = offset.i64().unwrap(); @@ -213,8 +220,8 @@ impl PhysicalExpr for SliceExpr { _ => { let length = ac_length.aggregated(); let offset = ac_offset.aggregated(); - check_argument(&length, groups, "length")?; - check_argument(&offset, groups, "offset")?; + check_argument(&length, groups, "length", &self.expr)?; + check_argument(&offset, groups, "offset", &self.expr)?; let offset = offset.cast(&DataType::Int64)?; let offset = offset.i64().unwrap(); diff --git a/polars/polars-lazy/src/physical_plan/expressions/take.rs b/polars/polars-lazy/src/physical_plan/expressions/take.rs index d9a32954e8606..c116cded9e6c2 100644 --- a/polars/polars-lazy/src/physical_plan/expressions/take.rs +++ b/polars/polars-lazy/src/physical_plan/expressions/take.rs @@ -5,6 +5,7 @@ use polars_core::frame::groupby::GroupsProxy; use polars_core::prelude::*; use polars_core::utils::NoNull; +use crate::physical_plan::expression_err; use crate::physical_plan::state::ExecutionState; use crate::prelude::*; @@ -47,6 +48,11 @@ impl PhysicalExpr for TakeExpr { let mut ac = self.phys_expr.evaluate_on_groups(df, groups, state)?; let mut idx = self.idx.evaluate_on_groups(df, groups, state)?; + let oob_err = || { + let msg = "Out of bounds."; + Err(expression_err!(msg, self.expr, ComputeError)) + }; + let idx = match idx.state { AggState::AggregatedFlat(s) => { @@ -74,7 +80,7 @@ impl PhysicalExpr for TakeExpr { Some(idx) => idx >= g.len() as IdxSize, }, ) { - return Err(PolarsError::ComputeError("out of bounds".into())); + return oob_err(); } idx.into_iter() @@ -91,7 +97,7 @@ impl PhysicalExpr for TakeExpr { Some(idx) => idx >= g[1], }) { - return Err(PolarsError::ComputeError("out of bounds".into())); + return oob_err(); } idx.into_iter() @@ -130,18 +136,14 @@ impl PhysicalExpr for TakeExpr { let idx: NoNull = match groups.as_ref() { GroupsProxy::Idx(groups) => { if groups.all().iter().any(|g| idx >= g.len() as IdxSize) { - return Err(PolarsError::ComputeError( - "out of bounds".into(), - )); + return oob_err(); } groups.first().iter().map(|f| *f + idx).collect_trusted() } GroupsProxy::Slice { groups, .. } => { if groups.iter().any(|g| idx >= g[1]) { - return Err(PolarsError::ComputeError( - "out of bounds".into(), - )); + return oob_err(); } groups.iter().map(|g| g[0] + idx).collect_trusted() diff --git a/polars/polars-lazy/src/physical_plan/expressions/ternary.rs b/polars/polars-lazy/src/physical_plan/expressions/ternary.rs index 65721b4b52452..39c0ecc3f48ba 100644 --- a/polars/polars-lazy/src/physical_plan/expressions/ternary.rs +++ b/polars/polars-lazy/src/physical_plan/expressions/ternary.rs @@ -5,6 +5,7 @@ use polars_core::frame::groupby::GroupsProxy; use polars_core::prelude::*; use polars_core::POOL; +use crate::physical_plan::expression_err; use crate::physical_plan::state::{ExecutionState, StateFlags}; use crate::prelude::*; @@ -182,7 +183,8 @@ impl PhysicalExpr for TernaryExpr { let mask = mask_s.bool()?; let check_length = |ca: &ListChunked, mask: &BooleanChunked| { if ca.len() != mask.len() { - Err(PolarsError::ComputeError(format!("the predicates length: '{}' does not match the length of the groups: {}", mask.len(), ca.len()).into())) + let msg = format!("The predicates length: '{}' does not match the length of the groups: {}.", mask.len(), ca.len()); + Err(expression_err!(msg, self.expr, ComputeError)) } else { Ok(()) } diff --git a/polars/polars-lazy/src/physical_plan/expressions/window.rs b/polars/polars-lazy/src/physical_plan/expressions/window.rs index deb7dd959c8c2..b6768574acb01 100644 --- a/polars/polars-lazy/src/physical_plan/expressions/window.rs +++ b/polars/polars-lazy/src/physical_plan/expressions/window.rs @@ -11,6 +11,7 @@ use polars_core::POOL; use polars_utils::sort::perfect_sort; use super::*; +use crate::physical_plan::expression_err; use crate::physical_plan::state::ExecutionState; use crate::prelude::*; @@ -136,22 +137,19 @@ impl WindowExpr { (true, true, _) => Ok(MapStrategy::ExplodeLater), // Explode all the aggregated lists. Maybe add later? (true, false, _) => { - Err(PolarsError::ComputeError("This operation is likely not what you want (you may need '.list()'). Please open an issue if you really want to do this".into())) + let msg = "This operation is likely not what you want (you may need '.list()'). Please open an issue if you really want to do this"; + Err(expression_err!(msg, self.expr, ComputeError)) } // explicit list // `(col("x").sum() * col("y")).list().over("groups")` - (false, true, _) => { - Ok(MapStrategy::Join) - } + (false, true, _) => Ok(MapStrategy::Join), // aggregations //`sum("foo").over("groups")` - (false, false, AggState::AggregatedFlat(_)) => { - Ok(MapStrategy::Join) - } + (false, false, AggState::AggregatedFlat(_)) => Ok(MapStrategy::Join), // no explicit aggregations, map over the groups //`(col("x").sum() * col("y")).over("groups")` (false, false, AggState::AggregatedList(_)) => { - if sorted_keys { + if sorted_keys { if let GroupsProxy::Idx(g) = gb.get_groups() { debug_assert!(g.is_sorted()) } @@ -175,12 +173,9 @@ impl WindowExpr { } else { Ok(MapStrategy::Map) } - } // literals, do nothing and let broadcast - (false, false, AggState::Literal(_)) => { - Ok(MapStrategy::Nothing) - } + (false, false, AggState::Literal(_)) => Ok(MapStrategy::Nothing), } } } @@ -385,12 +380,11 @@ impl PhysicalExpr for WindowExpr { group.len(), output.unwrap() ); - Err(PolarsError::ComputeError(err_msg.into())) + Err(expression_err!(err_msg, self.expr, ComputeError)) } else { - Err(PolarsError::ComputeError( - "The length of the window expression did not match that of the group." - .into(), - )) + let msg = + "The length of the window expression did not match that of the group."; + Err(expression_err!(msg, self.expr, ComputeError)) }; } diff --git a/polars/polars-lazy/src/physical_plan/mod.rs b/polars/polars-lazy/src/physical_plan/mod.rs index 78ad591abe5b3..5db06afd8a387 100644 --- a/polars/polars-lazy/src/physical_plan/mod.rs +++ b/polars/polars-lazy/src/physical_plan/mod.rs @@ -1,3 +1,4 @@ +mod errors; pub mod executors; #[cfg(any(feature = "list_eval", feature = "pivot"))] pub(crate) mod exotic; @@ -9,6 +10,7 @@ pub mod planner; pub(crate) mod state; pub(crate) mod streaming; +use errors::expression_err; use polars_core::prelude::*; use polars_io::predicates::PhysicalIoExpr;