From aee5967619819828fc821f24b2b3a58122f441cc Mon Sep 17 00:00:00 2001 From: sorhawell Date: Sun, 3 Dec 2023 23:51:52 +0100 Subject: [PATCH] pl$Struct + $join_asof --- R/dataframe__frame.R | 2 +- R/datatype.R | 16 +++------- R/lazyframe__lazy.R | 2 +- man/DataFrame_join_asof.Rd | 2 +- man/LazyFrame_join_asof.Rd | 2 +- src/rust/src/lazy/dataframe.rs | 34 ++++++++------------- src/rust/src/rdatatype.rs | 56 ++++++++++++++++------------------ src/rust/src/utils/mod.rs | 4 +++ tests/testthat/test-datatype.R | 7 ++--- 9 files changed, 54 insertions(+), 71 deletions(-) diff --git a/R/dataframe__frame.R b/R/dataframe__frame.R index 2c8c655b9..36038e34f 100644 --- a/R/dataframe__frame.R +++ b/R/dataframe__frame.R @@ -1298,7 +1298,7 @@ DataFrame_join_asof = function( by_left = NULL, by_right = NULL, by = NULL, - strategy = "backward", + strategy = c("backward", "forward", "nearest"), suffix = "_right", tolerance = NULL, allow_parallel = TRUE, diff --git a/R/datatype.R b/R/datatype.R index 24a6736cd..6a8543092 100644 --- a/R/datatype.R +++ b/R/datatype.R @@ -184,13 +184,7 @@ DataType_constructors = list( # doc below pl_Struct Struct = function(...) { result({ - largs = list2(...) - if (length(largs) >= 1 && is.list(largs[[1]])) { - largs = largs[[1]] - element_name = "list element" - } else { - element_name = "positional argument" - } + largs = unpack_list(...) mapply( names(largs) %||% character(length(largs)), largs, @@ -202,10 +196,10 @@ DataType_constructors = list( if (inherits(arg, "RPolarsRField")) { return(arg) } - stop( - "%s [%s] {name:'%s', value:%s} must either be a Field (pl$Field) or a named %s", - element_name, i, name, arg, "DataType see (pl$dtypes), see examples for pl$Struct()" - ) + stop(sprintf( + "element [%s] {name:'%s', value:%s} must either be a Field (pl$Field) or a named %s", + i, name, arg, "DataType see (pl$dtypes), see examples for pl$Struct()" + )) }, SIMPLIFY = FALSE ) }) |> diff --git a/R/lazyframe__lazy.R b/R/lazyframe__lazy.R index 81bcb1f4e..e96e018f1 100644 --- a/R/lazyframe__lazy.R +++ b/R/lazyframe__lazy.R @@ -1143,7 +1143,7 @@ LazyFrame_join_asof = function( by_left = NULL, by_right = NULL, by = NULL, - strategy = "backward", + strategy = c("backward", "forward", "nearest"), suffix = "_right", tolerance = NULL, allow_parallel = TRUE, diff --git a/man/DataFrame_join_asof.Rd b/man/DataFrame_join_asof.Rd index 0338e41eb..35940ce97 100644 --- a/man/DataFrame_join_asof.Rd +++ b/man/DataFrame_join_asof.Rd @@ -13,7 +13,7 @@ DataFrame_join_asof( by_left = NULL, by_right = NULL, by = NULL, - strategy = "backward", + strategy = c("backward", "forward", "nearest"), suffix = "_right", tolerance = NULL, allow_parallel = TRUE, diff --git a/man/LazyFrame_join_asof.Rd b/man/LazyFrame_join_asof.Rd index 9281fa324..d05da4044 100644 --- a/man/LazyFrame_join_asof.Rd +++ b/man/LazyFrame_join_asof.Rd @@ -13,7 +13,7 @@ LazyFrame_join_asof( by_left = NULL, by_right = NULL, by = NULL, - strategy = "backward", + strategy = c("backward", "forward", "nearest"), suffix = "_right", tolerance = NULL, allow_parallel = TRUE, diff --git a/src/rust/src/lazy/dataframe.rs b/src/rust/src/lazy/dataframe.rs index 8e6a7a6e5..183a19d34 100644 --- a/src/rust/src/lazy/dataframe.rs +++ b/src/rust/src/lazy/dataframe.rs @@ -8,12 +8,11 @@ use crate::lazy::dsl::*; use crate::rdataframe::RPolarsDataFrame as RDF; use crate::rdatatype::{ - new_asof_strategy, new_ipc_compression, new_parquet_compression, new_unique_keep_strategy, - RPolarsDataType, + new_ipc_compression, new_parquet_compression, new_unique_keep_strategy, RPolarsDataType, }; use crate::robj_to; -use crate::rpolarserr::{polars_to_rpolars_err, RResult, Rctx, WithRctx}; -use crate::utils::{r_result_list, try_f64_into_usize, wrappers::null_to_opt}; +use crate::rpolarserr::{polars_to_rpolars_err, RPolarsErr, RResult, WithRctx}; +use crate::utils::{r_result_list, try_f64_into_usize}; use extendr_api::prelude::*; use polars::frame::explode::MeltArgs; use polars::prelude as pl; @@ -362,24 +361,17 @@ impl RPolarsLazyFrame { other: Robj, left_on: Robj, right_on: Robj, - left_by: Nullable, - right_by: Nullable, + left_by: Robj, + right_by: Robj, allow_parallel: Robj, force_parallel: Robj, suffix: Robj, strategy: Robj, tolerance: Robj, tolerance_str: Robj, - ) -> Result { - //TODO upgrade robj_to to handle variadic composed types, as - // robj_to!(Option, Vec, left_by), instead of this ad-hoc conversion - // using Nullable to handle outer Option and robj_to! for inner Vec - let left_by = null_to_opt(left_by) - .map(|left_by| robj_to!(Vec, String, left_by)) - .transpose()?; - let right_by = null_to_opt(right_by) - .map(|right_by| robj_to!(Vec, String, right_by)) - .transpose()?; + ) -> RResult { + let left_by = robj_to!(Option, Vec, String, left_by)?; + let right_by = robj_to!(Option, Vec, String, right_by)?; // polars AnyValue<&str> is not self owned, therefore rust-polars // chose to handle tolerance_str isolated as a String. Only one, if any, @@ -388,8 +380,10 @@ impl RPolarsLazyFrame { // like tolerance = pl$lit(42)$cast(pl$UInt64). let tolerance = robj_to!(Option, Expr, tolerance)? + //TODO expr_to_any_value should return RResult .map(|e| crate::rdatatype::expr_to_any_value(e.0)) - .transpose()?; + .transpose() + .map_err(|err| RPolarsErr::new().plain(err))?; let tolerance_str = robj_to!(Option, String, tolerance_str)?; Ok(self @@ -402,11 +396,7 @@ impl RPolarsLazyFrame { .allow_parallel(robj_to!(bool, allow_parallel)?) .force_parallel(robj_to!(bool, force_parallel)?) .how(pl::JoinType::AsOf(AsOfOptions { - strategy: robj_to!(str, strategy).and_then(|s| { - new_asof_strategy(s) - .map_err(Rctx::Plain) - .bad_arg("stragegy") - })?, + strategy: robj_to!(AsOfStrategy, strategy)?, left_by: left_by.map(|opt_vec_s| opt_vec_s.into_iter().map(|s| s.into()).collect()), right_by: right_by .map(|opt_vec_s| opt_vec_s.into_iter().map(|s| s.into()).collect()), diff --git a/src/rust/src/rdatatype.rs b/src/rust/src/rdatatype.rs index 1b48d00f9..bc6403450 100644 --- a/src/rust/src/rdatatype.rs +++ b/src/rust/src/rdatatype.rs @@ -1,11 +1,12 @@ use crate::robj_to; + use crate::utils::wrappers::Wrap; use crate::utils::{r_result_list, robj_to_string}; use extendr_api::prelude::*; use polars::prelude as pl; use polars_core::prelude::QuantileInterpolOptions; //expose polars DateType in R -use crate::rpolarserr::{polars_to_rpolars_err, rerr, RResult, WithRctx}; +use crate::rpolarserr::{polars_to_rpolars_err, rerr, RPolarsErr, RResult, WithRctx}; use crate::utils::collect_hinted_result; use crate::utils::wrappers::null_to_opt; use std::result::Result; @@ -103,33 +104,30 @@ impl RPolarsDataType { todo!("object not implemented") } - pub fn new_struct(l: Robj) -> List { - let res = || -> std::result::Result { - let len = l.len(); - - //iterate over R list and collect Fields and place in a Struct-datatype - l.as_list() - .ok_or_else(|| "argument [l] is not a list".to_string()) - .map(|l| { - l.into_iter().enumerate().map(|(i, (name, robj))| { - let res: extendr_api::Result> = robj.try_into(); - res.map_err(|err| { - format!( - "list element [[{}]] named {} is not a Field: {:?}", - i + 1, - name, - err - ) - }) - .map(|ok| ok.0.clone()) + pub fn new_struct(l: Robj) -> RResult { + let len = l.len(); + //iterate over R list and collect Fields and place in a Struct-datatype + l.as_list() + .ok_or_else(|| { + RPolarsErr::new() + .bad_arg("l".into()) + .plain("is not a list".into()) + }) + .map(|l| { + l.into_iter().enumerate().map(|(i, (name, robj))| { + let res: extendr_api::Result> = robj.try_into(); + res.map_err(|extendr_err| { + RPolarsErr::from(extendr_err).plain(format!( + "list element [[{}]] named {} is not a Field.", + i + 1, + name, + )) }) + .map(|ok| ok.0.clone()) }) - .and_then(|iter| collect_hinted_result(len, iter)) - .map_err(|err| format!("in pl$Struct: {}", err)) - .map(|v_field| RPolarsDataType(pl::DataType::Struct(v_field))) - }(); - - r_result_list(res) + }) + .and_then(|iter| collect_hinted_result(len, iter)) + .map(|v_field| RPolarsDataType(pl::DataType::Struct(v_field))) } pub fn get_all_simple_type_names() -> Vec { @@ -247,11 +245,11 @@ impl RPolarsDataTypeVector { } } -pub fn new_asof_strategy(s: &str) -> Result { - match s { +pub fn robj_to_asof_strategy(robj: Robj) -> RResult { + match robj_to_rchoice(robj)?.as_str() { "forward" => Ok(AsofStrategy::Forward), "backward" => Ok(AsofStrategy::Backward), - _ => Err(format!( + s => rerr().bad_val(format!( "asof strategy choice: [{}] is not any of 'forward' or 'backward'", s )), diff --git a/src/rust/src/utils/mod.rs b/src/rust/src/utils/mod.rs index 9da0308aa..d93b03924 100644 --- a/src/rust/src/utils/mod.rs +++ b/src/rust/src/utils/mod.rs @@ -1043,6 +1043,10 @@ macro_rules! robj_to_inner { $crate::rdatatype::robj_to_join_type($a) }; + (AsOfStrategy, $a:ident) => { + $crate::rdatatype::robj_to_asof_strategy($a) + }; + (ParallelStrategy, $a:ident) => { $crate::rdatatype::robj_to_parallel_strategy($a) }; diff --git a/tests/testthat/test-datatype.R b/tests/testthat/test-datatype.R index b524cedfb..eab4671ca 100644 --- a/tests/testthat/test-datatype.R +++ b/tests/testthat/test-datatype.R @@ -25,10 +25,7 @@ test_that("plStruct", { expect_no_error(pl$Struct(bin = pl$Binary, pl$Boolean, pl$Boolean)) # wrong uses - err_state = result(pl$Struct(bin = pl$Binary, pl$Boolean, "abc")) - expect_grepl_error(unwrap(err_state), "must either be a Field") - expect_grepl_error(unwrap(err_state), "positional argument") - expect_grepl_error(unwrap(err_state), "in pl\\$Struct\\:") + ctx = pl$Struct(bin = pl$Binary, pl$Boolean, "abc") |> get_err_ctx() + expect_true(startsWith(ctx$PlainErrorMessage,"element [3] {name:'', value:abc}")) - err_state = result(pl$Struct(bin = pl$Binary, pl$Boolean, bob = 42)) })