From ca9d03430e796dc462176e397fd44bfed6b2fd07 Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 10 Sep 2024 09:48:54 +0200 Subject: [PATCH 1/3] chore(rust): Feature gate iejoin --- crates/polars-lazy/Cargo.toml | 1 + crates/polars-ops/Cargo.toml | 1 + crates/polars-ops/src/frame/join/args.rs | 3 +++ crates/polars-ops/src/frame/join/mod.rs | 5 +++++ crates/polars-plan/Cargo.toml | 1 + .../polars-plan/src/plans/conversion/join.rs | 22 ++++++++++++------- crates/polars-python/Cargo.toml | 1 + .../src/lazyframe/visitor/nodes.rs | 1 + crates/polars/Cargo.toml | 1 + 9 files changed, 28 insertions(+), 8 deletions(-) diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 03fcc0d8b2c8..a3efd659a77f 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -169,6 +169,7 @@ is_between = ["polars-plan/is_between", "polars-expr/is_between"] is_unique = ["polars-plan/is_unique"] cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"] asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join", "polars-mem-engine/asof_join"] +iejoin = ["polars-plan/iejoin"] business = ["polars-plan/business"] concat_str = ["polars-plan/concat_str"] range = ["polars-plan/range"] diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml index 0782f188b1df..2f37857c9cd2 100644 --- a/crates/polars-ops/Cargo.toml +++ b/crates/polars-ops/Cargo.toml @@ -117,6 +117,7 @@ pivot = ["polars-core/reinterpret", "polars-core/dtype-struct"] cross_join = [] chunked_ids = [] asof_join = [] +iejoin = [] semi_anti_join = [] array_any_all = ["dtype-array"] array_count = ["dtype-array"] diff --git a/crates/polars-ops/src/frame/join/args.rs b/crates/polars-ops/src/frame/join/args.rs index 2f5d6504eba7..10eee5d765df 100644 --- a/crates/polars-ops/src/frame/join/args.rs +++ b/crates/polars-ops/src/frame/join/args.rs @@ -58,6 +58,7 @@ impl JoinCoalesce { }, #[cfg(feature = "asof_join")] AsOf(_) => matches!(self, JoinSpecific | CoalesceColumns), + #[cfg(feature = "iejoin")] IEJoin(_) => false, Cross => false, #[cfg(feature = "semi_anti_join")] @@ -121,6 +122,7 @@ pub enum JoinType { Semi, #[cfg(feature = "semi_anti_join")] Anti, + #[cfg(feature = "iejoin")] IEJoin(IEJoinOptions), } @@ -140,6 +142,7 @@ impl Display for JoinType { Full { .. } => "FULL", #[cfg(feature = "asof_join")] AsOf(_) => "ASOF", + #[cfg(feature = "iejoin")] IEJoin(_) => "IEJOIN", Cross => "CROSS", #[cfg(feature = "semi_anti_join")] diff --git a/crates/polars-ops/src/frame/join/mod.rs b/crates/polars-ops/src/frame/join/mod.rs index 433bffd232dd..89507ac216c5 100644 --- a/crates/polars-ops/src/frame/join/mod.rs +++ b/crates/polars-ops/src/frame/join/mod.rs @@ -7,6 +7,7 @@ mod cross_join; mod dispatch_left_right; mod general; mod hash_join; +#[cfg(feature = "iejoin")] mod iejoin; #[cfg(feature = "merge_sorted")] mod merge_sorted; @@ -29,6 +30,7 @@ use general::create_chunked_index_mapping; pub use general::{_coalesce_full_join, _finish_join, _join_suffix_name}; pub use hash_join::*; use hashbrown::hash_map::{Entry, RawEntryMut}; +#[cfg(feature = "iejoin")] pub use iejoin::{IEJoinOptions, InequalityOperator}; #[cfg(feature = "merge_sorted")] pub use merge_sorted::_merge_sorted_dfs; @@ -199,6 +201,7 @@ pub trait DataFrameJoinOps: IntoDf { } } + #[cfg(feature = "iejoin")] if let JoinType::IEJoin(options) = args.how { let func = if POOL.current_num_threads() > 1 && !left_df.is_empty() && !other.is_empty() { @@ -289,6 +292,7 @@ pub trait DataFrameJoinOps: IntoDf { panic!("expected by arguments on both sides") }, }, + #[cfg(feature = "iejoin")] JoinType::IEJoin(_) => { unreachable!() }, @@ -316,6 +320,7 @@ pub trait DataFrameJoinOps: IntoDf { JoinType::AsOf(_) => polars_bail!( ComputeError: "asof join not supported for join on multiple keys" ), + #[cfg(feature = "iejoin")] JoinType::IEJoin(_) => { unreachable!() }, diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml index dd33428c8398..7edc15ea8616 100644 --- a/crates/polars-plan/Cargo.toml +++ b/crates/polars-plan/Cargo.toml @@ -116,6 +116,7 @@ is_unique = ["polars-ops/is_unique"] is_between = ["polars-ops/is_between"] cross_join = ["polars-ops/cross_join"] asof_join = ["polars-time", "polars-ops/asof_join"] +iejoin = ["polars-ops/iejoin"] concat_str = [] business = ["polars-ops/business"] range = [] diff --git a/crates/polars-plan/src/plans/conversion/join.rs b/crates/polars-plan/src/plans/conversion/join.rs index 36701f9ab5a7..53a7bbd2274a 100644 --- a/crates/polars-plan/src/plans/conversion/join.rs +++ b/crates/polars-plan/src/plans/conversion/join.rs @@ -1,8 +1,10 @@ use arrow::legacy::error::PolarsResult; use either::Either; +use polars_core::error::feature_gated; use super::*; use crate::dsl::Expr; +#[cfg(feature = "iejoin")] use crate::plans::AExpr; fn check_join_keys(keys: &[Expr]) -> PolarsResult<()> { @@ -26,14 +28,16 @@ pub fn resolve_join( ctxt: &mut DslConversionContext, ) -> PolarsResult { if !predicates.is_empty() { - debug_assert!(left_on.is_empty() && right_on.is_empty()); - return resolve_join_where( - input_left.unwrap_left(), - input_right.unwrap_left(), - predicates, - options, - ctxt, - ); + feature_gated!("iejoin", { + debug_assert!(left_on.is_empty() && right_on.is_empty()); + return resolve_join_where( + input_left.unwrap_left(), + input_right.unwrap_left(), + predicates, + options, + ctxt, + ); + }) } let owned = Arc::unwrap_or_clone; @@ -119,6 +123,7 @@ pub fn resolve_join( run_conversion(lp, ctxt, "join") } +#[cfg(feature = "iejoin")] impl From for Operator { fn from(value: InequalityOperator) -> Self { match value { @@ -130,6 +135,7 @@ impl From for Operator { } } +#[cfg(feature = "iejoin")] fn resolve_join_where( input_left: Arc, input_right: Arc, diff --git a/crates/polars-python/Cargo.toml b/crates/polars-python/Cargo.toml index b93d34a678e5..9ed35648c89f 100644 --- a/crates/polars-python/Cargo.toml +++ b/crates/polars-python/Cargo.toml @@ -122,6 +122,7 @@ json = ["polars/serde", "serde_json", "polars/json", "polars-utils/serde"] trigonometry = ["polars/trigonometry"] sign = ["polars/sign"] asof_join = ["polars/asof_join"] +iejoin = ["polars/iejoin"] cross_join = ["polars/cross_join"] pct_change = ["polars/pct_change"] repeat_by = ["polars/repeat_by"] diff --git a/crates/polars-python/src/lazyframe/visitor/nodes.rs b/crates/polars-python/src/lazyframe/visitor/nodes.rs index 4e9344a61d15..d8dbb71281bc 100644 --- a/crates/polars-python/src/lazyframe/visitor/nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/nodes.rs @@ -481,6 +481,7 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { JoinType::Cross => "cross", JoinType::Semi => "leftsemi", JoinType::Anti => "leftanti", + #[cfg(feature = "iejoin")] JoinType::IEJoin(_) => return Err(PyNotImplementedError::new_err("IEJoin")), }, options.args.join_nulls, diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index a27907484369..b858dbc36678 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -130,6 +130,7 @@ approx_unique = ["polars-lazy?/approx_unique", "polars-ops/approx_unique"] arg_where = ["polars-lazy?/arg_where"] array_any_all = ["polars-lazy?/array_any_all", "dtype-array"] asof_join = ["polars-lazy?/asof_join", "polars-ops/asof_join"] +iejoin = ["polars-lazy?/iejoin"] binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding", "polars-sql?/binary_encoding"] business = ["polars-lazy?/business", "polars-ops/business"] checked_arithmetic = ["polars-core/checked_arithmetic"] From 92624a7bd880d68c4997312dfc5258d9bc8af000 Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 10 Sep 2024 10:14:27 +0200 Subject: [PATCH 2/3] feature --- py-polars/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index a42c643516ea..05f5d6152449 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -45,6 +45,7 @@ is_in = ["polars-python/is_in"] json = ["polars-python/json"] sign = ["polars-python/sign"] asof_join = ["polars-python/asof_join"] +iejoin = ["polars-python/iejoin"] cross_join = ["polars-python/cross_join"] pct_change = ["polars-python/pct_change"] repeat_by = ["polars-python/repeat_by"] From dc8a1e2d02f733fb27b82f7c4467d7da991e067a Mon Sep 17 00:00:00 2001 From: ritchie Date: Tue, 10 Sep 2024 10:36:52 +0200 Subject: [PATCH 3/3] activate --- py-polars/Cargo.toml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index 05f5d6152449..1147cbdde89a 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -9,7 +9,7 @@ crate-type = ["cdylib"] [dependencies] libc = { workspace = true } -polars-python = { workspace = true, features = ["pymethods"] } +polars-python = { workspace = true, features = ["pymethods", "iejoin"] } pyo3 = { workspace = true, features = ["abi3-py38", "chrono", "extension-module", "multiple-pymethods"] } [build-dependencies] @@ -45,7 +45,6 @@ is_in = ["polars-python/is_in"] json = ["polars-python/json"] sign = ["polars-python/sign"] asof_join = ["polars-python/asof_join"] -iejoin = ["polars-python/iejoin"] cross_join = ["polars-python/cross_join"] pct_change = ["polars-python/pct_change"] repeat_by = ["polars-python/repeat_by"]