diff --git a/datafusion/core/src/physical_optimizer/mod.rs b/datafusion/core/src/physical_optimizer/mod.rs index 0e68a05d855c..1a4e4095e65e 100644 --- a/datafusion/core/src/physical_optimizer/mod.rs +++ b/datafusion/core/src/physical_optimizer/mod.rs @@ -29,7 +29,6 @@ pub mod join_selection; pub mod limited_distinct_aggregation; pub mod optimizer; pub mod projection_pushdown; -pub mod pruning; pub mod replace_with_order_preserving_variants; pub mod sanity_checker; #[cfg(test)] diff --git a/datafusion/expr-common/Cargo.toml b/datafusion/expr-common/Cargo.toml index 7e477efc4ebc..37e694e761b0 100644 --- a/datafusion/expr-common/Cargo.toml +++ b/datafusion/expr-common/Cargo.toml @@ -17,7 +17,7 @@ [package] name = "datafusion-expr-common" -description = "Logical plan and expression representation for DataFusion query engine" +description = "Common types and traits for plan and expression representation for DataFusion query engine" keywords = ["datafusion", "logical", "plan", "expressions"] readme = "README.md" version = { workspace = true } diff --git a/datafusion/expr-common/src/lib.rs b/datafusion/expr-common/src/lib.rs index 179dd75ace85..93186adaa877 100644 --- a/datafusion/expr-common/src/lib.rs +++ b/datafusion/expr-common/src/lib.rs @@ -17,9 +17,13 @@ //! Logical Expr types and traits for [DataFusion] //! -//! This crate contains types and traits that are used by both Logical and Physical expressions. -//! They are kept in their own crate to avoid physical expressions depending on logical expressions. -//! +//! This crate contains types and traits that are used by both Logical and +//! Physical expressions. They are kept in their own crate to avoid physical +//! expressions depending on logical expressions. +//! +//! Note this crate is not intended to have substantial logic itself, but rather +//! to provide a common set of types and traits that can be used by both logical +//! and physical expressions. //! //! [DataFusion]: diff --git a/datafusion/physical-optimizer/Cargo.toml b/datafusion/physical-optimizer/Cargo.toml index 125ea6acc77f..afa5c25e533a 100644 --- a/datafusion/physical-optimizer/Cargo.toml +++ b/datafusion/physical-optimizer/Cargo.toml @@ -32,7 +32,10 @@ rust-version = { workspace = true } workspace = true [dependencies] +arrow = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-execution = { workspace = true } +datafusion-expr-common = { workspace = true } datafusion-physical-expr = { workspace = true } datafusion-physical-plan = { workspace = true } +log = { workspace = true } diff --git a/datafusion/physical-optimizer/src/lib.rs b/datafusion/physical-optimizer/src/lib.rs index d54e6dbcab8f..4b81b35ef9cb 100644 --- a/datafusion/physical-optimizer/src/lib.rs +++ b/datafusion/physical-optimizer/src/lib.rs @@ -23,3 +23,5 @@ mod optimizer; pub mod output_requirements; pub use optimizer::PhysicalOptimizerRule; + +pub mod pruning; diff --git a/datafusion/core/src/physical_optimizer/pruning.rs b/datafusion/physical-optimizer/src/pruning.rs similarity index 99% rename from datafusion/core/src/physical_optimizer/pruning.rs rename to datafusion/physical-optimizer/src/pruning.rs index 0ef390fff45c..8229db592c80 100644 --- a/datafusion/core/src/physical_optimizer/pruning.rs +++ b/datafusion/physical-optimizer/src/pruning.rs @@ -22,28 +22,24 @@ use std::collections::HashSet; use std::sync::Arc; -use crate::{ - common::{Column, DFSchema}, - error::{DataFusionError, Result}, - logical_expr::Operator, - physical_plan::{ColumnarValue, PhysicalExpr}, -}; - +use arrow::array::AsArray; use arrow::{ array::{new_null_array, ArrayRef, BooleanArray}, datatypes::{DataType, Field, Schema, SchemaRef}, record_batch::{RecordBatch, RecordBatchOptions}, }; -use arrow_array::cast::AsArray; use datafusion_common::tree_node::TransformedResult; use datafusion_common::{ internal_err, plan_datafusion_err, plan_err, tree_node::{Transformed, TreeNode}, - ScalarValue, + Column, DFSchema, ScalarValue, }; +use datafusion_common::{DataFusionError, Result}; use datafusion_physical_expr::utils::{collect_columns, Guarantee, LiteralGuarantee}; -use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef}; +use datafusion_physical_expr::{expressions as phys_expr, PhysicalExpr, PhysicalExprRef}; +use datafusion_expr_common::operator::Operator; +use datafusion_physical_plan::ColumnarValue; use log::trace; /// A source of runtime statistical information to [`PruningPredicate`]s. @@ -615,7 +611,8 @@ impl PruningPredicate { is_always_true(&self.predicate_expr) && self.literal_guarantees.is_empty() } - pub(crate) fn required_columns(&self) -> &RequiredColumns { + /// Return the columns that are required to evaluate the pruning predicate + pub fn required_columns(&self) -> &RequiredColumns { &self.required_columns } @@ -724,7 +721,7 @@ fn is_always_true(expr: &Arc) -> bool { /// Handles creating references to the min/max statistics /// for columns as well as recording which statistics are needed #[derive(Debug, Default, Clone)] -pub(crate) struct RequiredColumns { +pub struct RequiredColumns { /// The statistics required to evaluate this predicate: /// * The unqualified column in the input schema /// * Statistics type (e.g. Min or Max or Null_Count) @@ -746,7 +743,7 @@ impl RequiredColumns { /// * `a > 5 OR a < 10` returns `Some(a)` /// * `a > 5 OR b < 10` returns `None` /// * `true` returns None - pub(crate) fn single_column(&self) -> Option<&phys_expr::Column> { + pub fn single_column(&self) -> Option<&phys_expr::Column> { if self.columns.windows(2).all(|w| { // check if all columns are the same (ignoring statistics and field) let c1 = &w[0].0;