Skip to content

Commit

Permalink
Move PruningPredicate to physical-optimizer crate
Browse files Browse the repository at this point in the history
  • Loading branch information
alamb committed Aug 16, 2024
1 parent 300a08c commit 5a5fa91
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 18 deletions.
1 change: 0 additions & 1 deletion datafusion/core/src/physical_optimizer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ pub mod join_selection;
pub mod limited_distinct_aggregation;
pub mod optimizer;
pub mod projection_pushdown;
pub mod pruning;
pub mod replace_with_order_preserving_variants;
pub mod sanity_checker;
#[cfg(test)]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/expr-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

[package]
name = "datafusion-expr-common"
description = "Logical plan and expression representation for DataFusion query engine"
description = "Common types and traits for plan and expression representation for DataFusion query engine"
keywords = ["datafusion", "logical", "plan", "expressions"]
readme = "README.md"
version = { workspace = true }
Expand Down
12 changes: 9 additions & 3 deletions datafusion/expr-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,13 @@

//! Logical Expr types and traits for [DataFusion]
//!
//! This crate contains types and traits that are used by both Logical and Physical expressions.
//! They are kept in their own crate to avoid physical expressions depending on logical expressions.
//!
//! This crate contains types and traits that are used by both Logical and
//! Physical expressions. They are kept in their own crate to avoid physical
//! expressions depending on logical expressions.
//!
//! Note this crate is not intended to have substantial logic itself, but rather
//! to provide a common set of types and traits that can be used by both logical
//! and physical expressions.
//!
//! [DataFusion]: <https://crates.io/crates/datafusion>
Expand All @@ -34,3 +38,5 @@ pub mod operator;
pub mod signature;
pub mod sort_properties;
pub mod type_coercion;

pub use operator::Operator;
3 changes: 3 additions & 0 deletions datafusion/physical-optimizer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,10 @@ rust-version = { workspace = true }
workspace = true

[dependencies]
arrow = { workspace = true }
datafusion-common = { workspace = true, default-features = true }
datafusion-execution = { workspace = true }
datafusion-expr-common = { workspace = true }
datafusion-physical-expr = { workspace = true }
datafusion-physical-plan = { workspace = true }
log = { workspace = true }
2 changes: 2 additions & 0 deletions datafusion/physical-optimizer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,5 @@ mod optimizer;
pub mod output_requirements;

pub use optimizer::PhysicalOptimizerRule;

pub mod pruning;
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,24 @@
use std::collections::HashSet;
use std::sync::Arc;

use crate::{
common::{Column, DFSchema},
error::{DataFusionError, Result},
logical_expr::Operator,
physical_plan::{ColumnarValue, PhysicalExpr},
};

use arrow::array::AsArray;
use arrow::{
array::{new_null_array, ArrayRef, BooleanArray},
datatypes::{DataType, Field, Schema, SchemaRef},
record_batch::{RecordBatch, RecordBatchOptions},
};
use arrow_array::cast::AsArray;
use datafusion_common::tree_node::TransformedResult;
use datafusion_common::{
internal_err, plan_datafusion_err, plan_err,
tree_node::{Transformed, TreeNode},
ScalarValue,
Column, DFSchema, ScalarValue,
};
use datafusion_common::{DataFusionError, Result};
use datafusion_physical_expr::utils::{collect_columns, Guarantee, LiteralGuarantee};
use datafusion_physical_expr::{expressions as phys_expr, PhysicalExprRef};
use datafusion_physical_expr::{expressions as phys_expr, PhysicalExpr, PhysicalExprRef};

use datafusion_expr_common::operator::Operator;
use datafusion_physical_plan::ColumnarValue;
use log::trace;

/// A source of runtime statistical information to [`PruningPredicate`]s.
Expand Down Expand Up @@ -615,7 +611,8 @@ impl PruningPredicate {
is_always_true(&self.predicate_expr) && self.literal_guarantees.is_empty()
}

pub(crate) fn required_columns(&self) -> &RequiredColumns {
/// Return the columns that are required to evaluate the pruning predicate
pub fn required_columns(&self) -> &RequiredColumns {
&self.required_columns
}

Expand Down Expand Up @@ -724,7 +721,7 @@ fn is_always_true(expr: &Arc<dyn PhysicalExpr>) -> bool {
/// Handles creating references to the min/max statistics
/// for columns as well as recording which statistics are needed
#[derive(Debug, Default, Clone)]
pub(crate) struct RequiredColumns {
pub struct RequiredColumns {
/// The statistics required to evaluate this predicate:
/// * The unqualified column in the input schema
/// * Statistics type (e.g. Min or Max or Null_Count)
Expand All @@ -746,7 +743,7 @@ impl RequiredColumns {
/// * `a > 5 OR a < 10` returns `Some(a)`
/// * `a > 5 OR b < 10` returns `None`
/// * `true` returns None
pub(crate) fn single_column(&self) -> Option<&phys_expr::Column> {
pub fn single_column(&self) -> Option<&phys_expr::Column> {
if self.columns.windows(2).all(|w| {
// check if all columns are the same (ignoring statistics and field)
let c1 = &w[0].0;
Expand Down

0 comments on commit 5a5fa91

Please sign in to comment.