From f3257f5b5ccf981dd1dba1c17cc411a24049cce6 Mon Sep 17 00:00:00 2001 From: Ben Chambers Date: Fri, 2 Jun 2023 11:26:21 -0700 Subject: [PATCH] ref: create an Exprs --- crates/sparrow-physical/src/expr.rs | 29 +++++++++++++++++++++++++++++ crates/sparrow-physical/src/step.rs | 22 +++++++++++----------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/crates/sparrow-physical/src/expr.rs b/crates/sparrow-physical/src/expr.rs index bb1592571..1e6a8c6f2 100644 --- a/crates/sparrow-physical/src/expr.rs +++ b/crates/sparrow-physical/src/expr.rs @@ -2,6 +2,35 @@ use std::borrow::Cow; use arrow_schema::DataType; +/// Represents 1 or more values computed by expressions. +#[derive(Debug, serde::Serialize, serde::Deserialize)] +pub struct Exprs { + /// The expressions computing the intermediate values. + pub exprs: Vec, + /// The indices of columns to output. + pub outputs: Vec, +} + +impl Exprs { + /// Create expressions computing the value of the last expression. + pub fn singleton(exprs: Vec) -> Self { + let output = exprs.len() - 1; + Self { + exprs, + outputs: vec![output.into()], + } + } + + pub fn is_singleton(&self) -> bool { + self.outputs.len() == 1 + } + + /// Return the number of outputs produced by these expressions. + pub fn output_len(&self) -> usize { + self.outputs.len() + } +} + /// The identifier (index) of an expression. #[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(transparent)] diff --git a/crates/sparrow-physical/src/step.rs b/crates/sparrow-physical/src/step.rs index d240644d9..233acbb4e 100644 --- a/crates/sparrow-physical/src/step.rs +++ b/crates/sparrow-physical/src/step.rs @@ -1,5 +1,7 @@ use arrow_schema::SchemaRef; +use crate::Exprs; + /// The identifier (index) of a step. #[derive(Debug, serde::Serialize, serde::Deserialize, PartialEq, Eq, PartialOrd, Ord, Hash)] @@ -41,27 +43,25 @@ pub enum StepKind { /// The output includes the same rows as the input, but with columns /// projected as configured. Project { - /// Expressions to apply to compute additional input columns. - exprs: Vec, - /// Indices of expressions to use for the output. + /// Expressions to compute the projection. /// - /// The length should be the same as the number of fields in the schema. - outputs: Vec, + /// The length of the outputs should be the same as the fields in the schema. + exprs: Exprs, }, /// Filter the results based on a boolean predicate. Filter { /// Expressions to apply to compute the predicate. /// - /// The last expression should be the boolean predicate. - exprs: Vec, + /// There should be a single output producing a boolean value. + exprs: Exprs, }, /// A step that repartitions the output. Repartition { num_partitions: usize, - /// Expressions to apply to compute columns which may be referenced by `keys`. - exprs: Vec, - /// Indices of expression columns representing the keys. - keys: Vec, + /// Expressions to compute the keys. + /// + /// Each output corresponds to a part of the key. + keys: Exprs, }, Error, }