apache · jayzhan211 · Aug 21, 2024 · Aug 14, 2024 · Aug 14, 2024 · Aug 14, 2024
diff --git a/datafusion/core/src/physical_planner.rs b/datafusion/core/src/physical_planner.rs
@@ -670,6 +670,10 @@ impl DefaultPhysicalPlanner {
                 let input_exec = children.one()?;
                 let physical_input_schema = input_exec.schema();
                 let logical_input_schema = input.as_ref().schema();
+                let physical_input_schema_from_logical: Arc<Schema> =
+                    logical_input_schema.as_ref().clone().into();
+
+                debug_assert_eq!(physical_input_schema_from_logical, physical_input_schema, "Physical input schema should be the same as the one converted from logical input schema. Please file an issue or send the PR");
 
                 let groups = self.create_grouping_physical_expr(
                     group_expr,
@@ -1548,7 +1552,7 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
     e: &Expr,
     name: Option<String>,
     logical_input_schema: &DFSchema,
-    _physical_input_schema: &Schema,
+    physical_input_schema: &Schema,
     execution_props: &ExecutionProps,
 ) -> Result<AggregateExprWithOptionalArgs> {
     match e {
@@ -1599,11 +1603,10 @@ pub fn create_aggregate_expr_with_name_and_maybe_filter(
                 let ordering_reqs: Vec<PhysicalSortExpr> =
                     physical_sort_exprs.clone().unwrap_or(vec![]);
 
-                let schema: Schema = logical_input_schema.clone().into();
                 let agg_expr =
                     AggregateExprBuilder::new(func.to_owned(), physical_args.to_vec())
                         .order_by(ordering_reqs.to_vec())
-                        .schema(Arc::new(schema))
+                        .schema(Arc::new(physical_input_schema.to_owned()))
                         .alias(name)
                         .with_ignore_nulls(ignore_nulls)
                         .with_distinct(*distinct)

diff --git a/datafusion/expr/src/expr_schema.rs b/datafusion/expr/src/expr_schema.rs
@@ -328,10 +328,45 @@ impl ExprSchemable for Expr {
                     Ok(true)
                 }
             }
+            Expr::WindowFunction(WindowFunction { fun, .. }) => {
+                match fun {
+                    WindowFunctionDefinition::BuiltInWindowFunction(func) => {
+                        if func.name() == "ROW_NUMBER"
+                            || func.name() == "RANK"
+                            || func.name() == "NTILE"
+                            || func.name() == "CUME_DIST"
+                        {
+                            Ok(false)
+                        } else {
+                            Ok(true)
+                        }
+                    }
+                    WindowFunctionDefinition::AggregateUDF(func) => {
+                        // TODO: UDF should be able to customize nullability
+                        if func.name() == "count" {
+                            // TODO: there is issue unsolved for count with window, should return false
+                            Ok(true)
+                        } else {
+                            Ok(true)
+                        }
+                    }
+                    _ => Ok(true),
+                }
+            }
+            Expr::ScalarFunction(ScalarFunction { func, args }) => {
+                // If all the element in coalesce is non-null, the result is non-null
+                if func.name() == "coalesce"
+                    && args
+                        .iter()
+                        .all(|e| !e.nullable(input_schema).ok().unwrap_or(true))
+                {
+                    return Ok(false);
+                }
+
+                Ok(true)
+            }
             Expr::ScalarVariable(_, _)
             | Expr::TryCast { .. }
-            | Expr::ScalarFunction(..)
-            | Expr::WindowFunction { .. }
             | Expr::Unnest(_)
             | Expr::Placeholder(_) => Ok(true),
             Expr::IsNull(_)

diff --git a/datafusion/expr/src/logical_plan/plan.rs b/datafusion/expr/src/logical_plan/plan.rs
@@ -2015,10 +2015,9 @@ impl Projection {
 /// produced by the projection operation. If the schema computation is successful,
 /// the `Result` will contain the schema; otherwise, it will contain an error.
 pub fn projection_schema(input: &LogicalPlan, exprs: &[Expr]) -> Result<Arc<DFSchema>> {
-    let mut schema = DFSchema::new_with_metadata(
-        exprlist_to_fields(exprs, input)?,
-        input.schema().metadata().clone(),
-    )?;
+    let metadata = input.schema().metadata().clone();
+    let mut schema =
+        DFSchema::new_with_metadata(exprlist_to_fields(exprs, input)?, metadata)?;
     schema = schema.with_functional_dependencies(calc_func_dependencies_for_project(
         exprs, input,
     )?)?;
@@ -2659,7 +2658,10 @@ impl Aggregate {
 
         qualified_fields.extend(exprlist_to_fields(aggr_expr.as_slice(), &input)?);
 
-        let schema = DFSchema::new_with_metadata(qualified_fields, HashMap::new())?;
+        let schema = DFSchema::new_with_metadata(
+            qualified_fields,
+            input.schema().metadata().clone(),
+        )?;
 
         Self::try_new_with_schema(input, group_expr, aggr_expr, Arc::new(schema))
     }

diff --git a/datafusion/expr/src/udaf.rs b/datafusion/expr/src/udaf.rs
@@ -196,6 +196,10 @@ impl AggregateUDF {
         self.inner.state_fields(args)
     }
 
+    pub fn fields(&self, args: StateFieldsArgs) -> Result<Field> {
+        self.inner.field(args)
+    }
+
     /// See [`AggregateUDFImpl::groups_accumulator_supported`] for more details.
     pub fn groups_accumulator_supported(&self, args: AccumulatorArgs) -> bool {
         self.inner.groups_accumulator_supported(args)
@@ -383,6 +387,10 @@ pub trait AggregateUDFImpl: Debug + Send + Sync {
             .collect())
     }
 
+    fn field(&self, args: StateFieldsArgs) -> Result<Field> {
+        Ok(Field::new(args.name, args.return_type.clone(), true))
+    }
+
     /// If the aggregate expression has a specialized
     /// [`GroupsAccumulator`] implementation. If this returns true,
     /// `[Self::create_groups_accumulator]` will be called.

diff --git a/datafusion/functions-aggregate-common/src/aggregate.rs b/datafusion/functions-aggregate-common/src/aggregate.rs
@@ -171,6 +171,9 @@ pub trait AggregateExpr: Send + Sync + Debug + PartialEq<dyn Any> {
     fn get_minmax_desc(&self) -> Option<(Field, bool)> {
         None
     }
+
+    /// Get function's name, for example `count(x)` returns `count`
+    fn func_name(&self) -> &str;
 }
 
 /// Stores the physical expressions used inside the `AggregateExpr`.

diff --git a/datafusion/functions-aggregate/src/count.rs b/datafusion/functions-aggregate/src/count.rs
@@ -138,6 +138,11 @@ impl AggregateUDFImpl for Count {
         }
     }
 
+    fn field(&self, args: StateFieldsArgs) -> Result<Field> {
+        // count always return non-null value
+        Ok(Field::new(args.name, args.return_type.clone(), false))
+    }
+
     fn accumulator(&self, acc_args: AccumulatorArgs) -> Result<Box<dyn Accumulator>> {
         if !acc_args.is_distinct {
             return Ok(Box::new(CountAccumulator::new()));

diff --git a/datafusion/optimizer/src/analyzer/type_coercion.rs b/datafusion/optimizer/src/analyzer/type_coercion.rs
@@ -17,7 +17,6 @@
 
 //! Optimizer rule for type validation and coercion
 
-use std::collections::HashMap;
 use std::sync::Arc;
 
 use itertools::izip;
@@ -821,9 +820,18 @@ fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
         .iter()
         .map(|f| f.is_nullable())
         .collect::<Vec<_>>();
+    let mut union_field_meta = base_schema
+        .fields()
+        .iter()
+        .map(|f| f.metadata().clone())
+        .collect::<Vec<_>>();
+
+    let mut metadata = base_schema.metadata().clone();
 
     for (i, plan) in inputs.iter().enumerate().skip(1) {
         let plan_schema = plan.schema();
+        metadata.extend(plan_schema.metadata().clone());
+
         if plan_schema.fields().len() != base_schema.fields().len() {
             return plan_err!(
                 "Union schemas have different number of fields: \
@@ -833,39 +841,47 @@ fn coerce_union_schema(inputs: &[Arc<LogicalPlan>]) -> Result<DFSchema> {
                 plan_schema.fields().len()
             );
         }
-        // coerce data type and nullablity for each field
-        for (union_datatype, union_nullable, plan_field) in izip!(
-            union_datatypes.iter_mut(),
-            union_nullabilities.iter_mut(),
-            plan_schema.fields()
-        ) {
-            let coerced_type =
-                comparison_coercion(union_datatype, plan_field.data_type()).ok_or_else(
-                    || {
-                        plan_datafusion_err!(
-                            "Incompatible inputs for Union: Previous inputs were \
-                            of type {}, but got incompatible type {} on column '{}'",
-                            union_datatype,
-                            plan_field.data_type(),
-                            plan_field.name()
-                        )
-                    },
-                )?;
-            *union_datatype = coerced_type;
-            *union_nullable = *union_nullable || plan_field.is_nullable();
+
+        // Safety: Length is checked
+        unsafe {
+            // coerce data type and nullablity for each field
+            for (i, plan_field) in plan_schema.fields().iter().enumerate() {
+                let union_datatype = union_datatypes.get_unchecked_mut(i);
+                let union_nullable = union_nullabilities.get_unchecked_mut(i);
+                let union_field_map = union_field_meta.get_unchecked_mut(i);
+
+                let coerced_type =
+                    comparison_coercion(union_datatype, plan_field.data_type())
+                        .ok_or_else(|| {
+                            plan_datafusion_err!(
+                                "Incompatible inputs for Union: Previous inputs were \
+                                of type {}, but got incompatible type {} on column '{}'",
+                                union_datatype,
+                                plan_field.data_type(),
+                                plan_field.name()
+                            )
+                        })?;
+
+                *union_datatype = coerced_type;
+                *union_nullable = *union_nullable || plan_field.is_nullable();
+                union_field_map.extend(plan_field.metadata().clone());
+            }
         }
     }
     let union_qualified_fields = izip!(
         base_schema.iter(),
         union_datatypes.into_iter(),
-        union_nullabilities
+        union_nullabilities,
+        union_field_meta.into_iter()
     )
-    .map(|((qualifier, field), datatype, nullable)| {
-        let field = Arc::new(Field::new(field.name().clone(), datatype, nullable));
-        (qualifier.cloned(), field)
+    .map(|((qualifier, field), datatype, nullable, metadata)| {
+        let mut field = Field::new(field.name().clone(), datatype, nullable);
+        field.set_metadata(metadata);
+        (qualifier.cloned(), field.into())
     })
     .collect::<Vec<_>>();
-    DFSchema::new_with_metadata(union_qualified_fields, HashMap::new())
+
+    DFSchema::new_with_metadata(union_qualified_fields, metadata)
 }
 
 /// See `<https://github.com/apache/datafusion/pull/2108>`

diff --git a/datafusion/physical-expr-functions-aggregate/src/aggregate.rs b/datafusion/physical-expr-functions-aggregate/src/aggregate.rs
@@ -241,7 +241,15 @@ impl AggregateExpr for AggregateFunctionExpr {
     }
 
     fn field(&self) -> Result<Field> {
-        Ok(Field::new(&self.name, self.data_type.clone(), true))
+        let args = StateFieldsArgs {
+            name: &self.name,
+            input_types: &self.input_types,
+            return_type: &self.data_type,
+            ordering_fields: &self.ordering_fields,
+            is_distinct: self.is_distinct,
+        };
+
+        self.fun.fields(args)
     }
 
     fn create_accumulator(&self) -> Result<Box<dyn Accumulator>> {
@@ -435,6 +443,10 @@ impl AggregateExpr for AggregateFunctionExpr {
             .is_descending()
             .and_then(|flag| self.field().ok().map(|f| (f, flag)))
     }
+
+    fn func_name(&self) -> &str {
+        self.fun.name()
+    }
 }
 
 impl PartialEq<dyn Any> for AggregateFunctionExpr {

diff --git a/datafusion/physical-expr/src/window/aggregate.rs b/datafusion/physical-expr/src/window/aggregate.rs
@@ -80,6 +80,14 @@ impl WindowExpr for PlainAggregateWindowExpr {
     }
 
     fn field(&self) -> Result<Field> {
+        // TODO: Fix window function to always return non-null for count
+        if let Ok(name) = self.func_name() {
+            if name == "count" {
+                let field = self.aggregate.field()?;
+                return Ok(field.with_nullable(true));
+            }
+        }
+
         self.aggregate.field()
     }
 
@@ -157,6 +165,10 @@ impl WindowExpr for PlainAggregateWindowExpr {
     fn uses_bounded_memory(&self) -> bool {
         !self.window_frame.end_bound.is_unbounded()
     }
+
+    fn func_name(&self) -> Result<&str> {
+        Ok(self.aggregate.func_name())
+    }
 }
 
 impl AggregateWindowExpr for PlainAggregateWindowExpr {

diff --git a/datafusion/physical-expr/src/window/built_in.rs b/datafusion/physical-expr/src/window/built_in.rs
@@ -32,7 +32,7 @@ use arrow::compute::SortOptions;
 use arrow::datatypes::Field;
 use arrow::record_batch::RecordBatch;
 use datafusion_common::utils::evaluate_partition_ranges;
-use datafusion_common::{Result, ScalarValue};
+use datafusion_common::{not_impl_err, Result, ScalarValue};
 use datafusion_expr::window_state::{WindowAggState, WindowFrameContext};
 use datafusion_expr::WindowFrame;
 
@@ -97,6 +97,10 @@ impl BuiltInWindowExpr {
 }
 
 impl WindowExpr for BuiltInWindowExpr {
+    fn func_name(&self) -> Result<&str> {
+        not_impl_err!("function name not determined")
+    }
+
     /// Return a reference to Any that can be used for downcasting
     fn as_any(&self) -> &dyn Any {
         self

diff --git a/datafusion/physical-expr/src/window/sliding_aggregate.rs b/datafusion/physical-expr/src/window/sliding_aggregate.rs
@@ -82,6 +82,14 @@ impl WindowExpr for SlidingAggregateWindowExpr {
     }
 
     fn field(&self) -> Result<Field> {
+        // TODO: Fix window function to always return non-null for count
+        if let Ok(name) = self.func_name() {
+            if name == "count" {
+                let field = self.aggregate.field()?;
+                return Ok(field.with_nullable(true));
+            }
+        }
+
         self.aggregate.field()
     }
 
@@ -166,6 +174,10 @@ impl WindowExpr for SlidingAggregateWindowExpr {
             window_frame: Arc::clone(&self.window_frame),
         }))
     }
+
+    fn func_name(&self) -> Result<&str> {
+        Ok(self.aggregate.func_name())
+    }
 }
 
 impl AggregateWindowExpr for SlidingAggregateWindowExpr {

diff --git a/datafusion/physical-expr/src/window/window_expr.rs b/datafusion/physical-expr/src/window/window_expr.rs
@@ -157,6 +157,8 @@ pub trait WindowExpr: Send + Sync + Debug {
     ) -> Option<Arc<dyn WindowExpr>> {
         None
     }
+
+    fn func_name(&self) -> Result<&str>;
 }
 
 /// Stores the physical expressions used inside the `WindowExpr`.
-Original file line number
+Diff line change
@@ Expand Up / @@ -157,6 +157,8 @@ pub trait WindowExpr: Send + Sync + Debug { @@
         ) -> Option<Arc<dyn WindowExpr>> {
             None
         }
+        fn func_name(&self) -> Result<&str>;
     }
     /// Stores the physical expressions used inside the `WindowExpr`.
@@ Expand Down @@