From f5023248b0e353075a85902d93fdf1d7f25cc061 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 23 Aug 2021 09:30:48 -0400 Subject: [PATCH] EXPLAIN ANALYZE should run all Optimizer passes --- datafusion/src/optimizer/utils.rs | 14 ++++++++-- datafusion/tests/sql.rs | 45 +++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/datafusion/src/optimizer/utils.rs b/datafusion/src/optimizer/utils.rs index 8ce6fe5b557b..07e758d6005c 100644 --- a/datafusion/src/optimizer/utils.rs +++ b/datafusion/src/optimizer/utils.rs @@ -195,11 +195,21 @@ pub fn from_plan( schema: schema.clone(), alias: alias.clone(), }), + LogicalPlan::Analyze { + verbose, schema, .. + } => { + assert!(expr.is_empty()); + assert_eq!(inputs.len(), 1); + Ok(LogicalPlan::Analyze { + verbose: *verbose, + schema: schema.clone(), + input: Arc::new(inputs[0].clone()), + }) + } LogicalPlan::EmptyRelation { .. } | LogicalPlan::TableScan { .. } | LogicalPlan::CreateExternalTable { .. } - | LogicalPlan::Explain { .. } - | LogicalPlan::Analyze { .. } => Ok(plan.clone()), + | LogicalPlan::Explain { .. } => Ok(plan.clone()), } } diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 0f385680deed..285a3a9e1d85 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -52,6 +52,28 @@ use datafusion::{ }; use datafusion::{execution::context::ExecutionContext, physical_plan::displayable}; +/// A macro to assert that one string is contained within another with +/// a nice error message if they are not. +/// +/// Usage: `assert_contains!(actual, expected)` +/// +/// Is a macro so test error +/// messages are on the same line as the failure; +/// +/// Both arguments must be convertable into Strings (Into) +macro_rules! assert_contains { + ($ACTUAL: expr, $EXPECTED: expr) => { + let actual_value: String = $ACTUAL.into(); + let expected_value: String = $EXPECTED.into(); + assert!( + actual_value.contains(&expected_value), + "Can not find expected in actual.\n\nExpected:\n{}\n\nActual:\n{}", + expected_value, + actual_value + ); + }; +} + #[tokio::test] async fn nyc() -> Result<()> { // schema for nyxtaxi csv files @@ -2589,6 +2611,29 @@ async fn csv_explain_verbose_plans() { ); } +#[tokio::test] +async fn explain_analyze_runs_optimizers() { + // repro for https://github.com/apache/arrow-datafusion/issues/917 + // where EXPLAIN ANALYZE was not correctly running optiimizer + let mut ctx = ExecutionContext::new(); + register_alltypes_parquet(&mut ctx); + + // This happens as an optimization pass where count(*) can be + // answered using statistics only. + let expected = "EmptyExec: produce_one_row=true"; + + let sql = "EXPLAIN SELECT count(*) from alltypes_plain"; + let actual = execute_to_batches(&mut ctx, sql).await; + let actual = arrow::util::pretty::pretty_format_batches(&actual).unwrap(); + assert_contains!(actual, expected); + + // EXPLAIN ANALYZE should work the same + let sql = "EXPLAIN ANALYZE SELECT count(*) from alltypes_plain"; + let actual = execute_to_batches(&mut ctx, sql).await; + let actual = arrow::util::pretty::pretty_format_batches(&actual).unwrap(); + assert_contains!(actual, expected); +} + fn aggr_test_schema() -> SchemaRef { Arc::new(Schema::new(vec![ Field::new("c1", DataType::Utf8, false),