diff --git a/crates/core/src/delta_datafusion/mod.rs b/crates/core/src/delta_datafusion/mod.rs index 723c2bea03..8d422dc8b5 100644 --- a/crates/core/src/delta_datafusion/mod.rs +++ b/crates/core/src/delta_datafusion/mod.rs @@ -190,34 +190,35 @@ impl DataFusionMixins for DeltaTableState { fn _arrow_schema(snapshot: &Snapshot, wrap_partitions: bool) -> DeltaResult { let meta = snapshot.metadata(); - let fields = meta - .schema()? + + let schema = meta.schema()?; + let fields = schema .fields() .filter(|f| !meta.partition_columns.contains(&f.name().to_string())) .map(|f| f.try_into()) .chain( - meta.schema()? - .fields() - .filter(|f| meta.partition_columns.contains(&f.name().to_string())) - .map(|f| { - let field = Field::try_from(f)?; - let corrected = if wrap_partitions { - match field.data_type() { - // Only dictionary-encode types that may be large - // // https://github.com/apache/arrow-datafusion/pull/5545 - DataType::Utf8 - | DataType::LargeUtf8 - | DataType::Binary - | DataType::LargeBinary => { - wrap_partition_type_in_dict(field.data_type().clone()) - } - _ => field.data_type().clone(), + // We need stable order between logical and physical schemas, but the order of + // partitioning columns is not always the same in the json schema and the array + meta.partition_columns.iter().map(|partition_col| { + let f = schema.field(partition_col).unwrap(); + let field = Field::try_from(f)?; + let corrected = if wrap_partitions { + match field.data_type() { + // Only dictionary-encode types that may be large + // // https://github.com/apache/arrow-datafusion/pull/5545 + DataType::Utf8 + | DataType::LargeUtf8 + | DataType::Binary + | DataType::LargeBinary => { + wrap_partition_type_in_dict(field.data_type().clone()) } - } else { - field.data_type().clone() - }; - Ok(field.with_data_type(corrected)) - }), + _ => field.data_type().clone(), + } + } else { + field.data_type().clone() + }; + Ok(field.with_data_type(corrected)) + }) ) .collect::, _>>()?;