Skip to content

Commit

Permalink
Fix inconsistent order of partitioning columns (#2494)
Browse files Browse the repository at this point in the history
  • Loading branch information
aditanase committed Jun 27, 2024
1 parent d9605ea commit ca9abe5
Showing 1 changed file with 24 additions and 23 deletions.
47 changes: 24 additions & 23 deletions crates/core/src/delta_datafusion/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,34 +190,35 @@ impl DataFusionMixins for DeltaTableState {

fn _arrow_schema(snapshot: &Snapshot, wrap_partitions: bool) -> DeltaResult<ArrowSchemaRef> {
let meta = snapshot.metadata();
let fields = meta
.schema()?

let schema = meta.schema()?;
let fields = schema
.fields()
.filter(|f| !meta.partition_columns.contains(&f.name().to_string()))
.map(|f| f.try_into())
.chain(
meta.schema()?
.fields()
.filter(|f| meta.partition_columns.contains(&f.name().to_string()))
.map(|f| {
let field = Field::try_from(f)?;
let corrected = if wrap_partitions {
match field.data_type() {
// Only dictionary-encode types that may be large
// // https://github.com/apache/arrow-datafusion/pull/5545
DataType::Utf8
| DataType::LargeUtf8
| DataType::Binary
| DataType::LargeBinary => {
wrap_partition_type_in_dict(field.data_type().clone())
}
_ => field.data_type().clone(),
// We need stable order between logical and physical schemas, but the order of
// partitioning columns is not always the same in the json schema and the array
meta.partition_columns.iter().map(|partition_col| {
let f = schema.field(partition_col).unwrap();
let field = Field::try_from(f)?;
let corrected = if wrap_partitions {
match field.data_type() {
// Only dictionary-encode types that may be large
// // https://github.com/apache/arrow-datafusion/pull/5545
DataType::Utf8
| DataType::LargeUtf8
| DataType::Binary
| DataType::LargeBinary => {
wrap_partition_type_in_dict(field.data_type().clone())
}
} else {
field.data_type().clone()
};
Ok(field.with_data_type(corrected))
}),
_ => field.data_type().clone(),
}
} else {
field.data_type().clone()
};
Ok(field.with_data_type(corrected))
})
)
.collect::<Result<Vec<Field>, _>>()?;

Expand Down

0 comments on commit ca9abe5

Please sign in to comment.