Skip to content

Commit

Permalink
Less-strict schema equality in Table and ChunkedArray constructors (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored Aug 1, 2024
1 parent 2a243ed commit b819f0c
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pyo3-arrow/src/chunked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ impl PyChunkedArray {
assert!(
chunks
.iter()
.all(|chunk| chunk.data_type() == field.data_type()),
.all(|chunk| chunk.data_type().equals_datatype(field.data_type())),
"All chunks must have same data type"
);
Self { chunks, field }
Expand Down
1 change: 1 addition & 0 deletions pyo3-arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ mod record_batch;
mod record_batch_reader;
mod schema;
mod table;
mod utils;

pub use array::PyArray;
pub use array_reader::PyArrayReader;
Expand Down
6 changes: 4 additions & 2 deletions pyo3-arrow/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::input::{
AnyArray, AnyRecordBatch, FieldIndexInput, MetadataInput, NameOrField, SelectIndices,
};
use crate::schema::display_schema;
use crate::utils::schema_equals;
use crate::{PyChunkedArray, PyField, PyRecordBatch, PyRecordBatchReader, PySchema};

/// A Python-facing Arrow table.
Expand All @@ -35,9 +36,10 @@ pub struct PyTable {

impl PyTable {
pub fn new(batches: Vec<RecordBatch>, schema: SchemaRef) -> Self {
// TODO: allow batches to have different schema metadata?
assert!(
batches.iter().all(|rb| rb.schema_ref() == &schema),
batches
.iter()
.all(|rb| schema_equals(rb.schema_ref(), &schema)),
"All batches must have same schema"
);
Self { schema, batches }
Expand Down
17 changes: 17 additions & 0 deletions pyo3-arrow/src/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
use arrow_schema::Schema;

/// Check whether two schemas are equal
///
/// This allows schemas to have different top-level metadata, as well as different nested field
/// names and keys.
pub(crate) fn schema_equals(left: &Schema, right: &Schema) -> bool {
left.fields
.iter()
.zip(right.fields.iter())
.all(|(left_field, right_field)| {
left_field.name() == right_field.name()
&& left_field
.data_type()
.equals_datatype(right_field.data_type())
})
}

0 comments on commit b819f0c

Please sign in to comment.