Skip to content

Commit

Permalink
Validate Schema/Field when constructing new Array/ChunkedArray/Table (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
kylebarron authored Jul 26, 2024
1 parent dede702 commit 6bcd421
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 1 deletion.
13 changes: 12 additions & 1 deletion pyo3-arrow/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::fmt::Display;
use std::sync::Arc;

use arrow_array::{make_array, Array, ArrayRef};
use arrow_schema::{Field, FieldRef};
use arrow_schema::{ArrowError, Field, FieldRef};
use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::{PyCapsule, PyTuple, PyType};
Expand All @@ -26,9 +26,20 @@ pub struct PyArray {
impl PyArray {
/// Create a new Python Array from an [ArrayRef] and a [FieldRef].
pub fn new(array: ArrayRef, field: FieldRef) -> Self {
assert_eq!(array.data_type(), field.data_type());
Self { array, field }
}

/// Create a new Python Array from an [ArrayRef] and a [FieldRef].
pub fn try_new(array: ArrayRef, field: FieldRef) -> Result<Self, ArrowError> {
if array.data_type() != field.data_type() {
return Err(ArrowError::SchemaError(
"Array DataType must match Field DataType".to_string(),
));
}
Ok(Self { array, field })
}

pub fn from_array<A: Array>(array: A) -> Self {
let array = make_array(array.into_data());
Self::from_array_ref(array)
Expand Down
6 changes: 6 additions & 0 deletions pyo3-arrow/src/chunked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ pub struct PyChunkedArray {

impl PyChunkedArray {
pub fn new(chunks: Vec<ArrayRef>, field: FieldRef) -> Self {
assert!(
chunks
.iter()
.all(|chunk| chunk.data_type() == field.data_type()),
"All chunks must have same data type"
);
Self { chunks, field }
}

Expand Down
5 changes: 5 additions & 0 deletions pyo3-arrow/src/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@ pub struct PyTable {

impl PyTable {
pub fn new(batches: Vec<RecordBatch>, schema: SchemaRef) -> Self {
// TODO: allow batches to have different schema metadata?
assert!(
batches.iter().all(|rb| rb.schema_ref() == &schema),
"All batches must have same schema"
);
Self { schema, batches }
}

Expand Down

0 comments on commit 6bcd421

Please sign in to comment.