Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up API #195

Merged
merged 5 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change log

## 0.12

- Remove `serde_arrow::schema::Schema`
- Remove `serde_arrow::ArrowBuilder` and `serde_arrow::Arrow2Builder`
- Use `impl serde::Serialize` instead of `&(impl serde::Serialize + ?Sized)`
- Use `&[FieldRef]` instead of `&[Field]` in arrow APIs
- Remove `from_arrow_fields` / `to_arrow_fields` for `SerdeArrowSchema`, use the
`TryFrom` conversions to convert between fields and `SerdeArrowSchema`
- Remove `SerdeArrowSchema::new()`, `Overwrites::new()`

## 0.11.6

- Add `arrow=52` support
Expand Down
13 changes: 7 additions & 6 deletions serde_arrow/benches/groups/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ macro_rules! define_benchmark {
) => {
pub fn benchmark_serialize(c: &mut criterion::Criterion) {
use serde_arrow::schema::{SerdeArrowSchema, SchemaLike};
use serde_arrow::_impl::{arrow::datatypes::FieldRef, arrow2::datatypes::Field as Arrow2Field};

for n in [$($n),*] {
let mut group = c.benchmark_group(format!("{}_serialize({})", stringify!($name), n));
Expand All @@ -33,8 +34,8 @@ macro_rules! define_benchmark {
.map(|_| <$ty>::random(&mut rng))
.collect::<Vec<_>>();
let schema = SerdeArrowSchema::from_samples(&items, Default::default()).unwrap();
let arrow_fields = schema.to_arrow_fields().unwrap();
let arrow2_fields = schema.to_arrow2_fields().unwrap();
let arrow_fields = Vec::<FieldRef>::try_from(&schema).unwrap();
let arrow2_fields = Vec::<Arrow2Field>::try_from(&schema).unwrap();

#[allow(unused)]
let bench_serde_arrow = true;
Expand Down Expand Up @@ -93,10 +94,10 @@ pub mod serde_arrow_arrow {
use serde::Serialize;
use serde_arrow::{
Result,
_impl::arrow::{array::ArrayRef, datatypes::Field},
_impl::arrow::{array::ArrayRef, datatypes::FieldRef},
};

pub fn serialize<T>(fields: &[Field], items: &T) -> Result<Vec<ArrayRef>>
pub fn serialize<T>(fields: &[FieldRef], items: &T) -> Result<Vec<ArrayRef>>
where
T: Serialize + ?Sized,
{
Expand Down Expand Up @@ -132,10 +133,10 @@ pub mod arrow {

use serde_arrow::{
Error, Result,
_impl::arrow::{array::ArrayRef, datatypes::Field},
_impl::arrow::{array::ArrayRef, datatypes::FieldRef},
};

pub fn serialize<T>(fields: &[Field], items: &[T]) -> Result<Vec<ArrayRef>>
pub fn serialize<T>(fields: &[FieldRef], items: &[T]) -> Result<Vec<ArrayRef>>
where
T: Serialize,
{
Expand Down
4 changes: 2 additions & 2 deletions serde_arrow/src/_impl/docs/defs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ pub fn example_record_batch() -> crate::_impl::arrow::array::RecordBatch {
}

#[cfg(has_arrow)]
pub fn example_arrow_arrays() -> (Vec<crate::_impl::arrow::datatypes::Field>, Vec<crate::_impl::arrow::array::ArrayRef>) {
pub fn example_arrow_arrays() -> (Vec<crate::_impl::arrow::datatypes::FieldRef>, Vec<crate::_impl::arrow::array::ArrayRef>) {
use crate::schema::{SchemaLike, TracingOptions};

let items = example_records();

let fields = Vec::<crate::_impl::arrow::datatypes::Field>::from_type::<Record>(TracingOptions::default()).unwrap();
let fields = Vec::<crate::_impl::arrow::datatypes::FieldRef>::from_type::<Record>(TracingOptions::default()).unwrap();
let arrays = crate::to_arrow(&fields, &items).unwrap();

(fields, arrays)
Expand Down
17 changes: 9 additions & 8 deletions serde_arrow/src/_impl/docs/quickstart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
//! # #[cfg(has_arrow)]
//! # fn main() {
//! # use serde_arrow::_impl::arrow as arrow;
//! use arrow::datatypes::{DataType, Field};
//! use arrow::datatypes::{DataType, FieldRef};
//! use serde_arrow::{
//! schema::{SchemaLike, Strategy, TracingOptions},
//! utils::{Item, Items},
Expand All @@ -37,7 +37,7 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use serde_arrow::_impl::arrow::datatypes::{DataType, Field};
//! # use serde_arrow::_impl::arrow::datatypes::{DataType, FieldRef};
//! # use serde_arrow::{schema::{SchemaLike, TracingOptions}, utils::Item};
//! use chrono::NaiveDateTime;
//!
Expand All @@ -46,7 +46,7 @@
//! // ...
//! ];
//!
//! let fields = Vec::<Field>::from_samples(items, TracingOptions::default())?;
//! let fields = Vec::<FieldRef>::from_samples(items, TracingOptions::default())?;
//! assert_eq!(fields[0].data_type(), &DataType::LargeUtf8);
//! # Ok(())
//! # }
Expand Down Expand Up @@ -74,14 +74,15 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use std::sync::Arc;
//! # use serde_arrow::_impl::arrow::datatypes::{DataType, Field};
//! # use serde_arrow::utils::Item;
//! let records: &[Item<i64>] = &[
//! Item(12 * 60 * 60 * 24 * 1000),
//! Item(9 * 60 * 60 * 24 * 1000),
//! ];
//!
//! let fields = vec![Field::new("item", DataType::Date64, false)];
//! let fields = vec![Arc::new(Field::new("item", DataType::Date64, false))];
//! let arrays = serde_arrow::to_arrow(&fields, records)?;
//! # Ok(())
//! # }
Expand All @@ -95,7 +96,7 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use serde_arrow::_impl::arrow::datatypes::Field;
//! # use serde_arrow::_impl::arrow::datatypes::FieldRef;
//! # use serde_arrow::{schema::SchemaLike, utils::Item};
//! use std::str::FromStr;
//!
Expand All @@ -107,7 +108,7 @@
//! Item(BigDecimal::from_str("4.56").unwrap()),
//! ];
//!
//! let fields = Vec::<Field>::from_value(&json!([
//! let fields = Vec::<FieldRef>::from_value(&json!([
//! {"name": "item", "data_type": "Decimal128(5, 2)"},
//! ]))?;
//!
Expand Down Expand Up @@ -145,10 +146,10 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use serde_arrow::_impl::arrow::datatypes::Field;
//! # use serde_arrow::_impl::arrow::datatypes::FieldRef;
//! # use serde_arrow::{schema::{SchemaLike, TracingOptions}, utils::Item};
//! let items = &[Item("foo"), Item("bar")];
//! let fields = Vec::<Field>::from_samples(
//! let fields = Vec::<FieldRef>::from_samples(
//! items,
//! TracingOptions::default().string_dictionary_encoding(true),
//! )?;
Expand Down
91 changes: 2 additions & 89 deletions serde_arrow/src/arrow2_impl/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,105 +8,18 @@ use crate::{
_impl::arrow2::{array::Array, datatypes::Field},
internal::{
array_builder::ArrayBuilder, deserializer::Deserializer, error::Result,
schema::SerdeArrowSchema, serializer::Serializer,
serializer::Serializer,
},
};

/// Build arrow2 arrays record by record (*requires one of the `arrow2-*`
/// features*)
///
/// The given items should be records (e.g., structs). To serialize items
/// encoding single values consider the [`Items`][crate::utils::Items] and
/// [`Item`][crate::utils::Item] wrappers.
///
/// Example:
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::_impl::arrow2 as arrow2;
/// use arrow2::datatypes::{DataType, Field};
/// use serde::Serialize;
/// use serde_arrow::Arrow2Builder;
///
/// ##[derive(Serialize)]
/// struct Record {
/// a: Option<f32>,
/// b: u64,
/// }
///
/// let mut builder = Arrow2Builder::new(&[
/// Field::new("a", DataType::Float32, true),
/// Field::new("b", DataType::UInt64, false),
/// ])?;
///
/// builder.push(&Record { a: Some(1.0), b: 2})?;
/// builder.push(&Record { a: Some(3.0), b: 4})?;
/// builder.push(&Record { a: Some(5.0), b: 5})?;
///
/// builder.extend(&[
/// Record { a: Some(6.0), b: 7},
/// Record { a: Some(8.0), b: 9},
/// Record { a: Some(10.0), b: 11},
/// ])?;
///
/// let arrays = builder.build_arrays()?;
/// #
/// # assert_eq!(arrays.len(), 2);
/// # assert_eq!(arrays[0].len(), 6);
/// # Ok(())
/// # }
/// ```
#[deprecated = "`Arrow2Builder` is deprecated. Use `ArrayBuilder` instead"]
pub struct Arrow2Builder(ArrayBuilder);

#[allow(deprecated)]
impl std::fmt::Debug for Arrow2Builder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Arrow2Builder<...>")
}
}

#[allow(deprecated)]
impl Arrow2Builder {
/// Build a new Arrow2Builder for the given fields
///
/// This method may fail when unsupported data types are encountered in the
/// given fields.
///
pub fn new(fields: &[Field]) -> Result<Self> {
let schema = SerdeArrowSchema::from_arrow2_fields(fields)?;
Ok(Self(ArrayBuilder::new(schema)?))
}

/// Add a single record to the arrays
///
pub fn push<T: Serialize + ?Sized>(&mut self, item: &T) -> Result<()> {
self.0.push(item)
}

/// Add multiple records to the arrays
///
pub fn extend<T: Serialize + ?Sized>(&mut self, items: &T) -> Result<()> {
self.0.extend(items)
}

/// Build the arrays from the rows pushed to far.
///
/// This operation will reset the underlying buffers and start a new batch.
///
pub fn build_arrays(&mut self) -> Result<Vec<Box<dyn Array>>> {
self.0.to_arrow2()
}
}

/// Build arrow2 arrays from the given items (*requires one of the `arrow2-*`
/// features*)
///
/// `items` should be given in the form a list of records (e.g., a vector of
/// structs). To serialize items encoding single values consider the
/// [`Items`][crate::utils::Items] wrapper.
///
/// To build arrays record by record use [`Arrow2Builder`].
/// To build arrays record by record use [`ArrayBuilder`].
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
Expand Down
39 changes: 3 additions & 36 deletions serde_arrow/src/arrow2_impl/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,6 @@ use crate::{
},
};

/// Support for arrow2 types (*requires one of the `arrow2-*` features*)
impl SerdeArrowSchema {
/// Build a new Schema object from fields
pub fn from_arrow2_fields(fields: &[Field]) -> Result<Self> {
Self::try_from(fields)
}

/// This method is deprecated. Use
/// [`to_arrow2_fields`][SerdeArrowSchema::to_arrow2_fields] instead:
///
/// ```rust
/// # fn main() -> serde_arrow::_impl::PanicOnError<()> {
/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions};
/// # #[derive(serde::Deserialize)]
/// # struct Item { a: u32 }
/// # let schema = SerdeArrowSchema::from_type::<Item>(TracingOptions::default()).unwrap();
/// # let fields =
/// schema.to_arrow2_fields()?
/// # ;
/// # Ok(())
/// # }
/// ```
#[deprecated = "The method `get_arrow2_fields` is deprecated. Use `to_arrow2_fields` instead"]
pub fn get_arrow2_fields(&self) -> Result<Vec<Field>> {
Vec::<Field>::try_from(self)
}

/// Build a vec of fields from a Schema object
pub fn to_arrow2_fields(&self) -> Result<Vec<Field>> {
Vec::<Field>::try_from(self)
}
}

impl TryFrom<SerdeArrowSchema> for Vec<Field> {
type Error = Error;

Expand Down Expand Up @@ -79,20 +46,20 @@ impl Sealed for Vec<Field> {}
/// `arrow2-*` features*)
impl SchemaLike for Vec<Field> {
fn from_value<T: serde::Serialize + ?Sized>(value: &T) -> Result<Self> {
SerdeArrowSchema::from_value(value)?.to_arrow2_fields()
SerdeArrowSchema::from_value(value)?.try_into()
}

fn from_type<'de, T: serde::Deserialize<'de> + ?Sized>(
options: crate::schema::TracingOptions,
) -> Result<Self> {
SerdeArrowSchema::from_type::<T>(options)?.to_arrow2_fields()
SerdeArrowSchema::from_type::<T>(options)?.try_into()
}

fn from_samples<T: serde::Serialize + ?Sized>(
samples: &T,
options: crate::schema::TracingOptions,
) -> Result<Self> {
SerdeArrowSchema::from_samples(samples, options)?.to_arrow2_fields()
SerdeArrowSchema::from_samples(samples, options)?.try_into()
}
}

Expand Down
Loading