Skip to content

Commit

Permalink
Merge pull request #195 from chmp/feature/remove-deprecated-functions
Browse files Browse the repository at this point in the history
Clean up API
  • Loading branch information
chmp committed Jun 19, 2024
2 parents 553b629 + 01b496f commit 1f658a6
Show file tree
Hide file tree
Showing 20 changed files with 114 additions and 429 deletions.
10 changes: 10 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Change log

## 0.12

- Remove `serde_arrow::schema::Schema`
- Remove `serde_arrow::ArrowBuilder` and `serde_arrow::Arrow2Builder`
- Use `impl serde::Serialize` instead of `&(impl serde::Serialize + ?Sized)`
- Use `&[FieldRef]` instead of `&[Field]` in arrow APIs
- Remove `from_arrow_fields` / `to_arrow_fields` for `SerdeArrowSchema`, use the
`TryFrom` conversions to convert between fields and `SerdeArrowSchema`
- Remove `SerdeArrowSchema::new()`, `Overwrites::new()`

## 0.11.6

- Add `arrow=52` support
Expand Down
13 changes: 7 additions & 6 deletions serde_arrow/benches/groups/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ macro_rules! define_benchmark {
) => {
pub fn benchmark_serialize(c: &mut criterion::Criterion) {
use serde_arrow::schema::{SerdeArrowSchema, SchemaLike};
use serde_arrow::_impl::{arrow::datatypes::FieldRef, arrow2::datatypes::Field as Arrow2Field};

for n in [$($n),*] {
let mut group = c.benchmark_group(format!("{}_serialize({})", stringify!($name), n));
Expand All @@ -33,8 +34,8 @@ macro_rules! define_benchmark {
.map(|_| <$ty>::random(&mut rng))
.collect::<Vec<_>>();
let schema = SerdeArrowSchema::from_samples(&items, Default::default()).unwrap();
let arrow_fields = schema.to_arrow_fields().unwrap();
let arrow2_fields = schema.to_arrow2_fields().unwrap();
let arrow_fields = Vec::<FieldRef>::try_from(&schema).unwrap();
let arrow2_fields = Vec::<Arrow2Field>::try_from(&schema).unwrap();

#[allow(unused)]
let bench_serde_arrow = true;
Expand Down Expand Up @@ -93,10 +94,10 @@ pub mod serde_arrow_arrow {
use serde::Serialize;
use serde_arrow::{
Result,
_impl::arrow::{array::ArrayRef, datatypes::Field},
_impl::arrow::{array::ArrayRef, datatypes::FieldRef},
};

pub fn serialize<T>(fields: &[Field], items: &T) -> Result<Vec<ArrayRef>>
pub fn serialize<T>(fields: &[FieldRef], items: &T) -> Result<Vec<ArrayRef>>
where
T: Serialize + ?Sized,
{
Expand Down Expand Up @@ -132,10 +133,10 @@ pub mod arrow {

use serde_arrow::{
Error, Result,
_impl::arrow::{array::ArrayRef, datatypes::Field},
_impl::arrow::{array::ArrayRef, datatypes::FieldRef},
};

pub fn serialize<T>(fields: &[Field], items: &[T]) -> Result<Vec<ArrayRef>>
pub fn serialize<T>(fields: &[FieldRef], items: &[T]) -> Result<Vec<ArrayRef>>
where
T: Serialize,
{
Expand Down
4 changes: 2 additions & 2 deletions serde_arrow/src/_impl/docs/defs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ pub fn example_record_batch() -> crate::_impl::arrow::array::RecordBatch {
}

#[cfg(has_arrow)]
pub fn example_arrow_arrays() -> (Vec<crate::_impl::arrow::datatypes::Field>, Vec<crate::_impl::arrow::array::ArrayRef>) {
pub fn example_arrow_arrays() -> (Vec<crate::_impl::arrow::datatypes::FieldRef>, Vec<crate::_impl::arrow::array::ArrayRef>) {
use crate::schema::{SchemaLike, TracingOptions};

let items = example_records();

let fields = Vec::<crate::_impl::arrow::datatypes::Field>::from_type::<Record>(TracingOptions::default()).unwrap();
let fields = Vec::<crate::_impl::arrow::datatypes::FieldRef>::from_type::<Record>(TracingOptions::default()).unwrap();
let arrays = crate::to_arrow(&fields, &items).unwrap();

(fields, arrays)
Expand Down
17 changes: 9 additions & 8 deletions serde_arrow/src/_impl/docs/quickstart.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
//! # #[cfg(has_arrow)]
//! # fn main() {
//! # use serde_arrow::_impl::arrow as arrow;
//! use arrow::datatypes::{DataType, Field};
//! use arrow::datatypes::{DataType, FieldRef};
//! use serde_arrow::{
//! schema::{SchemaLike, Strategy, TracingOptions},
//! utils::{Item, Items},
Expand All @@ -37,7 +37,7 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use serde_arrow::_impl::arrow::datatypes::{DataType, Field};
//! # use serde_arrow::_impl::arrow::datatypes::{DataType, FieldRef};
//! # use serde_arrow::{schema::{SchemaLike, TracingOptions}, utils::Item};
//! use chrono::NaiveDateTime;
//!
Expand All @@ -46,7 +46,7 @@
//! // ...
//! ];
//!
//! let fields = Vec::<Field>::from_samples(items, TracingOptions::default())?;
//! let fields = Vec::<FieldRef>::from_samples(items, TracingOptions::default())?;
//! assert_eq!(fields[0].data_type(), &DataType::LargeUtf8);
//! # Ok(())
//! # }
Expand Down Expand Up @@ -74,14 +74,15 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use std::sync::Arc;
//! # use serde_arrow::_impl::arrow::datatypes::{DataType, Field};
//! # use serde_arrow::utils::Item;
//! let records: &[Item<i64>] = &[
//! Item(12 * 60 * 60 * 24 * 1000),
//! Item(9 * 60 * 60 * 24 * 1000),
//! ];
//!
//! let fields = vec![Field::new("item", DataType::Date64, false)];
//! let fields = vec![Arc::new(Field::new("item", DataType::Date64, false))];
//! let arrays = serde_arrow::to_arrow(&fields, records)?;
//! # Ok(())
//! # }
Expand All @@ -95,7 +96,7 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use serde_arrow::_impl::arrow::datatypes::Field;
//! # use serde_arrow::_impl::arrow::datatypes::FieldRef;
//! # use serde_arrow::{schema::SchemaLike, utils::Item};
//! use std::str::FromStr;
//!
Expand All @@ -107,7 +108,7 @@
//! Item(BigDecimal::from_str("4.56").unwrap()),
//! ];
//!
//! let fields = Vec::<Field>::from_value(&json!([
//! let fields = Vec::<FieldRef>::from_value(&json!([
//! {"name": "item", "data_type": "Decimal128(5, 2)"},
//! ]))?;
//!
Expand Down Expand Up @@ -145,10 +146,10 @@
//! ```rust
//! # #[cfg(has_arrow)]
//! # fn main() -> serde_arrow::_impl::PanicOnError<()> {
//! # use serde_arrow::_impl::arrow::datatypes::Field;
//! # use serde_arrow::_impl::arrow::datatypes::FieldRef;
//! # use serde_arrow::{schema::{SchemaLike, TracingOptions}, utils::Item};
//! let items = &[Item("foo"), Item("bar")];
//! let fields = Vec::<Field>::from_samples(
//! let fields = Vec::<FieldRef>::from_samples(
//! items,
//! TracingOptions::default().string_dictionary_encoding(true),
//! )?;
Expand Down
91 changes: 2 additions & 89 deletions serde_arrow/src/arrow2_impl/api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,105 +8,18 @@ use crate::{
_impl::arrow2::{array::Array, datatypes::Field},
internal::{
array_builder::ArrayBuilder, deserializer::Deserializer, error::Result,
schema::SerdeArrowSchema, serializer::Serializer,
serializer::Serializer,
},
};

/// Build arrow2 arrays record by record (*requires one of the `arrow2-*`
/// features*)
///
/// The given items should be records (e.g., structs). To serialize items
/// encoding single values consider the [`Items`][crate::utils::Items] and
/// [`Item`][crate::utils::Item] wrappers.
///
/// Example:
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
/// # use serde_arrow::_impl::arrow2 as arrow2;
/// use arrow2::datatypes::{DataType, Field};
/// use serde::Serialize;
/// use serde_arrow::Arrow2Builder;
///
/// ##[derive(Serialize)]
/// struct Record {
/// a: Option<f32>,
/// b: u64,
/// }
///
/// let mut builder = Arrow2Builder::new(&[
/// Field::new("a", DataType::Float32, true),
/// Field::new("b", DataType::UInt64, false),
/// ])?;
///
/// builder.push(&Record { a: Some(1.0), b: 2})?;
/// builder.push(&Record { a: Some(3.0), b: 4})?;
/// builder.push(&Record { a: Some(5.0), b: 5})?;
///
/// builder.extend(&[
/// Record { a: Some(6.0), b: 7},
/// Record { a: Some(8.0), b: 9},
/// Record { a: Some(10.0), b: 11},
/// ])?;
///
/// let arrays = builder.build_arrays()?;
/// #
/// # assert_eq!(arrays.len(), 2);
/// # assert_eq!(arrays[0].len(), 6);
/// # Ok(())
/// # }
/// ```
#[deprecated = "`Arrow2Builder` is deprecated. Use `ArrayBuilder` instead"]
pub struct Arrow2Builder(ArrayBuilder);

#[allow(deprecated)]
impl std::fmt::Debug for Arrow2Builder {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Arrow2Builder<...>")
}
}

#[allow(deprecated)]
impl Arrow2Builder {
/// Build a new Arrow2Builder for the given fields
///
/// This method may fail when unsupported data types are encountered in the
/// given fields.
///
pub fn new(fields: &[Field]) -> Result<Self> {
let schema = SerdeArrowSchema::from_arrow2_fields(fields)?;
Ok(Self(ArrayBuilder::new(schema)?))
}

/// Add a single record to the arrays
///
pub fn push<T: Serialize + ?Sized>(&mut self, item: &T) -> Result<()> {
self.0.push(item)
}

/// Add multiple records to the arrays
///
pub fn extend<T: Serialize + ?Sized>(&mut self, items: &T) -> Result<()> {
self.0.extend(items)
}

/// Build the arrays from the rows pushed to far.
///
/// This operation will reset the underlying buffers and start a new batch.
///
pub fn build_arrays(&mut self) -> Result<Vec<Box<dyn Array>>> {
self.0.to_arrow2()
}
}

/// Build arrow2 arrays from the given items (*requires one of the `arrow2-*`
/// features*)
///
/// `items` should be given in the form a list of records (e.g., a vector of
/// structs). To serialize items encoding single values consider the
/// [`Items`][crate::utils::Items] wrapper.
///
/// To build arrays record by record use [`Arrow2Builder`].
/// To build arrays record by record use [`ArrayBuilder`].
///
/// ```rust
/// # fn main() -> serde_arrow::Result<()> {
Expand Down
39 changes: 3 additions & 36 deletions serde_arrow/src/arrow2_impl/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,39 +11,6 @@ use crate::{
},
};

/// Support for arrow2 types (*requires one of the `arrow2-*` features*)
impl SerdeArrowSchema {
/// Build a new Schema object from fields
pub fn from_arrow2_fields(fields: &[Field]) -> Result<Self> {
Self::try_from(fields)
}

/// This method is deprecated. Use
/// [`to_arrow2_fields`][SerdeArrowSchema::to_arrow2_fields] instead:
///
/// ```rust
/// # fn main() -> serde_arrow::_impl::PanicOnError<()> {
/// # use serde_arrow::schema::{SerdeArrowSchema, SchemaLike, TracingOptions};
/// # #[derive(serde::Deserialize)]
/// # struct Item { a: u32 }
/// # let schema = SerdeArrowSchema::from_type::<Item>(TracingOptions::default()).unwrap();
/// # let fields =
/// schema.to_arrow2_fields()?
/// # ;
/// # Ok(())
/// # }
/// ```
#[deprecated = "The method `get_arrow2_fields` is deprecated. Use `to_arrow2_fields` instead"]
pub fn get_arrow2_fields(&self) -> Result<Vec<Field>> {
Vec::<Field>::try_from(self)
}

/// Build a vec of fields from a Schema object
pub fn to_arrow2_fields(&self) -> Result<Vec<Field>> {
Vec::<Field>::try_from(self)
}
}

impl TryFrom<SerdeArrowSchema> for Vec<Field> {
type Error = Error;

Expand Down Expand Up @@ -79,20 +46,20 @@ impl Sealed for Vec<Field> {}
/// `arrow2-*` features*)
impl SchemaLike for Vec<Field> {
fn from_value<T: serde::Serialize + ?Sized>(value: &T) -> Result<Self> {
SerdeArrowSchema::from_value(value)?.to_arrow2_fields()
SerdeArrowSchema::from_value(value)?.try_into()
}

fn from_type<'de, T: serde::Deserialize<'de> + ?Sized>(
options: crate::schema::TracingOptions,
) -> Result<Self> {
SerdeArrowSchema::from_type::<T>(options)?.to_arrow2_fields()
SerdeArrowSchema::from_type::<T>(options)?.try_into()
}

fn from_samples<T: serde::Serialize + ?Sized>(
samples: &T,
options: crate::schema::TracingOptions,
) -> Result<Self> {
SerdeArrowSchema::from_samples(samples, options)?.to_arrow2_fields()
SerdeArrowSchema::from_samples(samples, options)?.try_into()
}
}

Expand Down
Loading

0 comments on commit 1f658a6

Please sign in to comment.