Skip to content

Commit

Permalink
Merge pull request #229 from chmp/feature/add-arrow-53
Browse files Browse the repository at this point in the history
Feature/add arrow 53
  • Loading branch information
chmp committed Sep 11, 2024
2 parents 095ed90 + 2f57a6f commit 69c0609
Show file tree
Hide file tree
Showing 12 changed files with 183 additions and 105 deletions.
205 changes: 128 additions & 77 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Refactor the underlying implementation to prepare for further development

New features

- Add `arrow=53` support
- Add `Binary` / `LargeBinary` support for `arrow2`
- Add support to serialize / deserialize `bool` from integer arrays
- Add a helper to construct `Bool8` arrays
Expand Down
6 changes: 3 additions & 3 deletions example/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
# arrow-version:replace: arrow = {{ version = "52.0", features = [{version}] }}
arrow = {version = "52.0", features = ["ipc"] }
# arrow-version:replace: arrow = {{ version = "{version}", features = ["ipc"] }}
arrow = {version = "53.0", features = ["ipc"] }

chrono = { version = "0.4", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }

# arrow-version:replace: serde_arrow = {{ path = "../serde_arrow", features = ["arrow-{version}"] }}
serde_arrow = { path = "../serde_arrow", features = ["arrow-52"] }
serde_arrow = { path = "../serde_arrow", features = ["arrow-53"] }
6 changes: 3 additions & 3 deletions integration_tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ version = "0.1.0"
edition = "2021"

[dependencies]
# arrow-version:replace: arrow = {{ version = "52.0", features = [{version}] }}
arrow = {version = "52.0", features = ["ipc"] }
# arrow-version:replace: arrow = {{ version = "{version}", features = ["ipc"] }}
arrow = {version = "53.0", features = ["ipc"] }

chrono = { version = "0.4", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"

# arrow-version:replace: serde_arrow = {{ path = "../serde_arrow", features = ["arrow-{version}"] }}
serde_arrow = { path = "../serde_arrow", features = ["arrow-52"] }
serde_arrow = { path = "../serde_arrow", features = ["arrow-53"] }
15 changes: 11 additions & 4 deletions serde_arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@ bench = false
[[bench]]
name = "serde_arrow_bench"
# arrow-version:replace: required-features = ["arrow2-0-17", "arrow-{version}"]
required-features = ["arrow2-0-17", "arrow-52"]
required-features = ["arrow2-0-17", "arrow-53"]
harness = false

[package.metadata.docs.rs]
# arrow-version:replace: features = ["arrow2-0-17", "arrow-{version}"]
features = ["arrow2-0-17", "arrow-52"]
features = ["arrow2-0-17", "arrow-53"]

[features]
default = []

# arrow-version:insert: arrow-{version} = ["dep:arrow-array-{version}", "dep:arrow-schema-{version}", "dep:arrow-data-{version}", "dep:arrow-buffer-{version}"]
arrow-53 = ["dep:arrow-array-53", "dep:arrow-schema-53", "dep:arrow-data-53", "dep:arrow-buffer-53"]
arrow-52 = ["dep:arrow-array-52", "dep:arrow-schema-52", "dep:arrow-data-52", "dep:arrow-buffer-52"]
arrow-51 = ["dep:arrow-array-51", "dep:arrow-schema-51", "dep:arrow-data-51", "dep:arrow-buffer-51"]
arrow-50 = ["dep:arrow-array-50", "dep:arrow-schema-50", "dep:arrow-data-50", "dep:arrow-buffer-50"]
Expand All @@ -50,6 +51,7 @@ half = { version = "2", features = ["bytemuck"], default-features = false }
serde = { version = "1.0", features = ["derive", "std"], default-features = false }

# arrow-version:insert: arrow-array-{version} = {{ package = "arrow-array", version = "{version}", optional = true, default-features = false }}
arrow-array-53 = { package = "arrow-array", version = "53", optional = true, default-features = false }
arrow-array-52 = { package = "arrow-array", version = "52", optional = true, default-features = false }
arrow-array-51 = { package = "arrow-array", version = "51", optional = true, default-features = false }
arrow-array-50 = { package = "arrow-array", version = "50", optional = true, default-features = false }
Expand All @@ -68,6 +70,7 @@ arrow-array-38 = { package = "arrow-array", version = "38", optional = true, def
arrow-array-37 = { package = "arrow-array", version = "37", optional = true, default-features = false }

# arrow-version:insert: arrow-buffer-{version} = {{ package = "arrow-buffer", version = "{version}", optional = true, default-features = false }}
arrow-buffer-53 = { package = "arrow-buffer", version = "53", optional = true, default-features = false }
arrow-buffer-52 = { package = "arrow-buffer", version = "52", optional = true, default-features = false }
arrow-buffer-51 = { package = "arrow-buffer", version = "51", optional = true, default-features = false }
arrow-buffer-50 = { package = "arrow-buffer", version = "50", optional = true, default-features = false }
Expand All @@ -86,6 +89,7 @@ arrow-buffer-38 = { package = "arrow-buffer", version = "38", optional = true, d
arrow-buffer-37 = { package = "arrow-buffer", version = "37", optional = true, default-features = false }

# arrow-version:insert: arrow-data-{version} = {{ package = "arrow-data", version="{version}", optional = true, default-features = false }}
arrow-data-53 = { package = "arrow-data", version="53", optional = true, default-features = false }
arrow-data-52 = { package = "arrow-data", version="52", optional = true, default-features = false }
arrow-data-51 = { package = "arrow-data", version="51", optional = true, default-features = false }
arrow-data-50 = { package = "arrow-data", version="50", optional = true, default-features = false }
Expand All @@ -104,6 +108,7 @@ arrow-data-38 = { package = "arrow-data", version="38", optional = true, default
arrow-data-37 = { package = "arrow-data", version="37", optional = true, default-features = false }

# arrow-version:insert: arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", optional = true, default-features = false }}
arrow-schema-53 = { package = "arrow-schema", version = "53", optional = true, default-features = false }
arrow-schema-52 = { package = "arrow-schema", version = "52", optional = true, default-features = false }
arrow-schema-51 = { package = "arrow-schema", version = "51", optional = true, default-features = false }
arrow-schema-50 = { package = "arrow-schema", version = "50", optional = true, default-features = false }
Expand Down Expand Up @@ -136,13 +141,14 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] }

# for benchmarks
# arrow-version:replace: arrow-json-{version} = {{ package = "arrow-json", version = "{version}" }}
arrow-json-52 = { package = "arrow-json", version = "52" }
arrow-json-53 = { package = "arrow-json", version = "53" }
criterion = "0.5"
arrow2_convert = "0.5.0"
serde-transcode = "1"
simd-json = "0.13.8"

# arrow-version:insert: arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", default-features = false, features = ["serde"] }}
arrow-schema-53 = { package = "arrow-schema", version = "53", default-features = false, features = ["serde"] }
arrow-schema-52 = { package = "arrow-schema", version = "52", default-features = false, features = ["serde"] }
arrow-schema-51 = { package = "arrow-schema", version = "51", default-features = false, features = ["serde"] }
arrow-schema-50 = { package = "arrow-schema", version = "50", default-features = false, features = ["serde"] }
Expand Down Expand Up @@ -178,6 +184,7 @@ check-cfg = [
'cfg(has_arrow)',
'cfg(has_arrow_fixed_binary_support)',
# arrow-version:insert: 'cfg(has_arrow_{version})',
'cfg(has_arrow_53)',
'cfg(has_arrow_52)',
'cfg(has_arrow_51)',
'cfg(has_arrow_50)',
Expand All @@ -194,4 +201,4 @@ check-cfg = [
'cfg(has_arrow_39)',
'cfg(has_arrow_38)',
'cfg(has_arrow_37)',
]
]
4 changes: 2 additions & 2 deletions serde_arrow/benches/groups/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ pub mod arrow {
use std::sync::Arc;

// arrow-version:replace: use arrow_json_{version}::ReaderBuilder;
use arrow_json_52::ReaderBuilder;
use arrow_json_53::ReaderBuilder;
// arrow-version:replace: use arrow_schema_{version}::Schema;
use arrow_schema_52::Schema;
use arrow_schema_53::Schema;

use serde::Serialize;

Expand Down
6 changes: 3 additions & 3 deletions serde_arrow/benches/groups/json_to_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use {
};

// arrow-version:replace: use arrow_json_{version}::ReaderBuilder;
use arrow_json_52::ReaderBuilder;
use arrow_json_53::ReaderBuilder;

// arrow-version:replace: use arrow_schema_{version}::{{FieldRef, Schema as ArrowSchema}};
use arrow_schema_52::{FieldRef, Schema as ArrowSchema};
use arrow_schema_53::{FieldRef, Schema as ArrowSchema};

// arrow-version:replace: use arrow_array_{version}::RecordBatch;
use arrow_array_52::RecordBatch;
use arrow_array_53::RecordBatch;
use serde_json::Value;

fn benchmark_json_to_arrow(c: &mut criterion::Criterion) {
Expand Down
2 changes: 2 additions & 0 deletions serde_arrow/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ fn main() {

let max_arrow_version: Option<usize> = [
// arrow-version:insert: #[cfg(feature = "arrow-{version}")]{\n}{version},
#[cfg(feature = "arrow-53")]
53,
#[cfg(feature = "arrow-52")]
52,
#[cfg(feature = "arrow-51")]
Expand Down
16 changes: 8 additions & 8 deletions serde_arrow/src/arrow_impl/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
T::Boolean,
// NOTE: use the explicit len
arr.len,
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(arr.values).into_inner()],
vec![],
Expand Down Expand Up @@ -114,7 +114,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {

Ok(ArrayData::builder(data_type)
.len(arr.len)
.null_bit_buffer(arr.validity.map(Buffer::from))
.null_bit_buffer(arr.validity.map(Buffer::from_vec))
.child_data(data)
.build()?)
}
Expand Down Expand Up @@ -153,7 +153,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
Ok(ArrayData::try_new(
T::FixedSizeList(Arc::new(field), arr.n),
child.len() / usize::try_from(arr.n)?,
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![],
vec![child],
Expand All @@ -170,7 +170,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
Ok(ArrayData::try_new(
T::FixedSizeBinary(arr.n),
arr.data.len() / usize::try_from(arr.n)?,
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(arr.data).into_inner()],
vec![],
Expand All @@ -196,7 +196,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
Ok(ArrayData::try_new(
T::Map(Arc::new(field), false),
arr.offsets.len().saturating_sub(1),
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(arr.offsets).into_inner()],
vec![child],
Expand Down Expand Up @@ -543,7 +543,7 @@ fn primitive_into_data<T: ArrowNativeType>(
Ok(ArrayData::try_new(
data_type,
values.len(),
validity.map(Buffer::from),
validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(values).into_inner()],
vec![],
Expand All @@ -559,7 +559,7 @@ fn bytes_into_data<O: ArrowNativeType>(
Ok(ArrayData::try_new(
data_type,
offsets.len().saturating_sub(1),
validity.map(Buffer::from),
validity.map(Buffer::from_vec),
0,
vec![
ScalarBuffer::from(offsets).into_inner(),
Expand All @@ -579,7 +579,7 @@ fn list_into_data<O: ArrowNativeType>(
Ok(ArrayData::try_new(
data_type,
len,
validity.map(Buffer::from),
validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(offsets).into_inner()],
vec![child_data],
Expand Down
20 changes: 17 additions & 3 deletions serde_arrow/src/arrow_impl/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,27 @@ impl TryFrom<&ArrowDataType> for DataType {
AT::Date32 => Ok(T::Date32),
AT::Date64 => Ok(T::Date64),
AT::Decimal128(precision, scale) => Ok(T::Decimal128(*precision, *scale)),
AT::Time32(unit) => Ok(T::Time32(unit.clone().into())),
AT::Time64(unit) => Ok(T::Time64(unit.clone().into())),
AT::Time32(unit) => Ok(T::Time32(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
)),
AT::Time64(unit) => Ok(T::Time64(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
)),
AT::Timestamp(unit, tz) => Ok(T::Timestamp(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
tz.as_ref().map(|s| s.to_string()),
)),
AT::Duration(unit) => Ok(T::Duration(unit.clone().into())),
AT::Duration(unit) => Ok(T::Duration(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
)),
AT::Binary => Ok(T::Binary),
AT::LargeBinary => Ok(T::LargeBinary),
AT::FixedSizeBinary(n) => Ok(T::FixedSizeBinary(*n)),
Expand Down
2 changes: 2 additions & 0 deletions serde_arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
//! | Arrow Feature | Arrow Version |
//! |---------------|---------------|
// arrow-version:insert: //! | `arrow-{version}` | `arrow={version}` |
//! | `arrow-53` | `arrow=53` |
//! | `arrow-52` | `arrow=52` |
//! | `arrow-51` | `arrow=51` |
//! | `arrow-50` | `arrow=50` |
Expand Down Expand Up @@ -268,6 +269,7 @@ pub mod _impl {
}

// arrow-version:insert: #[cfg(has_arrow_{version})] build_arrow_crate!(arrow_array_{version}, arrow_buffer_{version}, arrow_data_{version}, arrow_schema_{version});
#[cfg(has_arrow_53)] build_arrow_crate!(arrow_array_53, arrow_buffer_53, arrow_data_53, arrow_schema_53);
#[cfg(has_arrow_52)] build_arrow_crate!(arrow_array_52, arrow_buffer_52, arrow_data_52, arrow_schema_52);
#[cfg(has_arrow_51)] build_arrow_crate!(arrow_array_51, arrow_buffer_51, arrow_data_51, arrow_schema_51);
#[cfg(has_arrow_50)] build_arrow_crate!(arrow_array_50, arrow_buffer_50, arrow_data_50, arrow_schema_50);
Expand Down
5 changes: 3 additions & 2 deletions x.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

all_arrow_features = [
# arrow-version:insert: "arrow-{version}",
"arrow-53",
"arrow-52",
"arrow-51",
"arrow-50",
Expand Down Expand Up @@ -538,8 +539,8 @@ def add_arrow_version(version):

for p in [
self_path / "x.py",
*self_path.glob("serde_arrow/**/*.rs"),
*self_path.glob("serde_arrow/**/*.toml"),
*self_path.glob("*/**/*.rs"),
*self_path.glob("*/**/*.toml"),
]:
content = p.read_text()
if "arrow-version" not in content:
Expand Down

0 comments on commit 69c0609

Please sign in to comment.