Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/add arrow 53 #229

Merged
merged 5 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
205 changes: 128 additions & 77 deletions Cargo.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions Changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Refactor the underlying implementation to prepare for further development

New features

- Add `arrow=53` support
- Add `Binary` / `LargeBinary` support for `arrow2`
- Add support to serialize / deserialize `bool` from integer arrays
- Add a helper to construct `Bool8` arrays
Expand Down
6 changes: 3 additions & 3 deletions example/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
# arrow-version:replace: arrow = {{ version = "52.0", features = [{version}] }}
arrow = {version = "52.0", features = ["ipc"] }
# arrow-version:replace: arrow = {{ version = "{version}", features = ["ipc"] }}
arrow = {version = "53.0", features = ["ipc"] }

chrono = { version = "0.4", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }

# arrow-version:replace: serde_arrow = {{ path = "../serde_arrow", features = ["arrow-{version}"] }}
serde_arrow = { path = "../serde_arrow", features = ["arrow-52"] }
serde_arrow = { path = "../serde_arrow", features = ["arrow-53"] }
6 changes: 3 additions & 3 deletions integration_tests/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ version = "0.1.0"
edition = "2021"

[dependencies]
# arrow-version:replace: arrow = {{ version = "52.0", features = [{version}] }}
arrow = {version = "52.0", features = ["ipc"] }
# arrow-version:replace: arrow = {{ version = "{version}", features = ["ipc"] }}
arrow = {version = "53.0", features = ["ipc"] }

chrono = { version = "0.4", features = ["serde"] }
serde = { version = "1.0", features = ["derive"] }
serde_json = "1"

# arrow-version:replace: serde_arrow = {{ path = "../serde_arrow", features = ["arrow-{version}"] }}
serde_arrow = { path = "../serde_arrow", features = ["arrow-52"] }
serde_arrow = { path = "../serde_arrow", features = ["arrow-53"] }
15 changes: 11 additions & 4 deletions serde_arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@ bench = false
[[bench]]
name = "serde_arrow_bench"
# arrow-version:replace: required-features = ["arrow2-0-17", "arrow-{version}"]
required-features = ["arrow2-0-17", "arrow-52"]
required-features = ["arrow2-0-17", "arrow-53"]
harness = false

[package.metadata.docs.rs]
# arrow-version:replace: features = ["arrow2-0-17", "arrow-{version}"]
features = ["arrow2-0-17", "arrow-52"]
features = ["arrow2-0-17", "arrow-53"]

[features]
default = []

# arrow-version:insert: arrow-{version} = ["dep:arrow-array-{version}", "dep:arrow-schema-{version}", "dep:arrow-data-{version}", "dep:arrow-buffer-{version}"]
arrow-53 = ["dep:arrow-array-53", "dep:arrow-schema-53", "dep:arrow-data-53", "dep:arrow-buffer-53"]
arrow-52 = ["dep:arrow-array-52", "dep:arrow-schema-52", "dep:arrow-data-52", "dep:arrow-buffer-52"]
arrow-51 = ["dep:arrow-array-51", "dep:arrow-schema-51", "dep:arrow-data-51", "dep:arrow-buffer-51"]
arrow-50 = ["dep:arrow-array-50", "dep:arrow-schema-50", "dep:arrow-data-50", "dep:arrow-buffer-50"]
Expand All @@ -50,6 +51,7 @@ half = { version = "2", features = ["bytemuck"], default-features = false }
serde = { version = "1.0", features = ["derive", "std"], default-features = false }

# arrow-version:insert: arrow-array-{version} = {{ package = "arrow-array", version = "{version}", optional = true, default-features = false }}
arrow-array-53 = { package = "arrow-array", version = "53", optional = true, default-features = false }
arrow-array-52 = { package = "arrow-array", version = "52", optional = true, default-features = false }
arrow-array-51 = { package = "arrow-array", version = "51", optional = true, default-features = false }
arrow-array-50 = { package = "arrow-array", version = "50", optional = true, default-features = false }
Expand All @@ -68,6 +70,7 @@ arrow-array-38 = { package = "arrow-array", version = "38", optional = true, def
arrow-array-37 = { package = "arrow-array", version = "37", optional = true, default-features = false }

# arrow-version:insert: arrow-buffer-{version} = {{ package = "arrow-buffer", version = "{version}", optional = true, default-features = false }}
arrow-buffer-53 = { package = "arrow-buffer", version = "53", optional = true, default-features = false }
arrow-buffer-52 = { package = "arrow-buffer", version = "52", optional = true, default-features = false }
arrow-buffer-51 = { package = "arrow-buffer", version = "51", optional = true, default-features = false }
arrow-buffer-50 = { package = "arrow-buffer", version = "50", optional = true, default-features = false }
Expand All @@ -86,6 +89,7 @@ arrow-buffer-38 = { package = "arrow-buffer", version = "38", optional = true, d
arrow-buffer-37 = { package = "arrow-buffer", version = "37", optional = true, default-features = false }

# arrow-version:insert: arrow-data-{version} = {{ package = "arrow-data", version="{version}", optional = true, default-features = false }}
arrow-data-53 = { package = "arrow-data", version="53", optional = true, default-features = false }
arrow-data-52 = { package = "arrow-data", version="52", optional = true, default-features = false }
arrow-data-51 = { package = "arrow-data", version="51", optional = true, default-features = false }
arrow-data-50 = { package = "arrow-data", version="50", optional = true, default-features = false }
Expand All @@ -104,6 +108,7 @@ arrow-data-38 = { package = "arrow-data", version="38", optional = true, default
arrow-data-37 = { package = "arrow-data", version="37", optional = true, default-features = false }

# arrow-version:insert: arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", optional = true, default-features = false }}
arrow-schema-53 = { package = "arrow-schema", version = "53", optional = true, default-features = false }
arrow-schema-52 = { package = "arrow-schema", version = "52", optional = true, default-features = false }
arrow-schema-51 = { package = "arrow-schema", version = "51", optional = true, default-features = false }
arrow-schema-50 = { package = "arrow-schema", version = "50", optional = true, default-features = false }
Expand Down Expand Up @@ -136,13 +141,14 @@ uuid = { version = "1.10.0", features = ["serde", "v4"] }

# for benchmarks
# arrow-version:replace: arrow-json-{version} = {{ package = "arrow-json", version = "{version}" }}
arrow-json-52 = { package = "arrow-json", version = "52" }
arrow-json-53 = { package = "arrow-json", version = "53" }
criterion = "0.5"
arrow2_convert = "0.5.0"
serde-transcode = "1"
simd-json = "0.13.8"

# arrow-version:insert: arrow-schema-{version} = {{ package = "arrow-schema", version = "{version}", default-features = false, features = ["serde"] }}
arrow-schema-53 = { package = "arrow-schema", version = "53", default-features = false, features = ["serde"] }
arrow-schema-52 = { package = "arrow-schema", version = "52", default-features = false, features = ["serde"] }
arrow-schema-51 = { package = "arrow-schema", version = "51", default-features = false, features = ["serde"] }
arrow-schema-50 = { package = "arrow-schema", version = "50", default-features = false, features = ["serde"] }
Expand Down Expand Up @@ -178,6 +184,7 @@ check-cfg = [
'cfg(has_arrow)',
'cfg(has_arrow_fixed_binary_support)',
# arrow-version:insert: 'cfg(has_arrow_{version})',
'cfg(has_arrow_53)',
'cfg(has_arrow_52)',
'cfg(has_arrow_51)',
'cfg(has_arrow_50)',
Expand All @@ -194,4 +201,4 @@ check-cfg = [
'cfg(has_arrow_39)',
'cfg(has_arrow_38)',
'cfg(has_arrow_37)',
]
]
4 changes: 2 additions & 2 deletions serde_arrow/benches/groups/impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,9 @@ pub mod arrow {
use std::sync::Arc;

// arrow-version:replace: use arrow_json_{version}::ReaderBuilder;
use arrow_json_52::ReaderBuilder;
use arrow_json_53::ReaderBuilder;
// arrow-version:replace: use arrow_schema_{version}::Schema;
use arrow_schema_52::Schema;
use arrow_schema_53::Schema;

use serde::Serialize;

Expand Down
6 changes: 3 additions & 3 deletions serde_arrow/benches/groups/json_to_arrow.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use {
};

// arrow-version:replace: use arrow_json_{version}::ReaderBuilder;
use arrow_json_52::ReaderBuilder;
use arrow_json_53::ReaderBuilder;

// arrow-version:replace: use arrow_schema_{version}::{{FieldRef, Schema as ArrowSchema}};
use arrow_schema_52::{FieldRef, Schema as ArrowSchema};
use arrow_schema_53::{FieldRef, Schema as ArrowSchema};

// arrow-version:replace: use arrow_array_{version}::RecordBatch;
use arrow_array_52::RecordBatch;
use arrow_array_53::RecordBatch;
use serde_json::Value;

fn benchmark_json_to_arrow(c: &mut criterion::Criterion) {
Expand Down
2 changes: 2 additions & 0 deletions serde_arrow/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ fn main() {

let max_arrow_version: Option<usize> = [
// arrow-version:insert: #[cfg(feature = "arrow-{version}")]{\n}{version},
#[cfg(feature = "arrow-53")]
53,
#[cfg(feature = "arrow-52")]
52,
#[cfg(feature = "arrow-51")]
Expand Down
16 changes: 8 additions & 8 deletions serde_arrow/src/arrow_impl/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
T::Boolean,
// NOTE: use the explicit len
arr.len,
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(arr.values).into_inner()],
vec![],
Expand Down Expand Up @@ -114,7 +114,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {

Ok(ArrayData::builder(data_type)
.len(arr.len)
.null_bit_buffer(arr.validity.map(Buffer::from))
.null_bit_buffer(arr.validity.map(Buffer::from_vec))
.child_data(data)
.build()?)
}
Expand Down Expand Up @@ -153,7 +153,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
Ok(ArrayData::try_new(
T::FixedSizeList(Arc::new(field), arr.n),
child.len() / usize::try_from(arr.n)?,
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![],
vec![child],
Expand All @@ -170,7 +170,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
Ok(ArrayData::try_new(
T::FixedSizeBinary(arr.n),
arr.data.len() / usize::try_from(arr.n)?,
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(arr.data).into_inner()],
vec![],
Expand All @@ -196,7 +196,7 @@ impl TryFrom<crate::internal::arrow::Array> for ArrayData {
Ok(ArrayData::try_new(
T::Map(Arc::new(field), false),
arr.offsets.len().saturating_sub(1),
arr.validity.map(Buffer::from),
arr.validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(arr.offsets).into_inner()],
vec![child],
Expand Down Expand Up @@ -543,7 +543,7 @@ fn primitive_into_data<T: ArrowNativeType>(
Ok(ArrayData::try_new(
data_type,
values.len(),
validity.map(Buffer::from),
validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(values).into_inner()],
vec![],
Expand All @@ -559,7 +559,7 @@ fn bytes_into_data<O: ArrowNativeType>(
Ok(ArrayData::try_new(
data_type,
offsets.len().saturating_sub(1),
validity.map(Buffer::from),
validity.map(Buffer::from_vec),
0,
vec![
ScalarBuffer::from(offsets).into_inner(),
Expand All @@ -579,7 +579,7 @@ fn list_into_data<O: ArrowNativeType>(
Ok(ArrayData::try_new(
data_type,
len,
validity.map(Buffer::from),
validity.map(Buffer::from_vec),
0,
vec![ScalarBuffer::from(offsets).into_inner()],
vec![child_data],
Expand Down
20 changes: 17 additions & 3 deletions serde_arrow/src/arrow_impl/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,13 +141,27 @@ impl TryFrom<&ArrowDataType> for DataType {
AT::Date32 => Ok(T::Date32),
AT::Date64 => Ok(T::Date64),
AT::Decimal128(precision, scale) => Ok(T::Decimal128(*precision, *scale)),
AT::Time32(unit) => Ok(T::Time32(unit.clone().into())),
AT::Time64(unit) => Ok(T::Time64(unit.clone().into())),
AT::Time32(unit) => Ok(T::Time32(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
)),
AT::Time64(unit) => Ok(T::Time64(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
)),
AT::Timestamp(unit, tz) => Ok(T::Timestamp(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
tz.as_ref().map(|s| s.to_string()),
)),
AT::Duration(unit) => Ok(T::Duration(unit.clone().into())),
AT::Duration(unit) => Ok(T::Duration(
// only some arrow version implement Copy for unit
#[allow(clippy::clone_on_copy)]
unit.clone().into(),
)),
AT::Binary => Ok(T::Binary),
AT::LargeBinary => Ok(T::LargeBinary),
AT::FixedSizeBinary(n) => Ok(T::FixedSizeBinary(*n)),
Expand Down
2 changes: 2 additions & 0 deletions serde_arrow/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
//! | Arrow Feature | Arrow Version |
//! |---------------|---------------|
// arrow-version:insert: //! | `arrow-{version}` | `arrow={version}` |
//! | `arrow-53` | `arrow=53` |
//! | `arrow-52` | `arrow=52` |
//! | `arrow-51` | `arrow=51` |
//! | `arrow-50` | `arrow=50` |
Expand Down Expand Up @@ -268,6 +269,7 @@ pub mod _impl {
}

// arrow-version:insert: #[cfg(has_arrow_{version})] build_arrow_crate!(arrow_array_{version}, arrow_buffer_{version}, arrow_data_{version}, arrow_schema_{version});
#[cfg(has_arrow_53)] build_arrow_crate!(arrow_array_53, arrow_buffer_53, arrow_data_53, arrow_schema_53);
#[cfg(has_arrow_52)] build_arrow_crate!(arrow_array_52, arrow_buffer_52, arrow_data_52, arrow_schema_52);
#[cfg(has_arrow_51)] build_arrow_crate!(arrow_array_51, arrow_buffer_51, arrow_data_51, arrow_schema_51);
#[cfg(has_arrow_50)] build_arrow_crate!(arrow_array_50, arrow_buffer_50, arrow_data_50, arrow_schema_50);
Expand Down
5 changes: 3 additions & 2 deletions x.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

all_arrow_features = [
# arrow-version:insert: "arrow-{version}",
"arrow-53",
"arrow-52",
"arrow-51",
"arrow-50",
Expand Down Expand Up @@ -538,8 +539,8 @@ def add_arrow_version(version):

for p in [
self_path / "x.py",
*self_path.glob("serde_arrow/**/*.rs"),
*self_path.glob("serde_arrow/**/*.toml"),
*self_path.glob("*/**/*.rs"),
*self_path.glob("*/**/*.toml"),
]:
content = p.read_text()
if "arrow-version" not in content:
Expand Down