Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

FixedSizeBinaryArray and COW #1601

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 26 additions & 12 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
[package]
name = "arrow2"
version = "0.17.4"
version = "0.17.5"
license = "Apache-2.0"
description = "Unofficial implementation of Apache Arrow spec in safe Rust"
homepage = "https://github.com/jorgecarleitao/arrow2"
repository = "https://github.com/jorgecarleitao/arrow2"
authors = ["Jorge C. Leitao <jorgecarleitao@gmail.com>", "Apache Arrow <dev@arrow.apache.org>"]
authors = [
"Jorge C. Leitao <jorgecarleitao@gmail.com>",
"Apache Arrow <dev@arrow.apache.org>",
]
keywords = ["arrow", "analytics"]
edition = "2021"
exclude = ["testing/"]
Expand Down Expand Up @@ -51,7 +54,9 @@ regex-syntax = { version = "0.7", optional = true }
streaming-iterator = { version = "0.1", optional = true }
fallible-streaming-iterator = { version = "0.1", optional = true }

json-deserializer = { version = "0.4.4", optional = true, features = ["preserve_order"] }
json-deserializer = { version = "0.4.4", optional = true, features = [
"preserve_order",
] }
indexmap = { version = "^1.6", optional = true }

# used to print columns in a nice columnar format
Expand Down Expand Up @@ -86,7 +91,9 @@ orc-format = { version = "0.3.0", optional = true }
# Arrow integration tests support
serde = { version = "^1.0", features = ["rc"], optional = true }
serde_derive = { version = "^1.0", optional = true }
serde_json = { version = "^1.0", features = ["preserve_order"], optional = true }
serde_json = { version = "^1.0", features = [
"preserve_order",
], optional = true }

# for division/remainder optimization at runtime
strength_reduce = { version = "0.2", optional = true }
Expand Down Expand Up @@ -180,23 +187,33 @@ io_csv_read_async = ["csv-async", "lexical-core", "futures"]
io_csv_write = ["csv-core", "streaming-iterator", "lexical-core"]
io_json = ["io_json_read", "io_json_write"]
io_json_read = ["json-deserializer", "indexmap", "lexical-core"]
io_json_write = ["streaming-iterator", "fallible-streaming-iterator", "lexical-core"]
io_json_write = [
"streaming-iterator",
"fallible-streaming-iterator",
"lexical-core",
]
io_ipc = ["arrow-format"]
io_ipc_write_async = ["io_ipc", "futures"]
io_ipc_read_async = ["io_ipc", "futures", "async-stream"]
io_ipc_compression = ["lz4", "zstd"]
io_flight = ["io_ipc", "arrow-format/flight-data"]

# base64 + io_ipc because arrow schemas are stored as base64-encoded ipc format.
io_parquet = ["parquet2", "io_ipc", "base64", "streaming-iterator", "fallible-streaming-iterator"]
io_parquet = [
"parquet2",
"io_ipc",
"base64",
"streaming-iterator",
"fallible-streaming-iterator",
]
io_parquet_async = ["futures", "io_parquet", "parquet2/async"]

io_parquet_compression = [
"io_parquet_zstd",
"io_parquet_gzip",
"io_parquet_snappy",
"io_parquet_lz4",
"io_parquet_brotli"
"io_parquet_brotli",
]

# sample testing of generated arrow data
Expand All @@ -214,9 +231,7 @@ io_parquet_brotli = ["parquet2/brotli"]
io_parquet_bloom_filter = ["parquet2/bloom_filter"]

io_avro = ["avro-schema", "streaming-iterator"]
io_avro_compression = [
"avro-schema/compression",
]
io_avro_compression = ["avro-schema/compression"]
io_avro_async = ["avro-schema/async"]

io_orc = ["orc-format"]
Expand Down Expand Up @@ -277,7 +292,7 @@ compute = [
"compute_take",
"compute_temporal",
"compute_utf8",
"compute_window"
"compute_window",
]
benchmarks = ["rand"]
serde_types = ["serde", "serde_derive"]
Expand Down Expand Up @@ -401,4 +416,3 @@ harness = false
[[bench]]
name = "like_kernels"
harness = false

10 changes: 10 additions & 0 deletions src/array/binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,16 @@ impl<O: Offset> BinaryArray<O> {
impl_mut_validity!();
impl_into_array!();

/// Returns an option of a mutable reference to the values of this [`BinaryArray`].
pub fn get_mut_values(&mut self) -> Option<&mut [u8]> {
self.values.get_mut_slice()
}

/// Returns an option of a mutable reference to the values of this [`BinaryArray`].
pub fn get_mut_offsets(&mut self) -> Option<&mut [O]> {
self.offsets.get_mut_slice()
}

/// Returns its internal representation
#[must_use]
pub fn into_inner(self) -> (DataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
Expand Down
5 changes: 5 additions & 0 deletions src/array/fixed_size_binary/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ impl FixedSizeBinaryArray {
}
}

/// Returns an option of a mutable reference to the values of this [`FixedSizeBinaryArray`].
pub fn get_mut_values(&mut self) -> Option<&mut [u8]> {
self.values.get_mut_slice()
}

/// Returns a new [`FixedSizeBinaryArray`] with a different logical type.
/// This is `O(1)`.
/// # Panics
Expand Down
7 changes: 6 additions & 1 deletion src/array/fixed_size_list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -148,11 +148,16 @@ impl FixedSizeListArray {
self.validity.as_ref()
}

/// Returns the inner array.
/// Returns the inner array reference.
pub fn values(&self) -> &Box<dyn Array> {
&self.values
}

/// Returns the inner array mutable reference.
pub fn mut_values(&mut self) -> &mut Box<dyn Array> {
&mut self.values
}

/// Returns the `Vec<T>` at position `i`.
/// # Panic:
/// panics iff `i >= self.len()`
Expand Down
10 changes: 10 additions & 0 deletions src/array/utf8/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,16 @@ impl<O: Offset> Utf8Array<O> {
impl_mut_validity!();
impl_into_array!();

/// Returns an option of a mutable reference to the values of this [`Utf8Array`].
pub fn get_mut_values(&mut self) -> Option<&mut [u8]> {
self.values.get_mut_slice()
}

/// Returns an option of a mutable reference to the values of this [`Utf8Array`].
pub fn get_mut_offsets(&mut self) -> Option<&mut [O]> {
self.offsets.get_mut_slice()
}

/// Returns its internal representation
#[must_use]
pub fn into_inner(self) -> (DataType, OffsetsBuffer<O>, Buffer<u8>, Option<Bitmap>) {
Expand Down
10 changes: 10 additions & 0 deletions src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,16 @@ impl<O: Offset> OffsetsBuffer<O> {
.map_left(Self)
}

/// Returns a mutable reference to its slice, if possible.
///
/// This operation returns [`Some`] iff this [`OffsetsBuffer`]:
/// * has not been cloned (i.e. [`Arc`]`::get_mut` yields [`Some`])
/// * has not been imported from the c data interface (FFI)
#[inline]
pub fn get_mut_slice(&mut self) -> Option<&mut [O]> {
self.0.get_mut_slice()
}

/// Returns a reference to its internal [`Buffer`].
#[inline]
pub fn buffer(&self) -> &Buffer<O> {
Expand Down