From 59f4d1b1ee80d97a327653af32d969d6440b432a Mon Sep 17 00:00:00 2001 From: kikkon Date: Sat, 12 Oct 2024 18:44:41 +0800 Subject: [PATCH 1/9] feat: add GenericListViewBuilder --- .../src/builder/generic_list_view_builder.rs | 707 ++++++++++++++++++ arrow-array/src/builder/mod.rs | 8 + arrow-array/src/builder/struct_builder.rs | 10 + arrow-array/src/cast.rs | 16 + 4 files changed, 741 insertions(+) create mode 100644 arrow-array/src/builder/generic_list_view_builder.rs diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs new file mode 100644 index 000000000000..0eb77985f40a --- /dev/null +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -0,0 +1,707 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::builder::ArrayBuilder; +use crate::{ArrayRef, GenericListViewArray, OffsetSizeTrait}; +use arrow_buffer::{Buffer, BufferBuilder, NullBufferBuilder, ScalarBuffer}; +use arrow_schema::{Field, FieldRef}; +use std::any::Any; +use std::sync::Arc; + +/// Builder for [`GenericListViewArray`] +/// +#[derive(Debug)] +pub struct GenericListViewBuilder { + offsets_builder: BufferBuilder, + sizes_builder: BufferBuilder, + null_buffer_builder: NullBufferBuilder, + values_builder: T, + field: Option, +} + +impl Default for GenericListViewBuilder { + fn default() -> Self { + Self::new(T::default()) + } +} + +impl ArrayBuilder + for GenericListViewBuilder +where + T: 'static, +{ + /// Returns the builder as a non-mutable `Any` reference. + fn as_any(&self) -> &dyn Any { + self + } + + /// Returns the builder as a mutable `Any` reference. + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + + /// Returns the boxed builder as a box of `Any`. + fn into_box_any(self: Box) -> Box { + self + } + + /// Returns the number of array slots in the builder + fn len(&self) -> usize { + self.null_buffer_builder.len() + } + + /// Builds the array and reset this builder. + fn finish(&mut self) -> ArrayRef { + Arc::new(self.finish()) + } + + /// Builds the array without resetting the builder. + fn finish_cloned(&self) -> ArrayRef { + Arc::new(self.finish_cloned()) + } +} + +impl GenericListViewBuilder { + /// Creates a new [`GenericListViewBuilder`] from a given values array builder + pub fn new(values_builder: T) -> Self { + let capacity = values_builder.len(); + Self::with_capacity(values_builder, capacity) + } + + /// Creates a new [`GenericListViewBuilder`] from a given values array builder + /// `capacity` is the number of items to pre-allocate space for in this builder + pub fn with_capacity(values_builder: T, capacity: usize) -> Self { + let offsets_builder = BufferBuilder::::new(capacity); + let sizes_builder = BufferBuilder::::new(capacity); + Self { + offsets_builder, + null_buffer_builder: NullBufferBuilder::new(capacity), + values_builder, + sizes_builder, + field: None, + } + } + + /// Override the field passed to [`GenericListViewArray::new`] + /// + /// By default a nullable field is created with the name `item` + /// + /// Note: [`Self::finish`] and [`Self::finish_cloned`] will panic if the + /// field's data type does not match that of `T` + pub fn with_field(self, field: impl Into) -> Self { + Self { + field: Some(field.into()), + ..self + } + } +} + +impl GenericListViewBuilder +where + T: 'static, +{ + /// Returns the child array builder as a mutable reference. + /// + /// This mutable reference can be used to append values into the child array builder, + /// but you must call [`append`](#method.append) to delimit each distinct list value. + pub fn values(&mut self) -> &mut T { + &mut self.values_builder + } + + /// Returns the child array builder as an immutable reference + pub fn values_ref(&self) -> &T { + &self.values_builder + } + + /// Finish the current variable-length list array slot + /// + /// # Panics + /// + /// Panics if the length of [`Self::values`] exceeds `OffsetSize::MAX` + #[inline] + pub fn append(&mut self, is_valid: bool, size: usize) { + self.offsets_builder + .append(OffsetSize::from_usize(self.values_builder.len() - size).unwrap()); + self.sizes_builder + .append(OffsetSize::from_usize(self.values_builder.len()).unwrap()); + self.null_buffer_builder.append(is_valid); + } + + /// Append value into this [`GenericListViewBuilder`] + #[inline] + pub fn append_value(&mut self, i: I) + where + T: Extend>, + I: IntoIterator>, + { + self.extend(std::iter::once(Some(i))) + } + + /// Append a null to this [`GenericListViewBuilder`] + /// + /// See [`Self::append_value`] for an example use. + #[inline] + pub fn append_null(&mut self) { + self.offsets_builder + .append(OffsetSize::from_usize(self.values_builder.len()).unwrap()); + self.sizes_builder + .append(OffsetSize::from_usize(0).unwrap()); + self.null_buffer_builder.append_null(); + } + + /// Appends an optional value into this [`GenericListViewBuilder`] + /// + /// If `Some` calls [`Self::append_value`] otherwise calls [`Self::append_null`] + #[inline] + pub fn append_option(&mut self, i: Option) + where + T: Extend>, + I: IntoIterator>, + { + match i { + Some(i) => self.append_value(i), + None => self.append_null(), + } + } + + /// Builds the [`GenericListViewArray`] and reset this builder. + pub fn finish(&mut self) -> GenericListViewArray { + let values = self.values_builder.finish(); + let nulls = self.null_buffer_builder.finish(); + + let offsets = self.offsets_builder.finish(); + // Safety: Safe by construction + let offsets = ScalarBuffer::from(offsets); + + let sizes = self.sizes_builder.finish(); + let sizes = ScalarBuffer::from(sizes); + + let field = match &self.field { + Some(f) => f.clone(), + None => Arc::new(Field::new("item", values.data_type().clone(), true)), + }; + GenericListViewArray::new(field, offsets, sizes, values, nulls) + } + + /// Builds the [`GenericListViewArray`] without resetting the builder. + pub fn finish_cloned(&self) -> GenericListViewArray { + let values = self.values_builder.finish_cloned(); + let nulls = self.null_buffer_builder.finish_cloned(); + + let offsets = Buffer::from_slice_ref(self.offsets_builder.as_slice()); + // Safety: safe by construction + let offsets = ScalarBuffer::from(offsets); + + let sizes = Buffer::from_slice_ref(self.sizes_builder.as_slice()); + let sizes = ScalarBuffer::from(sizes); + + let field = match &self.field { + Some(f) => f.clone(), + None => Arc::new(Field::new("item", values.data_type().clone(), true)), + }; + + GenericListViewArray::new(field, offsets, sizes, values, nulls) + } + + /// Returns the current offsets buffer as a slice + pub fn offsets_slice(&self) -> &[OffsetSize] { + self.offsets_builder.as_slice() + } +} + +impl Extend> for GenericListViewBuilder +where + O: OffsetSizeTrait, + B: ArrayBuilder + Extend, + V: IntoIterator, +{ + #[inline] + fn extend>>(&mut self, iter: T) { + for v in iter { + match v { + Some(elements) => { + let origin_size = self.values_builder.len(); + self.values_builder.extend(elements); + let size = self.values_builder.len() - origin_size; + self.append(true, size); + } + None => self.append(false, 0), + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::builder::{make_builder, Int32Builder, ListViewBuilder}; + use crate::cast::AsArray; + use crate::types::Int32Type; + use crate::{Array, Int32Array}; + use arrow_schema::DataType; + + fn _test_generic_list_view_array_builder() { + let values_builder = Int32Builder::with_capacity(10); + let mut builder = GenericListViewBuilder::::new(values_builder); + + // [[0, 1, 2], [3, 4, 5], [6, 7]] + builder.values().append_value(0); + builder.values().append_value(1); + builder.values().append_value(2); + builder.append(true, 3); + builder.values().append_value(3); + builder.values().append_value(4); + builder.values().append_value(5); + builder.append(true, 3); + builder.values().append_value(6); + builder.values().append_value(7); + builder.append(true, 2); + let list_array = builder.finish(); + + let list_values = list_array.values().as_primitive::(); + assert_eq!(list_values.values(), &[0, 1, 2, 3, 4, 5, 6, 7]); + assert_eq!(list_array.value_offsets(), [0, 3, 6].map(O::usize_as)); + assert_eq!(list_array.value_sizes(), [3, 3, 2].map(O::usize_as)); + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(3, list_array.len()); + assert_eq!(0, list_array.null_count()); + assert_eq!(O::from_usize(6).unwrap(), list_array.value_offsets()[2]); + assert_eq!(O::from_usize(2).unwrap(), list_array.value_sizes()[2]); + for i in 0..2 { + assert!(list_array.is_valid(i)); + assert!(!list_array.is_null(i)); + } + } + + #[test] + fn test_list_view_array_builder() { + _test_generic_list_view_array_builder::() + } + + #[test] + fn test_large_list_view_array_builder() { + _test_generic_list_view_array_builder::() + } + + fn _test_generic_list_view_array_builder_nulls() { + let values_builder = Int32Builder::with_capacity(10); + let mut builder = GenericListViewBuilder::::new(values_builder); + + // [[0, 1, 2], null, [3, null, 5], [6, 7]] + builder.values().append_value(0); + builder.values().append_value(1); + builder.values().append_value(2); + builder.append(true, 3); + builder.append(false, 0); + builder.values().append_value(3); + builder.values().append_null(); + builder.values().append_value(5); + builder.append(true, 3); + builder.values().append_value(6); + builder.values().append_value(7); + builder.append(true, 2); + + let list_array = builder.finish(); + + assert_eq!(DataType::Int32, list_array.value_type()); + assert_eq!(4, list_array.len()); + assert_eq!(1, list_array.null_count()); + assert_eq!(O::from_usize(3).unwrap(), list_array.value_offsets()[2]); + assert_eq!(O::from_usize(3).unwrap(), list_array.value_sizes()[2]); + } + + #[test] + fn test_list_view_array_builder_nulls() { + _test_generic_list_view_array_builder_nulls::() + } + + #[test] + fn test_large_list_view_array_builder_nulls() { + _test_generic_list_view_array_builder_nulls::() + } + + #[test] + fn test_list_view_array_builder_finish() { + let values_builder = Int32Array::builder(5); + let mut builder = ListViewBuilder::new(values_builder); + + builder.values().append_slice(&[1, 2, 3]); + builder.append(true, 1); + builder.values().append_slice(&[4, 5, 6]); + builder.append(true, 1); + + let mut arr = builder.finish(); + assert_eq!(2, arr.len()); + assert!(builder.is_empty()); + + builder.values().append_slice(&[7, 8, 9]); + builder.append(true, 1); + arr = builder.finish(); + assert_eq!(1, arr.len()); + assert!(builder.is_empty()); + } + + #[test] + fn test_list_view_array_builder_finish_cloned() { + let values_builder = Int32Array::builder(5); + let mut builder = ListViewBuilder::new(values_builder); + + builder.values().append_slice(&[1, 2, 3]); + builder.append(true, 1); + builder.values().append_slice(&[4, 5, 6]); + builder.append(true, 1); + + let mut arr = builder.finish_cloned(); + assert_eq!(2, arr.len()); + assert!(!builder.is_empty()); + + builder.values().append_slice(&[7, 8, 9]); + builder.append(true, 1); + arr = builder.finish(); + assert_eq!(3, arr.len()); + assert!(builder.is_empty()); + } + + #[test] + fn test_list_view_list_view_array_builder() { + let primitive_builder = Int32Builder::with_capacity(10); + let values_builder = ListViewBuilder::new(primitive_builder); + let mut builder = ListViewBuilder::new(values_builder); + + // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] + builder.values().values().append_value(1); + builder.values().values().append_value(2); + builder.values().append(true, 2); + builder.values().values().append_value(3); + builder.values().values().append_value(4); + builder.values().append(true, 2); + builder.append(true, 2); + + builder.values().values().append_value(5); + builder.values().values().append_value(6); + builder.values().values().append_value(7); + builder.values().append(true, 3); + builder.values().append(false, 0); + builder.values().values().append_value(8); + builder.values().append(true, 1); + builder.append(true, 3); + + builder.append(false, 0); + + builder.values().values().append_value(9); + builder.values().values().append_value(10); + builder.values().append(true, 2); + builder.append(true, 1); + + let l1 = builder.finish(); + + assert_eq!(4, l1.len()); + assert_eq!(1, l1.null_count()); + + assert_eq!(l1.value_offsets(), &[0, 2, 5, 5]); + assert_eq!(l1.value_sizes(), &[2, 3, 0, 1]); + + let l2 = l1.values().as_list_view::(); + + assert_eq!(6, l2.len()); + assert_eq!(1, l2.null_count()); + assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8]); + assert_eq!(l2.value_sizes(), &[2, 2, 3, 0, 1, 2]); + + let i1 = l2.values().as_primitive::(); + assert_eq!(10, i1.len()); + assert_eq!(0, i1.null_count()); + assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + } + + #[test] + fn test_extend() { + let mut builder = ListViewBuilder::new(Int32Builder::new()); + builder.extend([ + Some(vec![Some(1), Some(2), Some(7), None]), + Some(vec![]), + Some(vec![Some(4), Some(5)]), + None, + ]); + + let array = builder.finish(); + assert_eq!(array.value_offsets(), [0, 4, 4, 6]); + assert_eq!(array.value_sizes(), [4, 0, 2, 0]); + assert_eq!(array.null_count(), 1); + assert!(array.is_null(3)); + let elements = array.values().as_primitive::(); + assert_eq!(elements.values(), &[1, 2, 7, 0, 4, 5]); + assert_eq!(elements.null_count(), 1); + assert!(elements.is_null(3)); + } + + #[test] + fn test_boxed_primitive_array_builder() { + let values_builder = make_builder(&DataType::Int32, 5); + let mut builder = ListViewBuilder::new(values_builder); + + builder + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_slice(&[1, 2, 3]); + builder.append(true, 1); + + builder + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_slice(&[4, 5, 6]); + builder.append(true, 1); + + let arr = builder.finish(); + assert_eq!(2, arr.len()); + + let elements = arr.values().as_primitive::(); + assert_eq!(elements.values(), &[1, 2, 3, 4, 5, 6]); + } + + #[test] + fn test_boxed_list_view_list_view_array_builder() { + // This test is same as `test_list_list_array_builder` but uses boxed builders. + let values_builder = make_builder( + &DataType::ListView(Arc::new(Field::new("item", DataType::Int32, true))), + 10, + ); + test_boxed_generic_list_view_generic_list_view_array_builder::(values_builder); + } + + #[test] + fn test_boxed_large_list_view_large_list_view_array_builder() { + // This test is same as `test_list_list_array_builder` but uses boxed builders. + let values_builder = make_builder( + &DataType::LargeListView(Arc::new(Field::new("item", DataType::Int32, true))), + 10, + ); + test_boxed_generic_list_view_generic_list_view_array_builder::(values_builder); + } + + fn test_boxed_generic_list_view_generic_list_view_array_builder< + O: OffsetSizeTrait + PartialEq, + >( + values_builder: Box, + ) { + let mut builder: GenericListViewBuilder> = + GenericListViewBuilder::>::new(values_builder); + + // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(1); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(2); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true, 2); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(3); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(4); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true, 2); + builder.append(true, 2); + + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(5); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(6); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an (Large)ListViewBuilder") + .append_value(7); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true, 3); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(false, 0); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(8); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true, 1); + builder.append(true, 3); + + builder.append(false, 0); + + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(9); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .values() + .as_any_mut() + .downcast_mut::() + .expect("should be an Int32Builder") + .append_value(10); + builder + .values() + .as_any_mut() + .downcast_mut::>>() + .expect("should be an (Large)ListViewBuilder") + .append(true, 2); + builder.append(true, 1); + + let l1 = builder.finish(); + assert_eq!(4, l1.len()); + assert_eq!(1, l1.null_count()); + assert_eq!(l1.value_offsets(), &[0, 2, 5, 5].map(O::usize_as)); + assert_eq!(l1.value_sizes(), &[2, 3, 0, 1].map(O::usize_as)); + + let l2 = l1.values().as_list_view::(); + assert_eq!(6, l2.len()); + assert_eq!(1, l2.null_count()); + assert_eq!(l2.value_offsets(), &[0, 2, 4, 7, 7, 8].map(O::usize_as)); + assert_eq!(l2.value_sizes(), &[2, 2, 3, 0, 1, 2].map(O::usize_as)); + + let i1 = l2.values().as_primitive::(); + assert_eq!(10, i1.len()); + assert_eq!(0, i1.null_count()); + assert_eq!(i1.values(), &[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + } + + #[test] + fn test_with_field() { + let field = Arc::new(Field::new("bar", DataType::Int32, false)); + let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); + builder.append_value([Some(1), Some(2), Some(3)]); + builder.append_null(); // This is fine as nullability refers to nullability of values + builder.append_value([Some(4)]); + let array = builder.finish(); + assert_eq!(array.len(), 3); + assert_eq!(array.data_type(), &DataType::ListView(field.clone())); + + builder.append_value([Some(4), Some(5)]); + let array = builder.finish(); + assert_eq!(array.data_type(), &DataType::ListView(field)); + assert_eq!(array.len(), 1); + } + + #[test] + #[should_panic( + expected = "Non-nullable field of ListViewArray \\\"item\\\" cannot contain nulls" + )] + fn test_checks_nullability() { + let field = Arc::new(Field::new("item", DataType::Int32, false)); + let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); + builder.append_value([Some(1), None]); + builder.finish(); + } + + #[test] + #[should_panic(expected = "ListViewArray expected data type Int64 got Int32")] + fn test_checks_data_type() { + let field = Arc::new(Field::new("item", DataType::Int64, false)); + let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); + builder.append_value([Some(1)]); + builder.finish(); + } +} diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs index dd1a5c3ae722..2167e36dc437 100644 --- a/arrow-array/src/builder/mod.rs +++ b/arrow-array/src/builder/mod.rs @@ -180,6 +180,8 @@ mod generic_byte_run_builder; pub use generic_byte_run_builder::*; mod generic_bytes_view_builder; pub use generic_bytes_view_builder::*; +mod generic_list_view_builder; +pub use generic_list_view_builder::*; mod union_builder; pub use union_builder::*; @@ -304,6 +306,12 @@ pub type ListBuilder = GenericListBuilder; /// Builder for [`LargeListArray`](crate::array::LargeListArray) pub type LargeListBuilder = GenericListBuilder; +/// Builder for [`ListViewArray`](crate::array::ListViewArray) +pub type ListViewBuilder = GenericListViewBuilder; + +/// Builder for [`LargeListViewArray`](crate::array::LargeListViewArray) +pub type LargeListViewBuilder = GenericListViewBuilder; + /// Builder for [`BinaryArray`](crate::array::BinaryArray) /// /// See examples on [`GenericBinaryBuilder`] diff --git a/arrow-array/src/builder/struct_builder.rs b/arrow-array/src/builder/struct_builder.rs index 396ab2fed851..8c576200d89f 100644 --- a/arrow-array/src/builder/struct_builder.rs +++ b/arrow-array/src/builder/struct_builder.rs @@ -269,6 +269,16 @@ pub fn make_builder(datatype: &DataType, capacity: usize) -> Box { + let builder = make_builder(field.data_type(), capacity); + Box::new(ListViewBuilder::with_capacity(builder, capacity).with_field(field.clone())) + } + DataType::LargeListView(field) => { + let builder = make_builder(field.data_type(), capacity); + Box::new( + LargeListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()), + ) + } DataType::Map(field, _) => match field.data_type() { DataType::Struct(fields) => { let map_field_names = MapFieldNames { diff --git a/arrow-array/src/cast.rs b/arrow-array/src/cast.rs index 8c7cc2a12f62..da0d35fefb2a 100644 --- a/arrow-array/src/cast.rs +++ b/arrow-array/src/cast.rs @@ -830,6 +830,14 @@ pub trait AsArray: private::Sealed { self.as_list_opt().expect("list array") } + /// Downcast this to a [`GenericListViewArray`] returning `None` if not possible + fn as_list_view_opt(&self) -> Option<&GenericListViewArray>; + + /// Downcast this to a [`GenericListViewArray`] panicking if not possible + fn as_list_view(&self) -> &GenericListViewArray { + self.as_list_view_opt().expect("list view array") + } + /// Downcast this to a [`FixedSizeBinaryArray`] returning `None` if not possible fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray>; @@ -903,6 +911,10 @@ impl AsArray for dyn Array + '_ { self.as_any().downcast_ref() } + fn as_list_view_opt(&self) -> Option<&GenericListViewArray> { + self.as_any().downcast_ref() + } + fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> { self.as_any().downcast_ref() } @@ -958,6 +970,10 @@ impl AsArray for ArrayRef { self.as_ref().as_list_opt() } + fn as_list_view_opt(&self) -> Option<&GenericListViewArray> { + self.as_ref().as_list_view_opt() + } + fn as_fixed_size_binary_opt(&self) -> Option<&FixedSizeBinaryArray> { self.as_ref().as_fixed_size_binary_opt() } From 2a02a3ac2d2790066339a90c5f7b2adcb843f349 Mon Sep 17 00:00:00 2001 From: kikkon Date: Sat, 12 Oct 2024 23:31:10 +0800 Subject: [PATCH 2/9] remove uszie --- .../src/builder/generic_list_view_builder.rs | 98 ++++++++++--------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index 0eb77985f40a..a8ac913b045e 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -31,6 +31,8 @@ pub struct GenericListViewBuilder null_buffer_builder: NullBufferBuilder, values_builder: T, field: Option, + current_offset: OffsetSize, + current_size: OffsetSize, } impl Default for GenericListViewBuilder { @@ -93,10 +95,12 @@ impl GenericListViewBuilder GenericListViewArray { let values = self.values_builder.finish(); let nulls = self.null_buffer_builder.finish(); - let offsets = self.offsets_builder.finish(); + // Safety: Safe by construction let offsets = ScalarBuffer::from(offsets); - let sizes = self.sizes_builder.finish(); let sizes = ScalarBuffer::from(sizes); - + dbg!(&offsets); + dbg!(&sizes); let field = match &self.field { Some(f) => f.clone(), None => Arc::new(Field::new("item", values.data_type().clone(), true)), @@ -236,10 +243,9 @@ where Some(elements) => { let origin_size = self.values_builder.len(); self.values_builder.extend(elements); - let size = self.values_builder.len() - origin_size; - self.append(true, size); + self.append(true); } - None => self.append(false, 0), + None => self.append(false), } } } @@ -262,14 +268,14 @@ mod tests { builder.values().append_value(0); builder.values().append_value(1); builder.values().append_value(2); - builder.append(true, 3); + builder.append(true); builder.values().append_value(3); builder.values().append_value(4); builder.values().append_value(5); - builder.append(true, 3); + builder.append(true); builder.values().append_value(6); builder.values().append_value(7); - builder.append(true, 2); + builder.append(true); let list_array = builder.finish(); let list_values = list_array.values().as_primitive::(); @@ -305,15 +311,15 @@ mod tests { builder.values().append_value(0); builder.values().append_value(1); builder.values().append_value(2); - builder.append(true, 3); - builder.append(false, 0); + builder.append(true); + builder.append(false); builder.values().append_value(3); builder.values().append_null(); builder.values().append_value(5); - builder.append(true, 3); + builder.append(true); builder.values().append_value(6); builder.values().append_value(7); - builder.append(true, 2); + builder.append(true); let list_array = builder.finish(); @@ -340,16 +346,16 @@ mod tests { let mut builder = ListViewBuilder::new(values_builder); builder.values().append_slice(&[1, 2, 3]); - builder.append(true, 1); + builder.append(true); builder.values().append_slice(&[4, 5, 6]); - builder.append(true, 1); + builder.append(true); let mut arr = builder.finish(); assert_eq!(2, arr.len()); assert!(builder.is_empty()); builder.values().append_slice(&[7, 8, 9]); - builder.append(true, 1); + builder.append(true); arr = builder.finish(); assert_eq!(1, arr.len()); assert!(builder.is_empty()); @@ -361,16 +367,16 @@ mod tests { let mut builder = ListViewBuilder::new(values_builder); builder.values().append_slice(&[1, 2, 3]); - builder.append(true, 1); + builder.append(true); builder.values().append_slice(&[4, 5, 6]); - builder.append(true, 1); + builder.append(true); let mut arr = builder.finish_cloned(); assert_eq!(2, arr.len()); assert!(!builder.is_empty()); builder.values().append_slice(&[7, 8, 9]); - builder.append(true, 1); + builder.append(true); arr = builder.finish(); assert_eq!(3, arr.len()); assert!(builder.is_empty()); @@ -385,27 +391,27 @@ mod tests { // [[[1, 2], [3, 4]], [[5, 6, 7], null, [8]], null, [[9, 10]]] builder.values().values().append_value(1); builder.values().values().append_value(2); - builder.values().append(true, 2); + builder.values().append(true); builder.values().values().append_value(3); builder.values().values().append_value(4); - builder.values().append(true, 2); - builder.append(true, 2); + builder.values().append(true); + builder.append(true); builder.values().values().append_value(5); builder.values().values().append_value(6); builder.values().values().append_value(7); - builder.values().append(true, 3); - builder.values().append(false, 0); + builder.values().append(true); + builder.values().append(false); builder.values().values().append_value(8); - builder.values().append(true, 1); - builder.append(true, 3); + builder.values().append(true); + builder.append(true); - builder.append(false, 0); + builder.append(false); builder.values().values().append_value(9); builder.values().values().append_value(10); - builder.values().append(true, 2); - builder.append(true, 1); + builder.values().append(true); + builder.append(true); let l1 = builder.finish(); @@ -460,7 +466,7 @@ mod tests { .downcast_mut::() .expect("should be an Int32Builder") .append_slice(&[1, 2, 3]); - builder.append(true, 1); + builder.append(true); builder .values() @@ -468,7 +474,7 @@ mod tests { .downcast_mut::() .expect("should be an Int32Builder") .append_slice(&[4, 5, 6]); - builder.append(true, 1); + builder.append(true); let arr = builder.finish(); assert_eq!(2, arr.len()); @@ -531,7 +537,7 @@ mod tests { .as_any_mut() .downcast_mut::>>() .expect("should be an (Large)ListViewBuilder") - .append(true, 2); + .append(true); builder .values() .as_any_mut() @@ -557,8 +563,8 @@ mod tests { .as_any_mut() .downcast_mut::>>() .expect("should be an (Large)ListViewBuilder") - .append(true, 2); - builder.append(true, 2); + .append(true); + builder.append(true); builder .values() @@ -595,13 +601,13 @@ mod tests { .as_any_mut() .downcast_mut::>>() .expect("should be an (Large)ListViewBuilder") - .append(true, 3); + .append(true); builder .values() .as_any_mut() .downcast_mut::>>() .expect("should be an (Large)ListViewBuilder") - .append(false, 0); + .append(false); builder .values() .as_any_mut() @@ -617,10 +623,10 @@ mod tests { .as_any_mut() .downcast_mut::>>() .expect("should be an (Large)ListViewBuilder") - .append(true, 1); - builder.append(true, 3); + .append(true); + builder.append(true); - builder.append(false, 0); + builder.append(false); builder .values() @@ -647,8 +653,8 @@ mod tests { .as_any_mut() .downcast_mut::>>() .expect("should be an (Large)ListViewBuilder") - .append(true, 2); - builder.append(true, 1); + .append(true); + builder.append(true); let l1 = builder.finish(); assert_eq!(4, l1.len()); @@ -704,4 +710,4 @@ mod tests { builder.append_value([Some(1)]); builder.finish(); } -} +} \ No newline at end of file From e3a16912a8ce10cabf142ff1f3a053e3fdd2b60e Mon Sep 17 00:00:00 2001 From: kikkon Date: Sun, 13 Oct 2024 10:09:38 +0800 Subject: [PATCH 3/9] fix tests --- .../src/builder/generic_list_view_builder.rs | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index a8ac913b045e..fb076b896d93 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -23,7 +23,6 @@ use std::any::Any; use std::sync::Arc; /// Builder for [`GenericListViewArray`] -/// #[derive(Debug)] pub struct GenericListViewBuilder { offsets_builder: BufferBuilder, @@ -32,7 +31,6 @@ pub struct GenericListViewBuilder values_builder: T, field: Option, current_offset: OffsetSize, - current_size: OffsetSize, } impl Default for GenericListViewBuilder { @@ -96,11 +94,9 @@ impl GenericListViewBuilder f.clone(), None => Arc::new(Field::new("item", values.data_type().clone(), true)), @@ -241,7 +233,6 @@ where for v in iter { match v { Some(elements) => { - let origin_size = self.values_builder.len(); self.values_builder.extend(elements); self.append(true); } @@ -710,4 +701,4 @@ mod tests { builder.append_value([Some(1)]); builder.finish(); } -} \ No newline at end of file +} From ec84f1549ee6e5a4f59bc73ac03106838952dbe7 Mon Sep 17 00:00:00 2001 From: kikkon Date: Sun, 13 Oct 2024 10:23:00 +0800 Subject: [PATCH 4/9] remove static --- arrow-array/src/builder/generic_list_view_builder.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index fb076b896d93..6219c660b8f6 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -41,8 +41,6 @@ impl Default for GenericListViewB impl ArrayBuilder for GenericListViewBuilder -where - T: 'static, { /// Returns the builder as a non-mutable `Any` reference. fn as_any(&self) -> &dyn Any { From 8befef6e462fc67ea89d612b4815caf39a53f8cd Mon Sep 17 00:00:00 2001 From: kikkon Date: Sun, 13 Oct 2024 10:46:27 +0800 Subject: [PATCH 5/9] lint --- arrow-array/src/builder/generic_list_view_builder.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index 6219c660b8f6..0b0a4a0f38e9 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -133,8 +133,12 @@ where #[inline] pub fn append(&mut self, is_valid: bool) { self.offsets_builder.append(self.current_offset); - self.sizes_builder - .append(OffsetSize::from_usize(self.values_builder.len() - self.current_offset.to_usize().unwrap()).unwrap()); + self.sizes_builder.append( + OffsetSize::from_usize( + self.values_builder.len() - self.current_offset.to_usize().unwrap(), + ) + .unwrap(), + ); self.null_buffer_builder.append(is_valid); self.current_offset = OffsetSize::from_usize(self.values_builder.len()).unwrap(); } @@ -154,8 +158,7 @@ where /// See [`Self::append_value`] for an example use. #[inline] pub fn append_null(&mut self) { - self.offsets_builder - .append(self.current_offset); + self.offsets_builder.append(self.current_offset); self.sizes_builder .append(OffsetSize::from_usize(0).unwrap()); self.null_buffer_builder.append_null(); From f0d9886f3430f48b45129f9a6c228125289abffc Mon Sep 17 00:00:00 2001 From: kikkon Date: Sat, 26 Oct 2024 22:06:24 +0800 Subject: [PATCH 6/9] chore: add comment for should fail test --- arrow-array/src/builder/generic_list_view_builder.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index 0b0a4a0f38e9..7c8eb7228616 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -687,6 +687,7 @@ mod tests { #[should_panic( expected = "Non-nullable field of ListViewArray \\\"item\\\" cannot contain nulls" )] + // If a non-nullable type is declared but a null value is used, it will be intercepted by the null check. fn test_checks_nullability() { let field = Arc::new(Field::new("item", DataType::Int32, false)); let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); @@ -696,6 +697,7 @@ mod tests { #[test] #[should_panic(expected = "ListViewArray expected data type Int64 got Int32")] + // If the declared type does not match the actual appended type, it will be intercepted by type checking in the finish function. fn test_checks_data_type() { let field = Arc::new(Field::new("item", DataType::Int64, false)); let mut builder = ListViewBuilder::new(Int32Builder::new()).with_field(field.clone()); From 3cc4aa245ff6d69cabdacbf9d3e7bebcaf38a139 Mon Sep 17 00:00:00 2001 From: Kikkon <19528375+Kikkon@users.noreply.github.com> Date: Sat, 28 Dec 2024 21:04:20 +0800 Subject: [PATCH 7/9] Update arrow-array/src/builder/generic_list_view_builder.rs Co-authored-by: Marco Neumann --- arrow-array/src/builder/generic_list_view_builder.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index 7c8eb7228616..386d641e52da 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -495,11 +495,10 @@ mod tests { test_boxed_generic_list_view_generic_list_view_array_builder::(values_builder); } - fn test_boxed_generic_list_view_generic_list_view_array_builder< - O: OffsetSizeTrait + PartialEq, - >( + fn test_boxed_generic_list_view_generic_list_view_array_builder( values_builder: Box, - ) { + ) where O: OffsetSizeTrait + PartialEq + { let mut builder: GenericListViewBuilder> = GenericListViewBuilder::>::new(values_builder); From 43701f6b0863858f0b27ae9ee1cbed601e9cab0e Mon Sep 17 00:00:00 2001 From: Kikkon <19528375+Kikkon@users.noreply.github.com> Date: Sat, 28 Dec 2024 21:04:35 +0800 Subject: [PATCH 8/9] Update arrow-array/src/builder/generic_list_view_builder.rs Co-authored-by: Marco Neumann --- arrow-array/src/builder/generic_list_view_builder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index 386d641e52da..ecabcaa1ee4e 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -684,7 +684,7 @@ mod tests { #[test] #[should_panic( - expected = "Non-nullable field of ListViewArray \\\"item\\\" cannot contain nulls" + expected = r#"Non-nullable field of ListViewArray \"item\" cannot contain nulls"# )] // If a non-nullable type is declared but a null value is used, it will be intercepted by the null check. fn test_checks_nullability() { From d43632ad126319275941f6f83c6b2a4d283ae1a3 Mon Sep 17 00:00:00 2001 From: kikkon Date: Sat, 28 Dec 2024 21:18:21 +0800 Subject: [PATCH 9/9] fix name & lint --- .../src/builder/generic_list_view_builder.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arrow-array/src/builder/generic_list_view_builder.rs b/arrow-array/src/builder/generic_list_view_builder.rs index ecabcaa1ee4e..5aaf9efefe24 100644 --- a/arrow-array/src/builder/generic_list_view_builder.rs +++ b/arrow-array/src/builder/generic_list_view_builder.rs @@ -252,7 +252,7 @@ mod tests { use crate::{Array, Int32Array}; use arrow_schema::DataType; - fn _test_generic_list_view_array_builder() { + fn test_generic_list_view_array_builder_impl() { let values_builder = Int32Builder::with_capacity(10); let mut builder = GenericListViewBuilder::::new(values_builder); @@ -287,15 +287,15 @@ mod tests { #[test] fn test_list_view_array_builder() { - _test_generic_list_view_array_builder::() + test_generic_list_view_array_builder_impl::() } #[test] fn test_large_list_view_array_builder() { - _test_generic_list_view_array_builder::() + test_generic_list_view_array_builder_impl::() } - fn _test_generic_list_view_array_builder_nulls() { + fn test_generic_list_view_array_builder_nulls_impl() { let values_builder = Int32Builder::with_capacity(10); let mut builder = GenericListViewBuilder::::new(values_builder); @@ -324,12 +324,12 @@ mod tests { #[test] fn test_list_view_array_builder_nulls() { - _test_generic_list_view_array_builder_nulls::() + test_generic_list_view_array_builder_nulls_impl::() } #[test] fn test_large_list_view_array_builder_nulls() { - _test_generic_list_view_array_builder_nulls::() + test_generic_list_view_array_builder_nulls_impl::() } #[test] @@ -497,7 +497,8 @@ mod tests { fn test_boxed_generic_list_view_generic_list_view_array_builder( values_builder: Box, - ) where O: OffsetSizeTrait + PartialEq + ) where + O: OffsetSizeTrait + PartialEq, { let mut builder: GenericListViewBuilder> = GenericListViewBuilder::>::new(values_builder);