From f980493ce1ca54e6e8dd77b7a47fe9fe0ed6e7aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Thu, 4 Jul 2024 09:49:29 +0800 Subject: [PATCH 1/4] Rename Schema::all_fields to flattened_fields --- arrow-flight/tests/flight_sql_client_cli.rs | 2 +- arrow-ipc/src/writer.rs | 2 +- arrow-json/src/reader/mod.rs | 2 +- arrow-schema/src/schema.rs | 9 ++++++++- parquet/src/arrow/async_reader/mod.rs | 2 +- 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/arrow-flight/tests/flight_sql_client_cli.rs b/arrow-flight/tests/flight_sql_client_cli.rs index 50a4ec0d8c66..317eb3900456 100644 --- a/arrow-flight/tests/flight_sql_client_cli.rs +++ b/arrow-flight/tests/flight_sql_client_cli.rs @@ -568,7 +568,7 @@ impl FlightSqlService for FlightSqlServiceImpl { .try_collect::>() .await?; - for (left, right) in parameters[0].schema().all_fields().iter().zip(vec![ + for (left, right) in parameters[0].schema().flattened_fields().iter().zip(vec![ Field::new("$1", DataType::Utf8, false), Field::new("$2", DataType::Int64, true), ]) { diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs index c0782195999d..d0a78ca2702e 100644 --- a/arrow-ipc/src/writer.rs +++ b/arrow-ipc/src/writer.rs @@ -411,7 +411,7 @@ impl IpcDataGenerator { write_options: &IpcWriteOptions, ) -> Result<(Vec, EncodedData), ArrowError> { let schema = batch.schema(); - let mut encoded_dictionaries = Vec::with_capacity(schema.all_fields().len()); + let mut encoded_dictionaries = Vec::with_capacity(schema.flattened_fields().len()); let mut dict_id = dictionary_tracker.dict_ids.clone().into_iter(); diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index 0a50cfac65f0..3e1c5d2fc896 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -289,7 +289,7 @@ impl ReaderBuilder { let decoder = make_decoder(data_type, self.coerce_primitive, self.strict_mode, nullable)?; - let num_fields = self.schema.all_fields().len(); + let num_fields = self.schema.flattened_fields().len(); Ok(Decoder { decoder, diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 3bb076aa54b0..d964ed0227f5 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -325,10 +325,17 @@ impl Schema { /// Returns a vector with references to all fields (including nested fields) #[inline] - pub fn all_fields(&self) -> Vec<&Field> { + pub fn flattened_fields(&self) -> Vec<&Field> { self.fields.iter().flat_map(|f| f.fields()).collect() } + /// Returns a vector with references to all fields (including nested fields) + #[deprecated(since = "52.1.0", note = "Use `flattened_fields` instead")] + #[inline] + pub fn all_fields(&self) -> Vec<&Field> { + self.flattened_fields() + } + /// Returns an immutable reference of a specific [`Field`] instance selected using an /// offset within the internal `fields` vector. /// diff --git a/parquet/src/arrow/async_reader/mod.rs b/parquet/src/arrow/async_reader/mod.rs index 0a72583b90d0..e4205b7ef2ce 100644 --- a/parquet/src/arrow/async_reader/mod.rs +++ b/parquet/src/arrow/async_reader/mod.rs @@ -1650,7 +1650,7 @@ mod tests { #[tokio::test] async fn test_parquet_record_batch_stream_schema() { fn get_all_field_names(schema: &Schema) -> Vec<&String> { - schema.all_fields().iter().map(|f| f.name()).collect() + schema.flattened_fields().iter().map(|f| f.name()).collect() } // ParquetRecordBatchReaderBuilder::schema differs from From b746d6a686e0fa7a5490e5cc3d152848b12e67de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Mon, 8 Jul 2024 11:01:19 +0800 Subject: [PATCH 2/4] Add doc example for Schema::flattened_fields --- arrow-schema/src/schema.rs | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index d964ed0227f5..61d3a939e2dd 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -324,6 +324,27 @@ impl Schema { } /// Returns a vector with references to all fields (including nested fields) + /// + /// # Example + /// + /// ``` + /// use std::sync::Arc; + /// use arrow_schema::{DataType, Field, Fields, Schema}; + /// + /// let f1 = Arc::new(Field::new("a", DataType::Boolean, false)); + /// + /// let f2_inner = Arc::new(Field::new("b_inner", DataType::Int8, false)); + /// let f2 = Arc::new(Field::new("b", DataType::List(f2_inner.clone()), false)); + /// + /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false)); + /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false)); + /// let f3 = Arc::new(Field::new("c", DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()), false)); + /// + /// let mut schema = Schema::new(vec![ + /// f1.clone(), f2.clone(), f3.clone() + /// ]); + /// assert_eq!(schema.flattened_fields(), vec![f1.as_ref(), f2.as_ref(), f2_inner.as_ref(), f3.as_ref(), f3_inner1.as_ref(), f3_inner2.as_ref()]); + /// ``` #[inline] pub fn flattened_fields(&self) -> Vec<&Field> { self.fields.iter().flat_map(|f| f.fields()).collect() From 75a4f1582a8aa6bf68a1a686fc614827d5fe3916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=9E=97=E4=BC=9F?= Date: Mon, 8 Jul 2024 11:12:35 +0800 Subject: [PATCH 3/4] fmt doc example --- arrow-schema/src/schema.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 61d3a939e2dd..0bcb94149f66 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -338,12 +338,26 @@ impl Schema { /// /// let f3_inner1 = Arc::new(Field::new("c_inner1", DataType::Int8, false)); /// let f3_inner2 = Arc::new(Field::new("c_inner2", DataType::Int8, false)); - /// let f3 = Arc::new(Field::new("c", DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()), false)); + /// let f3 = Arc::new(Field::new( + /// "c", + /// DataType::Struct(vec![f3_inner1.clone(), f3_inner2.clone()].into()), + /// false + /// )); /// /// let mut schema = Schema::new(vec![ /// f1.clone(), f2.clone(), f3.clone() /// ]); - /// assert_eq!(schema.flattened_fields(), vec![f1.as_ref(), f2.as_ref(), f2_inner.as_ref(), f3.as_ref(), f3_inner1.as_ref(), f3_inner2.as_ref()]); + /// assert_eq!( + /// schema.flattened_fields(), + /// vec![ + /// f1.as_ref(), + /// f2.as_ref(), + /// f2_inner.as_ref(), + /// f3.as_ref(), + /// f3_inner1.as_ref(), + /// f3_inner2.as_ref() + /// ] + /// ); /// ``` #[inline] pub fn flattened_fields(&self) -> Vec<&Field> { From ebf21b103f66f5f9fddcfe8d9e8869bf0c2cc659 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Mon, 8 Jul 2024 14:42:50 -0400 Subject: [PATCH 4/4] Update arrow-schema/src/schema.rs --- arrow-schema/src/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arrow-schema/src/schema.rs b/arrow-schema/src/schema.rs index 0bcb94149f66..9a9ef45d8b24 100644 --- a/arrow-schema/src/schema.rs +++ b/arrow-schema/src/schema.rs @@ -365,7 +365,7 @@ impl Schema { } /// Returns a vector with references to all fields (including nested fields) - #[deprecated(since = "52.1.0", note = "Use `flattened_fields` instead")] + #[deprecated(since = "52.2.0", note = "Use `flattened_fields` instead")] #[inline] pub fn all_fields(&self) -> Vec<&Field> { self.flattened_fields()