diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs index f63f2c19146e..de0bfc71672d 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/basic.rs @@ -63,12 +63,12 @@ impl<'a, O: Offset> StateTranslation<'a, BinaryDecoder> for BinaryStateTransl use BinaryStateTranslation as T; match (self, page_validity) { - (T::Unit(page_values), None) => { + (T::Plain(page_values), None) => { for x in page_values.by_ref().take(additional) { values.push(x) } }, - (T::Unit(page_values), Some(page_validity)) => extend_from_decoder( + (T::Plain(page_values), Some(page_validity)) => extend_from_decoder( validity, page_validity, Some(additional), diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs index b7bf3d66b65e..22df9e65f002 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/decoders.rs @@ -139,7 +139,7 @@ impl<'a> ValuesDictionary<'a> { #[derive(Debug)] pub(crate) enum BinaryStateTranslation<'a> { - Unit(BinaryIter<'a>), + Plain(BinaryIter<'a>), Dictionary(ValuesDictionary<'a>, Option>), Delta(Delta<'a>), DeltaBytes(DeltaBytes<'a>), @@ -167,7 +167,7 @@ impl<'a> BinaryStateTranslation<'a> { let values = split_buffer(page)?.values; let values = BinaryIter::new(values, page.num_values()); - Ok(BinaryStateTranslation::Unit(values)) + Ok(BinaryStateTranslation::Plain(values)) }, (Encoding::DeltaLengthByteArray, _) => { Ok(BinaryStateTranslation::Delta(Delta::try_new(page)?)) @@ -180,7 +180,7 @@ impl<'a> BinaryStateTranslation<'a> { } pub(crate) fn len_when_not_nullable(&self) -> usize { match self { - Self::Unit(v) => v.len_when_not_nullable(), + Self::Plain(v) => v.len_when_not_nullable(), Self::Dictionary(v, _) => v.len(), Self::Delta(v) => v.len(), Self::DeltaBytes(v) => v.size_hint().0, @@ -193,7 +193,7 @@ impl<'a> BinaryStateTranslation<'a> { } match self { - Self::Unit(t) => _ = t.by_ref().nth(n - 1), + Self::Plain(t) => _ = t.by_ref().nth(n - 1), Self::Dictionary(t, _) => t.values.skip_in_place(n)?, Self::Delta(t) => _ = t.by_ref().nth(n - 1), Self::DeltaBytes(t) => _ = t.by_ref().nth(n - 1), diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs index 95f48e493b8b..6023ab4d7532 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binary/nested.rs @@ -29,14 +29,14 @@ pub struct State<'a> { #[derive(Debug)] pub enum StateTranslation<'a> { - Unit(BinaryIter<'a>), + Plain(BinaryIter<'a>), Dictionary(ValuesDictionary<'a>, Option>), } impl<'a> PageState<'a> for State<'a> { fn len(&self) -> usize { match &self.translation { - StateTranslation::Unit(iter) => iter.size_hint().0, + StateTranslation::Plain(iter) => iter.size_hint().0, StateTranslation::Dictionary(values, _) => values.len(), } } @@ -69,7 +69,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { (Encoding::Plain, _) => { let values = split_buffer(page)?.values; let values = BinaryIter::new(values, page.num_values()); - StateTranslation::Unit(values) + StateTranslation::Plain(values) }, _ => return Err(not_implemented(page)), }; @@ -96,7 +96,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder { let (values, validity) = decoded; match &mut state.translation { - StateTranslation::Unit(page) => { + StateTranslation::Plain(page) => { // @TODO: This can be optimized to not be a constantly polling for value in page.by_ref().take(n) { values.push(value); diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binview/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/binview/basic.rs index e8e7afc78429..34a8a21db59c 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binview/basic.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binview/basic.rs @@ -57,12 +57,12 @@ impl<'a> StateTranslation<'a, BinViewDecoder> for BinaryStateTranslation<'a> { let mut validate_utf8 = decoder.check_utf8.take(); match (self, page_validity) { - (Self::Unit(page_values), None) => { + (Self::Plain(page_values), None) => { for x in page_values.by_ref().take(additional) { values.push_value_ignore_validity(x) } }, - (Self::Unit(page_values), Some(page_validity)) => extend_from_decoder( + (Self::Plain(page_values), Some(page_validity)) => extend_from_decoder( validity, page_validity, Some(additional), diff --git a/crates/polars-parquet/src/arrow/read/deserialize/binview/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/binview/nested.rs index 064709f1c80e..5f938e2cf6d9 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/binview/nested.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/binview/nested.rs @@ -26,14 +26,14 @@ pub(crate) struct State<'a> { #[derive(Debug)] pub(crate) enum StateTranslation<'a> { - Unit(BinaryIter<'a>), + Plain(BinaryIter<'a>), Dictionary(ValuesDictionary<'a>, Option>), } impl<'a> PageState<'a> for State<'a> { fn len(&self) -> usize { match &self.translation { - StateTranslation::Unit(iter) => iter.size_hint().0, + StateTranslation::Plain(iter) => iter.size_hint().0, StateTranslation::Dictionary(values, _) => values.len(), } } @@ -67,7 +67,7 @@ impl<'a> NestedDecoder<'a> for BinViewDecoder { (Encoding::Plain, _) => { let values = split_buffer(page)?.values; let values = BinaryIter::new(values, page.num_values()); - StateTranslation::Unit(values) + StateTranslation::Plain(values) }, _ => return Err(not_implemented(page)), }; @@ -93,7 +93,7 @@ impl<'a> NestedDecoder<'a> for BinViewDecoder { ) -> ParquetResult<()> { let (values, validity) = decoded; match &mut state.translation { - StateTranslation::Unit(page) => { + StateTranslation::Plain(page) => { // @TODO: This should probably be optimized to a better loop for value in page.by_ref().take(n) { values.push_value_ignore_validity(value); diff --git a/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs index 8220a22abe92..7e35f8484e5a 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/boolean/basic.rs @@ -19,7 +19,7 @@ use crate::read::deserialize::utils::{BatchableCollector, PageValidity}; #[allow(clippy::large_enum_variant)] #[derive(Debug)] enum StateTranslation<'a> { - Unit(BitmapIter<'a>), + Plain(BitmapIter<'a>), Rle(HybridRleDecoder<'a>), } @@ -43,7 +43,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> { } else { page.num_values() }; - Ok(Self::Unit(BitmapIter::new(values, 0, num_values))) + Ok(Self::Plain(BitmapIter::new(values, 0, num_values))) }, Encoding::Rle => { // @NOTE: For a nullable list, we might very well overestimate the amount of @@ -65,7 +65,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> { fn len_when_not_nullable(&self) -> usize { match self { - Self::Unit(v) => v.len(), + Self::Plain(v) => v.len(), Self::Rle(v) => v.len(), } } @@ -77,7 +77,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> { // @TODO: Add a skip_in_place on BitmapIter match self { - Self::Unit(t) => _ = t.nth(n - 1), + Self::Plain(t) => _ = t.nth(n - 1), Self::Rle(t) => t.skip_in_place(n)?, } @@ -94,8 +94,8 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> { let (values, validity) = decoded; match (self, page_validity) { - (Self::Unit(page), None) => page.collect_n_into(values, additional), - (Self::Unit(page_values), Some(page_validity)) => extend_from_decoder( + (Self::Plain(page), None) => page.collect_n_into(values, additional), + (Self::Plain(page_values), Some(page_validity)) => extend_from_decoder( validity, page_validity, Some(additional), diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs index 70c0d925636e..67a00d50d76f 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/basic.rs @@ -23,7 +23,7 @@ use crate::read::deserialize::utils::{self, PageValidity}; #[allow(clippy::large_enum_variant)] #[derive(Debug)] enum StateTranslation<'a> { - Unit(std::slice::ChunksExact<'a, u8>), + Plain(std::slice::ChunksExact<'a, u8>), Dictionary(HybridRleDecoder<'a>, &'a [u8]), } @@ -47,7 +47,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { .into()); } let values = values.chunks_exact(decoder.size); - Ok(Self::Unit(values)) + Ok(Self::Plain(values)) }, (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict)) => { let values = dict_indices_decoder(page)?; @@ -59,7 +59,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { fn len_when_not_nullable(&self) -> usize { match self { - Self::Unit(v) => v.len(), + Self::Plain(v) => v.len(), Self::Dictionary(v, _) => v.len(), } } @@ -70,7 +70,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { } match self { - Self::Unit(v) => _ = v.nth(n - 1), + Self::Plain(v) => _ = v.nth(n - 1), Self::Dictionary(v, _) => v.skip_in_place(n)?, } @@ -88,13 +88,13 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> { use StateTranslation as T; match (self, page_validity) { - (T::Unit(page_values), None) => { + (T::Plain(page_values), None) => { // @TODO: This can be done through a extend for x in page_values.by_ref().take(additional) { values.push(x) } }, - (T::Unit(page_values), Some(page_validity)) => extend_from_decoder( + (T::Plain(page_values), Some(page_validity)) => extend_from_decoder( validity, page_validity, Some(additional), diff --git a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs index b43cb266c960..69b88366a4ab 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/fixed_size_binary/nested.rs @@ -27,7 +27,7 @@ struct State<'a> { #[allow(clippy::large_enum_variant)] #[derive(Debug)] enum StateTranslation<'a> { - Unit(std::slice::ChunksExact<'a, u8>), + Plain(std::slice::ChunksExact<'a, u8>), Dictionary { values: HybridRleDecoder<'a>, dict: &'a [u8], @@ -37,7 +37,7 @@ enum StateTranslation<'a> { impl<'a> PageState<'a> for State<'a> { fn len(&self) -> usize { match &self.translation { - StateTranslation::Unit(chunks) => chunks.len(), + StateTranslation::Plain(chunks) => chunks.len(), StateTranslation::Dictionary { values: decoder, .. } => decoder.len(), @@ -69,7 +69,7 @@ impl<'a> NestedDecoder<'a> for BinaryDecoder { let values = page.buffer(); assert_eq!(values.len() % self.size, 0); let values = values.chunks_exact(self.size); - StateTranslation::Unit(values) + StateTranslation::Plain(values) }, (Encoding::PlainDictionary | Encoding::RleDictionary, Some(&dict), false) => { let values = dict_indices_decoder(page)?; @@ -104,7 +104,7 @@ impl<'a> NestedDecoder<'a> for BinaryDecoder { } match &mut state.translation { - StateTranslation::Unit(page_values) => { + StateTranslation::Plain(page_values) => { for value in page_values.by_ref().take(n) { values.push(value); } diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs index f5ed5d492e20..b12d0f73025c 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/basic.rs @@ -143,7 +143,7 @@ where #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub(super) enum StateTranslation<'a, P: ParquetNativeType, T: NativeType> { - Unit(ArrayChunks<'a, P>), + Plain(ArrayChunks<'a, P>), Dictionary(ValuesDictionary<'a, T>), ByteStreamSplit(byte_stream_split::Decoder<'a>), } @@ -168,7 +168,7 @@ where }, (Encoding::Plain, _) => { let values = split_buffer(page)?.values; - Ok(Self::Unit(ArrayChunks::new(values).unwrap())) + Ok(Self::Plain(ArrayChunks::new(values).unwrap())) }, (Encoding::ByteStreamSplit, _) => { let values = split_buffer(page)?.values; @@ -183,7 +183,7 @@ where fn len_when_not_nullable(&self) -> usize { match self { - Self::Unit(n) => n.len(), + Self::Plain(n) => n.len(), Self::Dictionary(n) => n.len(), Self::ByteStreamSplit(n) => n.len(), } @@ -195,7 +195,7 @@ where } match self { - Self::Unit(t) => _ = t.nth(n - 1), + Self::Plain(t) => _ = t.nth(n - 1), Self::Dictionary(t) => t.values.skip_in_place(n)?, Self::ByteStreamSplit(t) => _ = t.iter_converted(|_| ()).nth(n - 1), } @@ -213,7 +213,7 @@ where let (values, validity) = decoded; match (self, page_validity) { - (Self::Unit(page), None) => { + (Self::Plain(page), None) => { PlainDecoderFnCollector { chunks: page, decoder: decoder.decoder, @@ -221,7 +221,7 @@ where } .push_n(values, additional)?; }, - (Self::Unit(page), Some(page_validity)) => { + (Self::Plain(page), Some(page_validity)) => { let collector = PlainDecoderFnCollector { chunks: page, decoder: decoder.decoder, diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs index 4baa0d36be0b..eae4d1ac410f 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/integer.rs @@ -24,7 +24,7 @@ use crate::read::deserialize::utils::{BatchableCollector, PageValidity, Translat #[allow(clippy::large_enum_variant)] #[derive(Debug)] pub(super) enum StateTranslation<'a, P: ParquetNativeType, T: NativeType> { - Unit(ArrayChunks<'a, P>), + Plain(ArrayChunks<'a, P>), Dictionary(ValuesDictionary<'a, T>), ByteStreamSplit(byte_stream_split::Decoder<'a>), DeltaBinaryPacked(delta_bitpacked::Decoder<'a>), @@ -50,7 +50,7 @@ where }, (Encoding::Plain, _) => { let values = split_buffer(page)?.values; - Ok(Self::Unit(ArrayChunks::new(values).unwrap())) + Ok(Self::Plain(ArrayChunks::new(values).unwrap())) }, (Encoding::ByteStreamSplit, _) => { let values = split_buffer(page)?.values; @@ -71,7 +71,7 @@ where fn len_when_not_nullable(&self) -> usize { match self { - Self::Unit(v) => v.len(), + Self::Plain(v) => v.len(), Self::Dictionary(v) => v.len(), Self::ByteStreamSplit(v) => v.len(), Self::DeltaBinaryPacked(v) => v.size_hint().0, @@ -84,7 +84,7 @@ where } match self { - Self::Unit(v) => _ = v.nth(n - 1), + Self::Plain(v) => _ = v.nth(n - 1), Self::Dictionary(v) => v.values.skip_in_place(n)?, Self::ByteStreamSplit(v) => _ = v.iter_converted(|_| ()).nth(n - 1), Self::DeltaBinaryPacked(v) => _ = v.nth(n - 1), @@ -102,7 +102,7 @@ where ) -> ParquetResult<()> { let (values, validity) = decoded; match (self, page_validity) { - (Self::Unit(page), Some(page_validity)) => { + (Self::Plain(page), Some(page_validity)) => { let collector = PlainDecoderFnCollector { chunks: page, decoder: decoder.0.decoder, @@ -117,7 +117,7 @@ where collector, )?; }, - (Self::Unit(page), None) => { + (Self::Plain(page), None) => { PlainDecoderFnCollector { chunks: page, decoder: decoder.0.decoder, diff --git a/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs b/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs index a1621a0fe8b3..4e361b32cbc5 100644 --- a/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs +++ b/crates/polars-parquet/src/arrow/read/deserialize/primitive/nested.rs @@ -28,7 +28,7 @@ struct State<'a, P: ParquetNativeType, T: NativeType> { #[allow(clippy::large_enum_variant)] #[derive(Debug)] enum StateTranslation<'a, P: ParquetNativeType, T: NativeType> { - Unit(ArrayChunks<'a, P>), + Plain(ArrayChunks<'a, P>), Dictionary(ValuesDictionary<'a, T>), ByteStreamSplit(byte_stream_split::Decoder<'a>), } @@ -36,7 +36,7 @@ enum StateTranslation<'a, P: ParquetNativeType, T: NativeType> { impl<'a, P: ParquetNativeType, T: NativeType> utils::PageState<'a> for State<'a, P, T> { fn len(&self) -> usize { match &self.translation { - StateTranslation::Unit(values) => values.len(), + StateTranslation::Plain(values) => values.len(), StateTranslation::Dictionary(values) => values.len(), StateTranslation::ByteStreamSplit(decoder) => decoder.len(), } @@ -97,7 +97,7 @@ where let translation = match (page.encoding(), dict) { (Encoding::Plain, _) => { let values = split_buffer(page)?.values; - StateTranslation::Unit(ArrayChunks::new(values).unwrap()) + StateTranslation::Plain(ArrayChunks::new(values).unwrap()) }, (Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict)) => { StateTranslation::Dictionary(ValuesDictionary::try_new(page, dict)?) @@ -138,7 +138,7 @@ where } match &mut state.translation { - StateTranslation::Unit(page_values) => { + StateTranslation::Plain(page_values) => { for value in page_values.by_ref().take(n) { values.push(self.decoder.decode(P::from_le_bytes(*value))); }