Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Rename Unit to Plain in Parquet reader #17751

Merged
merged 1 commit into from
Jul 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -63,12 +63,12 @@ impl<'a, O: Offset> StateTranslation<'a, BinaryDecoder<O>> for BinaryStateTransl

use BinaryStateTranslation as T;
match (self, page_validity) {
(T::Unit(page_values), None) => {
(T::Plain(page_values), None) => {
for x in page_values.by_ref().take(additional) {
values.push(x)
}
},
(T::Unit(page_values), Some(page_validity)) => extend_from_decoder(
(T::Plain(page_values), Some(page_validity)) => extend_from_decoder(
validity,
page_validity,
Some(additional),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ impl<'a> ValuesDictionary<'a> {

#[derive(Debug)]
pub(crate) enum BinaryStateTranslation<'a> {
Unit(BinaryIter<'a>),
Plain(BinaryIter<'a>),
Dictionary(ValuesDictionary<'a>, Option<Vec<View>>),
Delta(Delta<'a>),
DeltaBytes(DeltaBytes<'a>),
Expand Down Expand Up @@ -167,7 +167,7 @@ impl<'a> BinaryStateTranslation<'a> {
let values = split_buffer(page)?.values;
let values = BinaryIter::new(values, page.num_values());

Ok(BinaryStateTranslation::Unit(values))
Ok(BinaryStateTranslation::Plain(values))
},
(Encoding::DeltaLengthByteArray, _) => {
Ok(BinaryStateTranslation::Delta(Delta::try_new(page)?))
Expand All @@ -180,7 +180,7 @@ impl<'a> BinaryStateTranslation<'a> {
}
pub(crate) fn len_when_not_nullable(&self) -> usize {
match self {
Self::Unit(v) => v.len_when_not_nullable(),
Self::Plain(v) => v.len_when_not_nullable(),
Self::Dictionary(v, _) => v.len(),
Self::Delta(v) => v.len(),
Self::DeltaBytes(v) => v.size_hint().0,
Expand All @@ -193,7 +193,7 @@ impl<'a> BinaryStateTranslation<'a> {
}

match self {
Self::Unit(t) => _ = t.by_ref().nth(n - 1),
Self::Plain(t) => _ = t.by_ref().nth(n - 1),
Self::Dictionary(t, _) => t.values.skip_in_place(n)?,
Self::Delta(t) => _ = t.by_ref().nth(n - 1),
Self::DeltaBytes(t) => _ = t.by_ref().nth(n - 1),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ pub struct State<'a> {

#[derive(Debug)]
pub enum StateTranslation<'a> {
Unit(BinaryIter<'a>),
Plain(BinaryIter<'a>),
Dictionary(ValuesDictionary<'a>, Option<Vec<&'a [u8]>>),
}

impl<'a> PageState<'a> for State<'a> {
fn len(&self) -> usize {
match &self.translation {
StateTranslation::Unit(iter) => iter.size_hint().0,
StateTranslation::Plain(iter) => iter.size_hint().0,
StateTranslation::Dictionary(values, _) => values.len(),
}
}
Expand Down Expand Up @@ -69,7 +69,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder<O> {
(Encoding::Plain, _) => {
let values = split_buffer(page)?.values;
let values = BinaryIter::new(values, page.num_values());
StateTranslation::Unit(values)
StateTranslation::Plain(values)
},
_ => return Err(not_implemented(page)),
};
Expand All @@ -96,7 +96,7 @@ impl<'a, O: Offset> NestedDecoder<'a> for BinaryDecoder<O> {
let (values, validity) = decoded;

match &mut state.translation {
StateTranslation::Unit(page) => {
StateTranslation::Plain(page) => {
// @TODO: This can be optimized to not be a constantly polling
for value in page.by_ref().take(n) {
values.push(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ impl<'a> StateTranslation<'a, BinViewDecoder> for BinaryStateTranslation<'a> {
let mut validate_utf8 = decoder.check_utf8.take();

match (self, page_validity) {
(Self::Unit(page_values), None) => {
(Self::Plain(page_values), None) => {
for x in page_values.by_ref().take(additional) {
values.push_value_ignore_validity(x)
}
},
(Self::Unit(page_values), Some(page_validity)) => extend_from_decoder(
(Self::Plain(page_values), Some(page_validity)) => extend_from_decoder(
validity,
page_validity,
Some(additional),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,14 @@ pub(crate) struct State<'a> {

#[derive(Debug)]
pub(crate) enum StateTranslation<'a> {
Unit(BinaryIter<'a>),
Plain(BinaryIter<'a>),
Dictionary(ValuesDictionary<'a>, Option<Vec<View>>),
}

impl<'a> PageState<'a> for State<'a> {
fn len(&self) -> usize {
match &self.translation {
StateTranslation::Unit(iter) => iter.size_hint().0,
StateTranslation::Plain(iter) => iter.size_hint().0,
StateTranslation::Dictionary(values, _) => values.len(),
}
}
Expand Down Expand Up @@ -67,7 +67,7 @@ impl<'a> NestedDecoder<'a> for BinViewDecoder {
(Encoding::Plain, _) => {
let values = split_buffer(page)?.values;
let values = BinaryIter::new(values, page.num_values());
StateTranslation::Unit(values)
StateTranslation::Plain(values)
},
_ => return Err(not_implemented(page)),
};
Expand All @@ -93,7 +93,7 @@ impl<'a> NestedDecoder<'a> for BinViewDecoder {
) -> ParquetResult<()> {
let (values, validity) = decoded;
match &mut state.translation {
StateTranslation::Unit(page) => {
StateTranslation::Plain(page) => {
// @TODO: This should probably be optimized to a better loop
for value in page.by_ref().take(n) {
values.push_value_ignore_validity(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::read::deserialize::utils::{BatchableCollector, PageValidity};
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
enum StateTranslation<'a> {
Unit(BitmapIter<'a>),
Plain(BitmapIter<'a>),
Rle(HybridRleDecoder<'a>),
}

Expand All @@ -43,7 +43,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> {
} else {
page.num_values()
};
Ok(Self::Unit(BitmapIter::new(values, 0, num_values)))
Ok(Self::Plain(BitmapIter::new(values, 0, num_values)))
},
Encoding::Rle => {
// @NOTE: For a nullable list, we might very well overestimate the amount of
Expand All @@ -65,7 +65,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> {

fn len_when_not_nullable(&self) -> usize {
match self {
Self::Unit(v) => v.len(),
Self::Plain(v) => v.len(),
Self::Rle(v) => v.len(),
}
}
Expand All @@ -77,7 +77,7 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> {

// @TODO: Add a skip_in_place on BitmapIter
match self {
Self::Unit(t) => _ = t.nth(n - 1),
Self::Plain(t) => _ = t.nth(n - 1),
Self::Rle(t) => t.skip_in_place(n)?,
}

Expand All @@ -94,8 +94,8 @@ impl<'a> utils::StateTranslation<'a, BooleanDecoder> for StateTranslation<'a> {
let (values, validity) = decoded;

match (self, page_validity) {
(Self::Unit(page), None) => page.collect_n_into(values, additional),
(Self::Unit(page_values), Some(page_validity)) => extend_from_decoder(
(Self::Plain(page), None) => page.collect_n_into(values, additional),
(Self::Plain(page_values), Some(page_validity)) => extend_from_decoder(
validity,
page_validity,
Some(additional),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ use crate::read::deserialize::utils::{self, PageValidity};
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
enum StateTranslation<'a> {
Unit(std::slice::ChunksExact<'a, u8>),
Plain(std::slice::ChunksExact<'a, u8>),
Dictionary(HybridRleDecoder<'a>, &'a [u8]),
}

Expand All @@ -47,7 +47,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> {
.into());
}
let values = values.chunks_exact(decoder.size);
Ok(Self::Unit(values))
Ok(Self::Plain(values))
},
(Encoding::PlainDictionary | Encoding::RleDictionary, Some(dict)) => {
let values = dict_indices_decoder(page)?;
Expand All @@ -59,7 +59,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> {

fn len_when_not_nullable(&self) -> usize {
match self {
Self::Unit(v) => v.len(),
Self::Plain(v) => v.len(),
Self::Dictionary(v, _) => v.len(),
}
}
Expand All @@ -70,7 +70,7 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> {
}

match self {
Self::Unit(v) => _ = v.nth(n - 1),
Self::Plain(v) => _ = v.nth(n - 1),
Self::Dictionary(v, _) => v.skip_in_place(n)?,
}

Expand All @@ -88,13 +88,13 @@ impl<'a> utils::StateTranslation<'a, BinaryDecoder> for StateTranslation<'a> {

use StateTranslation as T;
match (self, page_validity) {
(T::Unit(page_values), None) => {
(T::Plain(page_values), None) => {
// @TODO: This can be done through a extend
for x in page_values.by_ref().take(additional) {
values.push(x)
}
},
(T::Unit(page_values), Some(page_validity)) => extend_from_decoder(
(T::Plain(page_values), Some(page_validity)) => extend_from_decoder(
validity,
page_validity,
Some(additional),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ struct State<'a> {
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
enum StateTranslation<'a> {
Unit(std::slice::ChunksExact<'a, u8>),
Plain(std::slice::ChunksExact<'a, u8>),
Dictionary {
values: HybridRleDecoder<'a>,
dict: &'a [u8],
Expand All @@ -37,7 +37,7 @@ enum StateTranslation<'a> {
impl<'a> PageState<'a> for State<'a> {
fn len(&self) -> usize {
match &self.translation {
StateTranslation::Unit(chunks) => chunks.len(),
StateTranslation::Plain(chunks) => chunks.len(),
StateTranslation::Dictionary {
values: decoder, ..
} => decoder.len(),
Expand Down Expand Up @@ -69,7 +69,7 @@ impl<'a> NestedDecoder<'a> for BinaryDecoder {
let values = page.buffer();
assert_eq!(values.len() % self.size, 0);
let values = values.chunks_exact(self.size);
StateTranslation::Unit(values)
StateTranslation::Plain(values)
},
(Encoding::PlainDictionary | Encoding::RleDictionary, Some(&dict), false) => {
let values = dict_indices_decoder(page)?;
Expand Down Expand Up @@ -104,7 +104,7 @@ impl<'a> NestedDecoder<'a> for BinaryDecoder {
}

match &mut state.translation {
StateTranslation::Unit(page_values) => {
StateTranslation::Plain(page_values) => {
for value in page_values.by_ref().take(n) {
values.push(value);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ where
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
pub(super) enum StateTranslation<'a, P: ParquetNativeType, T: NativeType> {
Unit(ArrayChunks<'a, P>),
Plain(ArrayChunks<'a, P>),
Dictionary(ValuesDictionary<'a, T>),
ByteStreamSplit(byte_stream_split::Decoder<'a>),
}
Expand All @@ -168,7 +168,7 @@ where
},
(Encoding::Plain, _) => {
let values = split_buffer(page)?.values;
Ok(Self::Unit(ArrayChunks::new(values).unwrap()))
Ok(Self::Plain(ArrayChunks::new(values).unwrap()))
},
(Encoding::ByteStreamSplit, _) => {
let values = split_buffer(page)?.values;
Expand All @@ -183,7 +183,7 @@ where

fn len_when_not_nullable(&self) -> usize {
match self {
Self::Unit(n) => n.len(),
Self::Plain(n) => n.len(),
Self::Dictionary(n) => n.len(),
Self::ByteStreamSplit(n) => n.len(),
}
Expand All @@ -195,7 +195,7 @@ where
}

match self {
Self::Unit(t) => _ = t.nth(n - 1),
Self::Plain(t) => _ = t.nth(n - 1),
Self::Dictionary(t) => t.values.skip_in_place(n)?,
Self::ByteStreamSplit(t) => _ = t.iter_converted(|_| ()).nth(n - 1),
}
Expand All @@ -213,15 +213,15 @@ where
let (values, validity) = decoded;

match (self, page_validity) {
(Self::Unit(page), None) => {
(Self::Plain(page), None) => {
PlainDecoderFnCollector {
chunks: page,
decoder: decoder.decoder,
_pd: std::marker::PhantomData,
}
.push_n(values, additional)?;
},
(Self::Unit(page), Some(page_validity)) => {
(Self::Plain(page), Some(page_validity)) => {
let collector = PlainDecoderFnCollector {
chunks: page,
decoder: decoder.decoder,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ use crate::read::deserialize::utils::{BatchableCollector, PageValidity, Translat
#[allow(clippy::large_enum_variant)]
#[derive(Debug)]
pub(super) enum StateTranslation<'a, P: ParquetNativeType, T: NativeType> {
Unit(ArrayChunks<'a, P>),
Plain(ArrayChunks<'a, P>),
Dictionary(ValuesDictionary<'a, T>),
ByteStreamSplit(byte_stream_split::Decoder<'a>),
DeltaBinaryPacked(delta_bitpacked::Decoder<'a>),
Expand All @@ -50,7 +50,7 @@ where
},
(Encoding::Plain, _) => {
let values = split_buffer(page)?.values;
Ok(Self::Unit(ArrayChunks::new(values).unwrap()))
Ok(Self::Plain(ArrayChunks::new(values).unwrap()))
},
(Encoding::ByteStreamSplit, _) => {
let values = split_buffer(page)?.values;
Expand All @@ -71,7 +71,7 @@ where

fn len_when_not_nullable(&self) -> usize {
match self {
Self::Unit(v) => v.len(),
Self::Plain(v) => v.len(),
Self::Dictionary(v) => v.len(),
Self::ByteStreamSplit(v) => v.len(),
Self::DeltaBinaryPacked(v) => v.size_hint().0,
Expand All @@ -84,7 +84,7 @@ where
}

match self {
Self::Unit(v) => _ = v.nth(n - 1),
Self::Plain(v) => _ = v.nth(n - 1),
Self::Dictionary(v) => v.values.skip_in_place(n)?,
Self::ByteStreamSplit(v) => _ = v.iter_converted(|_| ()).nth(n - 1),
Self::DeltaBinaryPacked(v) => _ = v.nth(n - 1),
Expand All @@ -102,7 +102,7 @@ where
) -> ParquetResult<()> {
let (values, validity) = decoded;
match (self, page_validity) {
(Self::Unit(page), Some(page_validity)) => {
(Self::Plain(page), Some(page_validity)) => {
let collector = PlainDecoderFnCollector {
chunks: page,
decoder: decoder.0.decoder,
Expand All @@ -117,7 +117,7 @@ where
collector,
)?;
},
(Self::Unit(page), None) => {
(Self::Plain(page), None) => {
PlainDecoderFnCollector {
chunks: page,
decoder: decoder.0.decoder,
Expand Down
Loading