From c2abd27781a81c7a7e0c4942e9222c53cabed6da Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 4 Jul 2024 15:55:32 +0800 Subject: [PATCH 01/14] feat: Add flavor versioning --- .../src/chunked_array/array/mod.rs | 10 +++-- .../chunked_array/builder/fixed_size_list.rs | 2 +- .../chunked_array/builder/list/anonymous.rs | 4 +- .../chunked_array/builder/list/primitive.rs | 2 +- .../src/chunked_array/builder/mod.rs | 2 +- .../src/chunked_array/builder/primitive.rs | 2 +- crates/polars-core/src/chunked_array/cast.rs | 2 +- .../polars-core/src/chunked_array/collect.rs | 19 +++++---- crates/polars-core/src/chunked_array/from.rs | 7 +++- .../chunked_array/logical/categorical/from.rs | 29 +++++++------- .../chunked_array/logical/categorical/mod.rs | 2 +- .../src/chunked_array/logical/decimal.rs | 2 +- .../src/chunked_array/logical/struct_/mod.rs | 8 ++-- crates/polars-core/src/chunked_array/mod.rs | 6 ++- .../src/chunked_array/ops/apply.rs | 6 +-- .../src/chunked_array/ops/arity.rs | 30 +++++++++----- .../src/chunked_array/ops/bit_repr.rs | 2 +- .../src/chunked_array/ops/explode.rs | 6 ++- .../src/chunked_array/ops/fill_null.rs | 4 +- .../src/chunked_array/ops/filter.rs | 8 +++- .../polars-core/src/chunked_array/ops/full.rs | 25 ++++++++---- .../src/chunked_array/ops/gather.rs | 4 +- .../src/chunked_array/ops/rolling_window.rs | 2 +- .../polars-core/src/chunked_array/ops/set.rs | 11 +++++- .../ops/sort/arg_sort_multiple.rs | 4 +- .../src/chunked_array/ops/sort/mod.rs | 2 +- .../src/chunked_array/ops/unique/mod.rs | 12 +++++- .../src/chunked_array/trusted_len.rs | 5 ++- crates/polars-core/src/datatypes/dtype.rs | 39 ++++++++++++------- crates/polars-core/src/datatypes/field.rs | 4 +- .../frame/group_by/aggregations/agg_list.rs | 12 ++++-- crates/polars-core/src/frame/mod.rs | 12 ++++-- crates/polars-core/src/frame/row/transpose.rs | 2 +- crates/polars-core/src/schema.rs | 2 +- crates/polars-core/src/series/from.rs | 9 +++-- .../src/series/implementations/decimal.rs | 3 +- crates/polars-core/src/series/into.rs | 20 +++++----- crates/polars-core/src/series/mod.rs | 3 +- crates/polars-core/src/series/ops/reshape.rs | 3 +- crates/polars-expr/src/expressions/window.rs | 2 +- crates/polars-ffi/src/version_0.rs | 6 ++- crates/polars-io/src/avro/write.rs | 4 +- crates/polars-io/src/ipc/ipc_stream.rs | 6 +-- crates/polars-io/src/ipc/write.rs | 8 ++-- crates/polars-io/src/ipc/write_async.rs | 6 +-- crates/polars-io/src/json/mod.rs | 14 +++---- crates/polars-io/src/ndjson/mod.rs | 2 +- .../src/parquet/write/batched_writer.rs | 4 +- crates/polars-io/src/parquet/write/writer.rs | 2 +- crates/polars-io/src/shared.rs | 2 +- .../src/chunked_array/gather/chunked.rs | 4 +- .../src/chunked_array/gather_skip_nulls.rs | 5 ++- .../polars-ops/src/chunked_array/repeat_by.rs | 2 +- .../src/chunked_array/strings/extract.rs | 2 +- .../src/chunked_array/strings/json_path.rs | 2 +- crates/polars-ops/src/series/ops/ewm_by.rs | 4 +- .../series/ops/interpolation/interpolate.rs | 2 +- .../ops/interpolation/interpolate_by.rs | 4 +- .../sinks/group_by/aggregates/mean.rs | 2 +- .../sinks/group_by/aggregates/sum.rs | 2 +- .../executors/sinks/group_by/generic/eval.rs | 2 +- .../sinks/group_by/generic/hash_table.rs | 2 +- crates/polars-pipe/src/executors/sinks/io.rs | 8 ++-- .../src/executors/sinks/sort/sink_multiple.rs | 2 +- .../src/dsl/function_expr/plugin.rs | 2 +- .../src/dsl/function_expr/struct_.rs | 2 +- .../polars-time/src/chunkedarray/datetime.rs | 2 +- crates/polars/tests/it/io/ipc.rs | 4 +- py-polars/polars/dataframe/frame.py | 29 +++++++++----- py-polars/polars/series/series.py | 12 +++++- py-polars/src/conversion/mod.rs | 25 ++++++++++++ py-polars/src/dataframe/export.rs | 7 ++-- py-polars/src/dataframe/io.rs | 9 +++-- py-polars/src/dataframe/serde.rs | 2 +- py-polars/src/series/export.rs | 4 +- py-polars/src/series/mod.rs | 2 +- 76 files changed, 325 insertions(+), 201 deletions(-) diff --git a/crates/polars-core/src/chunked_array/array/mod.rs b/crates/polars-core/src/chunked_array/array/mod.rs index 96fee06ff3b2..bfb28c7e83f3 100644 --- a/crates/polars-core/src/chunked_array/array/mod.rs +++ b/crates/polars-core/src/chunked_array/array/mod.rs @@ -44,7 +44,9 @@ impl ArrayChunked { ) -> PolarsResult { // Rechunk or the generated Series will have wrong length. let ca = self.rechunk(); - let field = self.inner_dtype().to_arrow_field("item", true); + let field = self + .inner_dtype() + .to_arrow_field("item", PlFlavor::highest()); let chunks = ca.downcast_iter().map(|arr| { let elements = unsafe { @@ -66,8 +68,10 @@ impl ArrayChunked { let out = out.rechunk(); let values = out.chunks()[0].clone(); - let inner_dtype = - FixedSizeListArray::default_datatype(out.dtype().to_arrow(true), ca.width()); + let inner_dtype = FixedSizeListArray::default_datatype( + out.dtype().to_arrow(PlFlavor::highest()), + ca.width(), + ); let arr = FixedSizeListArray::new(inner_dtype, values, arr.validity().cloned()); Ok(arr) }); diff --git a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs index a419ee930401..2d6a939767ff 100644 --- a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs +++ b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs @@ -124,7 +124,7 @@ impl FixedSizeListBuilder for AnonymousOwnedFixedSizeListBuilder { .finish( self.inner_dtype .as_ref() - .map(|dt| dt.to_arrow(true)) + .map(|dt| dt.to_arrow(PlFlavor::highest())) .as_ref(), ) .unwrap(); diff --git a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs index 1fb5393db1df..b3a64dc83ff0 100644 --- a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs +++ b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs @@ -89,7 +89,7 @@ impl<'a> AnonymousListBuilder<'a> { let inner_dtype_physical = inner_dtype .as_ref() - .map(|dt| dt.to_physical().to_arrow(true)); + .map(|dt| dt.to_physical().to_arrow(PlFlavor::highest())); let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { @@ -157,7 +157,7 @@ impl ListBuilderTrait for AnonymousOwnedListBuilder { let slf = std::mem::take(self); let inner_dtype_physical = inner_dtype .as_ref() - .map(|dt| dt.to_physical().to_arrow(true)); + .map(|dt| dt.to_physical().to_arrow(PlFlavor::highest())); let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { diff --git a/crates/polars-core/src/chunked_array/builder/list/primitive.rs b/crates/polars-core/src/chunked_array/builder/list/primitive.rs index ce9caff3a116..1ccca95a6ebf 100644 --- a/crates/polars-core/src/chunked_array/builder/list/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/list/primitive.rs @@ -39,7 +39,7 @@ where ) -> Self { let values = MutablePrimitiveArray::::with_capacity_from( values_capacity, - values_type.to_arrow(true), + values_type.to_arrow(PlFlavor::highest()), ); let builder = LargePrimitiveBuilder::::new_with_capacity(values, capacity); let field = Field::new(name, DataType::List(Box::new(logical_type))); diff --git a/crates/polars-core/src/chunked_array/builder/mod.rs b/crates/polars-core/src/chunked_array/builder/mod.rs index 3c1060016101..5a10e8db1edc 100644 --- a/crates/polars-core/src/chunked_array/builder/mod.rs +++ b/crates/polars-core/src/chunked_array/builder/mod.rs @@ -66,7 +66,7 @@ where T: PolarsNumericType, { fn from_slice(name: &str, v: &[T::Native]) -> Self { - let arr = PrimitiveArray::from_slice(v).to(T::get_dtype().to_arrow(true)); + let arr = PrimitiveArray::from_slice(v).to(T::get_dtype().to_arrow(PlFlavor::highest())); ChunkedArray::with_chunk(name, arr) } diff --git a/crates/polars-core/src/chunked_array/builder/primitive.rs b/crates/polars-core/src/chunked_array/builder/primitive.rs index eaedc93a5a80..0ae99d5951d2 100644 --- a/crates/polars-core/src/chunked_array/builder/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/primitive.rs @@ -41,7 +41,7 @@ where { pub fn new(name: &str, capacity: usize) -> Self { let array_builder = MutablePrimitiveArray::::with_capacity(capacity) - .to(T::get_dtype().to_arrow(true)); + .to(T::get_dtype().to_arrow(PlFlavor::highest())); PrimitiveChunkedBuilder { array_builder, diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index ceb73d492d0b..bfe1768e490a 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -51,7 +51,7 @@ pub(crate) fn cast_chunks( let check_nulls = matches!(options, CastOptions::Strict); let options = options.into(); - let arrow_dtype = dtype.try_to_arrow(true)?; + let arrow_dtype = dtype.try_to_arrow(PlFlavor::highest())?; chunks .iter() .map(|arr| { diff --git a/crates/polars-core/src/chunked_array/collect.rs b/crates/polars-core/src/chunked_array/collect.rs index 6131d741eac6..f14a0cde3d9a 100644 --- a/crates/polars-core/src/chunked_array/collect.rs +++ b/crates/polars-core/src/chunked_array/collect.rs @@ -18,6 +18,7 @@ use crate::chunked_array::ChunkedArray; use crate::datatypes::{ ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, DataType, Field, PolarsDataType, }; +use crate::prelude::PlFlavor; pub trait ChunkedCollectIterExt: Iterator + Sized { #[inline] @@ -26,7 +27,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { T::Array: ArrayFromIterDtype, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -36,7 +37,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { T::Array: ArrayFromIterDtype, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -47,7 +48,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: TrustedLen, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -58,7 +59,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: TrustedLen, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -73,7 +74,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator>, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(true))?; + let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -87,7 +88,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator>, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(true))?; + let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -102,7 +103,8 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator> + TrustedLen, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(true))?; + let arr = + self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -116,7 +118,8 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator> + TrustedLen, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(true))?; + let arr = + self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } } diff --git a/crates/polars-core/src/chunked_array/from.rs b/crates/polars-core/src/chunked_array/from.rs index af4892890336..b6d26d536422 100644 --- a/crates/polars-core/src/chunked_array/from.rs +++ b/crates/polars-core/src/chunked_array/from.rs @@ -217,7 +217,7 @@ where #[cfg(debug_assertions)] { if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() { - assert_eq!(chunks[0].data_type(), &dtype.to_arrow(true)) + assert_eq!(chunks[0].data_type(), &dtype.to_arrow(PlFlavor::highest())) } } let field = Arc::new(Field::new(name, dtype)); @@ -234,7 +234,10 @@ where } pub fn full_null_like(ca: &Self, length: usize) -> Self { - let chunks = std::iter::once(T::Array::full_null(length, ca.dtype().to_arrow(true))); + let chunks = std::iter::once(T::Array::full_null( + length, + ca.dtype().to_arrow(PlFlavor::highest()), + )); Self::from_chunk_iter_like(ca, chunks) } } diff --git a/crates/polars-core/src/chunked_array/logical/categorical/from.rs b/crates/polars-core/src/chunked_array/logical/categorical/from.rs index 568d5650ba8e..d01d2e16c634 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/from.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/from.rs @@ -3,16 +3,15 @@ use arrow::datatypes::IntegerType; use super::*; -fn convert_values(arr: &Utf8ViewArray, pl_flavor: bool) -> ArrayRef { - if pl_flavor { - arr.clone().boxed() - } else { - utf8view_to_utf8::(arr).boxed() +fn convert_values(arr: &Utf8ViewArray, pl_flavor: PlFlavor) -> ArrayRef { + match pl_flavor { + PlFlavor::Future1 => arr.clone().boxed(), + PlFlavor::Compatible => utf8view_to_utf8::(arr).boxed(), } } impl CategoricalChunked { - pub fn to_arrow(&self, pl_flavor: bool, as_i64: bool) -> ArrayRef { + pub fn to_arrow(&self, pl_flavor: PlFlavor, as_i64: bool) -> ArrayRef { if as_i64 { self.to_i64(pl_flavor).boxed() } else { @@ -20,11 +19,10 @@ impl CategoricalChunked { } } - fn to_u32(&self, pl_flavor: bool) -> DictionaryArray { - let values_dtype = if pl_flavor { - ArrowDataType::Utf8View - } else { - ArrowDataType::LargeUtf8 + fn to_u32(&self, pl_flavor: PlFlavor) -> DictionaryArray { + let values_dtype = match pl_flavor { + PlFlavor::Future1 => ArrowDataType::Utf8View, + PlFlavor::Compatible => ArrowDataType::LargeUtf8, }; let keys = self.physical().rechunk(); let keys = keys.downcast_iter().next().unwrap(); @@ -53,11 +51,10 @@ impl CategoricalChunked { } } - fn to_i64(&self, pl_flavor: bool) -> DictionaryArray { - let values_dtype = if pl_flavor { - ArrowDataType::Utf8View - } else { - ArrowDataType::LargeUtf8 + fn to_i64(&self, pl_flavor: PlFlavor) -> DictionaryArray { + let values_dtype = match pl_flavor { + PlFlavor::Future1 => ArrowDataType::Utf8View, + PlFlavor::Compatible => ArrowDataType::LargeUtf8, }; let keys = self.physical().rechunk(); let keys = keys.downcast_iter().next().unwrap(); diff --git a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs index 30ab29b5c78a..972de59381be 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs @@ -464,7 +464,7 @@ mod test { let ca = ca.cast(&DataType::Categorical(None, Default::default()))?; let ca = ca.categorical().unwrap(); - let arr = ca.to_arrow(true, false); + let arr = ca.to_arrow(PlFlavor::highest(), false); let s = Series::try_from(("foo", arr))?; assert!(matches!(s.dtype(), &DataType::Categorical(_, _))); assert_eq!(s.null_count(), 1); diff --git a/crates/polars-core/src/chunked_array/logical/decimal.rs b/crates/polars-core/src/chunked_array/logical/decimal.rs index 4526f0d63e99..a7303867c98f 100644 --- a/crates/polars-core/src/chunked_array/logical/decimal.rs +++ b/crates/polars-core/src/chunked_array/logical/decimal.rs @@ -20,7 +20,7 @@ impl Int128Chunked { let (_, values, validity) = default.into_inner(); *arr = PrimitiveArray::new( - DataType::Decimal(precision, Some(scale)).to_arrow(true), + DataType::Decimal(precision, Some(scale)).to_arrow(PlFlavor::highest()), values, validity, ); diff --git a/crates/polars-core/src/chunked_array/logical/struct_/mod.rs b/crates/polars-core/src/chunked_array/logical/struct_/mod.rs index 07bc343aee21..a6a28acd5554 100644 --- a/crates/polars-core/src/chunked_array/logical/struct_/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/struct_/mod.rs @@ -48,12 +48,12 @@ fn fields_to_struct_array(fields: &[Series], physical: bool) -> (ArrayRef, Vec s.to_arrow(0, true), + DataType::Object(_, _) => s.to_arrow(0, PlFlavor::highest()), _ => { if physical { s.chunks()[0].clone() } else { - s.to_arrow(0, true) + s.to_arrow(0, PlFlavor::highest()) } }, } @@ -145,7 +145,7 @@ impl StructChunked { .iter() .map(|s| match s.dtype() { #[cfg(feature = "object")] - DataType::Object(_, _) => s.to_arrow(i, true), + DataType::Object(_, _) => s.to_arrow(i, PlFlavor::highest()), _ => s.chunks()[i].clone(), }) .collect::>(); @@ -295,7 +295,7 @@ impl StructChunked { self.into() } - pub(crate) fn to_arrow(&self, i: usize, pl_flavor: bool) -> ArrayRef { + pub(crate) fn to_arrow(&self, i: usize, pl_flavor: PlFlavor) -> ArrayRef { let values = self .fields .iter() diff --git a/crates/polars-core/src/chunked_array/mod.rs b/crates/polars-core/src/chunked_array/mod.rs index 3cc4f06b798e..b9d4b509218b 100644 --- a/crates/polars-core/src/chunked_array/mod.rs +++ b/crates/polars-core/src/chunked_array/mod.rs @@ -933,7 +933,11 @@ pub(crate) fn to_primitive( values: Vec, validity: Option, ) -> PrimitiveArray { - PrimitiveArray::new(T::get_dtype().to_arrow(true), values.into(), validity) + PrimitiveArray::new( + T::get_dtype().to_arrow(PlFlavor::highest()), + values.into(), + validity, + ) } pub(crate) fn to_array( diff --git a/crates/polars-core/src/chunked_array/ops/apply.rs b/crates/polars-core/src/chunked_array/ops/apply.rs index f6d7db426cd1..24529325498d 100644 --- a/crates/polars-core/src/chunked_array/ops/apply.rs +++ b/crates/polars-core/src/chunked_array/ops/apply.rs @@ -42,13 +42,13 @@ where let out: U::Array = arr .values_iter() .map(&mut op) - .collect_arr_with_dtype(dtype.to_arrow(true)); + .collect_arr_with_dtype(dtype.to_arrow(PlFlavor::highest())); out.with_validity_typed(arr.validity().cloned()) } else { let out: U::Array = arr .iter() .map(|opt| opt.map(&mut op)) - .collect_arr_with_dtype(dtype.to_arrow(true)); + .collect_arr_with_dtype(dtype.to_arrow(PlFlavor::highest())); out.with_validity_typed(arr.validity().cloned()) } }); @@ -133,7 +133,7 @@ where drop(arr); let compute_immutable = |arr: &PrimitiveArray| { - arrow::compute::arity::unary(arr, f, S::get_dtype().to_arrow(true)) + arrow::compute::arity::unary(arr, f, S::get_dtype().to_arrow(PlFlavor::highest())) }; if owned_arr.values().is_sliced() { diff --git a/crates/polars-core/src/chunked_array/ops/arity.rs b/crates/polars-core/src/chunked_array/ops/arity.rs index 0f3cf8cce4ca..a6ced1639da6 100644 --- a/crates/polars-core/src/chunked_array/ops/arity.rs +++ b/crates/polars-core/src/chunked_array/ops/arity.rs @@ -6,7 +6,7 @@ use polars_error::PolarsResult; use crate::chunked_array::metadata::MetadataProperties; use crate::datatypes::{ArrayCollectIterExt, ArrayFromIter}; -use crate::prelude::{ChunkedArray, PolarsDataType, Series}; +use crate::prelude::{ChunkedArray, PlFlavor, PolarsDataType, Series}; use crate::utils::{align_chunks_binary, align_chunks_binary_owned, align_chunks_ternary}; // We need this helper because for<'a> notation can't yet be applied properly @@ -106,7 +106,7 @@ where V::Array: ArrayFromIter<>>::Ret>, { if ca.null_count() == ca.len() { - let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(PlFlavor::highest())); return ChunkedArray::with_chunk(ca.name(), arr); } @@ -130,7 +130,7 @@ where V::Array: ArrayFromIter, { if ca.null_count() == ca.len() { - let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(PlFlavor::highest())); return Ok(ChunkedArray::with_chunk(ca.name(), arr)); } @@ -308,7 +308,7 @@ where { if lhs.null_count() == lhs.len() || rhs.null_count() == rhs.len() { let len = lhs.len().min(rhs.len()); - let arr = V::Array::full_null(len, V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(len, V::get_dtype().to_arrow(PlFlavor::highest())); return ChunkedArray::with_chunk(lhs.name(), arr); } @@ -704,7 +704,7 @@ where let min = lhs.len().min(rhs.len()); let max = lhs.len().max(rhs.len()); let len = if min == 1 { max } else { min }; - let arr = V::Array::full_null(len, V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(len, V::get_dtype().to_arrow(PlFlavor::highest())); return ChunkedArray::with_chunk(lhs.name(), arr); } @@ -745,7 +745,10 @@ where let opt_rhs = rhs.get(0); match opt_rhs { None => { - let arr = O::Array::full_null(lhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + lhs.len(), + O::get_dtype().to_arrow(PlFlavor::highest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(rhs) => unary_kernel(lhs, |arr| rhs_broadcast_kernel(arr, rhs.clone())), @@ -755,7 +758,10 @@ where let opt_lhs = lhs.get(0); match opt_lhs { None => { - let arr = O::Array::full_null(rhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + rhs.len(), + O::get_dtype().to_arrow(PlFlavor::highest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(lhs) => unary_kernel(rhs, |arr| lhs_broadcast_kernel(lhs.clone(), arr)), @@ -789,7 +795,10 @@ where let opt_rhs = rhs.get(0); match opt_rhs { None => { - let arr = O::Array::full_null(lhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + lhs.len(), + O::get_dtype().to_arrow(PlFlavor::highest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(rhs) => unary_kernel_owned(lhs, |arr| rhs_broadcast_kernel(arr, rhs.clone())), @@ -799,7 +808,10 @@ where let opt_lhs = lhs.get(0); match opt_lhs { None => { - let arr = O::Array::full_null(rhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + rhs.len(), + O::get_dtype().to_arrow(PlFlavor::highest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(lhs) => unary_kernel_owned(rhs, |arr| lhs_broadcast_kernel(lhs.clone(), arr)), diff --git a/crates/polars-core/src/chunked_array/ops/bit_repr.rs b/crates/polars-core/src/chunked_array/ops/bit_repr.rs index 37617a1d43ea..3771d4a66d76 100644 --- a/crates/polars-core/src/chunked_array/ops/bit_repr.rs +++ b/crates/polars-core/src/chunked_array/ops/bit_repr.rs @@ -46,7 +46,7 @@ fn reinterpret_list_chunked( let pa = PrimitiveArray::from_data_default(reinterpreted_buf, inner_arr.validity().cloned()); LargeListArray::new( - DataType::List(Box::new(U::get_dtype())).to_arrow(true), + DataType::List(Box::new(U::get_dtype())).to_arrow(PlFlavor::highest()), array.offsets().clone(), pa.to_boxed(), array.validity().cloned(), diff --git a/crates/polars-core/src/chunked_array/ops/explode.rs b/crates/polars-core/src/chunked_array/ops/explode.rs index 910d2941b28d..4d9d1112e569 100644 --- a/crates/polars-core/src/chunked_array/ops/explode.rs +++ b/crates/polars-core/src/chunked_array/ops/explode.rs @@ -150,7 +150,7 @@ where unsafe { set_bit_unchecked(validity_slice, i, false) } } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), new_values.into(), Some(validity.into()), ); @@ -269,7 +269,9 @@ impl ExplodeByOffsets for ListChunked { last = o; } process_range(start, last, &mut builder); - let arr = builder.finish(Some(&inner_type.to_arrow(true))).unwrap(); + let arr = builder + .finish(Some(&inner_type.to_arrow(PlFlavor::highest()))) + .unwrap(); let mut ca = unsafe { self.copy_with_chunks(vec![Box::new(arr)]) }; use MetadataProperties as P; diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index f09852f94f5e..c0d042854e02 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -306,7 +306,7 @@ where ChunkedArray::from_chunk_iter_like( ca, [ - T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(true)) + T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(PlFlavor::highest())) .with_validity_typed(Some(bm.into())), ], ) @@ -340,7 +340,7 @@ where ChunkedArray::from_chunk_iter_like( ca, [ - T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(true)) + T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(PlFlavor::highest())) .with_validity_typed(Some(bm.into())), ], ) diff --git a/crates/polars-core/src/chunked_array/ops/filter.rs b/crates/polars-core/src/chunked_array/ops/filter.rs index a6cf1ca98288..954adcb53d7a 100644 --- a/crates/polars-core/src/chunked_array/ops/filter.rs +++ b/crates/polars-core/src/chunked_array/ops/filter.rs @@ -120,7 +120,9 @@ impl ChunkFilter for ListChunked { Some(true) => Ok(self.clone()), _ => Ok(ListChunked::from_chunk_iter( self.name(), - [ListArray::new_empty(self.dtype().to_arrow(true))], + [ListArray::new_empty( + self.dtype().to_arrow(PlFlavor::highest()), + )], )), }; } @@ -146,7 +148,9 @@ impl ChunkFilter for ArrayChunked { Some(true) => Ok(self.clone()), _ => Ok(ArrayChunked::from_chunk_iter( self.name(), - [FixedSizeListArray::new_empty(self.dtype().to_arrow(true))], + [FixedSizeListArray::new_empty( + self.dtype().to_arrow(PlFlavor::highest()), + )], )), }; } diff --git a/crates/polars-core/src/chunked_array/ops/full.rs b/crates/polars-core/src/chunked_array/ops/full.rs index c04d68cce776..da70407052f7 100644 --- a/crates/polars-core/src/chunked_array/ops/full.rs +++ b/crates/polars-core/src/chunked_array/ops/full.rs @@ -21,7 +21,7 @@ where T: PolarsNumericType, { fn full_null(name: &str, length: usize) -> Self { - let arr = PrimitiveArray::new_null(T::get_dtype().to_arrow(true), length); + let arr = PrimitiveArray::new_null(T::get_dtype().to_arrow(PlFlavor::highest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -55,7 +55,7 @@ impl<'a> ChunkFull<&'a str> for StringChunked { impl ChunkFullNull for StringChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = Utf8ViewArray::new_null(DataType::String.to_arrow(true), length); + let arr = Utf8ViewArray::new_null(DataType::String.to_arrow(PlFlavor::highest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -72,7 +72,7 @@ impl<'a> ChunkFull<&'a [u8]> for BinaryChunked { impl ChunkFullNull for BinaryChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = BinaryViewArray::new_null(DataType::Binary.to_arrow(true), length); + let arr = BinaryViewArray::new_null(DataType::Binary.to_arrow(PlFlavor::highest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -90,7 +90,10 @@ impl<'a> ChunkFull<&'a [u8]> for BinaryOffsetChunked { impl ChunkFullNull for BinaryOffsetChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = BinaryArray::::new_null(DataType::BinaryOffset.to_arrow(true), length); + let arr = BinaryArray::::new_null( + DataType::BinaryOffset.to_arrow(PlFlavor::highest()), + length, + ); ChunkedArray::with_chunk(name, arr) } } @@ -122,7 +125,11 @@ impl ArrayChunked { ) -> ArrayChunked { let arr = FixedSizeListArray::new_null( ArrowDataType::FixedSizeList( - Box::new(ArrowField::new("item", inner_dtype.to_arrow(true), true)), + Box::new(ArrowField::new( + "item", + inner_dtype.to_arrow(PlFlavor::highest()), + true, + )), width, ), length, @@ -137,7 +144,11 @@ impl ChunkFull<&Series> for ArrayChunked { let width = value.len(); let dtype = value.dtype(); let arrow_dtype = ArrowDataType::FixedSizeList( - Box::new(ArrowField::new("item", dtype.to_arrow(true), true)), + Box::new(ArrowField::new( + "item", + dtype.to_arrow(PlFlavor::highest()), + true, + )), width, ); let value = value.rechunk().chunks()[0].clone(); @@ -158,7 +169,7 @@ impl ListChunked { let arr: ListArray = ListArray::new_null( ArrowDataType::LargeList(Box::new(ArrowField::new( "item", - inner_dtype.to_physical().to_arrow(true), + inner_dtype.to_physical().to_arrow(PlFlavor::highest()), true, ))), length, diff --git a/crates/polars-core/src/chunked_array/ops/gather.rs b/crates/polars-core/src/chunked_array/ops/gather.rs index 67c3b980d516..72c882c9aaad 100644 --- a/crates/polars-core/src/chunked_array/ops/gather.rs +++ b/crates/polars-core/src/chunked_array/ops/gather.rs @@ -155,7 +155,7 @@ where } let targets: Vec<_> = ca.downcast_iter().collect(); let arr = gather_idx_array_unchecked( - ca.dtype().to_arrow(true), + ca.dtype().to_arrow(PlFlavor::highest()), &targets, ca.null_count() > 0, indices.as_ref(), @@ -192,7 +192,7 @@ where let targets: Vec<_> = ca.downcast_iter().collect(); let chunks = indices.downcast_iter().map(|idx_arr| { - let dtype = ca.dtype().to_arrow(true); + let dtype = ca.dtype().to_arrow(PlFlavor::highest()); if idx_arr.null_count() == 0 { gather_idx_array_unchecked(dtype, &targets, targets_have_nulls, idx_arr.values()) } else if targets.len() == 1 { diff --git a/crates/polars-core/src/chunked_array/ops/rolling_window.rs b/crates/polars-core/src/chunked_array/ops/rolling_window.rs index ef3c07529504..3b0b2d1c4c6d 100644 --- a/crates/polars-core/src/chunked_array/ops/rolling_window.rs +++ b/crates/polars-core/src/chunked_array/ops/rolling_window.rs @@ -270,7 +270,7 @@ mod inner_mod { } } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), values.into(), Some(validity.into()), ); diff --git a/crates/polars-core/src/chunked_array/ops/set.rs b/crates/polars-core/src/chunked_array/ops/set.rs index 52646925a05c..c50046d05a1f 100644 --- a/crates/polars-core/src/chunked_array/ops/set.rs +++ b/crates/polars-core/src/chunked_array/ops/set.rs @@ -55,7 +55,7 @@ where self.downcast_iter().next().unwrap(), idx, value, - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), )?; return Ok(Self::with_chunk(self.name(), arr)); } @@ -101,7 +101,14 @@ where let chunks = left .downcast_iter() .zip(mask.downcast_iter()) - .map(|(arr, mask)| set_with_mask(arr, mask, value, T::get_dtype().to_arrow(true))); + .map(|(arr, mask)| { + set_with_mask( + arr, + mask, + value, + T::get_dtype().to_arrow(PlFlavor::highest()), + ) + }); Ok(ChunkedArray::from_chunk_iter(self.name(), chunks)) } else { // slow path, could be optimized. diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs index 8b909f09930a..521861db515c 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs @@ -94,12 +94,12 @@ pub fn _get_rows_encoded_compat_array(by: &Series) -> PolarsResult { DataType::Categorical(_, _) | DataType::Enum(_, _) => { let ca = by.categorical().unwrap(); if ca.uses_lexical_ordering() { - by.to_arrow(0, true) + by.to_arrow(0, PlFlavor::highest()) } else { ca.physical().chunks[0].clone() } }, - _ => by.to_arrow(0, true), + _ => by.to_arrow(0, PlFlavor::highest()), }; Ok(out) } diff --git a/crates/polars-core/src/chunked_array/ops/sort/mod.rs b/crates/polars-core/src/chunked_array/ops/sort/mod.rs index 907aeefa33d6..03d5f8060376 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/mod.rs @@ -201,7 +201,7 @@ where } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), vals.into(), Some(create_validity(len, null_count, options.nulls_last)), ); diff --git a/crates/polars-core/src/chunked_array/ops/unique/mod.rs b/crates/polars-core/src/chunked_array/ops/unique/mod.rs index 2f4cc86d192f..c257a9668fca 100644 --- a/crates/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/unique/mod.rs @@ -126,7 +126,11 @@ where if !T::Native::is_float() && MetadataEnv::experimental_enabled() { let md = self.metadata(); if let (Some(min), Some(max)) = (md.get_min_value(), md.get_max_value()) { - let data_type = self.field.as_ref().data_type().to_arrow(false); + let data_type = self + .field + .as_ref() + .data_type() + .to_arrow(PlFlavor::Compatible); if let Some(mut state) = PrimitiveRangedUniqueState::new( *min, *max, @@ -268,7 +272,11 @@ impl ChunkUnique for BooleanChunked { fn unique(&self) -> PolarsResult { use polars_compute::unique::RangedUniqueKernel; - let data_type = self.field.as_ref().data_type().to_arrow(false); + let data_type = self + .field + .as_ref() + .data_type() + .to_arrow(PlFlavor::Compatible); let has_null = self.null_count() > 0; let mut state = BooleanUniqueKernelState::new(has_null, data_type); diff --git a/crates/polars-core/src/chunked_array/trusted_len.rs b/crates/polars-core/src/chunked_array/trusted_len.rs index af6a6d3f3c51..e7030c665d88 100644 --- a/crates/polars-core/src/chunked_array/trusted_len.rs +++ b/crates/polars-core/src/chunked_array/trusted_len.rs @@ -17,7 +17,8 @@ where // SAFETY: iter is TrustedLen. let iter = iter.into_iter(); let arr = unsafe { - PrimitiveArray::from_trusted_len_iter_unchecked(iter).to(T::get_dtype().to_arrow(true)) + PrimitiveArray::from_trusted_len_iter_unchecked(iter) + .to(T::get_dtype().to_arrow(PlFlavor::highest())) }; arr.into() } @@ -37,7 +38,7 @@ where // SAFETY: iter is TrustedLen. let iter = iter.into_iter(); let values = unsafe { Vec::from_trusted_len_iter_unchecked(iter) }.into(); - let arr = PrimitiveArray::new(T::get_dtype().to_arrow(true), values, None); + let arr = PrimitiveArray::new(T::get_dtype().to_arrow(PlFlavor::highest()), values, None); NoNull::new(arr.into()) } } diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 44a2a82d94c8..4dcc22ec6fc4 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -498,7 +498,7 @@ impl DataType { } /// Convert to an Arrow Field - pub fn to_arrow_field(&self, name: &str, pl_flavor: bool) -> ArrowField { + pub fn to_arrow_field(&self, name: &str, pl_flavor: PlFlavor) -> ArrowField { let metadata = match self { #[cfg(feature = "dtype-categorical")] DataType::Enum(_, _) => Some(BTreeMap::from([( @@ -523,12 +523,12 @@ impl DataType { /// Convert to an Arrow data type. #[inline] - pub fn to_arrow(&self, pl_flavor: bool) -> ArrowDataType { + pub fn to_arrow(&self, pl_flavor: PlFlavor) -> ArrowDataType { self.try_to_arrow(pl_flavor).unwrap() } #[inline] - pub fn try_to_arrow(&self, pl_flavor: bool) -> PolarsResult { + pub fn try_to_arrow(&self, pl_flavor: PlFlavor) -> PolarsResult { use DataType::*; match self { Boolean => Ok(ArrowDataType::Boolean), @@ -553,18 +553,16 @@ impl DataType { )) }, String => { - let dt = if pl_flavor { - ArrowDataType::Utf8View - } else { - ArrowDataType::LargeUtf8 + let dt = match pl_flavor { + PlFlavor::Future1 => ArrowDataType::Utf8View, + PlFlavor::Compatible => ArrowDataType::LargeUtf8, }; Ok(dt) }, Binary => { - let dt = if pl_flavor { - ArrowDataType::BinaryView - } else { - ArrowDataType::LargeBinary + let dt = match pl_flavor { + PlFlavor::Future1 => ArrowDataType::BinaryView, + PlFlavor::Compatible => ArrowDataType::LargeBinary, }; Ok(dt) }, @@ -592,10 +590,9 @@ impl DataType { }, #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => { - let values = if pl_flavor { - ArrowDataType::Utf8View - } else { - ArrowDataType::LargeUtf8 + let values = match pl_flavor { + PlFlavor::Future1 => ArrowDataType::Utf8View, + PlFlavor::Compatible => ArrowDataType::LargeUtf8, }; Ok(ArrowDataType::Dictionary( IntegerType::UInt32, @@ -789,3 +786,15 @@ pub fn create_enum_data_type(categories: Utf8ViewArray) -> DataType { let rev_map = RevMapping::build_local(categories); DataType::Enum(Some(Arc::new(rev_map)), Default::default()) } + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum PlFlavor { + Compatible, + Future1, +} + +impl PlFlavor { + pub const fn highest() -> PlFlavor { + PlFlavor::Future1 + } +} diff --git a/crates/polars-core/src/datatypes/field.rs b/crates/polars-core/src/datatypes/field.rs index 63a3bafe33fe..003711a4d027 100644 --- a/crates/polars-core/src/datatypes/field.rs +++ b/crates/polars-core/src/datatypes/field.rs @@ -107,9 +107,9 @@ impl Field { /// let f = Field::new("Value", DataType::Int64); /// let af = arrow::datatypes::Field::new("Value", arrow::datatypes::ArrowDataType::Int64, true); /// - /// assert_eq!(f.to_arrow(true), af); + /// assert_eq!(f.to_arrow(PlFlavor::highest()), af); /// ``` - pub fn to_arrow(&self, pl_flavor: bool) -> ArrowField { + pub fn to_arrow(&self, pl_flavor: PlFlavor) -> ArrowField { self.dtype.to_arrow_field(self.name.as_str(), pl_flavor) } } diff --git a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs index 1915797ee41f..da91b1a2642d 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs @@ -70,11 +70,13 @@ where }; let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), list_values.into(), validity, ); - let data_type = ListArray::::default_datatype(T::get_dtype().to_arrow(true)); + let data_type = ListArray::::default_datatype( + T::get_dtype().to_arrow(PlFlavor::highest()), + ); // SAFETY: // offsets are monotonically increasing let arr = ListArray::::new( @@ -133,11 +135,13 @@ where }; let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), list_values.into(), validity, ); - let data_type = ListArray::::default_datatype(T::get_dtype().to_arrow(true)); + let data_type = ListArray::::default_datatype( + T::get_dtype().to_arrow(PlFlavor::highest()), + ); let arr = ListArray::::new( data_type, Offsets::new_unchecked(offsets).into(), diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 34e129e8808f..cca380bdcb09 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -2382,10 +2382,14 @@ impl DataFrame { /// This responsibility is left to the caller as we don't want to take mutable references here, /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller /// as well. - pub fn iter_chunks(&self, pl_flavor: bool, parallel: bool) -> RecordBatchIter { + pub fn iter_chunks(&self, pl_flavor: PlFlavor, parallel: bool) -> RecordBatchIter { // If any of the columns is binview and we don't convert `pl_flavor` we allow parallelism // as we must allocate arrow strings/binaries. - let parallel = if parallel && !pl_flavor { + let parallel = if parallel + && match pl_flavor { + PlFlavor::Future1 => true, + PlFlavor::Compatible => false, + } { self.columns.len() > 1 && self .columns @@ -3018,7 +3022,7 @@ pub struct RecordBatchIter<'a> { columns: &'a Vec, idx: usize, n_chunks: usize, - pl_flavor: bool, + pl_flavor: PlFlavor, parallel: bool, } @@ -3117,7 +3121,7 @@ mod test { "foo" => &[1, 2, 3, 4, 5] ) .unwrap(); - let mut iter = df.iter_chunks(true, false); + let mut iter = df.iter_chunks(PlFlavor::highest(), false); assert_eq!(5, iter.next().unwrap().len()); assert!(iter.next().is_none()); } diff --git a/crates/polars-core/src/frame/row/transpose.rs b/crates/polars-core/src/frame/row/transpose.rs index 0fdc15c9c6f6..05de7a63c9c5 100644 --- a/crates/polars-core/src/frame/row/transpose.rs +++ b/crates/polars-core/src/frame/row/transpose.rs @@ -247,7 +247,7 @@ where }; let arr = PrimitiveArray::::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), values.into(), validity, ); diff --git a/crates/polars-core/src/schema.rs b/crates/polars-core/src/schema.rs index 981615499dfb..52a67820ca2e 100644 --- a/crates/polars-core/src/schema.rs +++ b/crates/polars-core/src/schema.rs @@ -370,7 +370,7 @@ impl Schema { } /// Convert self to `ArrowSchema` by cloning the fields - pub fn to_arrow(&self, pl_flavor: bool) -> ArrowSchema { + pub fn to_arrow(&self, pl_flavor: PlFlavor) -> ArrowSchema { let fields: Vec<_> = self .inner .iter() diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 9a82ed0e2506..1474bbf3d38d 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -102,9 +102,12 @@ impl Series { Float64 => Float64Chunked::from_chunks(name, chunks).into_series(), BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(), #[cfg(feature = "dtype-struct")] - Struct(_) => { - Series::_try_from_arrow_unchecked(name, chunks, &dtype.to_arrow(true)).unwrap() - }, + Struct(_) => Series::_try_from_arrow_unchecked( + name, + chunks, + &dtype.to_arrow(PlFlavor::highest()), + ) + .unwrap(), #[cfg(feature = "object")] Object(_, _) => { assert_eq!(chunks.len(), 1); diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 9f5c382d94c7..04875a3d46a8 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -43,7 +43,8 @@ impl SeriesWrap { Series::from_chunks_and_dtype_unchecked("", vec![arr.values().clone()], dtype) }; let new_values = s.array_ref(0).clone(); - let data_type = ListArray::::default_datatype(dtype.to_arrow(true)); + let data_type = + ListArray::::default_datatype(dtype.to_arrow(PlFlavor::highest())); let new_arr = ListArray::::new( data_type, arr.offsets().clone(), diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index f1fbd6143f0a..969f2352b904 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -19,7 +19,7 @@ impl Series { /// Convert a chunk in the Series to the correct Arrow type. /// This conversion is needed because polars doesn't use a /// 1 on 1 mapping for logical/ categoricals, etc. - pub fn to_arrow(&self, chunk_idx: usize, pl_flavor: bool) -> ArrayRef { + pub fn to_arrow(&self, chunk_idx: usize, pl_flavor: PlFlavor) -> ArrayRef { match self.dtype() { // make sure that we recursively apply all logical types. #[cfg(feature = "dtype-struct")] @@ -116,21 +116,19 @@ impl Series { object_series_to_arrow_array(&s) } }, - DataType::String => { - if pl_flavor { - self.array_ref(chunk_idx).clone() - } else { + DataType::String => match pl_flavor { + PlFlavor::Future1 => self.array_ref(chunk_idx).clone(), + PlFlavor::Compatible => { let arr = self.array_ref(chunk_idx); cast_unchecked(arr.as_ref(), &ArrowDataType::LargeUtf8).unwrap() - } + }, }, - DataType::Binary => { - if pl_flavor { - self.array_ref(chunk_idx).clone() - } else { + DataType::Binary => match pl_flavor { + PlFlavor::Future1 => self.array_ref(chunk_idx).clone(), + PlFlavor::Compatible => { let arr = self.array_ref(chunk_idx); cast_unchecked(arr.as_ref(), &ArrowDataType::LargeBinary).unwrap() - } + }, }, _ => self.array_ref(chunk_idx).clone(), } diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 567f7e66f9d7..134e69e7b153 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -906,7 +906,8 @@ impl Series { let offsets = (0i64..(s.len() as i64 + 1)).collect::>(); let offsets = unsafe { Offsets::new_unchecked(offsets) }; - let data_type = LargeListArray::default_datatype(s.dtype().to_physical().to_arrow(true)); + let data_type = + LargeListArray::default_datatype(s.dtype().to_physical().to_arrow(PlFlavor::highest())); let new_arr = LargeListArray::new(data_type, offsets.into(), values, None); let mut out = ListChunked::with_chunk(s.name(), new_arr); out.set_inner_dtype(s.dtype().clone()); diff --git a/crates/polars-core/src/series/ops/reshape.rs b/crates/polars-core/src/series/ops/reshape.rs index 87e4a643d8e5..3304bb63689e 100644 --- a/crates/polars-core/src/series/ops/reshape.rs +++ b/crates/polars-core/src/series/ops/reshape.rs @@ -157,7 +157,8 @@ impl Series { prev_dtype = DataType::Array(Box::new(prev_dtype), dim as usize); prev_array = - FixedSizeListArray::new(prev_dtype.to_arrow(true), prev_array, None).boxed(); + FixedSizeListArray::new(prev_dtype.to_arrow(PlFlavor::highest()), prev_array, None) + .boxed(); } Ok(unsafe { Series::from_chunks_and_dtype_unchecked( diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs index 97f25abc4196..66dce9eb4888 100644 --- a/crates/polars-expr/src/expressions/window.rs +++ b/crates/polars-expr/src/expressions/window.rs @@ -821,7 +821,7 @@ where unsafe { values.set_len(len) } let validity = Bitmap::from(validity); let arr = PrimitiveArray::new( - T::get_dtype().to_physical().to_arrow(true), + T::get_dtype().to_physical().to_arrow(PlFlavor::highest()), values.into(), Some(validity), ); diff --git a/crates/polars-ffi/src/version_0.rs b/crates/polars-ffi/src/version_0.rs index 43fec994c4d4..d97f54d5adbc 100644 --- a/crates/polars-ffi/src/version_0.rs +++ b/crates/polars-ffi/src/version_0.rs @@ -1,3 +1,5 @@ +use polars_core::prelude::PlFlavor; + use super::*; /// An FFI exported `Series`. @@ -52,13 +54,13 @@ unsafe extern "C" fn c_release_series_export(e: *mut SeriesExport) { } pub fn export_series(s: &Series) -> SeriesExport { - let field = ArrowField::new(s.name(), s.dtype().to_arrow(true), true); + let field = ArrowField::new(s.name(), s.dtype().to_arrow(PlFlavor::highest()), true); let schema = Box::new(ffi::export_field_to_c(&field)); let mut arrays = (0..s.chunks().len()) .map(|i| { // Make sure we export the logical type. - let arr = s.to_arrow(i, true); + let arr = s.to_arrow(i, PlFlavor::highest()); Box::into_raw(Box::new(ffi::export_array_to_c(arr.clone()))) }) .collect::>(); diff --git a/crates/polars-io/src/avro/write.rs b/crates/polars-io/src/avro/write.rs index b12cd358da16..d797b3a4aed9 100644 --- a/crates/polars-io/src/avro/write.rs +++ b/crates/polars-io/src/avro/write.rs @@ -64,12 +64,12 @@ where } fn finish(&mut self, df: &mut DataFrame) -> PolarsResult<()> { - let schema = schema_to_arrow_checked(&df.schema(), false, "avro")?; + let schema = schema_to_arrow_checked(&df.schema(), PlFlavor::Compatible, "avro")?; let record = write::to_record(&schema, self.name.clone())?; let mut data = vec![]; let mut compressed_block = avro_schema::file::CompressedBlock::default(); - for chunk in df.iter_chunks(false, true) { + for chunk in df.iter_chunks(PlFlavor::Compatible, true) { let mut serializers = chunk .iter() .zip(record.fields.iter()) diff --git a/crates/polars-io/src/ipc/ipc_stream.rs b/crates/polars-io/src/ipc/ipc_stream.rs index ed0872dc1ea0..f9b5b4b0bc28 100644 --- a/crates/polars-io/src/ipc/ipc_stream.rs +++ b/crates/polars-io/src/ipc/ipc_stream.rs @@ -207,7 +207,7 @@ where pub struct IpcStreamWriter { writer: W, compression: Option, - pl_flavor: bool, + pl_flavor: PlFlavor, } use arrow::record_batch::RecordBatch; @@ -221,7 +221,7 @@ impl IpcStreamWriter { self } - pub fn with_pl_flavor(mut self, pl_flavor: bool) -> Self { + pub fn with_pl_flavor(mut self, pl_flavor: PlFlavor) -> Self { self.pl_flavor = pl_flavor; self } @@ -235,7 +235,7 @@ where IpcStreamWriter { writer, compression: None, - pl_flavor: false, + pl_flavor: PlFlavor::Compatible, } } diff --git a/crates/polars-io/src/ipc/write.rs b/crates/polars-io/src/ipc/write.rs index b2c08846dbbc..bc637c0b0876 100644 --- a/crates/polars-io/src/ipc/write.rs +++ b/crates/polars-io/src/ipc/write.rs @@ -42,7 +42,7 @@ pub struct IpcWriter { pub(super) writer: W, pub(super) compression: Option, /// Polars' flavor of arrow. This might be temporary. - pub(super) pl_flavor: bool, + pub(super) pl_flavor: PlFlavor, } impl IpcWriter { @@ -52,7 +52,7 @@ impl IpcWriter { self } - pub fn with_pl_flavor(mut self, pl_flavor: bool) -> Self { + pub fn with_pl_flavor(mut self, pl_flavor: PlFlavor) -> Self { self.pl_flavor = pl_flavor; self } @@ -84,7 +84,7 @@ where IpcWriter { writer, compression: None, - pl_flavor: true, + pl_flavor: PlFlavor::highest(), } } @@ -111,7 +111,7 @@ where pub struct BatchedWriter { writer: write::FileWriter, - pl_flavor: bool, + pl_flavor: PlFlavor, } impl BatchedWriter { diff --git a/crates/polars-io/src/ipc/write_async.rs b/crates/polars-io/src/ipc/write_async.rs index 7a5b3240cbb5..25805117ec7e 100644 --- a/crates/polars-io/src/ipc/write_async.rs +++ b/crates/polars-io/src/ipc/write_async.rs @@ -10,14 +10,14 @@ impl IpcWriter { IpcWriter { writer, compression: None, - pl_flavor: false, + pl_flavor: PlFlavor::Compatible, } } pub fn batched_async(self, schema: &Schema) -> PolarsResult> { let writer = FileSink::new( self.writer, - schema.to_arrow(false), + schema.to_arrow(PlFlavor::Compatible), None, WriteOptions { compression: self.compression.map(|c| c.into()), @@ -44,7 +44,7 @@ where /// # Panics /// The caller must ensure the chunks in the given [`DataFrame`] are aligned. pub async fn write_batch(&mut self, df: &DataFrame) -> PolarsResult<()> { - let iter = df.iter_chunks(false, true); + let iter = df.iter_chunks(PlFlavor::Compatible, true); for batch in iter { self.writer.feed(batch.into()).await?; } diff --git a/crates/polars-io/src/json/mod.rs b/crates/polars-io/src/json/mod.rs index c51e238da2f9..e56e226ee52f 100644 --- a/crates/polars-io/src/json/mod.rs +++ b/crates/polars-io/src/json/mod.rs @@ -146,11 +146,11 @@ where .map(|s| { #[cfg(feature = "object")] polars_ensure!(!matches!(s.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to json"); - Ok(s.field().to_arrow(true)) + Ok(s.field().to_arrow(PlFlavor::highest())) }) .collect::>>()?; let batches = df - .iter_chunks(true, false) + .iter_chunks(PlFlavor::highest(), false) .map(|chunk| Ok(Box::new(chunk_to_struct(chunk, fields.clone())) as ArrayRef)); match self.json_format { @@ -191,10 +191,10 @@ where .map(|s| { #[cfg(feature = "object")] polars_ensure!(!matches!(s.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to json"); - Ok(s.field().to_arrow(true)) + Ok(s.field().to_arrow(PlFlavor::highest())) }) .collect::>>()?; - let chunks = df.iter_chunks(true, false); + let chunks = df.iter_chunks(PlFlavor::highest(), false); let batches = chunks.map(|chunk| Ok(Box::new(chunk_to_struct(chunk, fields.clone())) as ArrayRef)); let mut serializer = polars_json::ndjson::write::Serializer::new(batches, vec![]); @@ -267,7 +267,7 @@ where overwrite_schema(mut_schema, overwrite)?; } - DataType::Struct(schema.iter_fields().collect()).to_arrow(true) + DataType::Struct(schema.iter_fields().collect()).to_arrow(PlFlavor::highest()) } else { // infer let inner_dtype = if let BorrowedValue::Array(values) = &json_value { @@ -276,7 +276,7 @@ where self.infer_schema_len .unwrap_or(NonZeroUsize::new(usize::MAX).unwrap()), )? - .to_arrow(true) + .to_arrow(PlFlavor::highest()) } else { polars_json::json::infer(&json_value)? }; @@ -295,7 +295,7 @@ where .map(|(name, dt)| Field::new(&name, dt)) .collect(), ) - .to_arrow(true) + .to_arrow(PlFlavor::highest()) } else { inner_dtype } diff --git a/crates/polars-io/src/ndjson/mod.rs b/crates/polars-io/src/ndjson/mod.rs index 8d6ad6c2d680..d79bfc4ae93b 100644 --- a/crates/polars-io/src/ndjson/mod.rs +++ b/crates/polars-io/src/ndjson/mod.rs @@ -13,7 +13,7 @@ pub fn infer_schema( let data_types = polars_json::ndjson::iter_unique_dtypes(reader, infer_schema_len)?; let data_type = crate::json::infer::data_types_to_supertype(data_types.map(|dt| DataType::from(&dt)))?; - let schema = StructArray::get_fields(&data_type.to_arrow(true)) + let schema = StructArray::get_fields(&data_type.to_arrow(PlFlavor::highest())) .iter() .collect(); Ok(schema) diff --git a/crates/polars-io/src/parquet/write/batched_writer.rs b/crates/polars-io/src/parquet/write/batched_writer.rs index 5f363e0eb1a3..48d068f49796 100644 --- a/crates/polars-io/src/parquet/write/batched_writer.rs +++ b/crates/polars-io/src/parquet/write/batched_writer.rs @@ -27,7 +27,7 @@ impl BatchedWriter { &'a self, df: &'a DataFrame, ) -> impl Iterator>> + 'a { - let rb_iter = df.iter_chunks(true, false); + let rb_iter = df.iter_chunks(PlFlavor::highest(), false); rb_iter.filter_map(move |batch| match batch.len() { 0 => None, _ => { @@ -95,7 +95,7 @@ fn prepare_rg_iter<'a>( options: WriteOptions, parallel: bool, ) -> impl Iterator>> + 'a { - let rb_iter = df.iter_chunks(true, false); + let rb_iter = df.iter_chunks(PlFlavor::highest(), false); rb_iter.filter_map(move |batch| match batch.len() { 0 => None, _ => { diff --git a/crates/polars-io/src/parquet/write/writer.rs b/crates/polars-io/src/parquet/write/writer.rs index cd03a77cfed8..7c7e75c24abf 100644 --- a/crates/polars-io/src/parquet/write/writer.rs +++ b/crates/polars-io/src/parquet/write/writer.rs @@ -83,7 +83,7 @@ where } pub fn batched(self, schema: &Schema) -> PolarsResult> { - let schema = schema_to_arrow_checked(schema, true, "parquet")?; + let schema = schema_to_arrow_checked(schema, PlFlavor::highest(), "parquet")?; let parquet_schema = to_parquet_schema(&schema)?; let encodings = get_encodings(&schema); let options = self.materialize_options(); diff --git a/crates/polars-io/src/shared.rs b/crates/polars-io/src/shared.rs index bb18f8cba8d2..a0f3bcdb1948 100644 --- a/crates/polars-io/src/shared.rs +++ b/crates/polars-io/src/shared.rs @@ -120,7 +120,7 @@ pub(crate) fn finish_reader( pub(crate) fn schema_to_arrow_checked( schema: &Schema, - pl_flavor: bool, + pl_flavor: PlFlavor, _file_name: &str, ) -> PolarsResult { let fields = schema.iter_fields().map(|field| { diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs index 44a7ac951ba7..c1516da874e8 100644 --- a/crates/polars-ops/src/chunked_array/gather/chunked.rs +++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs @@ -208,7 +208,7 @@ where T::Array: Debug, { unsafe fn take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Self { - let arrow_dtype = self.dtype().to_arrow(true); + let arrow_dtype = self.dtype().to_arrow(PlFlavor::highest()); let mut out = if let Some(iter) = self.downcast_slices() { let targets = iter.collect::>(); @@ -245,7 +245,7 @@ where // Take function that checks of null state in `ChunkIdx`. unsafe fn take_opt_chunked_unchecked(&self, by: &[NullableChunkId]) -> Self { - let arrow_dtype = self.dtype().to_arrow(true); + let arrow_dtype = self.dtype().to_arrow(PlFlavor::highest()); if let Some(iter) = self.downcast_slices() { let targets = iter.collect::>(); diff --git a/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs b/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs index ba1427e6f6fd..cca5272349ce 100644 --- a/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs +++ b/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs @@ -94,7 +94,7 @@ where .collect(); let gathered = unsafe { gather_skip_nulls_idx_pairs_unchecked(self, index_pairs, indices.len()) }; - let arr = T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(true)); + let arr = T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(PlFlavor::highest())); Ok(ChunkedArray::from_chunk_iter_like(self, [arr])) } } @@ -140,7 +140,8 @@ where gather_skip_nulls_idx_pairs_unchecked(self, index_pairs, indices.as_ref().len()) }; - let mut arr = T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(true)); + let mut arr = + T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(PlFlavor::highest())); if indices.null_count() > 0 { let array_refs: Vec<&dyn Array> = indices.chunks().iter().map(|x| &**x).collect(); arr = arr.with_validity_typed(concatenate_validities(&array_refs)); diff --git a/crates/polars-ops/src/chunked_array/repeat_by.rs b/crates/polars-ops/src/chunked_array/repeat_by.rs index bdba858d5719..ea5c0222974a 100644 --- a/crates/polars-ops/src/chunked_array/repeat_by.rs +++ b/crates/polars-ops/src/chunked_array/repeat_by.rs @@ -39,7 +39,7 @@ where unsafe { LargeListArray::from_iter_primitive_trusted_len( iter, - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), ) } })) diff --git a/crates/polars-ops/src/chunked_array/strings/extract.rs b/crates/polars-ops/src/chunked_array/strings/extract.rs index b56e1251c840..ed7a87345245 100644 --- a/crates/polars-ops/src/chunked_array/strings/extract.rs +++ b/crates/polars-ops/src/chunked_array/strings/extract.rs @@ -52,7 +52,7 @@ pub(super) fn extract_groups( .map(|ca| ca.into_series()); } - let data_type = dtype.try_to_arrow(true)?; + let data_type = dtype.try_to_arrow(PlFlavor::highest())?; let DataType::Struct(fields) = dtype else { unreachable!() // Implementation error if it isn't a struct. }; diff --git a/crates/polars-ops/src/chunked_array/strings/json_path.rs b/crates/polars-ops/src/chunked_array/strings/json_path.rs index 6c54de338676..264d8163e712 100644 --- a/crates/polars-ops/src/chunked_array/strings/json_path.rs +++ b/crates/polars-ops/src/chunked_array/strings/json_path.rs @@ -107,7 +107,7 @@ pub trait Utf8JsonPathImpl: AsString { let array = polars_json::ndjson::deserialize::deserialize_iter( iter, - dtype.to_arrow(true), + dtype.to_arrow(PlFlavor::highest()), buf_size, ca.len(), ) diff --git a/crates/polars-ops/src/series/ops/ewm_by.rs b/crates/polars-ops/src/series/ops/ewm_by.rs index 1bc3630d6604..734e417c27d4 100644 --- a/crates/polars-ops/src/series/ops/ewm_by.rs +++ b/crates/polars-ops/src/series/ops/ewm_by.rs @@ -130,7 +130,7 @@ where } }; }); - let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(true)); + let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(PlFlavor::highest())); if (times.null_count() > 0) || (values.null_count() > 0) { let validity = binary_concatenate_validities(times, values); arr = arr.with_validity_typed(validity); @@ -179,7 +179,7 @@ where } }; }); - let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(true)); + let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(PlFlavor::highest())); if (times.null_count() > 0) || (values.null_count() > 0) { let validity = binary_concatenate_validities(times, values); arr = arr.with_validity_typed(validity); diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs index 0263b506920d..4b1eb7507d6e 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs @@ -99,7 +99,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), out.into(), Some(validity.into()), ); diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs index f425ffaac7e7..5a6a73be4b2b 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs @@ -151,7 +151,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), out.into(), Some(validity.into()), ); @@ -253,7 +253,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(PlFlavor::highest()), out.into(), Some(validity.into()), ); diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs index 82afe6a0b40c..353733ace4f3 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs @@ -77,7 +77,7 @@ where let arr = values.chunks().get_unchecked(0); arr.sliced_unchecked(offset as usize, length as usize) }; - let dtype = K::PolarsType::get_dtype().to_arrow(true); + let dtype = K::PolarsType::get_dtype().to_arrow(PlFlavor::highest()); let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); let arr = unsafe { arr.as_any() diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs index b256ca41720f..62808623d4bf 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs @@ -55,7 +55,7 @@ where let arr = values.chunks().get_unchecked(0); arr.sliced_unchecked(offset as usize, length as usize) }; - let dtype = K::PolarsType::get_dtype().to_arrow(true); + let dtype = K::PolarsType::get_dtype().to_arrow(PlFlavor::highest()); let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); let arr = unsafe { arr.as_any() diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs index ccfd390bcf62..f3490aa93c75 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs @@ -82,7 +82,7 @@ impl Eval { _ => s.to_physical_repr().into_owned(), }; let s = prepare_key(&s, chunk); - keys_columns.push(s.to_arrow(0, true)); + keys_columns.push(s.to_arrow(0, PlFlavor::highest())); } polars_row::convert_columns_amortized( diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs index 3a1ca17a183a..1de8ba0810f5 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs @@ -261,7 +261,7 @@ impl AggHashTable { .output_schema .iter_dtypes() .take(self.num_keys) - .map(|dtype| dtype.to_physical().to_arrow(true)) + .map(|dtype| dtype.to_physical().to_arrow(PlFlavor::highest())) .collect::>(); let fields = vec![Default::default(); self.num_keys]; let key_columns = diff --git a/crates/polars-pipe/src/executors/sinks/io.rs b/crates/polars-pipe/src/executors/sinks/io.rs index ac2b27717a75..e5d279c8eca5 100644 --- a/crates/polars-pipe/src/executors/sinks/io.rs +++ b/crates/polars-pipe/src/executors/sinks/io.rs @@ -177,7 +177,7 @@ impl IOThread { path.push(format!("{count}.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(true); + let writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); let mut writer = writer.batched(&schema).unwrap(); writer.write_batch(&df).unwrap(); writer.finish().unwrap(); @@ -188,7 +188,7 @@ impl IOThread { path.push(format!("{count}_0_pass.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(true); + let writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); let mut writer = writer.batched(&schema).unwrap(); for mut df in iter { @@ -227,7 +227,7 @@ impl IOThread { path.push(format!("_{count}_full.ipc")); let file = File::create(path).unwrap(); - let mut writer = IpcWriter::new(file).with_pl_flavor(true); + let mut writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); writer.finish(&mut df).unwrap(); } else { let iter = Box::new(std::iter::once(df)); @@ -260,7 +260,7 @@ impl IOThread { // duplicates path.push(format!("_{count}.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(true); + let writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); let mut writer = writer.batched(&self.schema).unwrap(); writer.write_batch(&df).unwrap(); writer.finish().unwrap(); diff --git a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs index 1e7976afc431..f14fe7295254 100644 --- a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs +++ b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs @@ -275,7 +275,7 @@ impl Sink for SortSinkMultiple { let sort_dtypes = self.sort_dtypes.take().map(|arr| { arr.iter() - .map(|dt| dt.to_physical().to_arrow(true)) + .map(|dt| dt.to_physical().to_arrow(PlFlavor::highest())) .collect::>() }); diff --git a/crates/polars-plan/src/dsl/function_expr/plugin.rs b/crates/polars-plan/src/dsl/function_expr/plugin.rs index 83c429e738c3..adb8b6c1ca4b 100644 --- a/crates/polars-plan/src/dsl/function_expr/plugin.rs +++ b/crates/polars-plan/src/dsl/function_expr/plugin.rs @@ -130,7 +130,7 @@ pub(super) unsafe fn plugin_field( // we deallocate the fields buffer let ffi_fields = fields .iter() - .map(|field| arrow::ffi::export_field_to_c(&field.to_arrow(true))) + .map(|field| arrow::ffi::export_field_to_c(&field.to_arrow(PlFlavor::highest()))) .collect::>() .into_boxed_slice(); let n_args = ffi_fields.len(); diff --git a/crates/polars-plan/src/dsl/function_expr/struct_.rs b/crates/polars-plan/src/dsl/function_expr/struct_.rs index d0c6ddb0a223..a8584edd82de 100644 --- a/crates/polars-plan/src/dsl/function_expr/struct_.rs +++ b/crates/polars-plan/src/dsl/function_expr/struct_.rs @@ -211,7 +211,7 @@ pub(super) fn suffix_fields(s: &Series, suffix: Arc) -> PolarsResult PolarsResult { let ca = s.struct_()?; - let dtype = ca.dtype().to_arrow(true); + let dtype = ca.dtype().to_arrow(PlFlavor::highest()); let iter = ca.chunks().iter().map(|arr| { let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); diff --git a/crates/polars-time/src/chunkedarray/datetime.rs b/crates/polars-time/src/chunkedarray/datetime.rs index 7a2bf97d88e7..17a417ef6fe8 100644 --- a/crates/polars-time/src/chunkedarray/datetime.rs +++ b/crates/polars-time/src/chunkedarray/datetime.rs @@ -12,7 +12,7 @@ fn cast_and_apply< ca: &DatetimeChunked, func: F, ) -> ChunkedArray { - let dtype = ca.dtype().to_arrow(true); + let dtype = ca.dtype().to_arrow(PlFlavor::highest()); let chunks = ca.downcast_iter().map(|arr| { let arr = cast( arr, diff --git a/crates/polars/tests/it/io/ipc.rs b/crates/polars/tests/it/io/ipc.rs index f69bf78602da..386e161db419 100644 --- a/crates/polars/tests/it/io/ipc.rs +++ b/crates/polars/tests/it/io/ipc.rs @@ -12,7 +12,7 @@ fn test_ipc_compression_variadic_buffers() { let mut file = std::io::Cursor::new(vec![]); IpcWriter::new(&mut file) .with_compression(Some(IpcCompression::LZ4)) - .with_pl_flavor(true) + .with_pl_flavor(PlFlavor::highest()) .finish(&mut df) .unwrap(); @@ -82,7 +82,7 @@ fn test_read_ipc_with_columns() { .unwrap(); df_read.equals(&expected); - for pl_flavor in [false, true] { + for pl_flavor in [PlFlavor::Compatible, PlFlavor::Future1] { let mut buf: Cursor> = Cursor::new(Vec::new()); let mut df = df![ "letters" => ["x", "y", "z"], diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index e1ad0acb5d52..c209bcc19919 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1384,7 +1384,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - def to_arrow(self, *, future: bool = False) -> pa.Table: + def to_arrow(self, *, future: bool | int = False) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -1403,6 +1403,9 @@ def to_arrow(self, *, future: bool = False) -> pa.Table: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. + Setting this to an integer will use a specific version + of Polars' internal data structures. + Examples -------- >>> df = pl.DataFrame( @@ -1419,7 +1422,7 @@ def to_arrow(self, *, future: bool = False) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - if future: + if future is True: issue_unstable_warning( "The `future` parameter of `DataFrame.to_arrow` is considered unstable." ) @@ -3297,7 +3300,7 @@ def write_ipc( file: None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + future: bool | int | None = None, ) -> BytesIO: ... @overload @@ -3306,7 +3309,7 @@ def write_ipc( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + future: bool | int | None = None, ) -> None: ... def write_ipc( @@ -3314,7 +3317,7 @@ def write_ipc( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + future: bool | int | None = None, ) -> BytesIO | None: """ Write to Arrow IPC binary stream or Feather file. @@ -3336,6 +3339,9 @@ def write_ipc( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. + Setting this to an integer will use a specific version + of Polars' internal data structures. + Examples -------- >>> import pathlib @@ -3359,7 +3365,7 @@ def write_ipc( if compression is None: compression = "uncompressed" - if future: + if future is True: issue_unstable_warning( "The `future` parameter of `DataFrame.write_ipc` is considered unstable." ) @@ -3375,7 +3381,7 @@ def write_ipc_stream( file: None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + future: bool | int | None = None, ) -> BytesIO: ... @overload @@ -3384,7 +3390,7 @@ def write_ipc_stream( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + future: bool | int | None = None, ) -> None: ... def write_ipc_stream( @@ -3392,7 +3398,7 @@ def write_ipc_stream( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + future: bool | int | None = None, ) -> BytesIO | None: """ Write to Arrow IPC record batch stream. @@ -3414,6 +3420,9 @@ def write_ipc_stream( This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. + Setting this to an integer will use a specific version + of Polars' internal data structures. + Examples -------- >>> import pathlib @@ -3437,7 +3446,7 @@ def write_ipc_stream( if compression is None: compression = "uncompressed" - if future: + if future is True: issue_unstable_warning( "The `future` parameter of `DataFrame.write_ipc` is considered unstable." ) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 75a17453613e..bb6ad82eaf75 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -45,7 +45,7 @@ issue_deprecation_warning, ) from polars._utils.getitem import get_series_item_by_key -from polars._utils.unstable import unstable +from polars._utils.unstable import issue_unstable_warning, unstable from polars._utils.various import ( BUILDING_SPHINX_DOCS, _is_generator, @@ -4342,7 +4342,7 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - def to_arrow(self, *, future: bool = False) -> pa.Array: + def to_arrow(self, *, future: bool | int = False) -> pa.Array: """ Return the underlying Arrow array. @@ -4358,6 +4358,9 @@ def to_arrow(self, *, future: bool = False) -> pa.Array: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. + Setting this to an integer will use a specific version + of Polars' internal data structures. + Examples -------- >>> s = pl.Series("a", [1, 2, 3]) @@ -4370,6 +4373,11 @@ def to_arrow(self, *, future: bool = False) -> pa.Array: 3 ] """ + if future is True: + issue_unstable_warning( + "The `future` parameter of `DataFrame.to_arrow` is considered unstable." + ) + return self._s.to_arrow(future) def to_pandas( diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index d4d6f4e6b525..e0a18ead85fc 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -1182,3 +1182,28 @@ where { container.into_iter().map(|s| s.as_ref().into()).collect() } + +#[derive(Debug, Copy, Clone)] +pub struct PyPlFlavor(pub PlFlavor); + +impl<'a> FromPyObject<'a> for PyPlFlavor { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { + Ok(PyPlFlavor(if let Ok(version) = ob.extract::() { + match version { + 0 => PlFlavor::Compatible, + 1 => PlFlavor::Future1, + _ => return Err(PyValueError::new_err("invalid flavor version")), + } + } else if let Ok(future) = ob.extract::() { + if future { + PlFlavor::highest() + } else { + PlFlavor::Compatible + } + } else { + return Err(PyTypeError::new_err( + "'future' argument accepts int or bool", + )); + })) + } +} diff --git a/py-polars/src/dataframe/export.rs b/py-polars/src/dataframe/export.rs index c22d974407f4..f66eff570108 100644 --- a/py-polars/src/dataframe/export.rs +++ b/py-polars/src/dataframe/export.rs @@ -7,6 +7,7 @@ use pyo3::types::{PyList, PyTuple}; use super::*; use crate::conversion::{ObjectValue, Wrap}; use crate::interop; +use crate::prelude::PyPlFlavor; #[pymethods] impl PyDataFrame { @@ -63,7 +64,7 @@ impl PyDataFrame { } #[allow(clippy::wrong_self_convention)] - pub fn to_arrow(&mut self, future: bool) -> PyResult> { + pub fn to_arrow(&mut self, future: PyPlFlavor) -> PyResult> { self.df.align_chunks(); Python::with_gil(|py| { let pyarrow = py.import_bound("pyarrow")?; @@ -71,7 +72,7 @@ impl PyDataFrame { let rbs = self .df - .iter_chunks(future, true) + .iter_chunks(future.0, true) .map(|rb| interop::arrow::to_py::to_py_rb(&rb, &names, py, &pyarrow)) .collect::>()?; Ok(rbs) @@ -104,7 +105,7 @@ impl PyDataFrame { .collect::>(); let rbs = self .df - .iter_chunks(false, true) + .iter_chunks(PlFlavor::Compatible, true) .map(|rb| { let mut rb = rb.into_arrays(); for i in &cat_columns { diff --git a/py-polars/src/dataframe/io.rs b/py-polars/src/dataframe/io.rs index b363def33d14..77813ad8de60 100644 --- a/py-polars/src/dataframe/io.rs +++ b/py-polars/src/dataframe/io.rs @@ -18,6 +18,7 @@ use crate::file::{ get_either_file, get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio, EitherRustPythonFile, }; +use crate::prelude::PyPlFlavor; #[pymethods] impl PyDataFrame { @@ -431,7 +432,7 @@ impl PyDataFrame { py: Python, py_f: PyObject, compression: Wrap>, - future: bool, + future: PyPlFlavor, ) -> PyResult<()> { let either = get_either_file(py_f, true)?; if let EitherRustPythonFile::Rust(ref f) = either { @@ -441,7 +442,7 @@ impl PyDataFrame { py.allow_threads(|| { IpcWriter::new(&mut buf) .with_compression(compression.0) - .with_pl_flavor(future) + .with_pl_flavor(future.0) .finish(&mut self.df) .map_err(PyPolarsErr::from) })?; @@ -454,13 +455,13 @@ impl PyDataFrame { py: Python, py_f: PyObject, compression: Wrap>, - future: bool, + future: PyPlFlavor, ) -> PyResult<()> { let mut buf = get_file_like(py_f, true)?; py.allow_threads(|| { IpcStreamWriter::new(&mut buf) .with_compression(compression.0) - .with_pl_flavor(future) + .with_pl_flavor(future.0) .finish(&mut self.df) .map_err(PyPolarsErr::from) })?; diff --git a/py-polars/src/dataframe/serde.rs b/py-polars/src/dataframe/serde.rs index 131262acf9c8..b9a9c50a4e06 100644 --- a/py-polars/src/dataframe/serde.rs +++ b/py-polars/src/dataframe/serde.rs @@ -18,7 +18,7 @@ impl PyDataFrame { // Used in pickle/pickling let mut buf: Vec = vec![]; IpcStreamWriter::new(&mut buf) - .with_pl_flavor(true) + .with_pl_flavor(PlFlavor::highest()) .finish(&mut self.df.clone()) .expect("ipc writer"); Ok(PyBytes::new_bound(py, &buf).to_object(py)) diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index d49f3358cc14..f8c677b92f5c 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -145,12 +145,12 @@ impl PySeries { /// Return the underlying Arrow array. #[allow(clippy::wrong_self_convention)] - fn to_arrow(&mut self, future: bool) -> PyResult { + fn to_arrow(&mut self, future: PyPlFlavor) -> PyResult { self.rechunk(true); Python::with_gil(|py| { let pyarrow = py.import_bound("pyarrow")?; - interop::arrow::to_py::to_py_array(self.series.to_arrow(0, future), py, &pyarrow) + interop::arrow::to_py::to_py_array(self.series.to_arrow(0, future.0), py, &pyarrow) }) } } diff --git a/py-polars/src/series/mod.rs b/py-polars/src/series/mod.rs index 74d2c7ae8422..c14933fd8eaa 100644 --- a/py-polars/src/series/mod.rs +++ b/py-polars/src/series/mod.rs @@ -665,7 +665,7 @@ impl PySeries { // IPC only support DataFrames so we need to convert it let mut df = self.series.clone().into_frame(); IpcStreamWriter::new(&mut buf) - .with_pl_flavor(true) + .with_pl_flavor(PlFlavor::highest()) .finish(&mut df) .expect("ipc writer"); Ok(PyBytes::new_bound(py, &buf).to_object(py)) From ab5331b35887ddc4c4f1526e3aa68644ceca0449 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 4 Jul 2024 17:20:14 +0800 Subject: [PATCH 02/14] feat(python): Introduce Flavor --- py-polars/polars/dataframe/frame.py | 76 ++++++------------------ py-polars/polars/interchange/protocol.py | 23 ++++++- py-polars/polars/series/series.py | 24 ++------ py-polars/tests/unit/io/test_ipc.py | 4 +- 4 files changed, 47 insertions(+), 80 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c209bcc19919..2d4b0242b7ac 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -103,6 +103,7 @@ TooManyRowsReturnedError, ) from polars.functions import col, lit +from polars.interchange.protocol import Flavor from polars.schema import Schema from polars.selectors import _expand_selector_dicts, _expand_selectors @@ -1384,7 +1385,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - def to_arrow(self, *, future: bool | int = False) -> pa.Table: + def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -1395,16 +1396,8 @@ def to_arrow(self, *, future: bool | int = False) -> pa.Table: Parameters ---------- - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. - - Setting this to an integer will use a specific version - of Polars' internal data structures. + flavor + Use a specific version of Polars' internal data structures. Examples -------- @@ -1422,12 +1415,7 @@ def to_arrow(self, *, future: bool | int = False) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - if future is True: - issue_unstable_warning( - "The `future` parameter of `DataFrame.to_arrow` is considered unstable." - ) - - record_batches = self._df.to_arrow(future) + record_batches = self._df.to_arrow(flavor.value) return pa.Table.from_batches(record_batches) @overload @@ -3300,7 +3288,7 @@ def write_ipc( file: None, *, compression: IpcCompression = "uncompressed", - future: bool | int | None = None, + flavor: Flavor = Flavor.Future1, ) -> BytesIO: ... @overload @@ -3309,7 +3297,7 @@ def write_ipc( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: bool | int | None = None, + flavor: Flavor = Flavor.Future1, ) -> None: ... def write_ipc( @@ -3317,7 +3305,7 @@ def write_ipc( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: bool | int | None = None, + flavor: Flavor = Flavor.Future1, ) -> BytesIO | None: """ Write to Arrow IPC binary stream or Feather file. @@ -3331,16 +3319,8 @@ def write_ipc( written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. - - Setting this to an integer will use a specific version - of Polars' internal data structures. + flavor + Use a specific version of Polars' internal data structures. Examples -------- @@ -3365,14 +3345,7 @@ def write_ipc( if compression is None: compression = "uncompressed" - if future is True: - issue_unstable_warning( - "The `future` parameter of `DataFrame.write_ipc` is considered unstable." - ) - if future is None: - future = True - - self._df.write_ipc(file, compression, future) + self._df.write_ipc(file, compression, flavor.value) return file if return_bytes else None # type: ignore[return-value] @overload @@ -3381,7 +3354,7 @@ def write_ipc_stream( file: None, *, compression: IpcCompression = "uncompressed", - future: bool | int | None = None, + flavor: Flavor = Flavor.Future1, ) -> BytesIO: ... @overload @@ -3390,7 +3363,7 @@ def write_ipc_stream( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: bool | int | None = None, + flavor: Flavor = Flavor.Future1, ) -> None: ... def write_ipc_stream( @@ -3398,7 +3371,7 @@ def write_ipc_stream( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: bool | int | None = None, + flavor: Flavor = Flavor.Future1, ) -> BytesIO | None: """ Write to Arrow IPC record batch stream. @@ -3412,16 +3385,8 @@ def write_ipc_stream( be written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. - - Setting this to an integer will use a specific version - of Polars' internal data structures. + flavor + Use a specific version of Polars' internal data structures. Examples -------- @@ -3446,14 +3411,7 @@ def write_ipc_stream( if compression is None: compression = "uncompressed" - if future is True: - issue_unstable_warning( - "The `future` parameter of `DataFrame.write_ipc` is considered unstable." - ) - if future is None: - future = True - - self._df.write_ipc_stream(file, compression, future=future) + self._df.write_ipc_stream(file, compression, flavor.value) return file if return_bytes else None # type: ignore[return-value] def write_parquet( diff --git a/py-polars/polars/interchange/protocol.py b/py-polars/polars/interchange/protocol.py index 2daca4b3cb19..5ea892d61729 100644 --- a/py-polars/polars/interchange/protocol.py +++ b/py-polars/polars/interchange/protocol.py @@ -1,6 +1,6 @@ from __future__ import annotations -from enum import IntEnum +from enum import Enum, IntEnum from typing import ( TYPE_CHECKING, Any, @@ -13,6 +13,8 @@ TypedDict, ) +from polars._utils.unstable import issue_unstable_warning + if TYPE_CHECKING: import sys @@ -255,3 +257,22 @@ class Endianness: class CopyNotAllowedError(RuntimeError): """Exception raised when a copy is required, but `allow_copy` is set to `False`.""" + + +class Flavor(Enum): + """Data structure versioning.""" + + Compatible = 0 + Future1 = 1 + + @staticmethod + def highest() -> Flavor: + """ + Get the highest supported flavor. + + .. warning:: + Highest flavor is considered **unstable**. It may be changed + at any point without it being considered a breaking change. + """ + issue_unstable_warning("Using the highest flavor is considered unstable.") + return Flavor.Future1 diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index bb6ad82eaf75..ea36c0304fe4 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -45,7 +45,7 @@ issue_deprecation_warning, ) from polars._utils.getitem import get_series_item_by_key -from polars._utils.unstable import issue_unstable_warning, unstable +from polars._utils.unstable import unstable from polars._utils.various import ( BUILDING_SPHINX_DOCS, _is_generator, @@ -98,6 +98,7 @@ from polars.dependencies import pandas as pd from polars.dependencies import pyarrow as pa from polars.exceptions import ComputeError, ModuleUpgradeRequiredError, ShapeError +from polars.interchange.protocol import Flavor from polars.series.array import ArrayNameSpace from polars.series.binary import BinaryNameSpace from polars.series.categorical import CatNameSpace @@ -4342,7 +4343,7 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - def to_arrow(self, *, future: bool | int = False) -> pa.Array: + def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Array: """ Return the underlying Arrow array. @@ -4350,16 +4351,8 @@ def to_arrow(self, *, future: bool | int = False) -> pa.Array: Parameters ---------- - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. - - Setting this to an integer will use a specific version - of Polars' internal data structures. + flavor + Use a specific version of Polars' internal data structures. Examples -------- @@ -4373,12 +4366,7 @@ def to_arrow(self, *, future: bool | int = False) -> pa.Array: 3 ] """ - if future is True: - issue_unstable_warning( - "The `future` parameter of `DataFrame.to_arrow` is considered unstable." - ) - - return self._s.to_arrow(future) + return self._s.to_arrow(flavor.value) def to_pandas( self, *, use_pyarrow_extension_array: bool = False, **kwargs: Any diff --git a/py-polars/tests/unit/io/test_ipc.py b/py-polars/tests/unit/io/test_ipc.py index 1741ff116b63..0a180a8b668b 100644 --- a/py-polars/tests/unit/io/test_ipc.py +++ b/py-polars/tests/unit/io/test_ipc.py @@ -234,7 +234,7 @@ def test_from_float16() -> None: def test_binview_ipc_mmap(tmp_path: Path) -> None: df = pl.DataFrame({"foo": ["aa" * 10, "bb", None, "small", "big" * 20]}) file_path = tmp_path / "dump.ipc" - df.write_ipc(file_path, future=True) + df.write_ipc(file_path) read = pl.read_ipc(file_path, memory_map=True) assert_frame_equal(df, read) @@ -268,7 +268,7 @@ def test_ipc_view_gc_14448() -> None: df = pl.DataFrame( pl.Series(["small"] * 10 + ["looooooong string......."] * 750).slice(20, 20) ) - df.write_ipc(f, future=True) + df.write_ipc(f) f.seek(0) assert_frame_equal(pl.read_ipc(f), df) From 4641a277e882e647d6da7c7a17399cb5869eda6a Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Thu, 4 Jul 2024 17:39:09 +0800 Subject: [PATCH 03/14] feat(python): allow integer value for flavor arg This is undocumented. This allows easy calling `to_arrow()` from pyo3-polars --- py-polars/polars/dataframe/frame.py | 15 ++++++++++++--- py-polars/polars/series/series.py | 4 +++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 2d4b0242b7ac..29d701b5c99d 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1415,7 +1415,10 @@ def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - record_batches = self._df.to_arrow(flavor.value) + if isinstance(flavor, Flavor): + flavor = flavor.value # type: ignore[assignment] + + record_batches = self._df.to_arrow(flavor) return pa.Table.from_batches(record_batches) @overload @@ -3342,10 +3345,13 @@ def write_ipc( elif isinstance(file, (str, Path)): file = normalize_filepath(file) + if isinstance(flavor, Flavor): + flavor = flavor.value # type: ignore[assignment] + if compression is None: compression = "uncompressed" - self._df.write_ipc(file, compression, flavor.value) + self._df.write_ipc(file, compression, flavor) return file if return_bytes else None # type: ignore[return-value] @overload @@ -3408,10 +3414,13 @@ def write_ipc_stream( elif isinstance(file, (str, Path)): file = normalize_filepath(file) + if isinstance(flavor, Flavor): + flavor = flavor.value # type: ignore[assignment] + if compression is None: compression = "uncompressed" - self._df.write_ipc_stream(file, compression, flavor.value) + self._df.write_ipc_stream(file, compression, flavor) return file if return_bytes else None # type: ignore[return-value] def write_parquet( diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index ea36c0304fe4..d77e8d36ce12 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4366,7 +4366,9 @@ def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Array: 3 ] """ - return self._s.to_arrow(flavor.value) + if isinstance(flavor, Flavor): + flavor = flavor.value # type: ignore[assignment] + return self._s.to_arrow(flavor) def to_pandas( self, *, use_pyarrow_extension_array: bool = False, **kwargs: Any From 953627a4d784f45e1ef8d15b3c374f7770cf7301 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Fri, 5 Jul 2024 11:06:59 +0800 Subject: [PATCH 04/14] feat(python): rename flavor arg back to future --- py-polars/polars/dataframe/frame.py | 38 ++++++++++++++--------------- py-polars/polars/series/series.py | 10 ++++---- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 29d701b5c99d..8c89e19f315c 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1385,7 +1385,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Table: + def to_arrow(self, *, future: Flavor = Flavor.Compatible) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -1396,7 +1396,7 @@ def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Table: Parameters ---------- - flavor + future Use a specific version of Polars' internal data structures. Examples @@ -1415,10 +1415,10 @@ def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - if isinstance(flavor, Flavor): - flavor = flavor.value # type: ignore[assignment] + if isinstance(future, Flavor): + future = future.value # type: ignore[assignment] - record_batches = self._df.to_arrow(flavor) + record_batches = self._df.to_arrow(future) return pa.Table.from_batches(record_batches) @overload @@ -3291,7 +3291,7 @@ def write_ipc( file: None, *, compression: IpcCompression = "uncompressed", - flavor: Flavor = Flavor.Future1, + future: Flavor = Flavor.Future1, ) -> BytesIO: ... @overload @@ -3300,7 +3300,7 @@ def write_ipc( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - flavor: Flavor = Flavor.Future1, + future: Flavor = Flavor.Future1, ) -> None: ... def write_ipc( @@ -3308,7 +3308,7 @@ def write_ipc( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - flavor: Flavor = Flavor.Future1, + future: Flavor = Flavor.Future1, ) -> BytesIO | None: """ Write to Arrow IPC binary stream or Feather file. @@ -3322,7 +3322,7 @@ def write_ipc( written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - flavor + future Use a specific version of Polars' internal data structures. Examples @@ -3345,13 +3345,13 @@ def write_ipc( elif isinstance(file, (str, Path)): file = normalize_filepath(file) - if isinstance(flavor, Flavor): - flavor = flavor.value # type: ignore[assignment] + if isinstance(future, Flavor): + future = future.value # type: ignore[assignment] if compression is None: compression = "uncompressed" - self._df.write_ipc(file, compression, flavor) + self._df.write_ipc(file, compression, future) return file if return_bytes else None # type: ignore[return-value] @overload @@ -3360,7 +3360,7 @@ def write_ipc_stream( file: None, *, compression: IpcCompression = "uncompressed", - flavor: Flavor = Flavor.Future1, + future: Flavor = Flavor.Future1, ) -> BytesIO: ... @overload @@ -3369,7 +3369,7 @@ def write_ipc_stream( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - flavor: Flavor = Flavor.Future1, + future: Flavor = Flavor.Future1, ) -> None: ... def write_ipc_stream( @@ -3377,7 +3377,7 @@ def write_ipc_stream( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - flavor: Flavor = Flavor.Future1, + future: Flavor = Flavor.Future1, ) -> BytesIO | None: """ Write to Arrow IPC record batch stream. @@ -3391,7 +3391,7 @@ def write_ipc_stream( be written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - flavor + future Use a specific version of Polars' internal data structures. Examples @@ -3414,13 +3414,13 @@ def write_ipc_stream( elif isinstance(file, (str, Path)): file = normalize_filepath(file) - if isinstance(flavor, Flavor): - flavor = flavor.value # type: ignore[assignment] + if isinstance(future, Flavor): + future = future.value # type: ignore[assignment] if compression is None: compression = "uncompressed" - self._df.write_ipc_stream(file, compression, flavor) + self._df.write_ipc_stream(file, compression, future) return file if return_bytes else None # type: ignore[return-value] def write_parquet( diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index d77e8d36ce12..cd70a53c6ebc 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4343,7 +4343,7 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Array: + def to_arrow(self, *, future: Flavor = Flavor.Compatible) -> pa.Array: """ Return the underlying Arrow array. @@ -4351,7 +4351,7 @@ def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Array: Parameters ---------- - flavor + future Use a specific version of Polars' internal data structures. Examples @@ -4366,9 +4366,9 @@ def to_arrow(self, *, flavor: Flavor = Flavor.Compatible) -> pa.Array: 3 ] """ - if isinstance(flavor, Flavor): - flavor = flavor.value # type: ignore[assignment] - return self._s.to_arrow(flavor) + if isinstance(future, Flavor): + future = future.value # type: ignore[assignment] + return self._s.to_arrow(future) def to_pandas( self, *, use_pyarrow_extension_array: bool = False, **kwargs: Any From 099196d57b4085237b422c78f39acc596fee490f Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Fri, 5 Jul 2024 11:16:18 +0800 Subject: [PATCH 05/14] feat(python): ensure backward compatibility for future=None --- py-polars/polars/dataframe/frame.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 8c89e19f315c..38f5d9c9b48c 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -3347,6 +3347,9 @@ def write_ipc( if isinstance(future, Flavor): future = future.value # type: ignore[assignment] + elif future is None: + # this is for backward compatibility + future = True if compression is None: compression = "uncompressed" @@ -3416,6 +3419,9 @@ def write_ipc_stream( if isinstance(future, Flavor): future = future.value # type: ignore[assignment] + elif future is None: + # this is for backward compatibility + future = True if compression is None: compression = "uncompressed" From 1fe3059b23a53e2c6fd4beddb69cfa2b028a191d Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 6 Jul 2024 19:07:59 +0800 Subject: [PATCH 06/14] feat(rust): make PlFlavor a struct with hidden version --- .../chunked_array/logical/categorical/from.rs | 21 +++++----- .../src/chunked_array/ops/unique/mod.rs | 4 +- crates/polars-core/src/datatypes/dtype.rs | 40 +++++++++++++------ crates/polars-core/src/frame/mod.rs | 6 +-- crates/polars-core/src/series/into.rs | 18 +++++---- crates/polars-io/src/avro/write.rs | 4 +- crates/polars-io/src/ipc/ipc_stream.rs | 2 +- crates/polars-io/src/ipc/write_async.rs | 6 +-- crates/polars/tests/it/io/ipc.rs | 2 +- py-polars/src/conversion/mod.rs | 12 +++--- py-polars/src/dataframe/export.rs | 2 +- 11 files changed, 66 insertions(+), 51 deletions(-) diff --git a/crates/polars-core/src/chunked_array/logical/categorical/from.rs b/crates/polars-core/src/chunked_array/logical/categorical/from.rs index d01d2e16c634..9ab016f4d635 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/from.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/from.rs @@ -4,9 +4,10 @@ use arrow::datatypes::IntegerType; use super::*; fn convert_values(arr: &Utf8ViewArray, pl_flavor: PlFlavor) -> ArrayRef { - match pl_flavor { - PlFlavor::Future1 => arr.clone().boxed(), - PlFlavor::Compatible => utf8view_to_utf8::(arr).boxed(), + if pl_flavor.version >= 1 { + arr.clone().boxed() + } else { + utf8view_to_utf8::(arr).boxed() } } @@ -20,9 +21,10 @@ impl CategoricalChunked { } fn to_u32(&self, pl_flavor: PlFlavor) -> DictionaryArray { - let values_dtype = match pl_flavor { - PlFlavor::Future1 => ArrowDataType::Utf8View, - PlFlavor::Compatible => ArrowDataType::LargeUtf8, + let values_dtype = if pl_flavor.version >= 1 { + ArrowDataType::Utf8View + } else { + ArrowDataType::LargeUtf8 }; let keys = self.physical().rechunk(); let keys = keys.downcast_iter().next().unwrap(); @@ -52,9 +54,10 @@ impl CategoricalChunked { } fn to_i64(&self, pl_flavor: PlFlavor) -> DictionaryArray { - let values_dtype = match pl_flavor { - PlFlavor::Future1 => ArrowDataType::Utf8View, - PlFlavor::Compatible => ArrowDataType::LargeUtf8, + let values_dtype = if pl_flavor.version >= 1 { + ArrowDataType::Utf8View + } else { + ArrowDataType::LargeUtf8 }; let keys = self.physical().rechunk(); let keys = keys.downcast_iter().next().unwrap(); diff --git a/crates/polars-core/src/chunked_array/ops/unique/mod.rs b/crates/polars-core/src/chunked_array/ops/unique/mod.rs index c257a9668fca..fa958a151ba9 100644 --- a/crates/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/unique/mod.rs @@ -130,7 +130,7 @@ where .field .as_ref() .data_type() - .to_arrow(PlFlavor::Compatible); + .to_arrow(PlFlavor::compatible()); if let Some(mut state) = PrimitiveRangedUniqueState::new( *min, *max, @@ -276,7 +276,7 @@ impl ChunkUnique for BooleanChunked { .field .as_ref() .data_type() - .to_arrow(PlFlavor::Compatible); + .to_arrow(PlFlavor::compatible()); let has_null = self.null_count() > 0; let mut state = BooleanUniqueKernelState::new(has_null, data_type); diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 4dcc22ec6fc4..fc9af73a2406 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -553,16 +553,18 @@ impl DataType { )) }, String => { - let dt = match pl_flavor { - PlFlavor::Future1 => ArrowDataType::Utf8View, - PlFlavor::Compatible => ArrowDataType::LargeUtf8, + let dt = if pl_flavor.version >= 1 { + ArrowDataType::Utf8View + } else { + ArrowDataType::LargeUtf8 }; Ok(dt) }, Binary => { - let dt = match pl_flavor { - PlFlavor::Future1 => ArrowDataType::BinaryView, - PlFlavor::Compatible => ArrowDataType::LargeBinary, + let dt = if pl_flavor.version >= 1 { + ArrowDataType::BinaryView + } else { + ArrowDataType::LargeBinary }; Ok(dt) }, @@ -590,9 +592,10 @@ impl DataType { }, #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => { - let values = match pl_flavor { - PlFlavor::Future1 => ArrowDataType::Utf8View, - PlFlavor::Compatible => ArrowDataType::LargeUtf8, + let values = if pl_flavor.version >= 1 { + ArrowDataType::Utf8View + } else { + ArrowDataType::LargeUtf8 }; Ok(ArrowDataType::Dictionary( IntegerType::UInt32, @@ -788,13 +791,24 @@ pub fn create_enum_data_type(categories: Utf8ViewArray) -> DataType { } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub enum PlFlavor { - Compatible, - Future1, +pub struct PlFlavor { + pub(crate) version: u16, } impl PlFlavor { pub const fn highest() -> PlFlavor { - PlFlavor::Future1 + PlFlavor { version: 1 } + } + + pub const fn compatible() -> PlFlavor { + PlFlavor { version: 0 } + } + + /// DO NO USE! This is only for py-polars. + pub fn with_version(version: u16) -> PolarsResult { + if version > PlFlavor::highest().version { + polars_bail!(InvalidOperation: "invalid flavor version"); + } + Ok(PlFlavor { version }) } } diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index cca380bdcb09..73ab23166e0d 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -2385,11 +2385,7 @@ impl DataFrame { pub fn iter_chunks(&self, pl_flavor: PlFlavor, parallel: bool) -> RecordBatchIter { // If any of the columns is binview and we don't convert `pl_flavor` we allow parallelism // as we must allocate arrow strings/binaries. - let parallel = if parallel - && match pl_flavor { - PlFlavor::Future1 => true, - PlFlavor::Compatible => false, - } { + let parallel = if parallel && pl_flavor.version >= 1 { self.columns.len() > 1 && self .columns diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index 969f2352b904..6c0ef64cefb5 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -116,19 +116,21 @@ impl Series { object_series_to_arrow_array(&s) } }, - DataType::String => match pl_flavor { - PlFlavor::Future1 => self.array_ref(chunk_idx).clone(), - PlFlavor::Compatible => { + DataType::String => { + if pl_flavor.version >= 1 { + self.array_ref(chunk_idx).clone() + } else { let arr = self.array_ref(chunk_idx); cast_unchecked(arr.as_ref(), &ArrowDataType::LargeUtf8).unwrap() - }, + } }, - DataType::Binary => match pl_flavor { - PlFlavor::Future1 => self.array_ref(chunk_idx).clone(), - PlFlavor::Compatible => { + DataType::Binary => { + if pl_flavor.version >= 1 { + self.array_ref(chunk_idx).clone() + } else { let arr = self.array_ref(chunk_idx); cast_unchecked(arr.as_ref(), &ArrowDataType::LargeBinary).unwrap() - }, + } }, _ => self.array_ref(chunk_idx).clone(), } diff --git a/crates/polars-io/src/avro/write.rs b/crates/polars-io/src/avro/write.rs index d797b3a4aed9..6631e37c8db5 100644 --- a/crates/polars-io/src/avro/write.rs +++ b/crates/polars-io/src/avro/write.rs @@ -64,12 +64,12 @@ where } fn finish(&mut self, df: &mut DataFrame) -> PolarsResult<()> { - let schema = schema_to_arrow_checked(&df.schema(), PlFlavor::Compatible, "avro")?; + let schema = schema_to_arrow_checked(&df.schema(), PlFlavor::compatible(), "avro")?; let record = write::to_record(&schema, self.name.clone())?; let mut data = vec![]; let mut compressed_block = avro_schema::file::CompressedBlock::default(); - for chunk in df.iter_chunks(PlFlavor::Compatible, true) { + for chunk in df.iter_chunks(PlFlavor::compatible(), true) { let mut serializers = chunk .iter() .zip(record.fields.iter()) diff --git a/crates/polars-io/src/ipc/ipc_stream.rs b/crates/polars-io/src/ipc/ipc_stream.rs index f9b5b4b0bc28..03ae12efdb16 100644 --- a/crates/polars-io/src/ipc/ipc_stream.rs +++ b/crates/polars-io/src/ipc/ipc_stream.rs @@ -235,7 +235,7 @@ where IpcStreamWriter { writer, compression: None, - pl_flavor: PlFlavor::Compatible, + pl_flavor: PlFlavor::compatible(), } } diff --git a/crates/polars-io/src/ipc/write_async.rs b/crates/polars-io/src/ipc/write_async.rs index 25805117ec7e..63402cd94880 100644 --- a/crates/polars-io/src/ipc/write_async.rs +++ b/crates/polars-io/src/ipc/write_async.rs @@ -10,14 +10,14 @@ impl IpcWriter { IpcWriter { writer, compression: None, - pl_flavor: PlFlavor::Compatible, + pl_flavor: PlFlavor::compatible(), } } pub fn batched_async(self, schema: &Schema) -> PolarsResult> { let writer = FileSink::new( self.writer, - schema.to_arrow(PlFlavor::Compatible), + schema.to_arrow(PlFlavor::compatible()), None, WriteOptions { compression: self.compression.map(|c| c.into()), @@ -44,7 +44,7 @@ where /// # Panics /// The caller must ensure the chunks in the given [`DataFrame`] are aligned. pub async fn write_batch(&mut self, df: &DataFrame) -> PolarsResult<()> { - let iter = df.iter_chunks(PlFlavor::Compatible, true); + let iter = df.iter_chunks(PlFlavor::compatible(), true); for batch in iter { self.writer.feed(batch.into()).await?; } diff --git a/crates/polars/tests/it/io/ipc.rs b/crates/polars/tests/it/io/ipc.rs index 386e161db419..1d606777600d 100644 --- a/crates/polars/tests/it/io/ipc.rs +++ b/crates/polars/tests/it/io/ipc.rs @@ -82,7 +82,7 @@ fn test_read_ipc_with_columns() { .unwrap(); df_read.equals(&expected); - for pl_flavor in [PlFlavor::Compatible, PlFlavor::Future1] { + for pl_flavor in [0, 1].map(|version| PlFlavor::with_version(version).unwrap()) { let mut buf: Cursor> = Cursor::new(Vec::new()); let mut df = df![ "letters" => ["x", "y", "z"], diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index e0a18ead85fc..5ec7aca61cd8 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -1188,17 +1188,17 @@ pub struct PyPlFlavor(pub PlFlavor); impl<'a> FromPyObject<'a> for PyPlFlavor { fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { - Ok(PyPlFlavor(if let Ok(version) = ob.extract::() { - match version { - 0 => PlFlavor::Compatible, - 1 => PlFlavor::Future1, - _ => return Err(PyValueError::new_err("invalid flavor version")), + Ok(PyPlFlavor(if let Ok(version) = ob.extract::() { + if let Ok(flavor) = PlFlavor::with_version(version) { + flavor + } else { + return Err(PyValueError::new_err("invalid flavor version")); } } else if let Ok(future) = ob.extract::() { if future { PlFlavor::highest() } else { - PlFlavor::Compatible + PlFlavor::compatible() } } else { return Err(PyTypeError::new_err( diff --git a/py-polars/src/dataframe/export.rs b/py-polars/src/dataframe/export.rs index f66eff570108..49d52b292565 100644 --- a/py-polars/src/dataframe/export.rs +++ b/py-polars/src/dataframe/export.rs @@ -105,7 +105,7 @@ impl PyDataFrame { .collect::>(); let rbs = self .df - .iter_chunks(PlFlavor::Compatible, true) + .iter_chunks(PlFlavor::compatible(), true) .map(|rb| { let mut rb = rb.into_arrays(); for i in &cat_columns { From e10cc944d5ba2bac8a3542a3902da49ab966b551 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sat, 6 Jul 2024 19:55:34 +0800 Subject: [PATCH 07/14] feat(python): make PlFlavor singletons with hidden version --- py-polars/polars/dataframe/frame.py | 38 ++++++++++---------- py-polars/polars/interchange/protocol.py | 35 ++++++++++++++---- py-polars/polars/series/series.py | 17 +++++++-- py-polars/tests/unit/interop/test_interop.py | 12 ++++++- 4 files changed, 73 insertions(+), 29 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 38f5d9c9b48c..844a1f8b03b2 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1385,7 +1385,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - def to_arrow(self, *, future: Flavor = Flavor.Compatible) -> pa.Table: + def to_arrow(self, *, future: Flavor | None = None) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -1415,8 +1415,10 @@ def to_arrow(self, *, future: Flavor = Flavor.Compatible) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - if isinstance(future, Flavor): - future = future.value # type: ignore[assignment] + if future is None: + future = False # type: ignore[assignment] + elif isinstance(future, Flavor): + future = future._version # type: ignore[attr-defined] record_batches = self._df.to_arrow(future) return pa.Table.from_batches(record_batches) @@ -3291,7 +3293,7 @@ def write_ipc( file: None, *, compression: IpcCompression = "uncompressed", - future: Flavor = Flavor.Future1, + future: Flavor | None = None, ) -> BytesIO: ... @overload @@ -3300,7 +3302,7 @@ def write_ipc( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: Flavor = Flavor.Future1, + future: Flavor | None = None, ) -> None: ... def write_ipc( @@ -3308,7 +3310,7 @@ def write_ipc( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: Flavor = Flavor.Future1, + future: Flavor | None = None, ) -> BytesIO | None: """ Write to Arrow IPC binary stream or Feather file. @@ -3345,11 +3347,10 @@ def write_ipc( elif isinstance(file, (str, Path)): file = normalize_filepath(file) - if isinstance(future, Flavor): - future = future.value # type: ignore[assignment] - elif future is None: - # this is for backward compatibility - future = True + if future is None: + future = True # type: ignore[assignment] + elif isinstance(future, Flavor): + future = future._version # type: ignore[attr-defined] if compression is None: compression = "uncompressed" @@ -3363,7 +3364,7 @@ def write_ipc_stream( file: None, *, compression: IpcCompression = "uncompressed", - future: Flavor = Flavor.Future1, + future: Flavor | None = None, ) -> BytesIO: ... @overload @@ -3372,7 +3373,7 @@ def write_ipc_stream( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: Flavor = Flavor.Future1, + future: Flavor | None = None, ) -> None: ... def write_ipc_stream( @@ -3380,7 +3381,7 @@ def write_ipc_stream( file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: Flavor = Flavor.Future1, + future: Flavor | None = None, ) -> BytesIO | None: """ Write to Arrow IPC record batch stream. @@ -3417,11 +3418,10 @@ def write_ipc_stream( elif isinstance(file, (str, Path)): file = normalize_filepath(file) - if isinstance(future, Flavor): - future = future.value # type: ignore[assignment] - elif future is None: - # this is for backward compatibility - future = True + if future is None: + future = True # type: ignore[assignment] + elif isinstance(future, Flavor): + future = future._version # type: ignore[attr-defined] if compression is None: compression = "uncompressed" diff --git a/py-polars/polars/interchange/protocol.py b/py-polars/polars/interchange/protocol.py index 5ea892d61729..938ddb827b6c 100644 --- a/py-polars/polars/interchange/protocol.py +++ b/py-polars/polars/interchange/protocol.py @@ -1,6 +1,6 @@ from __future__ import annotations -from enum import Enum, IntEnum +from enum import IntEnum from typing import ( TYPE_CHECKING, Any, @@ -259,11 +259,22 @@ class CopyNotAllowedError(RuntimeError): """Exception raised when a copy is required, but `allow_copy` is set to `False`.""" -class Flavor(Enum): - """Data structure versioning.""" +class Flavor: + """Data structure flavor.""" - Compatible = 0 - Future1 = 1 + def __init__(self) -> None: + msg = "it is not allowed to create a Flavor object" + raise TypeError(msg) + + @staticmethod + def _with_version(version: int) -> Flavor: + flavor = Flavor.__new__(Flavor) + flavor._version = version # type: ignore[attr-defined] + return flavor + + @staticmethod + def _highest() -> Flavor: + return Flavor._future1 # type: ignore[attr-defined] @staticmethod def highest() -> Flavor: @@ -275,4 +286,16 @@ def highest() -> Flavor: at any point without it being considered a breaking change. """ issue_unstable_warning("Using the highest flavor is considered unstable.") - return Flavor.Future1 + return Flavor._highest() + + @staticmethod + def compatible() -> Flavor: + """Get the flavor that is compatible with older arrow implementation.""" + return Flavor._compatible # type: ignore[attr-defined] + + def __repr__(self) -> str: + return f"<{self.__class__.__module__}.{self.__class__.__qualname__}: {self._version}>" # type: ignore[attr-defined] + + +Flavor._compatible = Flavor._with_version(0) # type: ignore[attr-defined] +Flavor._future1 = Flavor._with_version(1) # type: ignore[attr-defined] diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index cd70a53c6ebc..d0d05bb6d283 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -503,6 +503,15 @@ def _from_buffers( validity = validity._s return cls._from_pyseries(PySeries._from_buffers(dtype, data, validity)) + def _highest_flavor(self) -> int: + """ + Get the highest supported flavor version. + + This is only used by pyo3-polars, + and it is simpler not to make it a static method. + """ + return Flavor._highest()._version # type: ignore[attr-defined] + @property def dtype(self) -> DataType: """ @@ -4343,7 +4352,7 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - def to_arrow(self, *, future: Flavor = Flavor.Compatible) -> pa.Array: + def to_arrow(self, *, future: Flavor | None = None) -> pa.Array: """ Return the underlying Arrow array. @@ -4366,8 +4375,10 @@ def to_arrow(self, *, future: Flavor = Flavor.Compatible) -> pa.Array: 3 ] """ - if isinstance(future, Flavor): - future = future.value # type: ignore[assignment] + if future is None: + future = False # type: ignore[assignment] + elif isinstance(future, Flavor): + future = future._version # type: ignore[attr-defined] return self._s.to_arrow(future) def to_pandas( diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index bef1c1bb694c..bfbf13893d32 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -9,7 +9,8 @@ import pytest import polars as pl -from polars.exceptions import ComputeError +from polars.exceptions import ComputeError, UnstableWarning +from polars.interchange.protocol import Flavor from polars.testing import assert_frame_equal, assert_series_equal @@ -702,3 +703,12 @@ def test_from_numpy_different_resolution_invalid() -> None: pl.Series( np.array(["2020-01-01"], dtype="datetime64[s]"), dtype=pl.Datetime("us") ) + + +def test_highest_flavor(monkeypatch: pytest.MonkeyPatch) -> None: + # change these if flavor version bumped + monkeypatch.setenv("POLARS_WARN_UNSTABLE", "1") + assert Flavor.compatible()._version == 0 # type: ignore[attr-defined] + with pytest.warns(UnstableWarning): + assert Flavor.highest()._version == 1 # type: ignore[attr-defined] + assert pl.Series([1])._highest_flavor() == 1 From 06b57e65cae5a56d0a7b52ddbfbcb75612d962cf Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 00:52:14 +0800 Subject: [PATCH 08/14] refactor(rust): change PlFlavor to CompatLevel --- .../src/chunked_array/array/mod.rs | 4 +- .../chunked_array/builder/fixed_size_list.rs | 2 +- .../chunked_array/builder/list/anonymous.rs | 4 +- .../chunked_array/builder/list/primitive.rs | 2 +- .../src/chunked_array/builder/mod.rs | 2 +- .../src/chunked_array/builder/primitive.rs | 2 +- crates/polars-core/src/chunked_array/cast.rs | 2 +- .../polars-core/src/chunked_array/collect.rs | 18 +++---- crates/polars-core/src/chunked_array/from.rs | 7 ++- .../chunked_array/logical/categorical/from.rs | 26 +++++----- .../chunked_array/logical/categorical/mod.rs | 2 +- .../src/chunked_array/logical/decimal.rs | 2 +- .../src/chunked_array/logical/struct_/mod.rs | 10 ++-- crates/polars-core/src/chunked_array/mod.rs | 2 +- .../src/chunked_array/ops/apply.rs | 6 +-- .../src/chunked_array/ops/arity.rs | 18 +++---- .../src/chunked_array/ops/bit_repr.rs | 2 +- .../src/chunked_array/ops/explode.rs | 4 +- .../src/chunked_array/ops/fill_null.rs | 4 +- .../src/chunked_array/ops/filter.rs | 4 +- .../polars-core/src/chunked_array/ops/full.rs | 15 +++--- .../src/chunked_array/ops/gather.rs | 4 +- .../src/chunked_array/ops/rolling_window.rs | 2 +- .../polars-core/src/chunked_array/ops/set.rs | 4 +- .../ops/sort/arg_sort_multiple.rs | 4 +- .../src/chunked_array/ops/sort/mod.rs | 2 +- .../src/chunked_array/ops/unique/mod.rs | 4 +- .../src/chunked_array/trusted_len.rs | 4 +- crates/polars-core/src/datatypes/dtype.rs | 51 ++++++++++--------- crates/polars-core/src/datatypes/field.rs | 6 +-- .../frame/group_by/aggregations/agg_list.rs | 8 +-- crates/polars-core/src/frame/mod.rs | 16 +++--- crates/polars-core/src/frame/row/transpose.rs | 2 +- crates/polars-core/src/schema.rs | 4 +- crates/polars-core/src/series/from.rs | 2 +- .../src/series/implementations/decimal.rs | 2 +- crates/polars-core/src/series/into.rs | 22 ++++---- crates/polars-core/src/series/mod.rs | 5 +- crates/polars-core/src/series/ops/reshape.rs | 9 ++-- crates/polars-expr/src/expressions/window.rs | 2 +- crates/polars-ffi/src/version_0.rs | 6 +-- crates/polars-io/src/avro/write.rs | 4 +- crates/polars-io/src/ipc/ipc_stream.rs | 12 ++--- crates/polars-io/src/ipc/write.rs | 20 ++++---- crates/polars-io/src/ipc/write_async.rs | 6 +-- crates/polars-io/src/json/mod.rs | 14 ++--- crates/polars-io/src/ndjson/mod.rs | 2 +- .../src/parquet/write/batched_writer.rs | 4 +- crates/polars-io/src/parquet/write/writer.rs | 2 +- crates/polars-io/src/shared.rs | 4 +- .../src/chunked_array/gather/chunked.rs | 4 +- .../src/chunked_array/gather_skip_nulls.rs | 5 +- .../polars-ops/src/chunked_array/repeat_by.rs | 2 +- .../src/chunked_array/strings/extract.rs | 2 +- .../src/chunked_array/strings/json_path.rs | 2 +- crates/polars-ops/src/series/ops/ewm_by.rs | 4 +- .../series/ops/interpolation/interpolate.rs | 2 +- .../ops/interpolation/interpolate_by.rs | 4 +- .../sinks/group_by/aggregates/mean.rs | 2 +- .../sinks/group_by/aggregates/sum.rs | 2 +- .../executors/sinks/group_by/generic/eval.rs | 2 +- .../sinks/group_by/generic/hash_table.rs | 2 +- crates/polars-pipe/src/executors/sinks/io.rs | 8 +-- .../src/executors/sinks/sort/sink_multiple.rs | 2 +- .../src/dsl/function_expr/plugin.rs | 2 +- .../src/dsl/function_expr/struct_.rs | 2 +- .../polars-time/src/chunkedarray/datetime.rs | 2 +- crates/polars/tests/it/io/ipc.rs | 6 +-- py-polars/src/conversion/mod.rs | 18 +++---- py-polars/src/dataframe/export.rs | 8 +-- py-polars/src/dataframe/io.rs | 10 ++-- py-polars/src/dataframe/serde.rs | 2 +- py-polars/src/series/export.rs | 8 ++- py-polars/src/series/mod.rs | 2 +- 74 files changed, 241 insertions(+), 227 deletions(-) diff --git a/crates/polars-core/src/chunked_array/array/mod.rs b/crates/polars-core/src/chunked_array/array/mod.rs index bfb28c7e83f3..a3b7a1a1f339 100644 --- a/crates/polars-core/src/chunked_array/array/mod.rs +++ b/crates/polars-core/src/chunked_array/array/mod.rs @@ -46,7 +46,7 @@ impl ArrayChunked { let ca = self.rechunk(); let field = self .inner_dtype() - .to_arrow_field("item", PlFlavor::highest()); + .to_arrow_field("item", CompatLevel::newest()); let chunks = ca.downcast_iter().map(|arr| { let elements = unsafe { @@ -69,7 +69,7 @@ impl ArrayChunked { let values = out.chunks()[0].clone(); let inner_dtype = FixedSizeListArray::default_datatype( - out.dtype().to_arrow(PlFlavor::highest()), + out.dtype().to_arrow(CompatLevel::newest()), ca.width(), ); let arr = FixedSizeListArray::new(inner_dtype, values, arr.validity().cloned()); diff --git a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs index 2d6a939767ff..e235d08ffbd6 100644 --- a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs +++ b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs @@ -124,7 +124,7 @@ impl FixedSizeListBuilder for AnonymousOwnedFixedSizeListBuilder { .finish( self.inner_dtype .as_ref() - .map(|dt| dt.to_arrow(PlFlavor::highest())) + .map(|dt| dt.to_arrow(CompatLevel::newest())) .as_ref(), ) .unwrap(); diff --git a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs index b3a64dc83ff0..99b566320fbf 100644 --- a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs +++ b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs @@ -89,7 +89,7 @@ impl<'a> AnonymousListBuilder<'a> { let inner_dtype_physical = inner_dtype .as_ref() - .map(|dt| dt.to_physical().to_arrow(PlFlavor::highest())); + .map(|dt| dt.to_physical().to_arrow(CompatLevel::newest())); let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { @@ -157,7 +157,7 @@ impl ListBuilderTrait for AnonymousOwnedListBuilder { let slf = std::mem::take(self); let inner_dtype_physical = inner_dtype .as_ref() - .map(|dt| dt.to_physical().to_arrow(PlFlavor::highest())); + .map(|dt| dt.to_physical().to_arrow(CompatLevel::newest())); let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { diff --git a/crates/polars-core/src/chunked_array/builder/list/primitive.rs b/crates/polars-core/src/chunked_array/builder/list/primitive.rs index 1ccca95a6ebf..34e12433db7a 100644 --- a/crates/polars-core/src/chunked_array/builder/list/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/list/primitive.rs @@ -39,7 +39,7 @@ where ) -> Self { let values = MutablePrimitiveArray::::with_capacity_from( values_capacity, - values_type.to_arrow(PlFlavor::highest()), + values_type.to_arrow(CompatLevel::newest()), ); let builder = LargePrimitiveBuilder::::new_with_capacity(values, capacity); let field = Field::new(name, DataType::List(Box::new(logical_type))); diff --git a/crates/polars-core/src/chunked_array/builder/mod.rs b/crates/polars-core/src/chunked_array/builder/mod.rs index 5a10e8db1edc..bac88f5a1ea5 100644 --- a/crates/polars-core/src/chunked_array/builder/mod.rs +++ b/crates/polars-core/src/chunked_array/builder/mod.rs @@ -66,7 +66,7 @@ where T: PolarsNumericType, { fn from_slice(name: &str, v: &[T::Native]) -> Self { - let arr = PrimitiveArray::from_slice(v).to(T::get_dtype().to_arrow(PlFlavor::highest())); + let arr = PrimitiveArray::from_slice(v).to(T::get_dtype().to_arrow(CompatLevel::newest())); ChunkedArray::with_chunk(name, arr) } diff --git a/crates/polars-core/src/chunked_array/builder/primitive.rs b/crates/polars-core/src/chunked_array/builder/primitive.rs index 0ae99d5951d2..14eb2c1f4f46 100644 --- a/crates/polars-core/src/chunked_array/builder/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/primitive.rs @@ -41,7 +41,7 @@ where { pub fn new(name: &str, capacity: usize) -> Self { let array_builder = MutablePrimitiveArray::::with_capacity(capacity) - .to(T::get_dtype().to_arrow(PlFlavor::highest())); + .to(T::get_dtype().to_arrow(CompatLevel::newest())); PrimitiveChunkedBuilder { array_builder, diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index bfe1768e490a..892d28203e55 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -51,7 +51,7 @@ pub(crate) fn cast_chunks( let check_nulls = matches!(options, CastOptions::Strict); let options = options.into(); - let arrow_dtype = dtype.try_to_arrow(PlFlavor::highest())?; + let arrow_dtype = dtype.try_to_arrow(CompatLevel::newest())?; chunks .iter() .map(|arr| { diff --git a/crates/polars-core/src/chunked_array/collect.rs b/crates/polars-core/src/chunked_array/collect.rs index f14a0cde3d9a..054f59de8958 100644 --- a/crates/polars-core/src/chunked_array/collect.rs +++ b/crates/polars-core/src/chunked_array/collect.rs @@ -18,7 +18,7 @@ use crate::chunked_array::ChunkedArray; use crate::datatypes::{ ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, DataType, Field, PolarsDataType, }; -use crate::prelude::PlFlavor; +use crate::prelude::CompatLevel; pub trait ChunkedCollectIterExt: Iterator + Sized { #[inline] @@ -27,7 +27,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { T::Array: ArrayFromIterDtype, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); + let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -37,7 +37,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { T::Array: ArrayFromIterDtype, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); + let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -48,7 +48,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: TrustedLen, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); + let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -59,7 +59,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: TrustedLen, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest())); + let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -74,7 +74,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator>, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; + let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -88,7 +88,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator>, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; + let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -104,7 +104,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { { let field = Arc::new(Field::new(name, dtype.clone())); let arr = - self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; + self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -119,7 +119,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { { let field = Arc::clone(&name_dtype_src.field); let arr = - self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(PlFlavor::highest()))?; + self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } } diff --git a/crates/polars-core/src/chunked_array/from.rs b/crates/polars-core/src/chunked_array/from.rs index b6d26d536422..56ac8cb90604 100644 --- a/crates/polars-core/src/chunked_array/from.rs +++ b/crates/polars-core/src/chunked_array/from.rs @@ -217,7 +217,10 @@ where #[cfg(debug_assertions)] { if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() { - assert_eq!(chunks[0].data_type(), &dtype.to_arrow(PlFlavor::highest())) + assert_eq!( + chunks[0].data_type(), + &dtype.to_arrow(CompatLevel::newest()) + ) } } let field = Arc::new(Field::new(name, dtype)); @@ -236,7 +239,7 @@ where pub fn full_null_like(ca: &Self, length: usize) -> Self { let chunks = std::iter::once(T::Array::full_null( length, - ca.dtype().to_arrow(PlFlavor::highest()), + ca.dtype().to_arrow(CompatLevel::newest()), )); Self::from_chunk_iter_like(ca, chunks) } diff --git a/crates/polars-core/src/chunked_array/logical/categorical/from.rs b/crates/polars-core/src/chunked_array/logical/categorical/from.rs index 9ab016f4d635..5df0a2691583 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/from.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/from.rs @@ -3,8 +3,8 @@ use arrow::datatypes::IntegerType; use super::*; -fn convert_values(arr: &Utf8ViewArray, pl_flavor: PlFlavor) -> ArrayRef { - if pl_flavor.version >= 1 { +fn convert_values(arr: &Utf8ViewArray, compat_level: CompatLevel) -> ArrayRef { + if compat_level.0 >= 1 { arr.clone().boxed() } else { utf8view_to_utf8::(arr).boxed() @@ -12,16 +12,16 @@ fn convert_values(arr: &Utf8ViewArray, pl_flavor: PlFlavor) -> ArrayRef { } impl CategoricalChunked { - pub fn to_arrow(&self, pl_flavor: PlFlavor, as_i64: bool) -> ArrayRef { + pub fn to_arrow(&self, compat_level: CompatLevel, as_i64: bool) -> ArrayRef { if as_i64 { - self.to_i64(pl_flavor).boxed() + self.to_i64(compat_level).boxed() } else { - self.to_u32(pl_flavor).boxed() + self.to_u32(compat_level).boxed() } } - fn to_u32(&self, pl_flavor: PlFlavor) -> DictionaryArray { - let values_dtype = if pl_flavor.version >= 1 { + fn to_u32(&self, compat_level: CompatLevel) -> DictionaryArray { + let values_dtype = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -32,7 +32,7 @@ impl CategoricalChunked { let dtype = ArrowDataType::Dictionary(IntegerType::UInt32, Box::new(values_dtype), false); match map { RevMapping::Local(arr, _) => { - let values = convert_values(arr, pl_flavor); + let values = convert_values(arr, compat_level); // SAFETY: // the keys are in bounds @@ -44,7 +44,7 @@ impl CategoricalChunked { .map(|opt_k| opt_k.map(|k| *reverse_map.get(k).unwrap())); let keys = PrimitiveArray::from_trusted_len_iter(iter); - let values = convert_values(values, pl_flavor); + let values = convert_values(values, compat_level); // SAFETY: // the keys are in bounds @@ -53,8 +53,8 @@ impl CategoricalChunked { } } - fn to_i64(&self, pl_flavor: PlFlavor) -> DictionaryArray { - let values_dtype = if pl_flavor.version >= 1 { + fn to_i64(&self, compat_level: CompatLevel) -> DictionaryArray { + let values_dtype = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -65,7 +65,7 @@ impl CategoricalChunked { let dtype = ArrowDataType::Dictionary(IntegerType::Int64, Box::new(values_dtype), false); match map { RevMapping::Local(arr, _) => { - let values = convert_values(arr, pl_flavor); + let values = convert_values(arr, compat_level); // SAFETY: // the keys are in bounds @@ -89,7 +89,7 @@ impl CategoricalChunked { .map(|opt_k| opt_k.map(|k| *reverse_map.get(k).unwrap() as i64)); let keys = PrimitiveArray::from_trusted_len_iter(iter); - let values = convert_values(values, pl_flavor); + let values = convert_values(values, compat_level); // SAFETY: // the keys are in bounds diff --git a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs index 972de59381be..1b2fa0d06a7f 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs @@ -464,7 +464,7 @@ mod test { let ca = ca.cast(&DataType::Categorical(None, Default::default()))?; let ca = ca.categorical().unwrap(); - let arr = ca.to_arrow(PlFlavor::highest(), false); + let arr = ca.to_arrow(CompatLevel::newest(), false); let s = Series::try_from(("foo", arr))?; assert!(matches!(s.dtype(), &DataType::Categorical(_, _))); assert_eq!(s.null_count(), 1); diff --git a/crates/polars-core/src/chunked_array/logical/decimal.rs b/crates/polars-core/src/chunked_array/logical/decimal.rs index a7303867c98f..acc19a522e46 100644 --- a/crates/polars-core/src/chunked_array/logical/decimal.rs +++ b/crates/polars-core/src/chunked_array/logical/decimal.rs @@ -20,7 +20,7 @@ impl Int128Chunked { let (_, values, validity) = default.into_inner(); *arr = PrimitiveArray::new( - DataType::Decimal(precision, Some(scale)).to_arrow(PlFlavor::highest()), + DataType::Decimal(precision, Some(scale)).to_arrow(CompatLevel::newest()), values, validity, ); diff --git a/crates/polars-core/src/chunked_array/logical/struct_/mod.rs b/crates/polars-core/src/chunked_array/logical/struct_/mod.rs index a6a28acd5554..3fc4baa6a706 100644 --- a/crates/polars-core/src/chunked_array/logical/struct_/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/struct_/mod.rs @@ -48,12 +48,12 @@ fn fields_to_struct_array(fields: &[Series], physical: bool) -> (ArrayRef, Vec s.to_arrow(0, PlFlavor::highest()), + DataType::Object(_, _) => s.to_arrow(0, CompatLevel::newest()), _ => { if physical { s.chunks()[0].clone() } else { - s.to_arrow(0, PlFlavor::highest()) + s.to_arrow(0, CompatLevel::newest()) } }, } @@ -145,7 +145,7 @@ impl StructChunked { .iter() .map(|s| match s.dtype() { #[cfg(feature = "object")] - DataType::Object(_, _) => s.to_arrow(i, PlFlavor::highest()), + DataType::Object(_, _) => s.to_arrow(i, CompatLevel::newest()), _ => s.chunks()[i].clone(), }) .collect::>(); @@ -295,11 +295,11 @@ impl StructChunked { self.into() } - pub(crate) fn to_arrow(&self, i: usize, pl_flavor: PlFlavor) -> ArrayRef { + pub(crate) fn to_arrow(&self, i: usize, compat_level: CompatLevel) -> ArrayRef { let values = self .fields .iter() - .map(|s| s.to_arrow(i, pl_flavor)) + .map(|s| s.to_arrow(i, compat_level)) .collect::>(); // we determine fields from arrays as there might be object arrays diff --git a/crates/polars-core/src/chunked_array/mod.rs b/crates/polars-core/src/chunked_array/mod.rs index b9d4b509218b..176bbd11fc90 100644 --- a/crates/polars-core/src/chunked_array/mod.rs +++ b/crates/polars-core/src/chunked_array/mod.rs @@ -934,7 +934,7 @@ pub(crate) fn to_primitive( validity: Option, ) -> PrimitiveArray { PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), values.into(), validity, ) diff --git a/crates/polars-core/src/chunked_array/ops/apply.rs b/crates/polars-core/src/chunked_array/ops/apply.rs index 24529325498d..632afe52d889 100644 --- a/crates/polars-core/src/chunked_array/ops/apply.rs +++ b/crates/polars-core/src/chunked_array/ops/apply.rs @@ -42,13 +42,13 @@ where let out: U::Array = arr .values_iter() .map(&mut op) - .collect_arr_with_dtype(dtype.to_arrow(PlFlavor::highest())); + .collect_arr_with_dtype(dtype.to_arrow(CompatLevel::newest())); out.with_validity_typed(arr.validity().cloned()) } else { let out: U::Array = arr .iter() .map(|opt| opt.map(&mut op)) - .collect_arr_with_dtype(dtype.to_arrow(PlFlavor::highest())); + .collect_arr_with_dtype(dtype.to_arrow(CompatLevel::newest())); out.with_validity_typed(arr.validity().cloned()) } }); @@ -133,7 +133,7 @@ where drop(arr); let compute_immutable = |arr: &PrimitiveArray| { - arrow::compute::arity::unary(arr, f, S::get_dtype().to_arrow(PlFlavor::highest())) + arrow::compute::arity::unary(arr, f, S::get_dtype().to_arrow(CompatLevel::newest())) }; if owned_arr.values().is_sliced() { diff --git a/crates/polars-core/src/chunked_array/ops/arity.rs b/crates/polars-core/src/chunked_array/ops/arity.rs index a6ced1639da6..ed9e6dab79ec 100644 --- a/crates/polars-core/src/chunked_array/ops/arity.rs +++ b/crates/polars-core/src/chunked_array/ops/arity.rs @@ -6,7 +6,7 @@ use polars_error::PolarsResult; use crate::chunked_array::metadata::MetadataProperties; use crate::datatypes::{ArrayCollectIterExt, ArrayFromIter}; -use crate::prelude::{ChunkedArray, PlFlavor, PolarsDataType, Series}; +use crate::prelude::{ChunkedArray, CompatLevel, PolarsDataType, Series}; use crate::utils::{align_chunks_binary, align_chunks_binary_owned, align_chunks_ternary}; // We need this helper because for<'a> notation can't yet be applied properly @@ -106,7 +106,7 @@ where V::Array: ArrayFromIter<>>::Ret>, { if ca.null_count() == ca.len() { - let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(PlFlavor::highest())); + let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(CompatLevel::newest())); return ChunkedArray::with_chunk(ca.name(), arr); } @@ -130,7 +130,7 @@ where V::Array: ArrayFromIter, { if ca.null_count() == ca.len() { - let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(PlFlavor::highest())); + let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(CompatLevel::newest())); return Ok(ChunkedArray::with_chunk(ca.name(), arr)); } @@ -308,7 +308,7 @@ where { if lhs.null_count() == lhs.len() || rhs.null_count() == rhs.len() { let len = lhs.len().min(rhs.len()); - let arr = V::Array::full_null(len, V::get_dtype().to_arrow(PlFlavor::highest())); + let arr = V::Array::full_null(len, V::get_dtype().to_arrow(CompatLevel::newest())); return ChunkedArray::with_chunk(lhs.name(), arr); } @@ -704,7 +704,7 @@ where let min = lhs.len().min(rhs.len()); let max = lhs.len().max(rhs.len()); let len = if min == 1 { max } else { min }; - let arr = V::Array::full_null(len, V::get_dtype().to_arrow(PlFlavor::highest())); + let arr = V::Array::full_null(len, V::get_dtype().to_arrow(CompatLevel::newest())); return ChunkedArray::with_chunk(lhs.name(), arr); } @@ -747,7 +747,7 @@ where None => { let arr = O::Array::full_null( lhs.len(), - O::get_dtype().to_arrow(PlFlavor::highest()), + O::get_dtype().to_arrow(CompatLevel::newest()), ); ChunkedArray::::with_chunk(lhs.name(), arr) }, @@ -760,7 +760,7 @@ where None => { let arr = O::Array::full_null( rhs.len(), - O::get_dtype().to_arrow(PlFlavor::highest()), + O::get_dtype().to_arrow(CompatLevel::newest()), ); ChunkedArray::::with_chunk(lhs.name(), arr) }, @@ -797,7 +797,7 @@ where None => { let arr = O::Array::full_null( lhs.len(), - O::get_dtype().to_arrow(PlFlavor::highest()), + O::get_dtype().to_arrow(CompatLevel::newest()), ); ChunkedArray::::with_chunk(lhs.name(), arr) }, @@ -810,7 +810,7 @@ where None => { let arr = O::Array::full_null( rhs.len(), - O::get_dtype().to_arrow(PlFlavor::highest()), + O::get_dtype().to_arrow(CompatLevel::newest()), ); ChunkedArray::::with_chunk(lhs.name(), arr) }, diff --git a/crates/polars-core/src/chunked_array/ops/bit_repr.rs b/crates/polars-core/src/chunked_array/ops/bit_repr.rs index 3771d4a66d76..9a2f1c33594a 100644 --- a/crates/polars-core/src/chunked_array/ops/bit_repr.rs +++ b/crates/polars-core/src/chunked_array/ops/bit_repr.rs @@ -46,7 +46,7 @@ fn reinterpret_list_chunked( let pa = PrimitiveArray::from_data_default(reinterpreted_buf, inner_arr.validity().cloned()); LargeListArray::new( - DataType::List(Box::new(U::get_dtype())).to_arrow(PlFlavor::highest()), + DataType::List(Box::new(U::get_dtype())).to_arrow(CompatLevel::newest()), array.offsets().clone(), pa.to_boxed(), array.validity().cloned(), diff --git a/crates/polars-core/src/chunked_array/ops/explode.rs b/crates/polars-core/src/chunked_array/ops/explode.rs index 4d9d1112e569..f469209558b6 100644 --- a/crates/polars-core/src/chunked_array/ops/explode.rs +++ b/crates/polars-core/src/chunked_array/ops/explode.rs @@ -150,7 +150,7 @@ where unsafe { set_bit_unchecked(validity_slice, i, false) } } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), new_values.into(), Some(validity.into()), ); @@ -270,7 +270,7 @@ impl ExplodeByOffsets for ListChunked { } process_range(start, last, &mut builder); let arr = builder - .finish(Some(&inner_type.to_arrow(PlFlavor::highest()))) + .finish(Some(&inner_type.to_arrow(CompatLevel::newest()))) .unwrap(); let mut ca = unsafe { self.copy_with_chunks(vec![Box::new(arr)]) }; diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index c0d042854e02..1a20cadc7175 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -306,7 +306,7 @@ where ChunkedArray::from_chunk_iter_like( ca, [ - T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(PlFlavor::highest())) + T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest())) .with_validity_typed(Some(bm.into())), ], ) @@ -340,7 +340,7 @@ where ChunkedArray::from_chunk_iter_like( ca, [ - T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(PlFlavor::highest())) + T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest())) .with_validity_typed(Some(bm.into())), ], ) diff --git a/crates/polars-core/src/chunked_array/ops/filter.rs b/crates/polars-core/src/chunked_array/ops/filter.rs index 954adcb53d7a..d0e6fe59c7e6 100644 --- a/crates/polars-core/src/chunked_array/ops/filter.rs +++ b/crates/polars-core/src/chunked_array/ops/filter.rs @@ -121,7 +121,7 @@ impl ChunkFilter for ListChunked { _ => Ok(ListChunked::from_chunk_iter( self.name(), [ListArray::new_empty( - self.dtype().to_arrow(PlFlavor::highest()), + self.dtype().to_arrow(CompatLevel::newest()), )], )), }; @@ -149,7 +149,7 @@ impl ChunkFilter for ArrayChunked { _ => Ok(ArrayChunked::from_chunk_iter( self.name(), [FixedSizeListArray::new_empty( - self.dtype().to_arrow(PlFlavor::highest()), + self.dtype().to_arrow(CompatLevel::newest()), )], )), }; diff --git a/crates/polars-core/src/chunked_array/ops/full.rs b/crates/polars-core/src/chunked_array/ops/full.rs index da70407052f7..71d80749e618 100644 --- a/crates/polars-core/src/chunked_array/ops/full.rs +++ b/crates/polars-core/src/chunked_array/ops/full.rs @@ -21,7 +21,7 @@ where T: PolarsNumericType, { fn full_null(name: &str, length: usize) -> Self { - let arr = PrimitiveArray::new_null(T::get_dtype().to_arrow(PlFlavor::highest()), length); + let arr = PrimitiveArray::new_null(T::get_dtype().to_arrow(CompatLevel::newest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -55,7 +55,7 @@ impl<'a> ChunkFull<&'a str> for StringChunked { impl ChunkFullNull for StringChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = Utf8ViewArray::new_null(DataType::String.to_arrow(PlFlavor::highest()), length); + let arr = Utf8ViewArray::new_null(DataType::String.to_arrow(CompatLevel::newest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -72,7 +72,8 @@ impl<'a> ChunkFull<&'a [u8]> for BinaryChunked { impl ChunkFullNull for BinaryChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = BinaryViewArray::new_null(DataType::Binary.to_arrow(PlFlavor::highest()), length); + let arr = + BinaryViewArray::new_null(DataType::Binary.to_arrow(CompatLevel::newest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -91,7 +92,7 @@ impl<'a> ChunkFull<&'a [u8]> for BinaryOffsetChunked { impl ChunkFullNull for BinaryOffsetChunked { fn full_null(name: &str, length: usize) -> Self { let arr = BinaryArray::::new_null( - DataType::BinaryOffset.to_arrow(PlFlavor::highest()), + DataType::BinaryOffset.to_arrow(CompatLevel::newest()), length, ); ChunkedArray::with_chunk(name, arr) @@ -127,7 +128,7 @@ impl ArrayChunked { ArrowDataType::FixedSizeList( Box::new(ArrowField::new( "item", - inner_dtype.to_arrow(PlFlavor::highest()), + inner_dtype.to_arrow(CompatLevel::newest()), true, )), width, @@ -146,7 +147,7 @@ impl ChunkFull<&Series> for ArrayChunked { let arrow_dtype = ArrowDataType::FixedSizeList( Box::new(ArrowField::new( "item", - dtype.to_arrow(PlFlavor::highest()), + dtype.to_arrow(CompatLevel::newest()), true, )), width, @@ -169,7 +170,7 @@ impl ListChunked { let arr: ListArray = ListArray::new_null( ArrowDataType::LargeList(Box::new(ArrowField::new( "item", - inner_dtype.to_physical().to_arrow(PlFlavor::highest()), + inner_dtype.to_physical().to_arrow(CompatLevel::newest()), true, ))), length, diff --git a/crates/polars-core/src/chunked_array/ops/gather.rs b/crates/polars-core/src/chunked_array/ops/gather.rs index 72c882c9aaad..21bf6479dfc4 100644 --- a/crates/polars-core/src/chunked_array/ops/gather.rs +++ b/crates/polars-core/src/chunked_array/ops/gather.rs @@ -155,7 +155,7 @@ where } let targets: Vec<_> = ca.downcast_iter().collect(); let arr = gather_idx_array_unchecked( - ca.dtype().to_arrow(PlFlavor::highest()), + ca.dtype().to_arrow(CompatLevel::newest()), &targets, ca.null_count() > 0, indices.as_ref(), @@ -192,7 +192,7 @@ where let targets: Vec<_> = ca.downcast_iter().collect(); let chunks = indices.downcast_iter().map(|idx_arr| { - let dtype = ca.dtype().to_arrow(PlFlavor::highest()); + let dtype = ca.dtype().to_arrow(CompatLevel::newest()); if idx_arr.null_count() == 0 { gather_idx_array_unchecked(dtype, &targets, targets_have_nulls, idx_arr.values()) } else if targets.len() == 1 { diff --git a/crates/polars-core/src/chunked_array/ops/rolling_window.rs b/crates/polars-core/src/chunked_array/ops/rolling_window.rs index 3b0b2d1c4c6d..c5898edb4df1 100644 --- a/crates/polars-core/src/chunked_array/ops/rolling_window.rs +++ b/crates/polars-core/src/chunked_array/ops/rolling_window.rs @@ -270,7 +270,7 @@ mod inner_mod { } } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), values.into(), Some(validity.into()), ); diff --git a/crates/polars-core/src/chunked_array/ops/set.rs b/crates/polars-core/src/chunked_array/ops/set.rs index c50046d05a1f..abafab312c6a 100644 --- a/crates/polars-core/src/chunked_array/ops/set.rs +++ b/crates/polars-core/src/chunked_array/ops/set.rs @@ -55,7 +55,7 @@ where self.downcast_iter().next().unwrap(), idx, value, - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), )?; return Ok(Self::with_chunk(self.name(), arr)); } @@ -106,7 +106,7 @@ where arr, mask, value, - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), ) }); Ok(ChunkedArray::from_chunk_iter(self.name(), chunks)) diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs index 521861db515c..1e86cda3e33b 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs @@ -94,12 +94,12 @@ pub fn _get_rows_encoded_compat_array(by: &Series) -> PolarsResult { DataType::Categorical(_, _) | DataType::Enum(_, _) => { let ca = by.categorical().unwrap(); if ca.uses_lexical_ordering() { - by.to_arrow(0, PlFlavor::highest()) + by.to_arrow(0, CompatLevel::newest()) } else { ca.physical().chunks[0].clone() } }, - _ => by.to_arrow(0, PlFlavor::highest()), + _ => by.to_arrow(0, CompatLevel::newest()), }; Ok(out) } diff --git a/crates/polars-core/src/chunked_array/ops/sort/mod.rs b/crates/polars-core/src/chunked_array/ops/sort/mod.rs index 03d5f8060376..c2ef58a23c26 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/mod.rs @@ -201,7 +201,7 @@ where } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), vals.into(), Some(create_validity(len, null_count, options.nulls_last)), ); diff --git a/crates/polars-core/src/chunked_array/ops/unique/mod.rs b/crates/polars-core/src/chunked_array/ops/unique/mod.rs index fa958a151ba9..989e30061478 100644 --- a/crates/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/unique/mod.rs @@ -130,7 +130,7 @@ where .field .as_ref() .data_type() - .to_arrow(PlFlavor::compatible()); + .to_arrow(CompatLevel::oldest()); if let Some(mut state) = PrimitiveRangedUniqueState::new( *min, *max, @@ -276,7 +276,7 @@ impl ChunkUnique for BooleanChunked { .field .as_ref() .data_type() - .to_arrow(PlFlavor::compatible()); + .to_arrow(CompatLevel::oldest()); let has_null = self.null_count() > 0; let mut state = BooleanUniqueKernelState::new(has_null, data_type); diff --git a/crates/polars-core/src/chunked_array/trusted_len.rs b/crates/polars-core/src/chunked_array/trusted_len.rs index e7030c665d88..84ff13cb906d 100644 --- a/crates/polars-core/src/chunked_array/trusted_len.rs +++ b/crates/polars-core/src/chunked_array/trusted_len.rs @@ -18,7 +18,7 @@ where let iter = iter.into_iter(); let arr = unsafe { PrimitiveArray::from_trusted_len_iter_unchecked(iter) - .to(T::get_dtype().to_arrow(PlFlavor::highest())) + .to(T::get_dtype().to_arrow(CompatLevel::newest())) }; arr.into() } @@ -38,7 +38,7 @@ where // SAFETY: iter is TrustedLen. let iter = iter.into_iter(); let values = unsafe { Vec::from_trusted_len_iter_unchecked(iter) }.into(); - let arr = PrimitiveArray::new(T::get_dtype().to_arrow(PlFlavor::highest()), values, None); + let arr = PrimitiveArray::new(T::get_dtype().to_arrow(CompatLevel::newest()), values, None); NoNull::new(arr.into()) } } diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index fc9af73a2406..53c444be5061 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -498,7 +498,7 @@ impl DataType { } /// Convert to an Arrow Field - pub fn to_arrow_field(&self, name: &str, pl_flavor: PlFlavor) -> ArrowField { + pub fn to_arrow_field(&self, name: &str, compat_level: CompatLevel) -> ArrowField { let metadata = match self { #[cfg(feature = "dtype-categorical")] DataType::Enum(_, _) => Some(BTreeMap::from([( @@ -512,7 +512,7 @@ impl DataType { _ => None, }; - let field = ArrowField::new(name, self.to_arrow(pl_flavor), true); + let field = ArrowField::new(name, self.to_arrow(compat_level), true); if let Some(metadata) = metadata { field.with_metadata(metadata) @@ -523,12 +523,12 @@ impl DataType { /// Convert to an Arrow data type. #[inline] - pub fn to_arrow(&self, pl_flavor: PlFlavor) -> ArrowDataType { - self.try_to_arrow(pl_flavor).unwrap() + pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType { + self.try_to_arrow(compat_level).unwrap() } #[inline] - pub fn try_to_arrow(&self, pl_flavor: PlFlavor) -> PolarsResult { + pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult { use DataType::*; match self { Boolean => Ok(ArrowDataType::Boolean), @@ -553,7 +553,7 @@ impl DataType { )) }, String => { - let dt = if pl_flavor.version >= 1 { + let dt = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -561,7 +561,7 @@ impl DataType { Ok(dt) }, Binary => { - let dt = if pl_flavor.version >= 1 { + let dt = if compat_level.0 >= 1 { ArrowDataType::BinaryView } else { ArrowDataType::LargeBinary @@ -574,11 +574,11 @@ impl DataType { Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)), #[cfg(feature = "dtype-array")] Array(dt, size) => Ok(ArrowDataType::FixedSizeList( - Box::new(dt.to_arrow_field("item", pl_flavor)), + Box::new(dt.to_arrow_field("item", compat_level)), *size, )), List(dt) => Ok(ArrowDataType::LargeList(Box::new( - dt.to_arrow_field("item", pl_flavor), + dt.to_arrow_field("item", compat_level), ))), Null => Ok(ArrowDataType::Null), #[cfg(feature = "object")] @@ -592,7 +592,7 @@ impl DataType { }, #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => { - let values = if pl_flavor.version >= 1 { + let values = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -605,7 +605,10 @@ impl DataType { }, #[cfg(feature = "dtype-struct")] Struct(fields) => { - let fields = fields.iter().map(|fld| fld.to_arrow(pl_flavor)).collect(); + let fields = fields + .iter() + .map(|fld| fld.to_arrow(compat_level)) + .collect(); Ok(ArrowDataType::Struct(fields)) }, BinaryOffset => Ok(ArrowDataType::LargeBinary), @@ -615,7 +618,7 @@ impl DataType { UnknownKind::Float => ArrowDataType::Float64, UnknownKind::Str => ArrowDataType::Utf8View, UnknownKind::Int(v) => { - return materialize_dyn_int(*v).dtype().try_to_arrow(pl_flavor) + return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level) }, }; Ok(dt) @@ -791,24 +794,22 @@ pub fn create_enum_data_type(categories: Utf8ViewArray) -> DataType { } #[derive(Debug, Copy, Clone, Eq, PartialEq)] -pub struct PlFlavor { - pub(crate) version: u16, -} +pub struct CompatLevel(pub(crate) u16); -impl PlFlavor { - pub const fn highest() -> PlFlavor { - PlFlavor { version: 1 } +impl CompatLevel { + pub const fn newest() -> CompatLevel { + CompatLevel(1) } - pub const fn compatible() -> PlFlavor { - PlFlavor { version: 0 } + pub const fn oldest() -> CompatLevel { + CompatLevel(0) } - /// DO NO USE! This is only for py-polars. - pub fn with_version(version: u16) -> PolarsResult { - if version > PlFlavor::highest().version { - polars_bail!(InvalidOperation: "invalid flavor version"); + #[doc(hidden)] + pub fn with_level(level: u16) -> PolarsResult { + if level > CompatLevel::newest().0 { + polars_bail!(InvalidOperation: "invalid compat level"); } - Ok(PlFlavor { version }) + Ok(CompatLevel(level)) } } diff --git a/crates/polars-core/src/datatypes/field.rs b/crates/polars-core/src/datatypes/field.rs index 003711a4d027..aea148546ef0 100644 --- a/crates/polars-core/src/datatypes/field.rs +++ b/crates/polars-core/src/datatypes/field.rs @@ -107,10 +107,10 @@ impl Field { /// let f = Field::new("Value", DataType::Int64); /// let af = arrow::datatypes::Field::new("Value", arrow::datatypes::ArrowDataType::Int64, true); /// - /// assert_eq!(f.to_arrow(PlFlavor::highest()), af); + /// assert_eq!(f.to_arrow(CompatLevel::newest()), af); /// ``` - pub fn to_arrow(&self, pl_flavor: PlFlavor) -> ArrowField { - self.dtype.to_arrow_field(self.name.as_str(), pl_flavor) + pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField { + self.dtype.to_arrow_field(self.name.as_str(), compat_level) } } diff --git a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs index da91b1a2642d..812d89f0d240 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs @@ -70,12 +70,12 @@ where }; let array = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), list_values.into(), validity, ); let data_type = ListArray::::default_datatype( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), ); // SAFETY: // offsets are monotonically increasing @@ -135,12 +135,12 @@ where }; let array = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), list_values.into(), validity, ); let data_type = ListArray::::default_datatype( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), ); let arr = ListArray::::new( data_type, diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 73ab23166e0d..0515d030a569 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -2382,10 +2382,10 @@ impl DataFrame { /// This responsibility is left to the caller as we don't want to take mutable references here, /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller /// as well. - pub fn iter_chunks(&self, pl_flavor: PlFlavor, parallel: bool) -> RecordBatchIter { - // If any of the columns is binview and we don't convert `pl_flavor` we allow parallelism + pub fn iter_chunks(&self, compat_level: CompatLevel, parallel: bool) -> RecordBatchIter { + // If any of the columns is binview and we don't convert `compat_level` we allow parallelism // as we must allocate arrow strings/binaries. - let parallel = if parallel && pl_flavor.version >= 1 { + let parallel = if parallel && compat_level.0 >= 1 { self.columns.len() > 1 && self .columns @@ -2399,7 +2399,7 @@ impl DataFrame { columns: &self.columns, idx: 0, n_chunks: self.n_chunks(), - pl_flavor, + compat_level, parallel, } } @@ -3018,7 +3018,7 @@ pub struct RecordBatchIter<'a> { columns: &'a Vec, idx: usize, n_chunks: usize, - pl_flavor: PlFlavor, + compat_level: CompatLevel, parallel: bool, } @@ -3034,12 +3034,12 @@ impl<'a> Iterator for RecordBatchIter<'a> { let iter = self .columns .par_iter() - .map(|s| s.to_arrow(self.idx, self.pl_flavor)); + .map(|s| s.to_arrow(self.idx, self.compat_level)); POOL.install(|| iter.collect()) } else { self.columns .iter() - .map(|s| s.to_arrow(self.idx, self.pl_flavor)) + .map(|s| s.to_arrow(self.idx, self.compat_level)) .collect() }; self.idx += 1; @@ -3117,7 +3117,7 @@ mod test { "foo" => &[1, 2, 3, 4, 5] ) .unwrap(); - let mut iter = df.iter_chunks(PlFlavor::highest(), false); + let mut iter = df.iter_chunks(CompatLevel::newest(), false); assert_eq!(5, iter.next().unwrap().len()); assert!(iter.next().is_none()); } diff --git a/crates/polars-core/src/frame/row/transpose.rs b/crates/polars-core/src/frame/row/transpose.rs index 05de7a63c9c5..7ad4bc4f1fef 100644 --- a/crates/polars-core/src/frame/row/transpose.rs +++ b/crates/polars-core/src/frame/row/transpose.rs @@ -247,7 +247,7 @@ where }; let arr = PrimitiveArray::::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), values.into(), validity, ); diff --git a/crates/polars-core/src/schema.rs b/crates/polars-core/src/schema.rs index 52a67820ca2e..dfb581135a0b 100644 --- a/crates/polars-core/src/schema.rs +++ b/crates/polars-core/src/schema.rs @@ -370,11 +370,11 @@ impl Schema { } /// Convert self to `ArrowSchema` by cloning the fields - pub fn to_arrow(&self, pl_flavor: PlFlavor) -> ArrowSchema { + pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema { let fields: Vec<_> = self .inner .iter() - .map(|(name, dtype)| dtype.to_arrow_field(name.as_str(), pl_flavor)) + .map(|(name, dtype)| dtype.to_arrow_field(name.as_str(), compat_level)) .collect(); ArrowSchema::from(fields) } diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 1474bbf3d38d..6a4c61cd7f37 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -105,7 +105,7 @@ impl Series { Struct(_) => Series::_try_from_arrow_unchecked( name, chunks, - &dtype.to_arrow(PlFlavor::highest()), + &dtype.to_arrow(CompatLevel::newest()), ) .unwrap(), #[cfg(feature = "object")] diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 04875a3d46a8..2437a43fc3f4 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -44,7 +44,7 @@ impl SeriesWrap { }; let new_values = s.array_ref(0).clone(); let data_type = - ListArray::::default_datatype(dtype.to_arrow(PlFlavor::highest())); + ListArray::::default_datatype(dtype.to_arrow(CompatLevel::newest())); let new_arr = ListArray::::new( data_type, arr.offsets().clone(), diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index 6c0ef64cefb5..c0ac905666cc 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -19,11 +19,11 @@ impl Series { /// Convert a chunk in the Series to the correct Arrow type. /// This conversion is needed because polars doesn't use a /// 1 on 1 mapping for logical/ categoricals, etc. - pub fn to_arrow(&self, chunk_idx: usize, pl_flavor: PlFlavor) -> ArrayRef { + pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef { match self.dtype() { // make sure that we recursively apply all logical types. #[cfg(feature = "dtype-struct")] - DataType::Struct(_) => self.struct_().unwrap().to_arrow(chunk_idx, pl_flavor), + DataType::Struct(_) => self.struct_().unwrap().to_arrow(chunk_idx, compat_level), // special list branch to // make sure that we recursively apply all logical types. DataType::List(inner) => { @@ -45,10 +45,10 @@ impl Series { .unwrap() }; - s.to_arrow(0, pl_flavor) + s.to_arrow(0, compat_level) }; - let data_type = ListArray::::default_datatype(inner.to_arrow(pl_flavor)); + let data_type = ListArray::::default_datatype(inner.to_arrow(compat_level)); let arr = ListArray::::new( data_type, arr.offsets().clone(), @@ -74,30 +74,30 @@ impl Series { ) }; - new.to_arrow(pl_flavor, false) + new.to_arrow(compat_level, false) }, #[cfg(feature = "dtype-date")] DataType::Date => cast( &*self.chunks()[chunk_idx], - &DataType::Date.to_arrow(pl_flavor), + &DataType::Date.to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "dtype-datetime")] DataType::Datetime(_, _) => cast( &*self.chunks()[chunk_idx], - &self.dtype().to_arrow(pl_flavor), + &self.dtype().to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "dtype-duration")] DataType::Duration(_) => cast( &*self.chunks()[chunk_idx], - &self.dtype().to_arrow(pl_flavor), + &self.dtype().to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "dtype-time")] DataType::Time => cast( &*self.chunks()[chunk_idx], - &DataType::Time.to_arrow(pl_flavor), + &DataType::Time.to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "object")] @@ -117,7 +117,7 @@ impl Series { } }, DataType::String => { - if pl_flavor.version >= 1 { + if compat_level.0 >= 1 { self.array_ref(chunk_idx).clone() } else { let arr = self.array_ref(chunk_idx); @@ -125,7 +125,7 @@ impl Series { } }, DataType::Binary => { - if pl_flavor.version >= 1 { + if compat_level.0 >= 1 { self.array_ref(chunk_idx).clone() } else { let arr = self.array_ref(chunk_idx); diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 134e69e7b153..05fdfcbc2171 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -906,8 +906,9 @@ impl Series { let offsets = (0i64..(s.len() as i64 + 1)).collect::>(); let offsets = unsafe { Offsets::new_unchecked(offsets) }; - let data_type = - LargeListArray::default_datatype(s.dtype().to_physical().to_arrow(PlFlavor::highest())); + let data_type = LargeListArray::default_datatype( + s.dtype().to_physical().to_arrow(CompatLevel::newest()), + ); let new_arr = LargeListArray::new(data_type, offsets.into(), values, None); let mut out = ListChunked::with_chunk(s.name(), new_arr); out.set_inner_dtype(s.dtype().clone()); diff --git a/crates/polars-core/src/series/ops/reshape.rs b/crates/polars-core/src/series/ops/reshape.rs index 3304bb63689e..ca824f6a4104 100644 --- a/crates/polars-core/src/series/ops/reshape.rs +++ b/crates/polars-core/src/series/ops/reshape.rs @@ -156,9 +156,12 @@ impl Series { while let Some(dim) = dims.pop_back() { prev_dtype = DataType::Array(Box::new(prev_dtype), dim as usize); - prev_array = - FixedSizeListArray::new(prev_dtype.to_arrow(PlFlavor::highest()), prev_array, None) - .boxed(); + prev_array = FixedSizeListArray::new( + prev_dtype.to_arrow(CompatLevel::newest()), + prev_array, + None, + ) + .boxed(); } Ok(unsafe { Series::from_chunks_and_dtype_unchecked( diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs index 66dce9eb4888..5a71230a9d09 100644 --- a/crates/polars-expr/src/expressions/window.rs +++ b/crates/polars-expr/src/expressions/window.rs @@ -821,7 +821,7 @@ where unsafe { values.set_len(len) } let validity = Bitmap::from(validity); let arr = PrimitiveArray::new( - T::get_dtype().to_physical().to_arrow(PlFlavor::highest()), + T::get_dtype().to_physical().to_arrow(CompatLevel::newest()), values.into(), Some(validity), ); diff --git a/crates/polars-ffi/src/version_0.rs b/crates/polars-ffi/src/version_0.rs index d97f54d5adbc..eb24542f0733 100644 --- a/crates/polars-ffi/src/version_0.rs +++ b/crates/polars-ffi/src/version_0.rs @@ -1,4 +1,4 @@ -use polars_core::prelude::PlFlavor; +use polars_core::prelude::CompatLevel; use super::*; @@ -54,13 +54,13 @@ unsafe extern "C" fn c_release_series_export(e: *mut SeriesExport) { } pub fn export_series(s: &Series) -> SeriesExport { - let field = ArrowField::new(s.name(), s.dtype().to_arrow(PlFlavor::highest()), true); + let field = ArrowField::new(s.name(), s.dtype().to_arrow(CompatLevel::newest()), true); let schema = Box::new(ffi::export_field_to_c(&field)); let mut arrays = (0..s.chunks().len()) .map(|i| { // Make sure we export the logical type. - let arr = s.to_arrow(i, PlFlavor::highest()); + let arr = s.to_arrow(i, CompatLevel::newest()); Box::into_raw(Box::new(ffi::export_array_to_c(arr.clone()))) }) .collect::>(); diff --git a/crates/polars-io/src/avro/write.rs b/crates/polars-io/src/avro/write.rs index 6631e37c8db5..2954de97d964 100644 --- a/crates/polars-io/src/avro/write.rs +++ b/crates/polars-io/src/avro/write.rs @@ -64,12 +64,12 @@ where } fn finish(&mut self, df: &mut DataFrame) -> PolarsResult<()> { - let schema = schema_to_arrow_checked(&df.schema(), PlFlavor::compatible(), "avro")?; + let schema = schema_to_arrow_checked(&df.schema(), CompatLevel::oldest(), "avro")?; let record = write::to_record(&schema, self.name.clone())?; let mut data = vec![]; let mut compressed_block = avro_schema::file::CompressedBlock::default(); - for chunk in df.iter_chunks(PlFlavor::compatible(), true) { + for chunk in df.iter_chunks(CompatLevel::oldest(), true) { let mut serializers = chunk .iter() .zip(record.fields.iter()) diff --git a/crates/polars-io/src/ipc/ipc_stream.rs b/crates/polars-io/src/ipc/ipc_stream.rs index 03ae12efdb16..35bd22dab5c3 100644 --- a/crates/polars-io/src/ipc/ipc_stream.rs +++ b/crates/polars-io/src/ipc/ipc_stream.rs @@ -207,7 +207,7 @@ where pub struct IpcStreamWriter { writer: W, compression: Option, - pl_flavor: PlFlavor, + compat_level: CompatLevel, } use arrow::record_batch::RecordBatch; @@ -221,8 +221,8 @@ impl IpcStreamWriter { self } - pub fn with_pl_flavor(mut self, pl_flavor: PlFlavor) -> Self { - self.pl_flavor = pl_flavor; + pub fn with_compat_level(mut self, compat_level: CompatLevel) -> Self { + self.compat_level = compat_level; self } } @@ -235,7 +235,7 @@ where IpcStreamWriter { writer, compression: None, - pl_flavor: PlFlavor::compatible(), + compat_level: CompatLevel::oldest(), } } @@ -247,9 +247,9 @@ where }, ); - ipc_stream_writer.start(&df.schema().to_arrow(self.pl_flavor), None)?; + ipc_stream_writer.start(&df.schema().to_arrow(self.compat_level), None)?; let df = chunk_df_for_writing(df, 512 * 512)?; - let iter = df.iter_chunks(self.pl_flavor, true); + let iter = df.iter_chunks(self.compat_level, true); for batch in iter { ipc_stream_writer.write(&batch, None)? diff --git a/crates/polars-io/src/ipc/write.rs b/crates/polars-io/src/ipc/write.rs index bc637c0b0876..b187ff8edc07 100644 --- a/crates/polars-io/src/ipc/write.rs +++ b/crates/polars-io/src/ipc/write.rs @@ -42,7 +42,7 @@ pub struct IpcWriter { pub(super) writer: W, pub(super) compression: Option, /// Polars' flavor of arrow. This might be temporary. - pub(super) pl_flavor: PlFlavor, + pub(super) compat_level: CompatLevel, } impl IpcWriter { @@ -52,13 +52,13 @@ impl IpcWriter { self } - pub fn with_pl_flavor(mut self, pl_flavor: PlFlavor) -> Self { - self.pl_flavor = pl_flavor; + pub fn with_compat_level(mut self, compat_level: CompatLevel) -> Self { + self.compat_level = compat_level; self } pub fn batched(self, schema: &Schema) -> PolarsResult> { - let schema = schema_to_arrow_checked(schema, self.pl_flavor, "ipc")?; + let schema = schema_to_arrow_checked(schema, self.compat_level, "ipc")?; let mut writer = write::FileWriter::new( self.writer, Arc::new(schema), @@ -71,7 +71,7 @@ impl IpcWriter { Ok(BatchedWriter { writer, - pl_flavor: self.pl_flavor, + compat_level: self.compat_level, }) } } @@ -84,12 +84,12 @@ where IpcWriter { writer, compression: None, - pl_flavor: PlFlavor::highest(), + compat_level: CompatLevel::newest(), } } fn finish(&mut self, df: &mut DataFrame) -> PolarsResult<()> { - let schema = schema_to_arrow_checked(&df.schema(), self.pl_flavor, "ipc")?; + let schema = schema_to_arrow_checked(&df.schema(), self.compat_level, "ipc")?; let mut ipc_writer = write::FileWriter::try_new( &mut self.writer, Arc::new(schema), @@ -99,7 +99,7 @@ where }, )?; df.align_chunks(); - let iter = df.iter_chunks(self.pl_flavor, true); + let iter = df.iter_chunks(self.compat_level, true); for batch in iter { ipc_writer.write(&batch, None)? @@ -111,7 +111,7 @@ where pub struct BatchedWriter { writer: write::FileWriter, - pl_flavor: PlFlavor, + compat_level: CompatLevel, } impl BatchedWriter { @@ -120,7 +120,7 @@ impl BatchedWriter { /// # Panics /// The caller must ensure the chunks in the given [`DataFrame`] are aligned. pub fn write_batch(&mut self, df: &DataFrame) -> PolarsResult<()> { - let iter = df.iter_chunks(self.pl_flavor, true); + let iter = df.iter_chunks(self.compat_level, true); for batch in iter { self.writer.write(&batch, None)? } diff --git a/crates/polars-io/src/ipc/write_async.rs b/crates/polars-io/src/ipc/write_async.rs index 63402cd94880..5ed459a715d2 100644 --- a/crates/polars-io/src/ipc/write_async.rs +++ b/crates/polars-io/src/ipc/write_async.rs @@ -10,14 +10,14 @@ impl IpcWriter { IpcWriter { writer, compression: None, - pl_flavor: PlFlavor::compatible(), + compat_level: CompatLevel::oldest(), } } pub fn batched_async(self, schema: &Schema) -> PolarsResult> { let writer = FileSink::new( self.writer, - schema.to_arrow(PlFlavor::compatible()), + schema.to_arrow(CompatLevel::oldest()), None, WriteOptions { compression: self.compression.map(|c| c.into()), @@ -44,7 +44,7 @@ where /// # Panics /// The caller must ensure the chunks in the given [`DataFrame`] are aligned. pub async fn write_batch(&mut self, df: &DataFrame) -> PolarsResult<()> { - let iter = df.iter_chunks(PlFlavor::compatible(), true); + let iter = df.iter_chunks(CompatLevel::oldest(), true); for batch in iter { self.writer.feed(batch.into()).await?; } diff --git a/crates/polars-io/src/json/mod.rs b/crates/polars-io/src/json/mod.rs index e56e226ee52f..99dbd53ffa5d 100644 --- a/crates/polars-io/src/json/mod.rs +++ b/crates/polars-io/src/json/mod.rs @@ -146,11 +146,11 @@ where .map(|s| { #[cfg(feature = "object")] polars_ensure!(!matches!(s.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to json"); - Ok(s.field().to_arrow(PlFlavor::highest())) + Ok(s.field().to_arrow(CompatLevel::newest())) }) .collect::>>()?; let batches = df - .iter_chunks(PlFlavor::highest(), false) + .iter_chunks(CompatLevel::newest(), false) .map(|chunk| Ok(Box::new(chunk_to_struct(chunk, fields.clone())) as ArrayRef)); match self.json_format { @@ -191,10 +191,10 @@ where .map(|s| { #[cfg(feature = "object")] polars_ensure!(!matches!(s.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to json"); - Ok(s.field().to_arrow(PlFlavor::highest())) + Ok(s.field().to_arrow(CompatLevel::newest())) }) .collect::>>()?; - let chunks = df.iter_chunks(PlFlavor::highest(), false); + let chunks = df.iter_chunks(CompatLevel::newest(), false); let batches = chunks.map(|chunk| Ok(Box::new(chunk_to_struct(chunk, fields.clone())) as ArrayRef)); let mut serializer = polars_json::ndjson::write::Serializer::new(batches, vec![]); @@ -267,7 +267,7 @@ where overwrite_schema(mut_schema, overwrite)?; } - DataType::Struct(schema.iter_fields().collect()).to_arrow(PlFlavor::highest()) + DataType::Struct(schema.iter_fields().collect()).to_arrow(CompatLevel::newest()) } else { // infer let inner_dtype = if let BorrowedValue::Array(values) = &json_value { @@ -276,7 +276,7 @@ where self.infer_schema_len .unwrap_or(NonZeroUsize::new(usize::MAX).unwrap()), )? - .to_arrow(PlFlavor::highest()) + .to_arrow(CompatLevel::newest()) } else { polars_json::json::infer(&json_value)? }; @@ -295,7 +295,7 @@ where .map(|(name, dt)| Field::new(&name, dt)) .collect(), ) - .to_arrow(PlFlavor::highest()) + .to_arrow(CompatLevel::newest()) } else { inner_dtype } diff --git a/crates/polars-io/src/ndjson/mod.rs b/crates/polars-io/src/ndjson/mod.rs index d79bfc4ae93b..4ec6ffa7a1da 100644 --- a/crates/polars-io/src/ndjson/mod.rs +++ b/crates/polars-io/src/ndjson/mod.rs @@ -13,7 +13,7 @@ pub fn infer_schema( let data_types = polars_json::ndjson::iter_unique_dtypes(reader, infer_schema_len)?; let data_type = crate::json::infer::data_types_to_supertype(data_types.map(|dt| DataType::from(&dt)))?; - let schema = StructArray::get_fields(&data_type.to_arrow(PlFlavor::highest())) + let schema = StructArray::get_fields(&data_type.to_arrow(CompatLevel::newest())) .iter() .collect(); Ok(schema) diff --git a/crates/polars-io/src/parquet/write/batched_writer.rs b/crates/polars-io/src/parquet/write/batched_writer.rs index 48d068f49796..86b95bc36f85 100644 --- a/crates/polars-io/src/parquet/write/batched_writer.rs +++ b/crates/polars-io/src/parquet/write/batched_writer.rs @@ -27,7 +27,7 @@ impl BatchedWriter { &'a self, df: &'a DataFrame, ) -> impl Iterator>> + 'a { - let rb_iter = df.iter_chunks(PlFlavor::highest(), false); + let rb_iter = df.iter_chunks(CompatLevel::newest(), false); rb_iter.filter_map(move |batch| match batch.len() { 0 => None, _ => { @@ -95,7 +95,7 @@ fn prepare_rg_iter<'a>( options: WriteOptions, parallel: bool, ) -> impl Iterator>> + 'a { - let rb_iter = df.iter_chunks(PlFlavor::highest(), false); + let rb_iter = df.iter_chunks(CompatLevel::newest(), false); rb_iter.filter_map(move |batch| match batch.len() { 0 => None, _ => { diff --git a/crates/polars-io/src/parquet/write/writer.rs b/crates/polars-io/src/parquet/write/writer.rs index 7c7e75c24abf..2060144acc26 100644 --- a/crates/polars-io/src/parquet/write/writer.rs +++ b/crates/polars-io/src/parquet/write/writer.rs @@ -83,7 +83,7 @@ where } pub fn batched(self, schema: &Schema) -> PolarsResult> { - let schema = schema_to_arrow_checked(schema, PlFlavor::highest(), "parquet")?; + let schema = schema_to_arrow_checked(schema, CompatLevel::newest(), "parquet")?; let parquet_schema = to_parquet_schema(&schema)?; let encodings = get_encodings(&schema); let options = self.materialize_options(); diff --git a/crates/polars-io/src/shared.rs b/crates/polars-io/src/shared.rs index a0f3bcdb1948..2788d10a54fd 100644 --- a/crates/polars-io/src/shared.rs +++ b/crates/polars-io/src/shared.rs @@ -120,13 +120,13 @@ pub(crate) fn finish_reader( pub(crate) fn schema_to_arrow_checked( schema: &Schema, - pl_flavor: PlFlavor, + compat_level: CompatLevel, _file_name: &str, ) -> PolarsResult { let fields = schema.iter_fields().map(|field| { #[cfg(feature = "object")] polars_ensure!(!matches!(field.data_type(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to {}", _file_name); - Ok(field.data_type().to_arrow_field(field.name().as_str(), pl_flavor)) + Ok(field.data_type().to_arrow_field(field.name().as_str(), compat_level)) }).collect::>>()?; Ok(ArrowSchema::from(fields)) } diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs index c1516da874e8..4b4aed6f7f87 100644 --- a/crates/polars-ops/src/chunked_array/gather/chunked.rs +++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs @@ -208,7 +208,7 @@ where T::Array: Debug, { unsafe fn take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Self { - let arrow_dtype = self.dtype().to_arrow(PlFlavor::highest()); + let arrow_dtype = self.dtype().to_arrow(CompatLevel::newest()); let mut out = if let Some(iter) = self.downcast_slices() { let targets = iter.collect::>(); @@ -245,7 +245,7 @@ where // Take function that checks of null state in `ChunkIdx`. unsafe fn take_opt_chunked_unchecked(&self, by: &[NullableChunkId]) -> Self { - let arrow_dtype = self.dtype().to_arrow(PlFlavor::highest()); + let arrow_dtype = self.dtype().to_arrow(CompatLevel::newest()); if let Some(iter) = self.downcast_slices() { let targets = iter.collect::>(); diff --git a/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs b/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs index cca5272349ce..ff52a6601589 100644 --- a/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs +++ b/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs @@ -94,7 +94,8 @@ where .collect(); let gathered = unsafe { gather_skip_nulls_idx_pairs_unchecked(self, index_pairs, indices.len()) }; - let arr = T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(PlFlavor::highest())); + let arr = + T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(CompatLevel::newest())); Ok(ChunkedArray::from_chunk_iter_like(self, [arr])) } } @@ -141,7 +142,7 @@ where }; let mut arr = - T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(PlFlavor::highest())); + T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(CompatLevel::newest())); if indices.null_count() > 0 { let array_refs: Vec<&dyn Array> = indices.chunks().iter().map(|x| &**x).collect(); arr = arr.with_validity_typed(concatenate_validities(&array_refs)); diff --git a/crates/polars-ops/src/chunked_array/repeat_by.rs b/crates/polars-ops/src/chunked_array/repeat_by.rs index ea5c0222974a..8ccf9ae58141 100644 --- a/crates/polars-ops/src/chunked_array/repeat_by.rs +++ b/crates/polars-ops/src/chunked_array/repeat_by.rs @@ -39,7 +39,7 @@ where unsafe { LargeListArray::from_iter_primitive_trusted_len( iter, - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), ) } })) diff --git a/crates/polars-ops/src/chunked_array/strings/extract.rs b/crates/polars-ops/src/chunked_array/strings/extract.rs index ed7a87345245..4c65b2ce8c3d 100644 --- a/crates/polars-ops/src/chunked_array/strings/extract.rs +++ b/crates/polars-ops/src/chunked_array/strings/extract.rs @@ -52,7 +52,7 @@ pub(super) fn extract_groups( .map(|ca| ca.into_series()); } - let data_type = dtype.try_to_arrow(PlFlavor::highest())?; + let data_type = dtype.try_to_arrow(CompatLevel::newest())?; let DataType::Struct(fields) = dtype else { unreachable!() // Implementation error if it isn't a struct. }; diff --git a/crates/polars-ops/src/chunked_array/strings/json_path.rs b/crates/polars-ops/src/chunked_array/strings/json_path.rs index 264d8163e712..a585e9837e39 100644 --- a/crates/polars-ops/src/chunked_array/strings/json_path.rs +++ b/crates/polars-ops/src/chunked_array/strings/json_path.rs @@ -107,7 +107,7 @@ pub trait Utf8JsonPathImpl: AsString { let array = polars_json::ndjson::deserialize::deserialize_iter( iter, - dtype.to_arrow(PlFlavor::highest()), + dtype.to_arrow(CompatLevel::newest()), buf_size, ca.len(), ) diff --git a/crates/polars-ops/src/series/ops/ewm_by.rs b/crates/polars-ops/src/series/ops/ewm_by.rs index 734e417c27d4..9ae0db056ae5 100644 --- a/crates/polars-ops/src/series/ops/ewm_by.rs +++ b/crates/polars-ops/src/series/ops/ewm_by.rs @@ -130,7 +130,7 @@ where } }; }); - let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(PlFlavor::highest())); + let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(CompatLevel::newest())); if (times.null_count() > 0) || (values.null_count() > 0) { let validity = binary_concatenate_validities(times, values); arr = arr.with_validity_typed(validity); @@ -179,7 +179,7 @@ where } }; }); - let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(PlFlavor::highest())); + let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(CompatLevel::newest())); if (times.null_count() > 0) || (values.null_count() > 0) { let validity = binary_concatenate_validities(times, values); arr = arr.with_validity_typed(validity); diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs index 4b1eb7507d6e..cffbe59f5f05 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs @@ -99,7 +99,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), out.into(), Some(validity.into()), ); diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs index 5a6a73be4b2b..7d76f7073cd5 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs @@ -151,7 +151,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), out.into(), Some(validity.into()), ); @@ -253,7 +253,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(PlFlavor::highest()), + T::get_dtype().to_arrow(CompatLevel::newest()), out.into(), Some(validity.into()), ); diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs index 353733ace4f3..c82c7678ff85 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs @@ -77,7 +77,7 @@ where let arr = values.chunks().get_unchecked(0); arr.sliced_unchecked(offset as usize, length as usize) }; - let dtype = K::PolarsType::get_dtype().to_arrow(PlFlavor::highest()); + let dtype = K::PolarsType::get_dtype().to_arrow(CompatLevel::newest()); let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); let arr = unsafe { arr.as_any() diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs index 62808623d4bf..e58cc09d132a 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs @@ -55,7 +55,7 @@ where let arr = values.chunks().get_unchecked(0); arr.sliced_unchecked(offset as usize, length as usize) }; - let dtype = K::PolarsType::get_dtype().to_arrow(PlFlavor::highest()); + let dtype = K::PolarsType::get_dtype().to_arrow(CompatLevel::newest()); let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); let arr = unsafe { arr.as_any() diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs index f3490aa93c75..84e504816daa 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs @@ -82,7 +82,7 @@ impl Eval { _ => s.to_physical_repr().into_owned(), }; let s = prepare_key(&s, chunk); - keys_columns.push(s.to_arrow(0, PlFlavor::highest())); + keys_columns.push(s.to_arrow(0, CompatLevel::newest())); } polars_row::convert_columns_amortized( diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs index 1de8ba0810f5..2bb4f57b46a1 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs @@ -261,7 +261,7 @@ impl AggHashTable { .output_schema .iter_dtypes() .take(self.num_keys) - .map(|dtype| dtype.to_physical().to_arrow(PlFlavor::highest())) + .map(|dtype| dtype.to_physical().to_arrow(CompatLevel::newest())) .collect::>(); let fields = vec![Default::default(); self.num_keys]; let key_columns = diff --git a/crates/polars-pipe/src/executors/sinks/io.rs b/crates/polars-pipe/src/executors/sinks/io.rs index e5d279c8eca5..cabf560dadb2 100644 --- a/crates/polars-pipe/src/executors/sinks/io.rs +++ b/crates/polars-pipe/src/executors/sinks/io.rs @@ -177,7 +177,7 @@ impl IOThread { path.push(format!("{count}.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); + let writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); let mut writer = writer.batched(&schema).unwrap(); writer.write_batch(&df).unwrap(); writer.finish().unwrap(); @@ -188,7 +188,7 @@ impl IOThread { path.push(format!("{count}_0_pass.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); + let writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); let mut writer = writer.batched(&schema).unwrap(); for mut df in iter { @@ -227,7 +227,7 @@ impl IOThread { path.push(format!("_{count}_full.ipc")); let file = File::create(path).unwrap(); - let mut writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); + let mut writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); writer.finish(&mut df).unwrap(); } else { let iter = Box::new(std::iter::once(df)); @@ -260,7 +260,7 @@ impl IOThread { // duplicates path.push(format!("_{count}.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(PlFlavor::highest()); + let writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); let mut writer = writer.batched(&self.schema).unwrap(); writer.write_batch(&df).unwrap(); writer.finish().unwrap(); diff --git a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs index f14fe7295254..c7256f084aeb 100644 --- a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs +++ b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs @@ -275,7 +275,7 @@ impl Sink for SortSinkMultiple { let sort_dtypes = self.sort_dtypes.take().map(|arr| { arr.iter() - .map(|dt| dt.to_physical().to_arrow(PlFlavor::highest())) + .map(|dt| dt.to_physical().to_arrow(CompatLevel::newest())) .collect::>() }); diff --git a/crates/polars-plan/src/dsl/function_expr/plugin.rs b/crates/polars-plan/src/dsl/function_expr/plugin.rs index adb8b6c1ca4b..5ce4875fe68d 100644 --- a/crates/polars-plan/src/dsl/function_expr/plugin.rs +++ b/crates/polars-plan/src/dsl/function_expr/plugin.rs @@ -130,7 +130,7 @@ pub(super) unsafe fn plugin_field( // we deallocate the fields buffer let ffi_fields = fields .iter() - .map(|field| arrow::ffi::export_field_to_c(&field.to_arrow(PlFlavor::highest()))) + .map(|field| arrow::ffi::export_field_to_c(&field.to_arrow(CompatLevel::newest()))) .collect::>() .into_boxed_slice(); let n_args = ffi_fields.len(); diff --git a/crates/polars-plan/src/dsl/function_expr/struct_.rs b/crates/polars-plan/src/dsl/function_expr/struct_.rs index a8584edd82de..22fe7092b4f9 100644 --- a/crates/polars-plan/src/dsl/function_expr/struct_.rs +++ b/crates/polars-plan/src/dsl/function_expr/struct_.rs @@ -211,7 +211,7 @@ pub(super) fn suffix_fields(s: &Series, suffix: Arc) -> PolarsResult PolarsResult { let ca = s.struct_()?; - let dtype = ca.dtype().to_arrow(PlFlavor::highest()); + let dtype = ca.dtype().to_arrow(CompatLevel::newest()); let iter = ca.chunks().iter().map(|arr| { let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); diff --git a/crates/polars-time/src/chunkedarray/datetime.rs b/crates/polars-time/src/chunkedarray/datetime.rs index 17a417ef6fe8..de14c83c6e72 100644 --- a/crates/polars-time/src/chunkedarray/datetime.rs +++ b/crates/polars-time/src/chunkedarray/datetime.rs @@ -12,7 +12,7 @@ fn cast_and_apply< ca: &DatetimeChunked, func: F, ) -> ChunkedArray { - let dtype = ca.dtype().to_arrow(PlFlavor::highest()); + let dtype = ca.dtype().to_arrow(CompatLevel::newest()); let chunks = ca.downcast_iter().map(|arr| { let arr = cast( arr, diff --git a/crates/polars/tests/it/io/ipc.rs b/crates/polars/tests/it/io/ipc.rs index 1d606777600d..6b5e2a83ba41 100644 --- a/crates/polars/tests/it/io/ipc.rs +++ b/crates/polars/tests/it/io/ipc.rs @@ -12,7 +12,7 @@ fn test_ipc_compression_variadic_buffers() { let mut file = std::io::Cursor::new(vec![]); IpcWriter::new(&mut file) .with_compression(Some(IpcCompression::LZ4)) - .with_pl_flavor(PlFlavor::highest()) + .with_compat_level(CompatLevel::newest()) .finish(&mut df) .unwrap(); @@ -82,7 +82,7 @@ fn test_read_ipc_with_columns() { .unwrap(); df_read.equals(&expected); - for pl_flavor in [0, 1].map(|version| PlFlavor::with_version(version).unwrap()) { + for compat_level in [0, 1].map(|level| CompatLevel::with_level(level).unwrap()) { let mut buf: Cursor> = Cursor::new(Vec::new()); let mut df = df![ "letters" => ["x", "y", "z"], @@ -92,7 +92,7 @@ fn test_read_ipc_with_columns() { ] .unwrap(); IpcWriter::new(&mut buf) - .with_pl_flavor(pl_flavor) + .with_compat_level(compat_level) .finish(&mut df) .expect("ipc writer"); buf.set_position(0); diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index 5ec7aca61cd8..0451c9f8eddf 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -1184,25 +1184,25 @@ where } #[derive(Debug, Copy, Clone)] -pub struct PyPlFlavor(pub PlFlavor); +pub struct PyCompatLevel(pub CompatLevel); -impl<'a> FromPyObject<'a> for PyPlFlavor { +impl<'a> FromPyObject<'a> for PyCompatLevel { fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { - Ok(PyPlFlavor(if let Ok(version) = ob.extract::() { - if let Ok(flavor) = PlFlavor::with_version(version) { - flavor + Ok(PyCompatLevel(if let Ok(level) = ob.extract::() { + if let Ok(compat_level) = CompatLevel::with_level(level) { + compat_level } else { - return Err(PyValueError::new_err("invalid flavor version")); + return Err(PyValueError::new_err("invalid compat level")); } } else if let Ok(future) = ob.extract::() { if future { - PlFlavor::highest() + CompatLevel::newest() } else { - PlFlavor::compatible() + CompatLevel::oldest() } } else { return Err(PyTypeError::new_err( - "'future' argument accepts int or bool", + "'compat_level' argument accepts int or bool", )); })) } diff --git a/py-polars/src/dataframe/export.rs b/py-polars/src/dataframe/export.rs index 49d52b292565..0b0d0a4f9020 100644 --- a/py-polars/src/dataframe/export.rs +++ b/py-polars/src/dataframe/export.rs @@ -7,7 +7,7 @@ use pyo3::types::{PyList, PyTuple}; use super::*; use crate::conversion::{ObjectValue, Wrap}; use crate::interop; -use crate::prelude::PyPlFlavor; +use crate::prelude::PyCompatLevel; #[pymethods] impl PyDataFrame { @@ -64,7 +64,7 @@ impl PyDataFrame { } #[allow(clippy::wrong_self_convention)] - pub fn to_arrow(&mut self, future: PyPlFlavor) -> PyResult> { + pub fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult> { self.df.align_chunks(); Python::with_gil(|py| { let pyarrow = py.import_bound("pyarrow")?; @@ -72,7 +72,7 @@ impl PyDataFrame { let rbs = self .df - .iter_chunks(future.0, true) + .iter_chunks(compat_level.0, true) .map(|rb| interop::arrow::to_py::to_py_rb(&rb, &names, py, &pyarrow)) .collect::>()?; Ok(rbs) @@ -105,7 +105,7 @@ impl PyDataFrame { .collect::>(); let rbs = self .df - .iter_chunks(PlFlavor::compatible(), true) + .iter_chunks(CompatLevel::oldest(), true) .map(|rb| { let mut rb = rb.into_arrays(); for i in &cat_columns { diff --git a/py-polars/src/dataframe/io.rs b/py-polars/src/dataframe/io.rs index 77813ad8de60..f03aa3df35b3 100644 --- a/py-polars/src/dataframe/io.rs +++ b/py-polars/src/dataframe/io.rs @@ -18,7 +18,7 @@ use crate::file::{ get_either_file, get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio, EitherRustPythonFile, }; -use crate::prelude::PyPlFlavor; +use crate::prelude::PyCompatLevel; #[pymethods] impl PyDataFrame { @@ -432,7 +432,7 @@ impl PyDataFrame { py: Python, py_f: PyObject, compression: Wrap>, - future: PyPlFlavor, + compat_level: PyCompatLevel, ) -> PyResult<()> { let either = get_either_file(py_f, true)?; if let EitherRustPythonFile::Rust(ref f) = either { @@ -442,7 +442,7 @@ impl PyDataFrame { py.allow_threads(|| { IpcWriter::new(&mut buf) .with_compression(compression.0) - .with_pl_flavor(future.0) + .with_compat_level(compat_level.0) .finish(&mut self.df) .map_err(PyPolarsErr::from) })?; @@ -455,13 +455,13 @@ impl PyDataFrame { py: Python, py_f: PyObject, compression: Wrap>, - future: PyPlFlavor, + compat_level: PyCompatLevel, ) -> PyResult<()> { let mut buf = get_file_like(py_f, true)?; py.allow_threads(|| { IpcStreamWriter::new(&mut buf) .with_compression(compression.0) - .with_pl_flavor(future.0) + .with_compat_level(compat_level.0) .finish(&mut self.df) .map_err(PyPolarsErr::from) })?; diff --git a/py-polars/src/dataframe/serde.rs b/py-polars/src/dataframe/serde.rs index b9a9c50a4e06..a6b1f29e23cf 100644 --- a/py-polars/src/dataframe/serde.rs +++ b/py-polars/src/dataframe/serde.rs @@ -18,7 +18,7 @@ impl PyDataFrame { // Used in pickle/pickling let mut buf: Vec = vec![]; IpcStreamWriter::new(&mut buf) - .with_pl_flavor(PlFlavor::highest()) + .with_compat_level(CompatLevel::newest()) .finish(&mut self.df.clone()) .expect("ipc writer"); Ok(PyBytes::new_bound(py, &buf).to_object(py)) diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index f8c677b92f5c..eb320311e7f8 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -145,12 +145,16 @@ impl PySeries { /// Return the underlying Arrow array. #[allow(clippy::wrong_self_convention)] - fn to_arrow(&mut self, future: PyPlFlavor) -> PyResult { + fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult { self.rechunk(true); Python::with_gil(|py| { let pyarrow = py.import_bound("pyarrow")?; - interop::arrow::to_py::to_py_array(self.series.to_arrow(0, future.0), py, &pyarrow) + interop::arrow::to_py::to_py_array( + self.series.to_arrow(0, compat_level.0), + py, + &pyarrow, + ) }) } } diff --git a/py-polars/src/series/mod.rs b/py-polars/src/series/mod.rs index c14933fd8eaa..899ae3940191 100644 --- a/py-polars/src/series/mod.rs +++ b/py-polars/src/series/mod.rs @@ -665,7 +665,7 @@ impl PySeries { // IPC only support DataFrames so we need to convert it let mut df = self.series.clone().into_frame(); IpcStreamWriter::new(&mut buf) - .with_pl_flavor(PlFlavor::highest()) + .with_compat_level(CompatLevel::newest()) .finish(&mut df) .expect("ipc writer"); Ok(PyBytes::new_bound(py, &buf).to_object(py)) From 0401852d37e30c78784e260fce2f31d7b4938850 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 01:14:26 +0800 Subject: [PATCH 09/14] refactor(python): change Flavor to CompatLevel --- py-polars/polars/dataframe/frame.py | 64 +++++++++++--------- py-polars/polars/interchange/protocol.py | 40 ++++++------ py-polars/polars/series/series.py | 24 ++++---- py-polars/tests/unit/interop/test_interop.py | 8 +-- 4 files changed, 73 insertions(+), 63 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 844a1f8b03b2..49940fbaf1ca 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -103,7 +103,7 @@ TooManyRowsReturnedError, ) from polars.functions import col, lit -from polars.interchange.protocol import Flavor +from polars.interchange.protocol import CompatLevel from polars.schema import Schema from polars.selectors import _expand_selector_dicts, _expand_selectors @@ -1385,7 +1385,8 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - def to_arrow(self, *, future: Flavor | None = None) -> pa.Table: + @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") + def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -1396,8 +1397,9 @@ def to_arrow(self, *, future: Flavor | None = None) -> pa.Table: Parameters ---------- - future - Use a specific version of Polars' internal data structures. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -1415,12 +1417,12 @@ def to_arrow(self, *, future: Flavor | None = None) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - if future is None: - future = False # type: ignore[assignment] - elif isinstance(future, Flavor): - future = future._version # type: ignore[attr-defined] + if compat_level is None: + compat_level = False # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] - record_batches = self._df.to_arrow(future) + record_batches = self._df.to_arrow(compat_level) return pa.Table.from_batches(record_batches) @overload @@ -3293,7 +3295,7 @@ def write_ipc( file: None, *, compression: IpcCompression = "uncompressed", - future: Flavor | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO: ... @overload @@ -3302,15 +3304,16 @@ def write_ipc( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: Flavor | None = None, + compat_level: CompatLevel | None = None, ) -> None: ... + @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") def write_ipc( self, file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: Flavor | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO | None: """ Write to Arrow IPC binary stream or Feather file. @@ -3324,8 +3327,9 @@ def write_ipc( written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - future - Use a specific version of Polars' internal data structures. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -3347,15 +3351,15 @@ def write_ipc( elif isinstance(file, (str, Path)): file = normalize_filepath(file) - if future is None: - future = True # type: ignore[assignment] - elif isinstance(future, Flavor): - future = future._version # type: ignore[attr-defined] + if compat_level is None: + compat_level = True # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] if compression is None: compression = "uncompressed" - self._df.write_ipc(file, compression, future) + self._df.write_ipc(file, compression, compat_level) return file if return_bytes else None # type: ignore[return-value] @overload @@ -3364,7 +3368,7 @@ def write_ipc_stream( file: None, *, compression: IpcCompression = "uncompressed", - future: Flavor | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO: ... @overload @@ -3373,15 +3377,16 @@ def write_ipc_stream( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: Flavor | None = None, + compat_level: CompatLevel | None = None, ) -> None: ... + @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") def write_ipc_stream( self, file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: Flavor | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO | None: """ Write to Arrow IPC record batch stream. @@ -3395,8 +3400,9 @@ def write_ipc_stream( be written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - future - Use a specific version of Polars' internal data structures. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -3418,15 +3424,15 @@ def write_ipc_stream( elif isinstance(file, (str, Path)): file = normalize_filepath(file) - if future is None: - future = True # type: ignore[assignment] - elif isinstance(future, Flavor): - future = future._version # type: ignore[attr-defined] + if compat_level is None: + compat_level = True # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] if compression is None: compression = "uncompressed" - self._df.write_ipc_stream(file, compression, future) + self._df.write_ipc_stream(file, compression, compat_level) return file if return_bytes else None # type: ignore[return-value] def write_parquet( diff --git a/py-polars/polars/interchange/protocol.py b/py-polars/polars/interchange/protocol.py index 938ddb827b6c..4eda7fa95f2d 100644 --- a/py-polars/polars/interchange/protocol.py +++ b/py-polars/polars/interchange/protocol.py @@ -259,43 +259,45 @@ class CopyNotAllowedError(RuntimeError): """Exception raised when a copy is required, but `allow_copy` is set to `False`.""" -class Flavor: - """Data structure flavor.""" +class CompatLevel: + """Data structure compatibility level.""" def __init__(self) -> None: - msg = "it is not allowed to create a Flavor object" + msg = "it is not allowed to create a CompatLevel object" raise TypeError(msg) @staticmethod - def _with_version(version: int) -> Flavor: - flavor = Flavor.__new__(Flavor) - flavor._version = version # type: ignore[attr-defined] - return flavor + def _with_version(version: int) -> CompatLevel: + compat_level = CompatLevel.__new__(CompatLevel) + compat_level._version = version # type: ignore[attr-defined] + return compat_level @staticmethod - def _highest() -> Flavor: - return Flavor._future1 # type: ignore[attr-defined] + def _newest() -> CompatLevel: + return CompatLevel._future1 # type: ignore[attr-defined] @staticmethod - def highest() -> Flavor: + def newest() -> CompatLevel: """ - Get the highest supported flavor. + Get the highest supported compatibility level. .. warning:: - Highest flavor is considered **unstable**. It may be changed + Highest compatibility level is considered **unstable**. It may be changed at any point without it being considered a breaking change. """ - issue_unstable_warning("Using the highest flavor is considered unstable.") - return Flavor._highest() + issue_unstable_warning( + "Using the highest compatibility level is considered unstable." + ) + return CompatLevel._newest() @staticmethod - def compatible() -> Flavor: - """Get the flavor that is compatible with older arrow implementation.""" - return Flavor._compatible # type: ignore[attr-defined] + def oldest() -> CompatLevel: + """Get the most compatible level.""" + return CompatLevel._compatible # type: ignore[attr-defined] def __repr__(self) -> str: return f"<{self.__class__.__module__}.{self.__class__.__qualname__}: {self._version}>" # type: ignore[attr-defined] -Flavor._compatible = Flavor._with_version(0) # type: ignore[attr-defined] -Flavor._future1 = Flavor._with_version(1) # type: ignore[attr-defined] +CompatLevel._compatible = CompatLevel._with_version(0) # type: ignore[attr-defined] +CompatLevel._future1 = CompatLevel._with_version(1) # type: ignore[attr-defined] diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index d0d05bb6d283..957db1db4fd3 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -98,7 +98,7 @@ from polars.dependencies import pandas as pd from polars.dependencies import pyarrow as pa from polars.exceptions import ComputeError, ModuleUpgradeRequiredError, ShapeError -from polars.interchange.protocol import Flavor +from polars.interchange.protocol import CompatLevel from polars.series.array import ArrayNameSpace from polars.series.binary import BinaryNameSpace from polars.series.categorical import CatNameSpace @@ -503,14 +503,14 @@ def _from_buffers( validity = validity._s return cls._from_pyseries(PySeries._from_buffers(dtype, data, validity)) - def _highest_flavor(self) -> int: + def _newest_compat_level(self) -> int: """ Get the highest supported flavor version. This is only used by pyo3-polars, and it is simpler not to make it a static method. """ - return Flavor._highest()._version # type: ignore[attr-defined] + return CompatLevel._newest()._version # type: ignore[attr-defined] @property def dtype(self) -> DataType: @@ -4352,7 +4352,8 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - def to_arrow(self, *, future: Flavor | None = None) -> pa.Array: + @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") + def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Array: """ Return the underlying Arrow array. @@ -4360,8 +4361,9 @@ def to_arrow(self, *, future: Flavor | None = None) -> pa.Array: Parameters ---------- - future - Use a specific version of Polars' internal data structures. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -4375,11 +4377,11 @@ def to_arrow(self, *, future: Flavor | None = None) -> pa.Array: 3 ] """ - if future is None: - future = False # type: ignore[assignment] - elif isinstance(future, Flavor): - future = future._version # type: ignore[attr-defined] - return self._s.to_arrow(future) + if compat_level is None: + compat_level = False # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] + return self._s.to_arrow(compat_level) def to_pandas( self, *, use_pyarrow_extension_array: bool = False, **kwargs: Any diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index bfbf13893d32..a2c9f834646b 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -10,7 +10,7 @@ import polars as pl from polars.exceptions import ComputeError, UnstableWarning -from polars.interchange.protocol import Flavor +from polars.interchange.protocol import CompatLevel from polars.testing import assert_frame_equal, assert_series_equal @@ -708,7 +708,7 @@ def test_from_numpy_different_resolution_invalid() -> None: def test_highest_flavor(monkeypatch: pytest.MonkeyPatch) -> None: # change these if flavor version bumped monkeypatch.setenv("POLARS_WARN_UNSTABLE", "1") - assert Flavor.compatible()._version == 0 # type: ignore[attr-defined] + assert CompatLevel.oldest()._version == 0 # type: ignore[attr-defined] with pytest.warns(UnstableWarning): - assert Flavor.highest()._version == 1 # type: ignore[attr-defined] - assert pl.Series([1])._highest_flavor() == 1 + assert CompatLevel.newest()._version == 1 # type: ignore[attr-defined] + assert pl.Series([1])._newest_compat_level() == 1 From 7b4309be902435ea38be03df4e32078bb50f3c1f Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 16:27:24 +0800 Subject: [PATCH 10/14] test(python): more tests in test_compat_level --- py-polars/tests/unit/interop/test_interop.py | 47 +++++++++++++++++--- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index a2c9f834646b..5dd7f442c905 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -5,6 +5,7 @@ import numpy as np import pandas as pd +import pyarrow import pyarrow as pa import pytest @@ -705,10 +706,46 @@ def test_from_numpy_different_resolution_invalid() -> None: ) -def test_highest_flavor(monkeypatch: pytest.MonkeyPatch) -> None: - # change these if flavor version bumped +def test_compat_level(monkeypatch: pytest.MonkeyPatch) -> None: + # change these if compat level bumped monkeypatch.setenv("POLARS_WARN_UNSTABLE", "1") - assert CompatLevel.oldest()._version == 0 # type: ignore[attr-defined] + oldest = CompatLevel.oldest() + assert oldest is CompatLevel.oldest() # test singleton + assert oldest._version == 0 # type: ignore[attr-defined] with pytest.warns(UnstableWarning): - assert CompatLevel.newest()._version == 1 # type: ignore[attr-defined] - assert pl.Series([1])._newest_compat_level() == 1 + newest = CompatLevel.newest() + assert newest is CompatLevel.newest() + assert newest._version == 1 # type: ignore[attr-defined] + + str_col = pl.Series(["awd"]) + bin_col = pl.Series([b"dwa"]) + assert str_col._newest_compat_level() == newest._version # type: ignore[attr-defined] + assert isinstance(str_col.to_arrow(), pyarrow.LargeStringArray) + assert isinstance(str_col.to_arrow(compat_level=oldest), pyarrow.LargeStringArray) + assert isinstance(str_col.to_arrow(compat_level=newest), pyarrow.StringViewArray) + assert isinstance(bin_col.to_arrow(), pyarrow.LargeBinaryArray) + assert isinstance(bin_col.to_arrow(compat_level=oldest), pyarrow.LargeBinaryArray) + assert isinstance(bin_col.to_arrow(compat_level=newest), pyarrow.BinaryViewArray) + + df = pl.DataFrame({"str_col": str_col, "bin_col": bin_col}) + assert isinstance(df.to_arrow()["str_col"][0], pyarrow.LargeStringScalar) + assert isinstance( + df.to_arrow(compat_level=oldest)["str_col"][0], pyarrow.LargeStringScalar + ) + assert isinstance( + df.to_arrow(compat_level=newest)["str_col"][0], pyarrow.StringViewScalar + ) + assert isinstance(df.to_arrow()["bin_col"][0], pyarrow.LargeBinaryScalar) + assert isinstance( + df.to_arrow(compat_level=oldest)["bin_col"][0], pyarrow.LargeBinaryScalar + ) + assert isinstance( + df.to_arrow(compat_level=newest)["bin_col"][0], pyarrow.BinaryViewScalar + ) + + assert len(df.write_ipc(None).getbuffer()) == 786 + assert len(df.write_ipc(None, compat_level=oldest).getbuffer()) == 914 + assert len(df.write_ipc(None, compat_level=newest).getbuffer()) == 786 + assert len(df.write_ipc_stream(None).getbuffer()) == 544 + assert len(df.write_ipc_stream(None, compat_level=oldest).getbuffer()) == 672 + assert len(df.write_ipc_stream(None, compat_level=newest).getbuffer()) == 544 From 5e76d5fd4b8105ce012b210b70254da8b4685431 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 16:29:34 +0800 Subject: [PATCH 11/14] feat(rust): add CompatLevel::get_level() --- crates/polars-core/src/datatypes/dtype.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 53c444be5061..9fc2a0ba0fb0 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -805,6 +805,8 @@ impl CompatLevel { CompatLevel(0) } + // The following methods are only used internally + #[doc(hidden)] pub fn with_level(level: u16) -> PolarsResult { if level > CompatLevel::newest().0 { @@ -812,4 +814,9 @@ impl CompatLevel { } Ok(CompatLevel(level)) } + + #[doc(hidden)] + pub fn get_level(&self) -> u16 { + self.0 + } } From 9ce6a823981341c079eb7535e7c101573c0e738e Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 16:45:52 +0800 Subject: [PATCH 12/14] feat(python): make Series._newest_compat_level() a static method --- py-polars/polars/series/series.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 957db1db4fd3..2a542a1f45e2 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -503,12 +503,12 @@ def _from_buffers( validity = validity._s return cls._from_pyseries(PySeries._from_buffers(dtype, data, validity)) - def _newest_compat_level(self) -> int: + @staticmethod + def _newest_compat_level() -> int: """ - Get the highest supported flavor version. + Get the newest supported compat level. - This is only used by pyo3-polars, - and it is simpler not to make it a static method. + This is for pyo3-polars. """ return CompatLevel._newest()._version # type: ignore[attr-defined] From b2b9d91265c9389ccbd5cc42154c7ddbd5369d48 Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 17:49:56 +0800 Subject: [PATCH 13/14] test(python): always used newest compat_level when testing write_ipc --- py-polars/tests/unit/io/test_ipc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/py-polars/tests/unit/io/test_ipc.py b/py-polars/tests/unit/io/test_ipc.py index 0a180a8b668b..1c0b7ed4516c 100644 --- a/py-polars/tests/unit/io/test_ipc.py +++ b/py-polars/tests/unit/io/test_ipc.py @@ -11,6 +11,7 @@ import polars as pl from polars.exceptions import ComputeError +from polars.interchange.protocol import CompatLevel from polars.testing import assert_frame_equal if TYPE_CHECKING: @@ -234,7 +235,7 @@ def test_from_float16() -> None: def test_binview_ipc_mmap(tmp_path: Path) -> None: df = pl.DataFrame({"foo": ["aa" * 10, "bb", None, "small", "big" * 20]}) file_path = tmp_path / "dump.ipc" - df.write_ipc(file_path) + df.write_ipc(file_path, compat_level=CompatLevel.newest()) read = pl.read_ipc(file_path, memory_map=True) assert_frame_equal(df, read) @@ -243,7 +244,7 @@ def test_list_nested_enum() -> None: dtype = pl.List(pl.Enum(["a", "b", "c"])) df = pl.DataFrame(pl.Series("list_cat", [["a", "b", "c", None]], dtype=dtype)) buffer = io.BytesIO() - df.write_ipc(buffer) + df.write_ipc(buffer, compat_level=CompatLevel.newest()) df = pl.read_ipc(buffer) assert df.get_column("list_cat").dtype == dtype @@ -256,7 +257,7 @@ def test_struct_nested_enum() -> None: ) ) buffer = io.BytesIO() - df.write_ipc(buffer) + df.write_ipc(buffer, compat_level=CompatLevel.newest()) df = pl.read_ipc(buffer) assert df.get_column("struct_cat").dtype == dtype @@ -268,7 +269,7 @@ def test_ipc_view_gc_14448() -> None: df = pl.DataFrame( pl.Series(["small"] * 10 + ["looooooong string......."] * 750).slice(20, 20) ) - df.write_ipc(f) + df.write_ipc(f, compat_level=CompatLevel.newest()) f.seek(0) assert_frame_equal(pl.read_ipc(f), df) From 09045d71c5621431f62ac1ca7299af19fecc352b Mon Sep 17 00:00:00 2001 From: Misaki Kasumi Date: Sun, 7 Jul 2024 17:50:58 +0800 Subject: [PATCH 14/14] feat(python): deprecate `future` arg in v1.1 --- py-polars/polars/dataframe/frame.py | 6 +++--- py-polars/polars/series/series.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 49940fbaf1ca..f07b3192a699 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1385,7 +1385,7 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -3307,7 +3307,7 @@ def write_ipc( compat_level: CompatLevel | None = None, ) -> None: ... - @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def write_ipc( self, file: str | Path | IO[bytes] | None, @@ -3380,7 +3380,7 @@ def write_ipc_stream( compat_level: CompatLevel | None = None, ) -> None: ... - @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def write_ipc_stream( self, file: str | Path | IO[bytes] | None, diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 2a542a1f45e2..bd3d6a45a43f 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4352,7 +4352,7 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - @deprecate_renamed_parameter("future", "compat_level", version="1.0.1") + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Array: """ Return the underlying Arrow array.