From ad836bd9970d72dad0c98a12a658321635ae57c4 Mon Sep 17 00:00:00 2001 From: Rui He <118280419+ruihe774@users.noreply.github.com> Date: Sun, 7 Jul 2024 18:03:31 +0800 Subject: [PATCH] feat: add "future" versioning (#17421) --- .../src/chunked_array/array/mod.rs | 10 ++- .../chunked_array/builder/fixed_size_list.rs | 2 +- .../chunked_array/builder/list/anonymous.rs | 4 +- .../chunked_array/builder/list/primitive.rs | 2 +- .../src/chunked_array/builder/mod.rs | 2 +- .../src/chunked_array/builder/primitive.rs | 2 +- crates/polars-core/src/chunked_array/cast.rs | 2 +- .../polars-core/src/chunked_array/collect.rs | 19 ++-- crates/polars-core/src/chunked_array/from.rs | 10 ++- .../chunked_array/logical/categorical/from.rs | 26 +++--- .../chunked_array/logical/categorical/mod.rs | 2 +- .../src/chunked_array/logical/decimal.rs | 2 +- .../src/chunked_array/logical/struct_/mod.rs | 10 +-- crates/polars-core/src/chunked_array/mod.rs | 6 +- .../src/chunked_array/ops/apply.rs | 6 +- .../src/chunked_array/ops/arity.rs | 30 +++++-- .../src/chunked_array/ops/bit_repr.rs | 2 +- .../src/chunked_array/ops/explode.rs | 6 +- .../src/chunked_array/ops/fill_null.rs | 4 +- .../src/chunked_array/ops/filter.rs | 8 +- .../polars-core/src/chunked_array/ops/full.rs | 26 ++++-- .../src/chunked_array/ops/gather.rs | 4 +- .../src/chunked_array/ops/rolling_window.rs | 2 +- .../polars-core/src/chunked_array/ops/set.rs | 11 ++- .../ops/sort/arg_sort_multiple.rs | 4 +- .../src/chunked_array/ops/sort/mod.rs | 2 +- .../src/chunked_array/ops/unique/mod.rs | 12 ++- .../src/chunked_array/trusted_len.rs | 5 +- crates/polars-core/src/datatypes/dtype.rs | 55 +++++++++--- crates/polars-core/src/datatypes/field.rs | 6 +- .../frame/group_by/aggregations/agg_list.rs | 12 ++- crates/polars-core/src/frame/mod.rs | 16 ++-- crates/polars-core/src/frame/row/transpose.rs | 2 +- crates/polars-core/src/schema.rs | 4 +- crates/polars-core/src/series/from.rs | 9 +- .../src/series/implementations/decimal.rs | 3 +- crates/polars-core/src/series/into.rs | 22 ++--- crates/polars-core/src/series/mod.rs | 4 +- crates/polars-core/src/series/ops/reshape.rs | 8 +- crates/polars-expr/src/expressions/window.rs | 2 +- crates/polars-ffi/src/version_0.rs | 6 +- crates/polars-io/src/avro/write.rs | 4 +- crates/polars-io/src/ipc/ipc_stream.rs | 12 +-- crates/polars-io/src/ipc/write.rs | 20 ++--- crates/polars-io/src/ipc/write_async.rs | 6 +- crates/polars-io/src/json/mod.rs | 14 +-- crates/polars-io/src/ndjson/mod.rs | 2 +- .../src/parquet/write/batched_writer.rs | 4 +- crates/polars-io/src/parquet/write/writer.rs | 2 +- crates/polars-io/src/shared.rs | 4 +- .../src/chunked_array/gather/chunked.rs | 4 +- .../src/chunked_array/gather_skip_nulls.rs | 6 +- .../polars-ops/src/chunked_array/repeat_by.rs | 2 +- .../src/chunked_array/strings/extract.rs | 2 +- .../src/chunked_array/strings/json_path.rs | 2 +- crates/polars-ops/src/series/ops/ewm_by.rs | 4 +- .../series/ops/interpolation/interpolate.rs | 2 +- .../ops/interpolation/interpolate_by.rs | 4 +- .../sinks/group_by/aggregates/mean.rs | 2 +- .../sinks/group_by/aggregates/sum.rs | 2 +- .../executors/sinks/group_by/generic/eval.rs | 2 +- .../sinks/group_by/generic/hash_table.rs | 2 +- crates/polars-pipe/src/executors/sinks/io.rs | 8 +- .../src/executors/sinks/sort/sink_multiple.rs | 2 +- .../src/dsl/function_expr/plugin.rs | 2 +- .../src/dsl/function_expr/struct_.rs | 2 +- .../polars-time/src/chunkedarray/datetime.rs | 2 +- crates/polars/tests/it/io/ipc.rs | 6 +- py-polars/polars/dataframe/frame.py | 86 ++++++++----------- py-polars/polars/interchange/protocol.py | 46 ++++++++++ py-polars/polars/series/series.py | 29 +++++-- py-polars/src/conversion/mod.rs | 25 ++++++ py-polars/src/dataframe/export.rs | 7 +- py-polars/src/dataframe/io.rs | 9 +- py-polars/src/dataframe/serde.rs | 2 +- py-polars/src/series/export.rs | 8 +- py-polars/src/series/mod.rs | 2 +- py-polars/tests/unit/interop/test_interop.py | 49 ++++++++++- py-polars/tests/unit/io/test_ipc.py | 9 +- 79 files changed, 501 insertions(+), 265 deletions(-) diff --git a/crates/polars-core/src/chunked_array/array/mod.rs b/crates/polars-core/src/chunked_array/array/mod.rs index 96fee06ff3b2..a3b7a1a1f339 100644 --- a/crates/polars-core/src/chunked_array/array/mod.rs +++ b/crates/polars-core/src/chunked_array/array/mod.rs @@ -44,7 +44,9 @@ impl ArrayChunked { ) -> PolarsResult { // Rechunk or the generated Series will have wrong length. let ca = self.rechunk(); - let field = self.inner_dtype().to_arrow_field("item", true); + let field = self + .inner_dtype() + .to_arrow_field("item", CompatLevel::newest()); let chunks = ca.downcast_iter().map(|arr| { let elements = unsafe { @@ -66,8 +68,10 @@ impl ArrayChunked { let out = out.rechunk(); let values = out.chunks()[0].clone(); - let inner_dtype = - FixedSizeListArray::default_datatype(out.dtype().to_arrow(true), ca.width()); + let inner_dtype = FixedSizeListArray::default_datatype( + out.dtype().to_arrow(CompatLevel::newest()), + ca.width(), + ); let arr = FixedSizeListArray::new(inner_dtype, values, arr.validity().cloned()); Ok(arr) }); diff --git a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs index a419ee930401..e235d08ffbd6 100644 --- a/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs +++ b/crates/polars-core/src/chunked_array/builder/fixed_size_list.rs @@ -124,7 +124,7 @@ impl FixedSizeListBuilder for AnonymousOwnedFixedSizeListBuilder { .finish( self.inner_dtype .as_ref() - .map(|dt| dt.to_arrow(true)) + .map(|dt| dt.to_arrow(CompatLevel::newest())) .as_ref(), ) .unwrap(); diff --git a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs index 1fb5393db1df..99b566320fbf 100644 --- a/crates/polars-core/src/chunked_array/builder/list/anonymous.rs +++ b/crates/polars-core/src/chunked_array/builder/list/anonymous.rs @@ -89,7 +89,7 @@ impl<'a> AnonymousListBuilder<'a> { let inner_dtype_physical = inner_dtype .as_ref() - .map(|dt| dt.to_physical().to_arrow(true)); + .map(|dt| dt.to_physical().to_arrow(CompatLevel::newest())); let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { @@ -157,7 +157,7 @@ impl ListBuilderTrait for AnonymousOwnedListBuilder { let slf = std::mem::take(self); let inner_dtype_physical = inner_dtype .as_ref() - .map(|dt| dt.to_physical().to_arrow(true)); + .map(|dt| dt.to_physical().to_arrow(CompatLevel::newest())); let arr = slf.builder.finish(inner_dtype_physical.as_ref()).unwrap(); let list_dtype_logical = match inner_dtype { diff --git a/crates/polars-core/src/chunked_array/builder/list/primitive.rs b/crates/polars-core/src/chunked_array/builder/list/primitive.rs index ce9caff3a116..34e12433db7a 100644 --- a/crates/polars-core/src/chunked_array/builder/list/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/list/primitive.rs @@ -39,7 +39,7 @@ where ) -> Self { let values = MutablePrimitiveArray::::with_capacity_from( values_capacity, - values_type.to_arrow(true), + values_type.to_arrow(CompatLevel::newest()), ); let builder = LargePrimitiveBuilder::::new_with_capacity(values, capacity); let field = Field::new(name, DataType::List(Box::new(logical_type))); diff --git a/crates/polars-core/src/chunked_array/builder/mod.rs b/crates/polars-core/src/chunked_array/builder/mod.rs index 3c1060016101..bac88f5a1ea5 100644 --- a/crates/polars-core/src/chunked_array/builder/mod.rs +++ b/crates/polars-core/src/chunked_array/builder/mod.rs @@ -66,7 +66,7 @@ where T: PolarsNumericType, { fn from_slice(name: &str, v: &[T::Native]) -> Self { - let arr = PrimitiveArray::from_slice(v).to(T::get_dtype().to_arrow(true)); + let arr = PrimitiveArray::from_slice(v).to(T::get_dtype().to_arrow(CompatLevel::newest())); ChunkedArray::with_chunk(name, arr) } diff --git a/crates/polars-core/src/chunked_array/builder/primitive.rs b/crates/polars-core/src/chunked_array/builder/primitive.rs index eaedc93a5a80..14eb2c1f4f46 100644 --- a/crates/polars-core/src/chunked_array/builder/primitive.rs +++ b/crates/polars-core/src/chunked_array/builder/primitive.rs @@ -41,7 +41,7 @@ where { pub fn new(name: &str, capacity: usize) -> Self { let array_builder = MutablePrimitiveArray::::with_capacity(capacity) - .to(T::get_dtype().to_arrow(true)); + .to(T::get_dtype().to_arrow(CompatLevel::newest())); PrimitiveChunkedBuilder { array_builder, diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index ceb73d492d0b..892d28203e55 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -51,7 +51,7 @@ pub(crate) fn cast_chunks( let check_nulls = matches!(options, CastOptions::Strict); let options = options.into(); - let arrow_dtype = dtype.try_to_arrow(true)?; + let arrow_dtype = dtype.try_to_arrow(CompatLevel::newest())?; chunks .iter() .map(|arr| { diff --git a/crates/polars-core/src/chunked_array/collect.rs b/crates/polars-core/src/chunked_array/collect.rs index 6131d741eac6..054f59de8958 100644 --- a/crates/polars-core/src/chunked_array/collect.rs +++ b/crates/polars-core/src/chunked_array/collect.rs @@ -18,6 +18,7 @@ use crate::chunked_array::ChunkedArray; use crate::datatypes::{ ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype, DataType, Field, PolarsDataType, }; +use crate::prelude::CompatLevel; pub trait ChunkedCollectIterExt: Iterator + Sized { #[inline] @@ -26,7 +27,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { T::Array: ArrayFromIterDtype, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -36,7 +37,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { T::Array: ArrayFromIterDtype, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -47,7 +48,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: TrustedLen, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -58,7 +59,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: TrustedLen, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(true)); + let arr = self.collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest())); ChunkedArray::from_chunk_iter_and_field(field, [arr]) } @@ -73,7 +74,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator>, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(true))?; + let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -87,7 +88,7 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator>, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(true))?; + let arr = self.try_collect_arr_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -102,7 +103,8 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator> + TrustedLen, { let field = Arc::new(Field::new(name, dtype.clone())); - let arr = self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(true))?; + let arr = + self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } @@ -116,7 +118,8 @@ pub trait ChunkedCollectIterExt: Iterator + Sized { Self: Iterator> + TrustedLen, { let field = Arc::clone(&name_dtype_src.field); - let arr = self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(true))?; + let arr = + self.try_collect_arr_trusted_with_dtype(field.dtype.to_arrow(CompatLevel::newest()))?; Ok(ChunkedArray::from_chunk_iter_and_field(field, [arr])) } } diff --git a/crates/polars-core/src/chunked_array/from.rs b/crates/polars-core/src/chunked_array/from.rs index af4892890336..56ac8cb90604 100644 --- a/crates/polars-core/src/chunked_array/from.rs +++ b/crates/polars-core/src/chunked_array/from.rs @@ -217,7 +217,10 @@ where #[cfg(debug_assertions)] { if !chunks.is_empty() && !chunks[0].is_empty() && dtype.is_primitive() { - assert_eq!(chunks[0].data_type(), &dtype.to_arrow(true)) + assert_eq!( + chunks[0].data_type(), + &dtype.to_arrow(CompatLevel::newest()) + ) } } let field = Arc::new(Field::new(name, dtype)); @@ -234,7 +237,10 @@ where } pub fn full_null_like(ca: &Self, length: usize) -> Self { - let chunks = std::iter::once(T::Array::full_null(length, ca.dtype().to_arrow(true))); + let chunks = std::iter::once(T::Array::full_null( + length, + ca.dtype().to_arrow(CompatLevel::newest()), + )); Self::from_chunk_iter_like(ca, chunks) } } diff --git a/crates/polars-core/src/chunked_array/logical/categorical/from.rs b/crates/polars-core/src/chunked_array/logical/categorical/from.rs index 568d5650ba8e..5df0a2691583 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/from.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/from.rs @@ -3,8 +3,8 @@ use arrow::datatypes::IntegerType; use super::*; -fn convert_values(arr: &Utf8ViewArray, pl_flavor: bool) -> ArrayRef { - if pl_flavor { +fn convert_values(arr: &Utf8ViewArray, compat_level: CompatLevel) -> ArrayRef { + if compat_level.0 >= 1 { arr.clone().boxed() } else { utf8view_to_utf8::(arr).boxed() @@ -12,16 +12,16 @@ fn convert_values(arr: &Utf8ViewArray, pl_flavor: bool) -> ArrayRef { } impl CategoricalChunked { - pub fn to_arrow(&self, pl_flavor: bool, as_i64: bool) -> ArrayRef { + pub fn to_arrow(&self, compat_level: CompatLevel, as_i64: bool) -> ArrayRef { if as_i64 { - self.to_i64(pl_flavor).boxed() + self.to_i64(compat_level).boxed() } else { - self.to_u32(pl_flavor).boxed() + self.to_u32(compat_level).boxed() } } - fn to_u32(&self, pl_flavor: bool) -> DictionaryArray { - let values_dtype = if pl_flavor { + fn to_u32(&self, compat_level: CompatLevel) -> DictionaryArray { + let values_dtype = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -32,7 +32,7 @@ impl CategoricalChunked { let dtype = ArrowDataType::Dictionary(IntegerType::UInt32, Box::new(values_dtype), false); match map { RevMapping::Local(arr, _) => { - let values = convert_values(arr, pl_flavor); + let values = convert_values(arr, compat_level); // SAFETY: // the keys are in bounds @@ -44,7 +44,7 @@ impl CategoricalChunked { .map(|opt_k| opt_k.map(|k| *reverse_map.get(k).unwrap())); let keys = PrimitiveArray::from_trusted_len_iter(iter); - let values = convert_values(values, pl_flavor); + let values = convert_values(values, compat_level); // SAFETY: // the keys are in bounds @@ -53,8 +53,8 @@ impl CategoricalChunked { } } - fn to_i64(&self, pl_flavor: bool) -> DictionaryArray { - let values_dtype = if pl_flavor { + fn to_i64(&self, compat_level: CompatLevel) -> DictionaryArray { + let values_dtype = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -65,7 +65,7 @@ impl CategoricalChunked { let dtype = ArrowDataType::Dictionary(IntegerType::Int64, Box::new(values_dtype), false); match map { RevMapping::Local(arr, _) => { - let values = convert_values(arr, pl_flavor); + let values = convert_values(arr, compat_level); // SAFETY: // the keys are in bounds @@ -89,7 +89,7 @@ impl CategoricalChunked { .map(|opt_k| opt_k.map(|k| *reverse_map.get(k).unwrap() as i64)); let keys = PrimitiveArray::from_trusted_len_iter(iter); - let values = convert_values(values, pl_flavor); + let values = convert_values(values, compat_level); // SAFETY: // the keys are in bounds diff --git a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs index 30ab29b5c78a..1b2fa0d06a7f 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/mod.rs @@ -464,7 +464,7 @@ mod test { let ca = ca.cast(&DataType::Categorical(None, Default::default()))?; let ca = ca.categorical().unwrap(); - let arr = ca.to_arrow(true, false); + let arr = ca.to_arrow(CompatLevel::newest(), false); let s = Series::try_from(("foo", arr))?; assert!(matches!(s.dtype(), &DataType::Categorical(_, _))); assert_eq!(s.null_count(), 1); diff --git a/crates/polars-core/src/chunked_array/logical/decimal.rs b/crates/polars-core/src/chunked_array/logical/decimal.rs index 4526f0d63e99..acc19a522e46 100644 --- a/crates/polars-core/src/chunked_array/logical/decimal.rs +++ b/crates/polars-core/src/chunked_array/logical/decimal.rs @@ -20,7 +20,7 @@ impl Int128Chunked { let (_, values, validity) = default.into_inner(); *arr = PrimitiveArray::new( - DataType::Decimal(precision, Some(scale)).to_arrow(true), + DataType::Decimal(precision, Some(scale)).to_arrow(CompatLevel::newest()), values, validity, ); diff --git a/crates/polars-core/src/chunked_array/logical/struct_/mod.rs b/crates/polars-core/src/chunked_array/logical/struct_/mod.rs index 07bc343aee21..3fc4baa6a706 100644 --- a/crates/polars-core/src/chunked_array/logical/struct_/mod.rs +++ b/crates/polars-core/src/chunked_array/logical/struct_/mod.rs @@ -48,12 +48,12 @@ fn fields_to_struct_array(fields: &[Series], physical: bool) -> (ArrayRef, Vec s.to_arrow(0, true), + DataType::Object(_, _) => s.to_arrow(0, CompatLevel::newest()), _ => { if physical { s.chunks()[0].clone() } else { - s.to_arrow(0, true) + s.to_arrow(0, CompatLevel::newest()) } }, } @@ -145,7 +145,7 @@ impl StructChunked { .iter() .map(|s| match s.dtype() { #[cfg(feature = "object")] - DataType::Object(_, _) => s.to_arrow(i, true), + DataType::Object(_, _) => s.to_arrow(i, CompatLevel::newest()), _ => s.chunks()[i].clone(), }) .collect::>(); @@ -295,11 +295,11 @@ impl StructChunked { self.into() } - pub(crate) fn to_arrow(&self, i: usize, pl_flavor: bool) -> ArrayRef { + pub(crate) fn to_arrow(&self, i: usize, compat_level: CompatLevel) -> ArrayRef { let values = self .fields .iter() - .map(|s| s.to_arrow(i, pl_flavor)) + .map(|s| s.to_arrow(i, compat_level)) .collect::>(); // we determine fields from arrays as there might be object arrays diff --git a/crates/polars-core/src/chunked_array/mod.rs b/crates/polars-core/src/chunked_array/mod.rs index 3cc4f06b798e..176bbd11fc90 100644 --- a/crates/polars-core/src/chunked_array/mod.rs +++ b/crates/polars-core/src/chunked_array/mod.rs @@ -933,7 +933,11 @@ pub(crate) fn to_primitive( values: Vec, validity: Option, ) -> PrimitiveArray { - PrimitiveArray::new(T::get_dtype().to_arrow(true), values.into(), validity) + PrimitiveArray::new( + T::get_dtype().to_arrow(CompatLevel::newest()), + values.into(), + validity, + ) } pub(crate) fn to_array( diff --git a/crates/polars-core/src/chunked_array/ops/apply.rs b/crates/polars-core/src/chunked_array/ops/apply.rs index f6d7db426cd1..632afe52d889 100644 --- a/crates/polars-core/src/chunked_array/ops/apply.rs +++ b/crates/polars-core/src/chunked_array/ops/apply.rs @@ -42,13 +42,13 @@ where let out: U::Array = arr .values_iter() .map(&mut op) - .collect_arr_with_dtype(dtype.to_arrow(true)); + .collect_arr_with_dtype(dtype.to_arrow(CompatLevel::newest())); out.with_validity_typed(arr.validity().cloned()) } else { let out: U::Array = arr .iter() .map(|opt| opt.map(&mut op)) - .collect_arr_with_dtype(dtype.to_arrow(true)); + .collect_arr_with_dtype(dtype.to_arrow(CompatLevel::newest())); out.with_validity_typed(arr.validity().cloned()) } }); @@ -133,7 +133,7 @@ where drop(arr); let compute_immutable = |arr: &PrimitiveArray| { - arrow::compute::arity::unary(arr, f, S::get_dtype().to_arrow(true)) + arrow::compute::arity::unary(arr, f, S::get_dtype().to_arrow(CompatLevel::newest())) }; if owned_arr.values().is_sliced() { diff --git a/crates/polars-core/src/chunked_array/ops/arity.rs b/crates/polars-core/src/chunked_array/ops/arity.rs index 0f3cf8cce4ca..ed9e6dab79ec 100644 --- a/crates/polars-core/src/chunked_array/ops/arity.rs +++ b/crates/polars-core/src/chunked_array/ops/arity.rs @@ -6,7 +6,7 @@ use polars_error::PolarsResult; use crate::chunked_array::metadata::MetadataProperties; use crate::datatypes::{ArrayCollectIterExt, ArrayFromIter}; -use crate::prelude::{ChunkedArray, PolarsDataType, Series}; +use crate::prelude::{ChunkedArray, CompatLevel, PolarsDataType, Series}; use crate::utils::{align_chunks_binary, align_chunks_binary_owned, align_chunks_ternary}; // We need this helper because for<'a> notation can't yet be applied properly @@ -106,7 +106,7 @@ where V::Array: ArrayFromIter<>>::Ret>, { if ca.null_count() == ca.len() { - let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(CompatLevel::newest())); return ChunkedArray::with_chunk(ca.name(), arr); } @@ -130,7 +130,7 @@ where V::Array: ArrayFromIter, { if ca.null_count() == ca.len() { - let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(ca.len(), V::get_dtype().to_arrow(CompatLevel::newest())); return Ok(ChunkedArray::with_chunk(ca.name(), arr)); } @@ -308,7 +308,7 @@ where { if lhs.null_count() == lhs.len() || rhs.null_count() == rhs.len() { let len = lhs.len().min(rhs.len()); - let arr = V::Array::full_null(len, V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(len, V::get_dtype().to_arrow(CompatLevel::newest())); return ChunkedArray::with_chunk(lhs.name(), arr); } @@ -704,7 +704,7 @@ where let min = lhs.len().min(rhs.len()); let max = lhs.len().max(rhs.len()); let len = if min == 1 { max } else { min }; - let arr = V::Array::full_null(len, V::get_dtype().to_arrow(true)); + let arr = V::Array::full_null(len, V::get_dtype().to_arrow(CompatLevel::newest())); return ChunkedArray::with_chunk(lhs.name(), arr); } @@ -745,7 +745,10 @@ where let opt_rhs = rhs.get(0); match opt_rhs { None => { - let arr = O::Array::full_null(lhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + lhs.len(), + O::get_dtype().to_arrow(CompatLevel::newest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(rhs) => unary_kernel(lhs, |arr| rhs_broadcast_kernel(arr, rhs.clone())), @@ -755,7 +758,10 @@ where let opt_lhs = lhs.get(0); match opt_lhs { None => { - let arr = O::Array::full_null(rhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + rhs.len(), + O::get_dtype().to_arrow(CompatLevel::newest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(lhs) => unary_kernel(rhs, |arr| lhs_broadcast_kernel(lhs.clone(), arr)), @@ -789,7 +795,10 @@ where let opt_rhs = rhs.get(0); match opt_rhs { None => { - let arr = O::Array::full_null(lhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + lhs.len(), + O::get_dtype().to_arrow(CompatLevel::newest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(rhs) => unary_kernel_owned(lhs, |arr| rhs_broadcast_kernel(arr, rhs.clone())), @@ -799,7 +808,10 @@ where let opt_lhs = lhs.get(0); match opt_lhs { None => { - let arr = O::Array::full_null(rhs.len(), O::get_dtype().to_arrow(true)); + let arr = O::Array::full_null( + rhs.len(), + O::get_dtype().to_arrow(CompatLevel::newest()), + ); ChunkedArray::::with_chunk(lhs.name(), arr) }, Some(lhs) => unary_kernel_owned(rhs, |arr| lhs_broadcast_kernel(lhs.clone(), arr)), diff --git a/crates/polars-core/src/chunked_array/ops/bit_repr.rs b/crates/polars-core/src/chunked_array/ops/bit_repr.rs index 37617a1d43ea..9a2f1c33594a 100644 --- a/crates/polars-core/src/chunked_array/ops/bit_repr.rs +++ b/crates/polars-core/src/chunked_array/ops/bit_repr.rs @@ -46,7 +46,7 @@ fn reinterpret_list_chunked( let pa = PrimitiveArray::from_data_default(reinterpreted_buf, inner_arr.validity().cloned()); LargeListArray::new( - DataType::List(Box::new(U::get_dtype())).to_arrow(true), + DataType::List(Box::new(U::get_dtype())).to_arrow(CompatLevel::newest()), array.offsets().clone(), pa.to_boxed(), array.validity().cloned(), diff --git a/crates/polars-core/src/chunked_array/ops/explode.rs b/crates/polars-core/src/chunked_array/ops/explode.rs index 910d2941b28d..f469209558b6 100644 --- a/crates/polars-core/src/chunked_array/ops/explode.rs +++ b/crates/polars-core/src/chunked_array/ops/explode.rs @@ -150,7 +150,7 @@ where unsafe { set_bit_unchecked(validity_slice, i, false) } } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), new_values.into(), Some(validity.into()), ); @@ -269,7 +269,9 @@ impl ExplodeByOffsets for ListChunked { last = o; } process_range(start, last, &mut builder); - let arr = builder.finish(Some(&inner_type.to_arrow(true))).unwrap(); + let arr = builder + .finish(Some(&inner_type.to_arrow(CompatLevel::newest()))) + .unwrap(); let mut ca = unsafe { self.copy_with_chunks(vec![Box::new(arr)]) }; use MetadataProperties as P; diff --git a/crates/polars-core/src/chunked_array/ops/fill_null.rs b/crates/polars-core/src/chunked_array/ops/fill_null.rs index f09852f94f5e..1a20cadc7175 100644 --- a/crates/polars-core/src/chunked_array/ops/fill_null.rs +++ b/crates/polars-core/src/chunked_array/ops/fill_null.rs @@ -306,7 +306,7 @@ where ChunkedArray::from_chunk_iter_like( ca, [ - T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(true)) + T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest())) .with_validity_typed(Some(bm.into())), ], ) @@ -340,7 +340,7 @@ where ChunkedArray::from_chunk_iter_like( ca, [ - T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(true)) + T::Array::from_zeroable_vec(values, ca.dtype().to_arrow(CompatLevel::newest())) .with_validity_typed(Some(bm.into())), ], ) diff --git a/crates/polars-core/src/chunked_array/ops/filter.rs b/crates/polars-core/src/chunked_array/ops/filter.rs index a6cf1ca98288..d0e6fe59c7e6 100644 --- a/crates/polars-core/src/chunked_array/ops/filter.rs +++ b/crates/polars-core/src/chunked_array/ops/filter.rs @@ -120,7 +120,9 @@ impl ChunkFilter for ListChunked { Some(true) => Ok(self.clone()), _ => Ok(ListChunked::from_chunk_iter( self.name(), - [ListArray::new_empty(self.dtype().to_arrow(true))], + [ListArray::new_empty( + self.dtype().to_arrow(CompatLevel::newest()), + )], )), }; } @@ -146,7 +148,9 @@ impl ChunkFilter for ArrayChunked { Some(true) => Ok(self.clone()), _ => Ok(ArrayChunked::from_chunk_iter( self.name(), - [FixedSizeListArray::new_empty(self.dtype().to_arrow(true))], + [FixedSizeListArray::new_empty( + self.dtype().to_arrow(CompatLevel::newest()), + )], )), }; } diff --git a/crates/polars-core/src/chunked_array/ops/full.rs b/crates/polars-core/src/chunked_array/ops/full.rs index c04d68cce776..71d80749e618 100644 --- a/crates/polars-core/src/chunked_array/ops/full.rs +++ b/crates/polars-core/src/chunked_array/ops/full.rs @@ -21,7 +21,7 @@ where T: PolarsNumericType, { fn full_null(name: &str, length: usize) -> Self { - let arr = PrimitiveArray::new_null(T::get_dtype().to_arrow(true), length); + let arr = PrimitiveArray::new_null(T::get_dtype().to_arrow(CompatLevel::newest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -55,7 +55,7 @@ impl<'a> ChunkFull<&'a str> for StringChunked { impl ChunkFullNull for StringChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = Utf8ViewArray::new_null(DataType::String.to_arrow(true), length); + let arr = Utf8ViewArray::new_null(DataType::String.to_arrow(CompatLevel::newest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -72,7 +72,8 @@ impl<'a> ChunkFull<&'a [u8]> for BinaryChunked { impl ChunkFullNull for BinaryChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = BinaryViewArray::new_null(DataType::Binary.to_arrow(true), length); + let arr = + BinaryViewArray::new_null(DataType::Binary.to_arrow(CompatLevel::newest()), length); ChunkedArray::with_chunk(name, arr) } } @@ -90,7 +91,10 @@ impl<'a> ChunkFull<&'a [u8]> for BinaryOffsetChunked { impl ChunkFullNull for BinaryOffsetChunked { fn full_null(name: &str, length: usize) -> Self { - let arr = BinaryArray::::new_null(DataType::BinaryOffset.to_arrow(true), length); + let arr = BinaryArray::::new_null( + DataType::BinaryOffset.to_arrow(CompatLevel::newest()), + length, + ); ChunkedArray::with_chunk(name, arr) } } @@ -122,7 +126,11 @@ impl ArrayChunked { ) -> ArrayChunked { let arr = FixedSizeListArray::new_null( ArrowDataType::FixedSizeList( - Box::new(ArrowField::new("item", inner_dtype.to_arrow(true), true)), + Box::new(ArrowField::new( + "item", + inner_dtype.to_arrow(CompatLevel::newest()), + true, + )), width, ), length, @@ -137,7 +145,11 @@ impl ChunkFull<&Series> for ArrayChunked { let width = value.len(); let dtype = value.dtype(); let arrow_dtype = ArrowDataType::FixedSizeList( - Box::new(ArrowField::new("item", dtype.to_arrow(true), true)), + Box::new(ArrowField::new( + "item", + dtype.to_arrow(CompatLevel::newest()), + true, + )), width, ); let value = value.rechunk().chunks()[0].clone(); @@ -158,7 +170,7 @@ impl ListChunked { let arr: ListArray = ListArray::new_null( ArrowDataType::LargeList(Box::new(ArrowField::new( "item", - inner_dtype.to_physical().to_arrow(true), + inner_dtype.to_physical().to_arrow(CompatLevel::newest()), true, ))), length, diff --git a/crates/polars-core/src/chunked_array/ops/gather.rs b/crates/polars-core/src/chunked_array/ops/gather.rs index 67c3b980d516..21bf6479dfc4 100644 --- a/crates/polars-core/src/chunked_array/ops/gather.rs +++ b/crates/polars-core/src/chunked_array/ops/gather.rs @@ -155,7 +155,7 @@ where } let targets: Vec<_> = ca.downcast_iter().collect(); let arr = gather_idx_array_unchecked( - ca.dtype().to_arrow(true), + ca.dtype().to_arrow(CompatLevel::newest()), &targets, ca.null_count() > 0, indices.as_ref(), @@ -192,7 +192,7 @@ where let targets: Vec<_> = ca.downcast_iter().collect(); let chunks = indices.downcast_iter().map(|idx_arr| { - let dtype = ca.dtype().to_arrow(true); + let dtype = ca.dtype().to_arrow(CompatLevel::newest()); if idx_arr.null_count() == 0 { gather_idx_array_unchecked(dtype, &targets, targets_have_nulls, idx_arr.values()) } else if targets.len() == 1 { diff --git a/crates/polars-core/src/chunked_array/ops/rolling_window.rs b/crates/polars-core/src/chunked_array/ops/rolling_window.rs index ef3c07529504..c5898edb4df1 100644 --- a/crates/polars-core/src/chunked_array/ops/rolling_window.rs +++ b/crates/polars-core/src/chunked_array/ops/rolling_window.rs @@ -270,7 +270,7 @@ mod inner_mod { } } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), values.into(), Some(validity.into()), ); diff --git a/crates/polars-core/src/chunked_array/ops/set.rs b/crates/polars-core/src/chunked_array/ops/set.rs index 52646925a05c..abafab312c6a 100644 --- a/crates/polars-core/src/chunked_array/ops/set.rs +++ b/crates/polars-core/src/chunked_array/ops/set.rs @@ -55,7 +55,7 @@ where self.downcast_iter().next().unwrap(), idx, value, - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), )?; return Ok(Self::with_chunk(self.name(), arr)); } @@ -101,7 +101,14 @@ where let chunks = left .downcast_iter() .zip(mask.downcast_iter()) - .map(|(arr, mask)| set_with_mask(arr, mask, value, T::get_dtype().to_arrow(true))); + .map(|(arr, mask)| { + set_with_mask( + arr, + mask, + value, + T::get_dtype().to_arrow(CompatLevel::newest()), + ) + }); Ok(ChunkedArray::from_chunk_iter(self.name(), chunks)) } else { // slow path, could be optimized. diff --git a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs index 8b909f09930a..1e86cda3e33b 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/arg_sort_multiple.rs @@ -94,12 +94,12 @@ pub fn _get_rows_encoded_compat_array(by: &Series) -> PolarsResult { DataType::Categorical(_, _) | DataType::Enum(_, _) => { let ca = by.categorical().unwrap(); if ca.uses_lexical_ordering() { - by.to_arrow(0, true) + by.to_arrow(0, CompatLevel::newest()) } else { ca.physical().chunks[0].clone() } }, - _ => by.to_arrow(0, true), + _ => by.to_arrow(0, CompatLevel::newest()), }; Ok(out) } diff --git a/crates/polars-core/src/chunked_array/ops/sort/mod.rs b/crates/polars-core/src/chunked_array/ops/sort/mod.rs index 907aeefa33d6..c2ef58a23c26 100644 --- a/crates/polars-core/src/chunked_array/ops/sort/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/sort/mod.rs @@ -201,7 +201,7 @@ where } let arr = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), vals.into(), Some(create_validity(len, null_count, options.nulls_last)), ); diff --git a/crates/polars-core/src/chunked_array/ops/unique/mod.rs b/crates/polars-core/src/chunked_array/ops/unique/mod.rs index 2f4cc86d192f..989e30061478 100644 --- a/crates/polars-core/src/chunked_array/ops/unique/mod.rs +++ b/crates/polars-core/src/chunked_array/ops/unique/mod.rs @@ -126,7 +126,11 @@ where if !T::Native::is_float() && MetadataEnv::experimental_enabled() { let md = self.metadata(); if let (Some(min), Some(max)) = (md.get_min_value(), md.get_max_value()) { - let data_type = self.field.as_ref().data_type().to_arrow(false); + let data_type = self + .field + .as_ref() + .data_type() + .to_arrow(CompatLevel::oldest()); if let Some(mut state) = PrimitiveRangedUniqueState::new( *min, *max, @@ -268,7 +272,11 @@ impl ChunkUnique for BooleanChunked { fn unique(&self) -> PolarsResult { use polars_compute::unique::RangedUniqueKernel; - let data_type = self.field.as_ref().data_type().to_arrow(false); + let data_type = self + .field + .as_ref() + .data_type() + .to_arrow(CompatLevel::oldest()); let has_null = self.null_count() > 0; let mut state = BooleanUniqueKernelState::new(has_null, data_type); diff --git a/crates/polars-core/src/chunked_array/trusted_len.rs b/crates/polars-core/src/chunked_array/trusted_len.rs index af6a6d3f3c51..84ff13cb906d 100644 --- a/crates/polars-core/src/chunked_array/trusted_len.rs +++ b/crates/polars-core/src/chunked_array/trusted_len.rs @@ -17,7 +17,8 @@ where // SAFETY: iter is TrustedLen. let iter = iter.into_iter(); let arr = unsafe { - PrimitiveArray::from_trusted_len_iter_unchecked(iter).to(T::get_dtype().to_arrow(true)) + PrimitiveArray::from_trusted_len_iter_unchecked(iter) + .to(T::get_dtype().to_arrow(CompatLevel::newest())) }; arr.into() } @@ -37,7 +38,7 @@ where // SAFETY: iter is TrustedLen. let iter = iter.into_iter(); let values = unsafe { Vec::from_trusted_len_iter_unchecked(iter) }.into(); - let arr = PrimitiveArray::new(T::get_dtype().to_arrow(true), values, None); + let arr = PrimitiveArray::new(T::get_dtype().to_arrow(CompatLevel::newest()), values, None); NoNull::new(arr.into()) } } diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 44a2a82d94c8..9fc2a0ba0fb0 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -498,7 +498,7 @@ impl DataType { } /// Convert to an Arrow Field - pub fn to_arrow_field(&self, name: &str, pl_flavor: bool) -> ArrowField { + pub fn to_arrow_field(&self, name: &str, compat_level: CompatLevel) -> ArrowField { let metadata = match self { #[cfg(feature = "dtype-categorical")] DataType::Enum(_, _) => Some(BTreeMap::from([( @@ -512,7 +512,7 @@ impl DataType { _ => None, }; - let field = ArrowField::new(name, self.to_arrow(pl_flavor), true); + let field = ArrowField::new(name, self.to_arrow(compat_level), true); if let Some(metadata) = metadata { field.with_metadata(metadata) @@ -523,12 +523,12 @@ impl DataType { /// Convert to an Arrow data type. #[inline] - pub fn to_arrow(&self, pl_flavor: bool) -> ArrowDataType { - self.try_to_arrow(pl_flavor).unwrap() + pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowDataType { + self.try_to_arrow(compat_level).unwrap() } #[inline] - pub fn try_to_arrow(&self, pl_flavor: bool) -> PolarsResult { + pub fn try_to_arrow(&self, compat_level: CompatLevel) -> PolarsResult { use DataType::*; match self { Boolean => Ok(ArrowDataType::Boolean), @@ -553,7 +553,7 @@ impl DataType { )) }, String => { - let dt = if pl_flavor { + let dt = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -561,7 +561,7 @@ impl DataType { Ok(dt) }, Binary => { - let dt = if pl_flavor { + let dt = if compat_level.0 >= 1 { ArrowDataType::BinaryView } else { ArrowDataType::LargeBinary @@ -574,11 +574,11 @@ impl DataType { Time => Ok(ArrowDataType::Time64(ArrowTimeUnit::Nanosecond)), #[cfg(feature = "dtype-array")] Array(dt, size) => Ok(ArrowDataType::FixedSizeList( - Box::new(dt.to_arrow_field("item", pl_flavor)), + Box::new(dt.to_arrow_field("item", compat_level)), *size, )), List(dt) => Ok(ArrowDataType::LargeList(Box::new( - dt.to_arrow_field("item", pl_flavor), + dt.to_arrow_field("item", compat_level), ))), Null => Ok(ArrowDataType::Null), #[cfg(feature = "object")] @@ -592,7 +592,7 @@ impl DataType { }, #[cfg(feature = "dtype-categorical")] Categorical(_, _) | Enum(_, _) => { - let values = if pl_flavor { + let values = if compat_level.0 >= 1 { ArrowDataType::Utf8View } else { ArrowDataType::LargeUtf8 @@ -605,7 +605,10 @@ impl DataType { }, #[cfg(feature = "dtype-struct")] Struct(fields) => { - let fields = fields.iter().map(|fld| fld.to_arrow(pl_flavor)).collect(); + let fields = fields + .iter() + .map(|fld| fld.to_arrow(compat_level)) + .collect(); Ok(ArrowDataType::Struct(fields)) }, BinaryOffset => Ok(ArrowDataType::LargeBinary), @@ -615,7 +618,7 @@ impl DataType { UnknownKind::Float => ArrowDataType::Float64, UnknownKind::Str => ArrowDataType::Utf8View, UnknownKind::Int(v) => { - return materialize_dyn_int(*v).dtype().try_to_arrow(pl_flavor) + return materialize_dyn_int(*v).dtype().try_to_arrow(compat_level) }, }; Ok(dt) @@ -789,3 +792,31 @@ pub fn create_enum_data_type(categories: Utf8ViewArray) -> DataType { let rev_map = RevMapping::build_local(categories); DataType::Enum(Some(Arc::new(rev_map)), Default::default()) } + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub struct CompatLevel(pub(crate) u16); + +impl CompatLevel { + pub const fn newest() -> CompatLevel { + CompatLevel(1) + } + + pub const fn oldest() -> CompatLevel { + CompatLevel(0) + } + + // The following methods are only used internally + + #[doc(hidden)] + pub fn with_level(level: u16) -> PolarsResult { + if level > CompatLevel::newest().0 { + polars_bail!(InvalidOperation: "invalid compat level"); + } + Ok(CompatLevel(level)) + } + + #[doc(hidden)] + pub fn get_level(&self) -> u16 { + self.0 + } +} diff --git a/crates/polars-core/src/datatypes/field.rs b/crates/polars-core/src/datatypes/field.rs index 63a3bafe33fe..aea148546ef0 100644 --- a/crates/polars-core/src/datatypes/field.rs +++ b/crates/polars-core/src/datatypes/field.rs @@ -107,10 +107,10 @@ impl Field { /// let f = Field::new("Value", DataType::Int64); /// let af = arrow::datatypes::Field::new("Value", arrow::datatypes::ArrowDataType::Int64, true); /// - /// assert_eq!(f.to_arrow(true), af); + /// assert_eq!(f.to_arrow(CompatLevel::newest()), af); /// ``` - pub fn to_arrow(&self, pl_flavor: bool) -> ArrowField { - self.dtype.to_arrow_field(self.name.as_str(), pl_flavor) + pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowField { + self.dtype.to_arrow_field(self.name.as_str(), compat_level) } } diff --git a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs index 1915797ee41f..812d89f0d240 100644 --- a/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs +++ b/crates/polars-core/src/frame/group_by/aggregations/agg_list.rs @@ -70,11 +70,13 @@ where }; let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), list_values.into(), validity, ); - let data_type = ListArray::::default_datatype(T::get_dtype().to_arrow(true)); + let data_type = ListArray::::default_datatype( + T::get_dtype().to_arrow(CompatLevel::newest()), + ); // SAFETY: // offsets are monotonically increasing let arr = ListArray::::new( @@ -133,11 +135,13 @@ where }; let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), list_values.into(), validity, ); - let data_type = ListArray::::default_datatype(T::get_dtype().to_arrow(true)); + let data_type = ListArray::::default_datatype( + T::get_dtype().to_arrow(CompatLevel::newest()), + ); let arr = ListArray::::new( data_type, Offsets::new_unchecked(offsets).into(), diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index 34e129e8808f..0515d030a569 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -2382,10 +2382,10 @@ impl DataFrame { /// This responsibility is left to the caller as we don't want to take mutable references here, /// but we also don't want to rechunk here, as this operation is costly and would benefit the caller /// as well. - pub fn iter_chunks(&self, pl_flavor: bool, parallel: bool) -> RecordBatchIter { - // If any of the columns is binview and we don't convert `pl_flavor` we allow parallelism + pub fn iter_chunks(&self, compat_level: CompatLevel, parallel: bool) -> RecordBatchIter { + // If any of the columns is binview and we don't convert `compat_level` we allow parallelism // as we must allocate arrow strings/binaries. - let parallel = if parallel && !pl_flavor { + let parallel = if parallel && compat_level.0 >= 1 { self.columns.len() > 1 && self .columns @@ -2399,7 +2399,7 @@ impl DataFrame { columns: &self.columns, idx: 0, n_chunks: self.n_chunks(), - pl_flavor, + compat_level, parallel, } } @@ -3018,7 +3018,7 @@ pub struct RecordBatchIter<'a> { columns: &'a Vec, idx: usize, n_chunks: usize, - pl_flavor: bool, + compat_level: CompatLevel, parallel: bool, } @@ -3034,12 +3034,12 @@ impl<'a> Iterator for RecordBatchIter<'a> { let iter = self .columns .par_iter() - .map(|s| s.to_arrow(self.idx, self.pl_flavor)); + .map(|s| s.to_arrow(self.idx, self.compat_level)); POOL.install(|| iter.collect()) } else { self.columns .iter() - .map(|s| s.to_arrow(self.idx, self.pl_flavor)) + .map(|s| s.to_arrow(self.idx, self.compat_level)) .collect() }; self.idx += 1; @@ -3117,7 +3117,7 @@ mod test { "foo" => &[1, 2, 3, 4, 5] ) .unwrap(); - let mut iter = df.iter_chunks(true, false); + let mut iter = df.iter_chunks(CompatLevel::newest(), false); assert_eq!(5, iter.next().unwrap().len()); assert!(iter.next().is_none()); } diff --git a/crates/polars-core/src/frame/row/transpose.rs b/crates/polars-core/src/frame/row/transpose.rs index 0fdc15c9c6f6..7ad4bc4f1fef 100644 --- a/crates/polars-core/src/frame/row/transpose.rs +++ b/crates/polars-core/src/frame/row/transpose.rs @@ -247,7 +247,7 @@ where }; let arr = PrimitiveArray::::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), values.into(), validity, ); diff --git a/crates/polars-core/src/schema.rs b/crates/polars-core/src/schema.rs index 981615499dfb..dfb581135a0b 100644 --- a/crates/polars-core/src/schema.rs +++ b/crates/polars-core/src/schema.rs @@ -370,11 +370,11 @@ impl Schema { } /// Convert self to `ArrowSchema` by cloning the fields - pub fn to_arrow(&self, pl_flavor: bool) -> ArrowSchema { + pub fn to_arrow(&self, compat_level: CompatLevel) -> ArrowSchema { let fields: Vec<_> = self .inner .iter() - .map(|(name, dtype)| dtype.to_arrow_field(name.as_str(), pl_flavor)) + .map(|(name, dtype)| dtype.to_arrow_field(name.as_str(), compat_level)) .collect(); ArrowSchema::from(fields) } diff --git a/crates/polars-core/src/series/from.rs b/crates/polars-core/src/series/from.rs index 9a82ed0e2506..6a4c61cd7f37 100644 --- a/crates/polars-core/src/series/from.rs +++ b/crates/polars-core/src/series/from.rs @@ -102,9 +102,12 @@ impl Series { Float64 => Float64Chunked::from_chunks(name, chunks).into_series(), BinaryOffset => BinaryOffsetChunked::from_chunks(name, chunks).into_series(), #[cfg(feature = "dtype-struct")] - Struct(_) => { - Series::_try_from_arrow_unchecked(name, chunks, &dtype.to_arrow(true)).unwrap() - }, + Struct(_) => Series::_try_from_arrow_unchecked( + name, + chunks, + &dtype.to_arrow(CompatLevel::newest()), + ) + .unwrap(), #[cfg(feature = "object")] Object(_, _) => { assert_eq!(chunks.len(), 1); diff --git a/crates/polars-core/src/series/implementations/decimal.rs b/crates/polars-core/src/series/implementations/decimal.rs index 9f5c382d94c7..2437a43fc3f4 100644 --- a/crates/polars-core/src/series/implementations/decimal.rs +++ b/crates/polars-core/src/series/implementations/decimal.rs @@ -43,7 +43,8 @@ impl SeriesWrap { Series::from_chunks_and_dtype_unchecked("", vec![arr.values().clone()], dtype) }; let new_values = s.array_ref(0).clone(); - let data_type = ListArray::::default_datatype(dtype.to_arrow(true)); + let data_type = + ListArray::::default_datatype(dtype.to_arrow(CompatLevel::newest())); let new_arr = ListArray::::new( data_type, arr.offsets().clone(), diff --git a/crates/polars-core/src/series/into.rs b/crates/polars-core/src/series/into.rs index f1fbd6143f0a..c0ac905666cc 100644 --- a/crates/polars-core/src/series/into.rs +++ b/crates/polars-core/src/series/into.rs @@ -19,11 +19,11 @@ impl Series { /// Convert a chunk in the Series to the correct Arrow type. /// This conversion is needed because polars doesn't use a /// 1 on 1 mapping for logical/ categoricals, etc. - pub fn to_arrow(&self, chunk_idx: usize, pl_flavor: bool) -> ArrayRef { + pub fn to_arrow(&self, chunk_idx: usize, compat_level: CompatLevel) -> ArrayRef { match self.dtype() { // make sure that we recursively apply all logical types. #[cfg(feature = "dtype-struct")] - DataType::Struct(_) => self.struct_().unwrap().to_arrow(chunk_idx, pl_flavor), + DataType::Struct(_) => self.struct_().unwrap().to_arrow(chunk_idx, compat_level), // special list branch to // make sure that we recursively apply all logical types. DataType::List(inner) => { @@ -45,10 +45,10 @@ impl Series { .unwrap() }; - s.to_arrow(0, pl_flavor) + s.to_arrow(0, compat_level) }; - let data_type = ListArray::::default_datatype(inner.to_arrow(pl_flavor)); + let data_type = ListArray::::default_datatype(inner.to_arrow(compat_level)); let arr = ListArray::::new( data_type, arr.offsets().clone(), @@ -74,30 +74,30 @@ impl Series { ) }; - new.to_arrow(pl_flavor, false) + new.to_arrow(compat_level, false) }, #[cfg(feature = "dtype-date")] DataType::Date => cast( &*self.chunks()[chunk_idx], - &DataType::Date.to_arrow(pl_flavor), + &DataType::Date.to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "dtype-datetime")] DataType::Datetime(_, _) => cast( &*self.chunks()[chunk_idx], - &self.dtype().to_arrow(pl_flavor), + &self.dtype().to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "dtype-duration")] DataType::Duration(_) => cast( &*self.chunks()[chunk_idx], - &self.dtype().to_arrow(pl_flavor), + &self.dtype().to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "dtype-time")] DataType::Time => cast( &*self.chunks()[chunk_idx], - &DataType::Time.to_arrow(pl_flavor), + &DataType::Time.to_arrow(compat_level), ) .unwrap(), #[cfg(feature = "object")] @@ -117,7 +117,7 @@ impl Series { } }, DataType::String => { - if pl_flavor { + if compat_level.0 >= 1 { self.array_ref(chunk_idx).clone() } else { let arr = self.array_ref(chunk_idx); @@ -125,7 +125,7 @@ impl Series { } }, DataType::Binary => { - if pl_flavor { + if compat_level.0 >= 1 { self.array_ref(chunk_idx).clone() } else { let arr = self.array_ref(chunk_idx); diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 4c63bedd9ffd..c508970faeae 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -914,7 +914,9 @@ impl Series { let offsets = (0i64..(s.len() as i64 + 1)).collect::>(); let offsets = unsafe { Offsets::new_unchecked(offsets) }; - let data_type = LargeListArray::default_datatype(s.dtype().to_physical().to_arrow(true)); + let data_type = LargeListArray::default_datatype( + s.dtype().to_physical().to_arrow(CompatLevel::newest()), + ); let new_arr = LargeListArray::new(data_type, offsets.into(), values, None); let mut out = ListChunked::with_chunk(s.name(), new_arr); out.set_inner_dtype(s.dtype().clone()); diff --git a/crates/polars-core/src/series/ops/reshape.rs b/crates/polars-core/src/series/ops/reshape.rs index 87e4a643d8e5..ca824f6a4104 100644 --- a/crates/polars-core/src/series/ops/reshape.rs +++ b/crates/polars-core/src/series/ops/reshape.rs @@ -156,8 +156,12 @@ impl Series { while let Some(dim) = dims.pop_back() { prev_dtype = DataType::Array(Box::new(prev_dtype), dim as usize); - prev_array = - FixedSizeListArray::new(prev_dtype.to_arrow(true), prev_array, None).boxed(); + prev_array = FixedSizeListArray::new( + prev_dtype.to_arrow(CompatLevel::newest()), + prev_array, + None, + ) + .boxed(); } Ok(unsafe { Series::from_chunks_and_dtype_unchecked( diff --git a/crates/polars-expr/src/expressions/window.rs b/crates/polars-expr/src/expressions/window.rs index 97f25abc4196..5a71230a9d09 100644 --- a/crates/polars-expr/src/expressions/window.rs +++ b/crates/polars-expr/src/expressions/window.rs @@ -821,7 +821,7 @@ where unsafe { values.set_len(len) } let validity = Bitmap::from(validity); let arr = PrimitiveArray::new( - T::get_dtype().to_physical().to_arrow(true), + T::get_dtype().to_physical().to_arrow(CompatLevel::newest()), values.into(), Some(validity), ); diff --git a/crates/polars-ffi/src/version_0.rs b/crates/polars-ffi/src/version_0.rs index 43fec994c4d4..eb24542f0733 100644 --- a/crates/polars-ffi/src/version_0.rs +++ b/crates/polars-ffi/src/version_0.rs @@ -1,3 +1,5 @@ +use polars_core::prelude::CompatLevel; + use super::*; /// An FFI exported `Series`. @@ -52,13 +54,13 @@ unsafe extern "C" fn c_release_series_export(e: *mut SeriesExport) { } pub fn export_series(s: &Series) -> SeriesExport { - let field = ArrowField::new(s.name(), s.dtype().to_arrow(true), true); + let field = ArrowField::new(s.name(), s.dtype().to_arrow(CompatLevel::newest()), true); let schema = Box::new(ffi::export_field_to_c(&field)); let mut arrays = (0..s.chunks().len()) .map(|i| { // Make sure we export the logical type. - let arr = s.to_arrow(i, true); + let arr = s.to_arrow(i, CompatLevel::newest()); Box::into_raw(Box::new(ffi::export_array_to_c(arr.clone()))) }) .collect::>(); diff --git a/crates/polars-io/src/avro/write.rs b/crates/polars-io/src/avro/write.rs index b12cd358da16..2954de97d964 100644 --- a/crates/polars-io/src/avro/write.rs +++ b/crates/polars-io/src/avro/write.rs @@ -64,12 +64,12 @@ where } fn finish(&mut self, df: &mut DataFrame) -> PolarsResult<()> { - let schema = schema_to_arrow_checked(&df.schema(), false, "avro")?; + let schema = schema_to_arrow_checked(&df.schema(), CompatLevel::oldest(), "avro")?; let record = write::to_record(&schema, self.name.clone())?; let mut data = vec![]; let mut compressed_block = avro_schema::file::CompressedBlock::default(); - for chunk in df.iter_chunks(false, true) { + for chunk in df.iter_chunks(CompatLevel::oldest(), true) { let mut serializers = chunk .iter() .zip(record.fields.iter()) diff --git a/crates/polars-io/src/ipc/ipc_stream.rs b/crates/polars-io/src/ipc/ipc_stream.rs index ed0872dc1ea0..35bd22dab5c3 100644 --- a/crates/polars-io/src/ipc/ipc_stream.rs +++ b/crates/polars-io/src/ipc/ipc_stream.rs @@ -207,7 +207,7 @@ where pub struct IpcStreamWriter { writer: W, compression: Option, - pl_flavor: bool, + compat_level: CompatLevel, } use arrow::record_batch::RecordBatch; @@ -221,8 +221,8 @@ impl IpcStreamWriter { self } - pub fn with_pl_flavor(mut self, pl_flavor: bool) -> Self { - self.pl_flavor = pl_flavor; + pub fn with_compat_level(mut self, compat_level: CompatLevel) -> Self { + self.compat_level = compat_level; self } } @@ -235,7 +235,7 @@ where IpcStreamWriter { writer, compression: None, - pl_flavor: false, + compat_level: CompatLevel::oldest(), } } @@ -247,9 +247,9 @@ where }, ); - ipc_stream_writer.start(&df.schema().to_arrow(self.pl_flavor), None)?; + ipc_stream_writer.start(&df.schema().to_arrow(self.compat_level), None)?; let df = chunk_df_for_writing(df, 512 * 512)?; - let iter = df.iter_chunks(self.pl_flavor, true); + let iter = df.iter_chunks(self.compat_level, true); for batch in iter { ipc_stream_writer.write(&batch, None)? diff --git a/crates/polars-io/src/ipc/write.rs b/crates/polars-io/src/ipc/write.rs index b2c08846dbbc..b187ff8edc07 100644 --- a/crates/polars-io/src/ipc/write.rs +++ b/crates/polars-io/src/ipc/write.rs @@ -42,7 +42,7 @@ pub struct IpcWriter { pub(super) writer: W, pub(super) compression: Option, /// Polars' flavor of arrow. This might be temporary. - pub(super) pl_flavor: bool, + pub(super) compat_level: CompatLevel, } impl IpcWriter { @@ -52,13 +52,13 @@ impl IpcWriter { self } - pub fn with_pl_flavor(mut self, pl_flavor: bool) -> Self { - self.pl_flavor = pl_flavor; + pub fn with_compat_level(mut self, compat_level: CompatLevel) -> Self { + self.compat_level = compat_level; self } pub fn batched(self, schema: &Schema) -> PolarsResult> { - let schema = schema_to_arrow_checked(schema, self.pl_flavor, "ipc")?; + let schema = schema_to_arrow_checked(schema, self.compat_level, "ipc")?; let mut writer = write::FileWriter::new( self.writer, Arc::new(schema), @@ -71,7 +71,7 @@ impl IpcWriter { Ok(BatchedWriter { writer, - pl_flavor: self.pl_flavor, + compat_level: self.compat_level, }) } } @@ -84,12 +84,12 @@ where IpcWriter { writer, compression: None, - pl_flavor: true, + compat_level: CompatLevel::newest(), } } fn finish(&mut self, df: &mut DataFrame) -> PolarsResult<()> { - let schema = schema_to_arrow_checked(&df.schema(), self.pl_flavor, "ipc")?; + let schema = schema_to_arrow_checked(&df.schema(), self.compat_level, "ipc")?; let mut ipc_writer = write::FileWriter::try_new( &mut self.writer, Arc::new(schema), @@ -99,7 +99,7 @@ where }, )?; df.align_chunks(); - let iter = df.iter_chunks(self.pl_flavor, true); + let iter = df.iter_chunks(self.compat_level, true); for batch in iter { ipc_writer.write(&batch, None)? @@ -111,7 +111,7 @@ where pub struct BatchedWriter { writer: write::FileWriter, - pl_flavor: bool, + compat_level: CompatLevel, } impl BatchedWriter { @@ -120,7 +120,7 @@ impl BatchedWriter { /// # Panics /// The caller must ensure the chunks in the given [`DataFrame`] are aligned. pub fn write_batch(&mut self, df: &DataFrame) -> PolarsResult<()> { - let iter = df.iter_chunks(self.pl_flavor, true); + let iter = df.iter_chunks(self.compat_level, true); for batch in iter { self.writer.write(&batch, None)? } diff --git a/crates/polars-io/src/ipc/write_async.rs b/crates/polars-io/src/ipc/write_async.rs index 7a5b3240cbb5..5ed459a715d2 100644 --- a/crates/polars-io/src/ipc/write_async.rs +++ b/crates/polars-io/src/ipc/write_async.rs @@ -10,14 +10,14 @@ impl IpcWriter { IpcWriter { writer, compression: None, - pl_flavor: false, + compat_level: CompatLevel::oldest(), } } pub fn batched_async(self, schema: &Schema) -> PolarsResult> { let writer = FileSink::new( self.writer, - schema.to_arrow(false), + schema.to_arrow(CompatLevel::oldest()), None, WriteOptions { compression: self.compression.map(|c| c.into()), @@ -44,7 +44,7 @@ where /// # Panics /// The caller must ensure the chunks in the given [`DataFrame`] are aligned. pub async fn write_batch(&mut self, df: &DataFrame) -> PolarsResult<()> { - let iter = df.iter_chunks(false, true); + let iter = df.iter_chunks(CompatLevel::oldest(), true); for batch in iter { self.writer.feed(batch.into()).await?; } diff --git a/crates/polars-io/src/json/mod.rs b/crates/polars-io/src/json/mod.rs index c51e238da2f9..99dbd53ffa5d 100644 --- a/crates/polars-io/src/json/mod.rs +++ b/crates/polars-io/src/json/mod.rs @@ -146,11 +146,11 @@ where .map(|s| { #[cfg(feature = "object")] polars_ensure!(!matches!(s.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to json"); - Ok(s.field().to_arrow(true)) + Ok(s.field().to_arrow(CompatLevel::newest())) }) .collect::>>()?; let batches = df - .iter_chunks(true, false) + .iter_chunks(CompatLevel::newest(), false) .map(|chunk| Ok(Box::new(chunk_to_struct(chunk, fields.clone())) as ArrayRef)); match self.json_format { @@ -191,10 +191,10 @@ where .map(|s| { #[cfg(feature = "object")] polars_ensure!(!matches!(s.dtype(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to json"); - Ok(s.field().to_arrow(true)) + Ok(s.field().to_arrow(CompatLevel::newest())) }) .collect::>>()?; - let chunks = df.iter_chunks(true, false); + let chunks = df.iter_chunks(CompatLevel::newest(), false); let batches = chunks.map(|chunk| Ok(Box::new(chunk_to_struct(chunk, fields.clone())) as ArrayRef)); let mut serializer = polars_json::ndjson::write::Serializer::new(batches, vec![]); @@ -267,7 +267,7 @@ where overwrite_schema(mut_schema, overwrite)?; } - DataType::Struct(schema.iter_fields().collect()).to_arrow(true) + DataType::Struct(schema.iter_fields().collect()).to_arrow(CompatLevel::newest()) } else { // infer let inner_dtype = if let BorrowedValue::Array(values) = &json_value { @@ -276,7 +276,7 @@ where self.infer_schema_len .unwrap_or(NonZeroUsize::new(usize::MAX).unwrap()), )? - .to_arrow(true) + .to_arrow(CompatLevel::newest()) } else { polars_json::json::infer(&json_value)? }; @@ -295,7 +295,7 @@ where .map(|(name, dt)| Field::new(&name, dt)) .collect(), ) - .to_arrow(true) + .to_arrow(CompatLevel::newest()) } else { inner_dtype } diff --git a/crates/polars-io/src/ndjson/mod.rs b/crates/polars-io/src/ndjson/mod.rs index 8d6ad6c2d680..4ec6ffa7a1da 100644 --- a/crates/polars-io/src/ndjson/mod.rs +++ b/crates/polars-io/src/ndjson/mod.rs @@ -13,7 +13,7 @@ pub fn infer_schema( let data_types = polars_json::ndjson::iter_unique_dtypes(reader, infer_schema_len)?; let data_type = crate::json::infer::data_types_to_supertype(data_types.map(|dt| DataType::from(&dt)))?; - let schema = StructArray::get_fields(&data_type.to_arrow(true)) + let schema = StructArray::get_fields(&data_type.to_arrow(CompatLevel::newest())) .iter() .collect(); Ok(schema) diff --git a/crates/polars-io/src/parquet/write/batched_writer.rs b/crates/polars-io/src/parquet/write/batched_writer.rs index 5f363e0eb1a3..86b95bc36f85 100644 --- a/crates/polars-io/src/parquet/write/batched_writer.rs +++ b/crates/polars-io/src/parquet/write/batched_writer.rs @@ -27,7 +27,7 @@ impl BatchedWriter { &'a self, df: &'a DataFrame, ) -> impl Iterator>> + 'a { - let rb_iter = df.iter_chunks(true, false); + let rb_iter = df.iter_chunks(CompatLevel::newest(), false); rb_iter.filter_map(move |batch| match batch.len() { 0 => None, _ => { @@ -95,7 +95,7 @@ fn prepare_rg_iter<'a>( options: WriteOptions, parallel: bool, ) -> impl Iterator>> + 'a { - let rb_iter = df.iter_chunks(true, false); + let rb_iter = df.iter_chunks(CompatLevel::newest(), false); rb_iter.filter_map(move |batch| match batch.len() { 0 => None, _ => { diff --git a/crates/polars-io/src/parquet/write/writer.rs b/crates/polars-io/src/parquet/write/writer.rs index 5b17dfbe6eec..32b104306aa6 100644 --- a/crates/polars-io/src/parquet/write/writer.rs +++ b/crates/polars-io/src/parquet/write/writer.rs @@ -97,7 +97,7 @@ where } pub fn batched(self, schema: &Schema) -> PolarsResult> { - let schema = schema_to_arrow_checked(schema, true, "parquet")?; + let schema = schema_to_arrow_checked(schema, CompatLevel::newest(), "parquet")?; let parquet_schema = to_parquet_schema(&schema)?; let encodings = get_encodings(&schema); let options = self.materialize_options(); diff --git a/crates/polars-io/src/shared.rs b/crates/polars-io/src/shared.rs index bb18f8cba8d2..2788d10a54fd 100644 --- a/crates/polars-io/src/shared.rs +++ b/crates/polars-io/src/shared.rs @@ -120,13 +120,13 @@ pub(crate) fn finish_reader( pub(crate) fn schema_to_arrow_checked( schema: &Schema, - pl_flavor: bool, + compat_level: CompatLevel, _file_name: &str, ) -> PolarsResult { let fields = schema.iter_fields().map(|field| { #[cfg(feature = "object")] polars_ensure!(!matches!(field.data_type(), DataType::Object(_, _)), ComputeError: "cannot write 'Object' datatype to {}", _file_name); - Ok(field.data_type().to_arrow_field(field.name().as_str(), pl_flavor)) + Ok(field.data_type().to_arrow_field(field.name().as_str(), compat_level)) }).collect::>>()?; Ok(ArrowSchema::from(fields)) } diff --git a/crates/polars-ops/src/chunked_array/gather/chunked.rs b/crates/polars-ops/src/chunked_array/gather/chunked.rs index 44a7ac951ba7..4b4aed6f7f87 100644 --- a/crates/polars-ops/src/chunked_array/gather/chunked.rs +++ b/crates/polars-ops/src/chunked_array/gather/chunked.rs @@ -208,7 +208,7 @@ where T::Array: Debug, { unsafe fn take_chunked_unchecked(&self, by: &[ChunkId], sorted: IsSorted) -> Self { - let arrow_dtype = self.dtype().to_arrow(true); + let arrow_dtype = self.dtype().to_arrow(CompatLevel::newest()); let mut out = if let Some(iter) = self.downcast_slices() { let targets = iter.collect::>(); @@ -245,7 +245,7 @@ where // Take function that checks of null state in `ChunkIdx`. unsafe fn take_opt_chunked_unchecked(&self, by: &[NullableChunkId]) -> Self { - let arrow_dtype = self.dtype().to_arrow(true); + let arrow_dtype = self.dtype().to_arrow(CompatLevel::newest()); if let Some(iter) = self.downcast_slices() { let targets = iter.collect::>(); diff --git a/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs b/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs index ba1427e6f6fd..ff52a6601589 100644 --- a/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs +++ b/crates/polars-ops/src/chunked_array/gather_skip_nulls.rs @@ -94,7 +94,8 @@ where .collect(); let gathered = unsafe { gather_skip_nulls_idx_pairs_unchecked(self, index_pairs, indices.len()) }; - let arr = T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(true)); + let arr = + T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(CompatLevel::newest())); Ok(ChunkedArray::from_chunk_iter_like(self, [arr])) } } @@ -140,7 +141,8 @@ where gather_skip_nulls_idx_pairs_unchecked(self, index_pairs, indices.as_ref().len()) }; - let mut arr = T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(true)); + let mut arr = + T::Array::from_zeroable_vec(gathered, self.dtype().to_arrow(CompatLevel::newest())); if indices.null_count() > 0 { let array_refs: Vec<&dyn Array> = indices.chunks().iter().map(|x| &**x).collect(); arr = arr.with_validity_typed(concatenate_validities(&array_refs)); diff --git a/crates/polars-ops/src/chunked_array/repeat_by.rs b/crates/polars-ops/src/chunked_array/repeat_by.rs index bdba858d5719..8ccf9ae58141 100644 --- a/crates/polars-ops/src/chunked_array/repeat_by.rs +++ b/crates/polars-ops/src/chunked_array/repeat_by.rs @@ -39,7 +39,7 @@ where unsafe { LargeListArray::from_iter_primitive_trusted_len( iter, - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), ) } })) diff --git a/crates/polars-ops/src/chunked_array/strings/extract.rs b/crates/polars-ops/src/chunked_array/strings/extract.rs index b56e1251c840..4c65b2ce8c3d 100644 --- a/crates/polars-ops/src/chunked_array/strings/extract.rs +++ b/crates/polars-ops/src/chunked_array/strings/extract.rs @@ -52,7 +52,7 @@ pub(super) fn extract_groups( .map(|ca| ca.into_series()); } - let data_type = dtype.try_to_arrow(true)?; + let data_type = dtype.try_to_arrow(CompatLevel::newest())?; let DataType::Struct(fields) = dtype else { unreachable!() // Implementation error if it isn't a struct. }; diff --git a/crates/polars-ops/src/chunked_array/strings/json_path.rs b/crates/polars-ops/src/chunked_array/strings/json_path.rs index 6c54de338676..a585e9837e39 100644 --- a/crates/polars-ops/src/chunked_array/strings/json_path.rs +++ b/crates/polars-ops/src/chunked_array/strings/json_path.rs @@ -107,7 +107,7 @@ pub trait Utf8JsonPathImpl: AsString { let array = polars_json::ndjson::deserialize::deserialize_iter( iter, - dtype.to_arrow(true), + dtype.to_arrow(CompatLevel::newest()), buf_size, ca.len(), ) diff --git a/crates/polars-ops/src/series/ops/ewm_by.rs b/crates/polars-ops/src/series/ops/ewm_by.rs index 1bc3630d6604..9ae0db056ae5 100644 --- a/crates/polars-ops/src/series/ops/ewm_by.rs +++ b/crates/polars-ops/src/series/ops/ewm_by.rs @@ -130,7 +130,7 @@ where } }; }); - let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(true)); + let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(CompatLevel::newest())); if (times.null_count() > 0) || (values.null_count() > 0) { let validity = binary_concatenate_validities(times, values); arr = arr.with_validity_typed(validity); @@ -179,7 +179,7 @@ where } }; }); - let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(true)); + let mut arr = T::Array::from_zeroable_vec(out, values.dtype().to_arrow(CompatLevel::newest())); if (times.null_count() > 0) || (values.null_count() > 0) { let validity = binary_concatenate_validities(times, values); arr = arr.with_validity_typed(validity); diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs index 0263b506920d..cffbe59f5f05 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate.rs @@ -99,7 +99,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), out.into(), Some(validity.into()), ); diff --git a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs index f425ffaac7e7..7d76f7073cd5 100644 --- a/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs +++ b/crates/polars-ops/src/series/ops/interpolation/interpolate_by.rs @@ -151,7 +151,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), out.into(), Some(validity.into()), ); @@ -253,7 +253,7 @@ where } let array = PrimitiveArray::new( - T::get_dtype().to_arrow(true), + T::get_dtype().to_arrow(CompatLevel::newest()), out.into(), Some(validity.into()), ); diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs index 82afe6a0b40c..c82c7678ff85 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/mean.rs @@ -77,7 +77,7 @@ where let arr = values.chunks().get_unchecked(0); arr.sliced_unchecked(offset as usize, length as usize) }; - let dtype = K::PolarsType::get_dtype().to_arrow(true); + let dtype = K::PolarsType::get_dtype().to_arrow(CompatLevel::newest()); let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); let arr = unsafe { arr.as_any() diff --git a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs index b256ca41720f..e58cc09d132a 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/aggregates/sum.rs @@ -55,7 +55,7 @@ where let arr = values.chunks().get_unchecked(0); arr.sliced_unchecked(offset as usize, length as usize) }; - let dtype = K::PolarsType::get_dtype().to_arrow(true); + let dtype = K::PolarsType::get_dtype().to_arrow(CompatLevel::newest()); let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); let arr = unsafe { arr.as_any() diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs index ccfd390bcf62..84e504816daa 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/eval.rs @@ -82,7 +82,7 @@ impl Eval { _ => s.to_physical_repr().into_owned(), }; let s = prepare_key(&s, chunk); - keys_columns.push(s.to_arrow(0, true)); + keys_columns.push(s.to_arrow(0, CompatLevel::newest())); } polars_row::convert_columns_amortized( diff --git a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs index 3a1ca17a183a..2bb4f57b46a1 100644 --- a/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs +++ b/crates/polars-pipe/src/executors/sinks/group_by/generic/hash_table.rs @@ -261,7 +261,7 @@ impl AggHashTable { .output_schema .iter_dtypes() .take(self.num_keys) - .map(|dtype| dtype.to_physical().to_arrow(true)) + .map(|dtype| dtype.to_physical().to_arrow(CompatLevel::newest())) .collect::>(); let fields = vec![Default::default(); self.num_keys]; let key_columns = diff --git a/crates/polars-pipe/src/executors/sinks/io.rs b/crates/polars-pipe/src/executors/sinks/io.rs index ac2b27717a75..cabf560dadb2 100644 --- a/crates/polars-pipe/src/executors/sinks/io.rs +++ b/crates/polars-pipe/src/executors/sinks/io.rs @@ -177,7 +177,7 @@ impl IOThread { path.push(format!("{count}.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(true); + let writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); let mut writer = writer.batched(&schema).unwrap(); writer.write_batch(&df).unwrap(); writer.finish().unwrap(); @@ -188,7 +188,7 @@ impl IOThread { path.push(format!("{count}_0_pass.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(true); + let writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); let mut writer = writer.batched(&schema).unwrap(); for mut df in iter { @@ -227,7 +227,7 @@ impl IOThread { path.push(format!("_{count}_full.ipc")); let file = File::create(path).unwrap(); - let mut writer = IpcWriter::new(file).with_pl_flavor(true); + let mut writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); writer.finish(&mut df).unwrap(); } else { let iter = Box::new(std::iter::once(df)); @@ -260,7 +260,7 @@ impl IOThread { // duplicates path.push(format!("_{count}.ipc")); let file = File::create(path).unwrap(); - let writer = IpcWriter::new(file).with_pl_flavor(true); + let writer = IpcWriter::new(file).with_compat_level(CompatLevel::newest()); let mut writer = writer.batched(&self.schema).unwrap(); writer.write_batch(&df).unwrap(); writer.finish().unwrap(); diff --git a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs index 1e7976afc431..c7256f084aeb 100644 --- a/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs +++ b/crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs @@ -275,7 +275,7 @@ impl Sink for SortSinkMultiple { let sort_dtypes = self.sort_dtypes.take().map(|arr| { arr.iter() - .map(|dt| dt.to_physical().to_arrow(true)) + .map(|dt| dt.to_physical().to_arrow(CompatLevel::newest())) .collect::>() }); diff --git a/crates/polars-plan/src/dsl/function_expr/plugin.rs b/crates/polars-plan/src/dsl/function_expr/plugin.rs index 83c429e738c3..5ce4875fe68d 100644 --- a/crates/polars-plan/src/dsl/function_expr/plugin.rs +++ b/crates/polars-plan/src/dsl/function_expr/plugin.rs @@ -130,7 +130,7 @@ pub(super) unsafe fn plugin_field( // we deallocate the fields buffer let ffi_fields = fields .iter() - .map(|field| arrow::ffi::export_field_to_c(&field.to_arrow(true))) + .map(|field| arrow::ffi::export_field_to_c(&field.to_arrow(CompatLevel::newest()))) .collect::>() .into_boxed_slice(); let n_args = ffi_fields.len(); diff --git a/crates/polars-plan/src/dsl/function_expr/struct_.rs b/crates/polars-plan/src/dsl/function_expr/struct_.rs index d0c6ddb0a223..22fe7092b4f9 100644 --- a/crates/polars-plan/src/dsl/function_expr/struct_.rs +++ b/crates/polars-plan/src/dsl/function_expr/struct_.rs @@ -211,7 +211,7 @@ pub(super) fn suffix_fields(s: &Series, suffix: Arc) -> PolarsResult PolarsResult { let ca = s.struct_()?; - let dtype = ca.dtype().to_arrow(true); + let dtype = ca.dtype().to_arrow(CompatLevel::newest()); let iter = ca.chunks().iter().map(|arr| { let arr = arrow::compute::cast::cast_unchecked(arr.as_ref(), &dtype).unwrap(); diff --git a/crates/polars-time/src/chunkedarray/datetime.rs b/crates/polars-time/src/chunkedarray/datetime.rs index 7a2bf97d88e7..de14c83c6e72 100644 --- a/crates/polars-time/src/chunkedarray/datetime.rs +++ b/crates/polars-time/src/chunkedarray/datetime.rs @@ -12,7 +12,7 @@ fn cast_and_apply< ca: &DatetimeChunked, func: F, ) -> ChunkedArray { - let dtype = ca.dtype().to_arrow(true); + let dtype = ca.dtype().to_arrow(CompatLevel::newest()); let chunks = ca.downcast_iter().map(|arr| { let arr = cast( arr, diff --git a/crates/polars/tests/it/io/ipc.rs b/crates/polars/tests/it/io/ipc.rs index f69bf78602da..6b5e2a83ba41 100644 --- a/crates/polars/tests/it/io/ipc.rs +++ b/crates/polars/tests/it/io/ipc.rs @@ -12,7 +12,7 @@ fn test_ipc_compression_variadic_buffers() { let mut file = std::io::Cursor::new(vec![]); IpcWriter::new(&mut file) .with_compression(Some(IpcCompression::LZ4)) - .with_pl_flavor(true) + .with_compat_level(CompatLevel::newest()) .finish(&mut df) .unwrap(); @@ -82,7 +82,7 @@ fn test_read_ipc_with_columns() { .unwrap(); df_read.equals(&expected); - for pl_flavor in [false, true] { + for compat_level in [0, 1].map(|level| CompatLevel::with_level(level).unwrap()) { let mut buf: Cursor> = Cursor::new(Vec::new()); let mut df = df![ "letters" => ["x", "y", "z"], @@ -92,7 +92,7 @@ fn test_read_ipc_with_columns() { ] .unwrap(); IpcWriter::new(&mut buf) - .with_pl_flavor(pl_flavor) + .with_compat_level(compat_level) .finish(&mut df) .expect("ipc writer"); buf.set_position(0); diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c11675d02b92..5cc2a1748137 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -103,6 +103,7 @@ TooManyRowsReturnedError, ) from polars.functions import col, lit +from polars.interchange.protocol import CompatLevel from polars.schema import Schema from polars.selectors import _expand_selector_dicts, _expand_selectors @@ -1384,7 +1385,8 @@ def item(self, row: int | None = None, column: int | str | None = None) -> Any: ) return s.get_index_signed(row) - def to_arrow(self, *, future: bool = False) -> pa.Table: + @deprecate_renamed_parameter("future", "compat_level", version="1.1") + def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Table: """ Collect the underlying arrow arrays in an Arrow Table. @@ -1395,13 +1397,9 @@ def to_arrow(self, *, future: bool = False) -> pa.Table: Parameters ---------- - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -1419,12 +1417,12 @@ def to_arrow(self, *, future: bool = False) -> pa.Table: if not self.width: # 0x0 dataframe, cannot infer schema from batches return pa.table({}) - if future: - issue_unstable_warning( - "The `future` parameter of `DataFrame.to_arrow` is considered unstable." - ) + if compat_level is None: + compat_level = False # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] - record_batches = self._df.to_arrow(future) + record_batches = self._df.to_arrow(compat_level) return pa.Table.from_batches(record_batches) @overload @@ -3297,7 +3295,7 @@ def write_ipc( file: None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO: ... @overload @@ -3306,15 +3304,16 @@ def write_ipc( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + compat_level: CompatLevel | None = None, ) -> None: ... + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def write_ipc( self, file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO | None: """ Write to Arrow IPC binary stream or Feather file. @@ -3328,13 +3327,9 @@ def write_ipc( written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -3356,17 +3351,15 @@ def write_ipc( elif isinstance(file, (str, Path)): file = normalize_filepath(file) + if compat_level is None: + compat_level = True # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] + if compression is None: compression = "uncompressed" - if future: - issue_unstable_warning( - "The `future` parameter of `DataFrame.write_ipc` is considered unstable." - ) - if future is None: - future = True - - self._df.write_ipc(file, compression, future) + self._df.write_ipc(file, compression, compat_level) return file if return_bytes else None # type: ignore[return-value] @overload @@ -3375,7 +3368,7 @@ def write_ipc_stream( file: None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO: ... @overload @@ -3384,15 +3377,16 @@ def write_ipc_stream( file: str | Path | IO[bytes], *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + compat_level: CompatLevel | None = None, ) -> None: ... + @deprecate_renamed_parameter("future", "compat_level", version="1.1") def write_ipc_stream( self, file: str | Path | IO[bytes] | None, *, compression: IpcCompression = "uncompressed", - future: bool | None = None, + compat_level: CompatLevel | None = None, ) -> BytesIO | None: """ Write to Arrow IPC record batch stream. @@ -3406,13 +3400,9 @@ def write_ipc_stream( be written. If set to `None`, the output is returned as a BytesIO object. compression : {'uncompressed', 'lz4', 'zstd'} Compression method. Defaults to "uncompressed". - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -3434,17 +3424,15 @@ def write_ipc_stream( elif isinstance(file, (str, Path)): file = normalize_filepath(file) + if compat_level is None: + compat_level = True # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] + if compression is None: compression = "uncompressed" - if future: - issue_unstable_warning( - "The `future` parameter of `DataFrame.write_ipc` is considered unstable." - ) - if future is None: - future = True - - self._df.write_ipc_stream(file, compression, future=future) + self._df.write_ipc_stream(file, compression, compat_level) return file if return_bytes else None # type: ignore[return-value] def write_parquet( diff --git a/py-polars/polars/interchange/protocol.py b/py-polars/polars/interchange/protocol.py index 2daca4b3cb19..4eda7fa95f2d 100644 --- a/py-polars/polars/interchange/protocol.py +++ b/py-polars/polars/interchange/protocol.py @@ -13,6 +13,8 @@ TypedDict, ) +from polars._utils.unstable import issue_unstable_warning + if TYPE_CHECKING: import sys @@ -255,3 +257,47 @@ class Endianness: class CopyNotAllowedError(RuntimeError): """Exception raised when a copy is required, but `allow_copy` is set to `False`.""" + + +class CompatLevel: + """Data structure compatibility level.""" + + def __init__(self) -> None: + msg = "it is not allowed to create a CompatLevel object" + raise TypeError(msg) + + @staticmethod + def _with_version(version: int) -> CompatLevel: + compat_level = CompatLevel.__new__(CompatLevel) + compat_level._version = version # type: ignore[attr-defined] + return compat_level + + @staticmethod + def _newest() -> CompatLevel: + return CompatLevel._future1 # type: ignore[attr-defined] + + @staticmethod + def newest() -> CompatLevel: + """ + Get the highest supported compatibility level. + + .. warning:: + Highest compatibility level is considered **unstable**. It may be changed + at any point without it being considered a breaking change. + """ + issue_unstable_warning( + "Using the highest compatibility level is considered unstable." + ) + return CompatLevel._newest() + + @staticmethod + def oldest() -> CompatLevel: + """Get the most compatible level.""" + return CompatLevel._compatible # type: ignore[attr-defined] + + def __repr__(self) -> str: + return f"<{self.__class__.__module__}.{self.__class__.__qualname__}: {self._version}>" # type: ignore[attr-defined] + + +CompatLevel._compatible = CompatLevel._with_version(0) # type: ignore[attr-defined] +CompatLevel._future1 = CompatLevel._with_version(1) # type: ignore[attr-defined] diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 75a17453613e..bd3d6a45a43f 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -98,6 +98,7 @@ from polars.dependencies import pandas as pd from polars.dependencies import pyarrow as pa from polars.exceptions import ComputeError, ModuleUpgradeRequiredError, ShapeError +from polars.interchange.protocol import CompatLevel from polars.series.array import ArrayNameSpace from polars.series.binary import BinaryNameSpace from polars.series.categorical import CatNameSpace @@ -502,6 +503,15 @@ def _from_buffers( validity = validity._s return cls._from_pyseries(PySeries._from_buffers(dtype, data, validity)) + @staticmethod + def _newest_compat_level() -> int: + """ + Get the newest supported compat level. + + This is for pyo3-polars. + """ + return CompatLevel._newest()._version # type: ignore[attr-defined] + @property def dtype(self) -> DataType: """ @@ -4342,7 +4352,8 @@ def to_torch(self) -> torch.Tensor: # tensor.rename(self.name) return tensor - def to_arrow(self, *, future: bool = False) -> pa.Array: + @deprecate_renamed_parameter("future", "compat_level", version="1.1") + def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Array: """ Return the underlying Arrow array. @@ -4350,13 +4361,9 @@ def to_arrow(self, *, future: bool = False) -> pa.Array: Parameters ---------- - future - Setting this to `True` will write Polars' internal data structures that - might not be available by other Arrow implementations. - - .. warning:: - This functionality is considered **unstable**. It may be changed - at any point without it being considered a breaking change. + compat_level + Use a specific compatibility level + when exporting Polars' internal data structures. Examples -------- @@ -4370,7 +4377,11 @@ def to_arrow(self, *, future: bool = False) -> pa.Array: 3 ] """ - return self._s.to_arrow(future) + if compat_level is None: + compat_level = False # type: ignore[assignment] + elif isinstance(compat_level, CompatLevel): + compat_level = compat_level._version # type: ignore[attr-defined] + return self._s.to_arrow(compat_level) def to_pandas( self, *, use_pyarrow_extension_array: bool = False, **kwargs: Any diff --git a/py-polars/src/conversion/mod.rs b/py-polars/src/conversion/mod.rs index d4d6f4e6b525..0451c9f8eddf 100644 --- a/py-polars/src/conversion/mod.rs +++ b/py-polars/src/conversion/mod.rs @@ -1182,3 +1182,28 @@ where { container.into_iter().map(|s| s.as_ref().into()).collect() } + +#[derive(Debug, Copy, Clone)] +pub struct PyCompatLevel(pub CompatLevel); + +impl<'a> FromPyObject<'a> for PyCompatLevel { + fn extract_bound(ob: &Bound<'a, PyAny>) -> PyResult { + Ok(PyCompatLevel(if let Ok(level) = ob.extract::() { + if let Ok(compat_level) = CompatLevel::with_level(level) { + compat_level + } else { + return Err(PyValueError::new_err("invalid compat level")); + } + } else if let Ok(future) = ob.extract::() { + if future { + CompatLevel::newest() + } else { + CompatLevel::oldest() + } + } else { + return Err(PyTypeError::new_err( + "'compat_level' argument accepts int or bool", + )); + })) + } +} diff --git a/py-polars/src/dataframe/export.rs b/py-polars/src/dataframe/export.rs index c22d974407f4..0b0d0a4f9020 100644 --- a/py-polars/src/dataframe/export.rs +++ b/py-polars/src/dataframe/export.rs @@ -7,6 +7,7 @@ use pyo3::types::{PyList, PyTuple}; use super::*; use crate::conversion::{ObjectValue, Wrap}; use crate::interop; +use crate::prelude::PyCompatLevel; #[pymethods] impl PyDataFrame { @@ -63,7 +64,7 @@ impl PyDataFrame { } #[allow(clippy::wrong_self_convention)] - pub fn to_arrow(&mut self, future: bool) -> PyResult> { + pub fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult> { self.df.align_chunks(); Python::with_gil(|py| { let pyarrow = py.import_bound("pyarrow")?; @@ -71,7 +72,7 @@ impl PyDataFrame { let rbs = self .df - .iter_chunks(future, true) + .iter_chunks(compat_level.0, true) .map(|rb| interop::arrow::to_py::to_py_rb(&rb, &names, py, &pyarrow)) .collect::>()?; Ok(rbs) @@ -104,7 +105,7 @@ impl PyDataFrame { .collect::>(); let rbs = self .df - .iter_chunks(false, true) + .iter_chunks(CompatLevel::oldest(), true) .map(|rb| { let mut rb = rb.into_arrays(); for i in &cat_columns { diff --git a/py-polars/src/dataframe/io.rs b/py-polars/src/dataframe/io.rs index 5e85554ceb7d..61afd6fc1b74 100644 --- a/py-polars/src/dataframe/io.rs +++ b/py-polars/src/dataframe/io.rs @@ -18,6 +18,7 @@ use crate::file::{ get_either_file, get_file_like, get_mmap_bytes_reader, get_mmap_bytes_reader_and_path, read_if_bytesio, EitherRustPythonFile, }; +use crate::prelude::PyCompatLevel; #[pymethods] impl PyDataFrame { @@ -474,7 +475,7 @@ impl PyDataFrame { py: Python, py_f: PyObject, compression: Wrap>, - future: bool, + compat_level: PyCompatLevel, ) -> PyResult<()> { let either = get_either_file(py_f, true)?; if let EitherRustPythonFile::Rust(ref f) = either { @@ -484,7 +485,7 @@ impl PyDataFrame { py.allow_threads(|| { IpcWriter::new(&mut buf) .with_compression(compression.0) - .with_pl_flavor(future) + .with_compat_level(compat_level.0) .finish(&mut self.df) .map_err(PyPolarsErr::from) })?; @@ -497,13 +498,13 @@ impl PyDataFrame { py: Python, py_f: PyObject, compression: Wrap>, - future: bool, + compat_level: PyCompatLevel, ) -> PyResult<()> { let mut buf = get_file_like(py_f, true)?; py.allow_threads(|| { IpcStreamWriter::new(&mut buf) .with_compression(compression.0) - .with_pl_flavor(future) + .with_compat_level(compat_level.0) .finish(&mut self.df) .map_err(PyPolarsErr::from) })?; diff --git a/py-polars/src/dataframe/serde.rs b/py-polars/src/dataframe/serde.rs index 131262acf9c8..a6b1f29e23cf 100644 --- a/py-polars/src/dataframe/serde.rs +++ b/py-polars/src/dataframe/serde.rs @@ -18,7 +18,7 @@ impl PyDataFrame { // Used in pickle/pickling let mut buf: Vec = vec![]; IpcStreamWriter::new(&mut buf) - .with_pl_flavor(true) + .with_compat_level(CompatLevel::newest()) .finish(&mut self.df.clone()) .expect("ipc writer"); Ok(PyBytes::new_bound(py, &buf).to_object(py)) diff --git a/py-polars/src/series/export.rs b/py-polars/src/series/export.rs index d49f3358cc14..eb320311e7f8 100644 --- a/py-polars/src/series/export.rs +++ b/py-polars/src/series/export.rs @@ -145,12 +145,16 @@ impl PySeries { /// Return the underlying Arrow array. #[allow(clippy::wrong_self_convention)] - fn to_arrow(&mut self, future: bool) -> PyResult { + fn to_arrow(&mut self, compat_level: PyCompatLevel) -> PyResult { self.rechunk(true); Python::with_gil(|py| { let pyarrow = py.import_bound("pyarrow")?; - interop::arrow::to_py::to_py_array(self.series.to_arrow(0, future), py, &pyarrow) + interop::arrow::to_py::to_py_array( + self.series.to_arrow(0, compat_level.0), + py, + &pyarrow, + ) }) } } diff --git a/py-polars/src/series/mod.rs b/py-polars/src/series/mod.rs index 74d2c7ae8422..899ae3940191 100644 --- a/py-polars/src/series/mod.rs +++ b/py-polars/src/series/mod.rs @@ -665,7 +665,7 @@ impl PySeries { // IPC only support DataFrames so we need to convert it let mut df = self.series.clone().into_frame(); IpcStreamWriter::new(&mut buf) - .with_pl_flavor(true) + .with_compat_level(CompatLevel::newest()) .finish(&mut df) .expect("ipc writer"); Ok(PyBytes::new_bound(py, &buf).to_object(py)) diff --git a/py-polars/tests/unit/interop/test_interop.py b/py-polars/tests/unit/interop/test_interop.py index bef1c1bb694c..5dd7f442c905 100644 --- a/py-polars/tests/unit/interop/test_interop.py +++ b/py-polars/tests/unit/interop/test_interop.py @@ -5,11 +5,13 @@ import numpy as np import pandas as pd +import pyarrow import pyarrow as pa import pytest import polars as pl -from polars.exceptions import ComputeError +from polars.exceptions import ComputeError, UnstableWarning +from polars.interchange.protocol import CompatLevel from polars.testing import assert_frame_equal, assert_series_equal @@ -702,3 +704,48 @@ def test_from_numpy_different_resolution_invalid() -> None: pl.Series( np.array(["2020-01-01"], dtype="datetime64[s]"), dtype=pl.Datetime("us") ) + + +def test_compat_level(monkeypatch: pytest.MonkeyPatch) -> None: + # change these if compat level bumped + monkeypatch.setenv("POLARS_WARN_UNSTABLE", "1") + oldest = CompatLevel.oldest() + assert oldest is CompatLevel.oldest() # test singleton + assert oldest._version == 0 # type: ignore[attr-defined] + with pytest.warns(UnstableWarning): + newest = CompatLevel.newest() + assert newest is CompatLevel.newest() + assert newest._version == 1 # type: ignore[attr-defined] + + str_col = pl.Series(["awd"]) + bin_col = pl.Series([b"dwa"]) + assert str_col._newest_compat_level() == newest._version # type: ignore[attr-defined] + assert isinstance(str_col.to_arrow(), pyarrow.LargeStringArray) + assert isinstance(str_col.to_arrow(compat_level=oldest), pyarrow.LargeStringArray) + assert isinstance(str_col.to_arrow(compat_level=newest), pyarrow.StringViewArray) + assert isinstance(bin_col.to_arrow(), pyarrow.LargeBinaryArray) + assert isinstance(bin_col.to_arrow(compat_level=oldest), pyarrow.LargeBinaryArray) + assert isinstance(bin_col.to_arrow(compat_level=newest), pyarrow.BinaryViewArray) + + df = pl.DataFrame({"str_col": str_col, "bin_col": bin_col}) + assert isinstance(df.to_arrow()["str_col"][0], pyarrow.LargeStringScalar) + assert isinstance( + df.to_arrow(compat_level=oldest)["str_col"][0], pyarrow.LargeStringScalar + ) + assert isinstance( + df.to_arrow(compat_level=newest)["str_col"][0], pyarrow.StringViewScalar + ) + assert isinstance(df.to_arrow()["bin_col"][0], pyarrow.LargeBinaryScalar) + assert isinstance( + df.to_arrow(compat_level=oldest)["bin_col"][0], pyarrow.LargeBinaryScalar + ) + assert isinstance( + df.to_arrow(compat_level=newest)["bin_col"][0], pyarrow.BinaryViewScalar + ) + + assert len(df.write_ipc(None).getbuffer()) == 786 + assert len(df.write_ipc(None, compat_level=oldest).getbuffer()) == 914 + assert len(df.write_ipc(None, compat_level=newest).getbuffer()) == 786 + assert len(df.write_ipc_stream(None).getbuffer()) == 544 + assert len(df.write_ipc_stream(None, compat_level=oldest).getbuffer()) == 672 + assert len(df.write_ipc_stream(None, compat_level=newest).getbuffer()) == 544 diff --git a/py-polars/tests/unit/io/test_ipc.py b/py-polars/tests/unit/io/test_ipc.py index 1741ff116b63..1c0b7ed4516c 100644 --- a/py-polars/tests/unit/io/test_ipc.py +++ b/py-polars/tests/unit/io/test_ipc.py @@ -11,6 +11,7 @@ import polars as pl from polars.exceptions import ComputeError +from polars.interchange.protocol import CompatLevel from polars.testing import assert_frame_equal if TYPE_CHECKING: @@ -234,7 +235,7 @@ def test_from_float16() -> None: def test_binview_ipc_mmap(tmp_path: Path) -> None: df = pl.DataFrame({"foo": ["aa" * 10, "bb", None, "small", "big" * 20]}) file_path = tmp_path / "dump.ipc" - df.write_ipc(file_path, future=True) + df.write_ipc(file_path, compat_level=CompatLevel.newest()) read = pl.read_ipc(file_path, memory_map=True) assert_frame_equal(df, read) @@ -243,7 +244,7 @@ def test_list_nested_enum() -> None: dtype = pl.List(pl.Enum(["a", "b", "c"])) df = pl.DataFrame(pl.Series("list_cat", [["a", "b", "c", None]], dtype=dtype)) buffer = io.BytesIO() - df.write_ipc(buffer) + df.write_ipc(buffer, compat_level=CompatLevel.newest()) df = pl.read_ipc(buffer) assert df.get_column("list_cat").dtype == dtype @@ -256,7 +257,7 @@ def test_struct_nested_enum() -> None: ) ) buffer = io.BytesIO() - df.write_ipc(buffer) + df.write_ipc(buffer, compat_level=CompatLevel.newest()) df = pl.read_ipc(buffer) assert df.get_column("struct_cat").dtype == dtype @@ -268,7 +269,7 @@ def test_ipc_view_gc_14448() -> None: df = pl.DataFrame( pl.Series(["small"] * 10 + ["looooooong string......."] * 750).slice(20, 20) ) - df.write_ipc(f, future=True) + df.write_ipc(f, compat_level=CompatLevel.newest()) f.seek(0) assert_frame_equal(pl.read_ipc(f), df)