diff --git a/Cargo.lock b/Cargo.lock index c437538417388..b93ea47ee3c3d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2621,6 +2621,7 @@ dependencies = [ "hex", "indexmap", "itoa", + "itoap", "lz4", "multiversion", "num-traits", @@ -2684,7 +2685,6 @@ dependencies = [ "either", "hashbrown 0.14.3", "indexmap", - "itoap", "ndarray", "num-traits", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index babc93da7ce5b..1611970c2b689 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -46,6 +46,7 @@ hashbrown = { version = "0.14", features = ["rayon", "ahash"] } hex = "0.4.3" indexmap = { version = "2", features = ["std"] } itoa = "1.0.6" +itoap = { version = "1", features = ["simd"] } atoi_simd = "0.15.5" fast-float = { version = "0.2" } memchr = "2.6" diff --git a/crates/polars-arrow/Cargo.toml b/crates/polars-arrow/Cargo.toml index 4c1285d4d03b9..dbd0910cc8eeb 100644 --- a/crates/polars-arrow/Cargo.toml +++ b/crates/polars-arrow/Cargo.toml @@ -36,6 +36,7 @@ ethnum = { workspace = true } atoi_simd = { workspace = true, optional = true } fast-float = { workspace = true, optional = true } itoa = { workspace = true, optional = true } +itoap = { workspace = true, optional = true } ryu = { workspace = true, optional = true } regex = { workspace = true, optional = true } @@ -161,7 +162,7 @@ simd = [] # polars-arrow timezones = [] dtype-array = [] -dtype-decimal = ["atoi"] +dtype-decimal = ["atoi", "itoap"] bigidx = [] nightly = [] performant = [] diff --git a/crates/polars-arrow/src/compute/cast/decimal_to.rs b/crates/polars-arrow/src/compute/cast/decimal_to.rs index e46b756baadca..ad6e979e79e22 100644 --- a/crates/polars-arrow/src/compute/cast/decimal_to.rs +++ b/crates/polars-arrow/src/compute/cast/decimal_to.rs @@ -137,3 +137,28 @@ where let from = from.as_any().downcast_ref().unwrap(); Ok(Box::new(decimal_to_integer::(from))) } + +/// Returns a [`Utf8Array`] where every element is the utf8 representation of the decimal. +#[cfg(feature = "dtype-decimal")] +pub(super) fn decimal_to_binview(from: &PrimitiveArray) -> BinaryViewArray { + let (_, from_scale) = if let ArrowDataType::Decimal(p, s) = from.data_type().to_logical_type() { + (*p, *s) + } else { + panic!("internal error: i128 is always a decimal") + }; + + let mut mutable = MutableBinaryViewArray::with_capacity(from.len()); + + for &x in from.values().iter() { + let buf = crate::legacy::compute::decimal::format_decimal(x, from_scale, false); + mutable.push_value_ignore_validity(buf.as_str().as_bytes()) + } + + mutable.freeze().with_validity(from.validity().cloned()) +} + +#[cfg(feature = "dtype-decimal")] +pub(super) fn decimal_to_binview_dyn(from: &dyn Array) -> BinaryViewArray { + let from = from.as_any().downcast_ref().unwrap(); + decimal_to_binview(from) +} diff --git a/crates/polars-arrow/src/compute/cast/mod.rs b/crates/polars-arrow/src/compute/cast/mod.rs index 70989f2879189..988b783bd59ff 100644 --- a/crates/polars-arrow/src/compute/cast/mod.rs +++ b/crates/polars-arrow/src/compute/cast/mod.rs @@ -778,6 +778,8 @@ fn from_to_binview( Binary => binary_to_binview::(array.as_any().downcast_ref().unwrap()), FixedSizeBinary(_) => fixed_size_binary_to_binview(array.as_any().downcast_ref().unwrap()), LargeBinary => binary_to_binview::(array.as_any().downcast_ref().unwrap()), + #[cfg(feature = "dtype-decimal")] + Decimal(_, _) => decimal_to_binview_dyn(array), _ => polars_bail!(InvalidOperation: "casting from {from_type:?} to {to_type:?} not supported", ), diff --git a/crates/polars-arrow/src/legacy/compute/decimal.rs b/crates/polars-arrow/src/legacy/compute/decimal.rs index d3f5d7862cc92..12a9fc53f6c1d 100644 --- a/crates/polars-arrow/src/legacy/compute/decimal.rs +++ b/crates/polars-arrow/src/legacy/compute/decimal.rs @@ -106,6 +106,109 @@ pub(crate) fn deserialize_decimal( } } +const BUF_LEN: usize = 48; + +#[derive(Clone, Copy)] +pub struct FormatBuffer { + data: [u8; BUF_LEN], + len: usize, +} + +impl FormatBuffer { + #[inline] + pub const fn new() -> Self { + Self { + data: [0; BUF_LEN], + len: 0, + } + } + + #[inline] + pub fn as_str(&self) -> &str { + unsafe { std::str::from_utf8_unchecked(&self.data[..self.len]) } + } +} + +const POW10: [i128; 38] = [ + 1, + 10, + 100, + 1000, + 10000, + 100000, + 1000000, + 10000000, + 100000000, + 1000000000, + 10000000000, + 100000000000, + 1000000000000, + 10000000000000, + 100000000000000, + 1000000000000000, + 10000000000000000, + 100000000000000000, + 1000000000000000000, + 10000000000000000000, + 100000000000000000000, + 1000000000000000000000, + 10000000000000000000000, + 100000000000000000000000, + 1000000000000000000000000, + 10000000000000000000000000, + 100000000000000000000000000, + 1000000000000000000000000000, + 10000000000000000000000000000, + 100000000000000000000000000000, + 1000000000000000000000000000000, + 10000000000000000000000000000000, + 100000000000000000000000000000000, + 1000000000000000000000000000000000, + 10000000000000000000000000000000000, + 100000000000000000000000000000000000, + 1000000000000000000000000000000000000, + 10000000000000000000000000000000000000, +]; + +pub fn format_decimal(v: i128, scale: usize, trim_zeros: bool) -> FormatBuffer { + const ZEROS: [u8; BUF_LEN] = [b'0'; BUF_LEN]; + + let mut buf = FormatBuffer::new(); + let factor = POW10[scale]; //10_i128.pow(scale as _); + let (div, rem) = (v / factor, v.abs() % factor); + + unsafe { + let mut ptr = buf.data.as_mut_ptr(); + if div == 0 && v < 0 { + *ptr = b'-'; + ptr = ptr.add(1); + buf.len = 1; + } + let n_whole = itoap::write_to_ptr(ptr, div); + buf.len += n_whole; + if rem != 0 { + ptr = ptr.add(n_whole); + *ptr = b'.'; + ptr = ptr.add(1); + let mut frac_buf = [0_u8; BUF_LEN]; + let n_frac = itoap::write_to_ptr(frac_buf.as_mut_ptr(), rem); + std::ptr::copy_nonoverlapping(ZEROS.as_ptr(), ptr, scale - n_frac); + ptr = ptr.add(scale - n_frac); + std::ptr::copy_nonoverlapping(frac_buf.as_mut_ptr(), ptr, n_frac); + buf.len += 1 + scale; + if trim_zeros { + ptr = ptr.add(n_frac - 1); + while *ptr == b'0' { + ptr = ptr.sub(1); + buf.len -= 1; + } + } + } + } + + buf +} + #[cfg(test)] mod test { use super::*; diff --git a/crates/polars-core/Cargo.toml b/crates/polars-core/Cargo.toml index c0ed8aa80ae4e..052fe4e890d81 100644 --- a/crates/polars-core/Cargo.toml +++ b/crates/polars-core/Cargo.toml @@ -25,7 +25,6 @@ comfy-table = { version = "7.0.1", default_features = false, optional = true } either = { workspace = true } hashbrown = { workspace = true } indexmap = { workspace = true } -itoap = { version = "1", optional = true, features = ["simd"] } ndarray = { workspace = true, optional = true } num-traits = { workspace = true } once_cell = { workspace = true } @@ -109,7 +108,7 @@ dtype-time = ["temporal"] dtype-array = ["arrow/dtype-array", "polars-compute/dtype-array"] dtype-i8 = [] dtype-i16 = [] -dtype-decimal = ["dep:itoap", "arrow/dtype-decimal"] +dtype-decimal = ["arrow/dtype-decimal"] dtype-u8 = [] dtype-u16 = [] dtype-categorical = [] diff --git a/crates/polars-core/src/fmt.rs b/crates/polars-core/src/fmt.rs index 3dc4239829964..d836283aa9723 100644 --- a/crates/polars-core/src/fmt.rs +++ b/crates/polars-core/src/fmt.rs @@ -1153,126 +1153,16 @@ impl Series { } } +#[inline] #[cfg(feature = "dtype-decimal")] -mod decimal { - use std::fmt::Formatter; - use std::{fmt, ptr, str}; +pub fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result { + use arrow::legacy::compute::decimal::format_decimal; - use crate::fmt::{fmt_float_string, get_trim_decimal_zeros}; - - const BUF_LEN: usize = 48; - - #[derive(Clone, Copy)] - pub struct FormatBuffer { - data: [u8; BUF_LEN], - len: usize, - } - - impl FormatBuffer { - #[inline] - pub const fn new() -> Self { - Self { - data: [0; BUF_LEN], - len: 0, - } - } - - #[inline] - pub fn as_str(&self) -> &str { - unsafe { str::from_utf8_unchecked(&self.data[..self.len]) } - } - } - - const POW10: [i128; 38] = [ - 1, - 10, - 100, - 1000, - 10000, - 100000, - 1000000, - 10000000, - 100000000, - 1000000000, - 10000000000, - 100000000000, - 1000000000000, - 10000000000000, - 100000000000000, - 1000000000000000, - 10000000000000000, - 100000000000000000, - 1000000000000000000, - 10000000000000000000, - 100000000000000000000, - 1000000000000000000000, - 10000000000000000000000, - 100000000000000000000000, - 1000000000000000000000000, - 10000000000000000000000000, - 100000000000000000000000000, - 1000000000000000000000000000, - 10000000000000000000000000000, - 100000000000000000000000000000, - 1000000000000000000000000000000, - 10000000000000000000000000000000, - 100000000000000000000000000000000, - 1000000000000000000000000000000000, - 10000000000000000000000000000000000, - 100000000000000000000000000000000000, - 1000000000000000000000000000000000000, - 10000000000000000000000000000000000000, - ]; - - pub fn format_decimal(v: i128, scale: usize, trim_zeros: bool) -> FormatBuffer { - const ZEROS: [u8; BUF_LEN] = [b'0'; BUF_LEN]; - - let mut buf = FormatBuffer::new(); - let factor = POW10[scale]; //10_i128.pow(scale as _); - let (div, rem) = (v / factor, v.abs() % factor); - - unsafe { - let mut ptr = buf.data.as_mut_ptr(); - if div == 0 && v < 0 { - *ptr = b'-'; - ptr = ptr.add(1); - buf.len = 1; - } - let n_whole = itoap::write_to_ptr(ptr, div); - buf.len += n_whole; - if rem != 0 { - ptr = ptr.add(n_whole); - *ptr = b'.'; - ptr = ptr.add(1); - let mut frac_buf = [0_u8; BUF_LEN]; - let n_frac = itoap::write_to_ptr(frac_buf.as_mut_ptr(), rem); - ptr::copy_nonoverlapping(ZEROS.as_ptr(), ptr, scale - n_frac); - ptr = ptr.add(scale - n_frac); - ptr::copy_nonoverlapping(frac_buf.as_mut_ptr(), ptr, n_frac); - buf.len += 1 + scale; - if trim_zeros { - ptr = ptr.add(n_frac - 1); - while *ptr == b'0' { - ptr = ptr.sub(1); - buf.len -= 1; - } - } - } - } - - buf - } - - #[inline] - pub fn fmt_decimal(f: &mut Formatter<'_>, v: i128, scale: usize) -> fmt::Result { - let trim_zeros = get_trim_decimal_zeros(); - f.write_str(fmt_float_string(format_decimal(v, scale, trim_zeros).as_str()).as_str()) - } + let trim_zeros = get_trim_decimal_zeros(); + let repr = format_decimal(v, scale, trim_zeros); + f.write_str(fmt_float_string(repr.as_str()).as_str()) } -#[cfg(feature = "dtype-decimal")] -pub use decimal::fmt_decimal; - #[cfg(all( test, feature = "temporal",