Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Expressify str.zfill #13790

Merged
merged 1 commit into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/polars-ops/src/chunked_array/strings/namespace.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ pub trait StringNameSpaceImpl: AsString {
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
fn zfill(&self, length: usize) -> StringChunked {
fn zfill(&self, length: &UInt64Chunked) -> StringChunked {
let ca = self.as_string();
pad::zfill(ca, length)
}
Expand Down
80 changes: 47 additions & 33 deletions crates/polars-ops/src/chunked_array/strings/pad.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::Write;

use polars_core::prelude::StringChunked;
use polars_core::prelude::arity::broadcast_binary_elementwise;
use polars_core::prelude::{StringChunked, UInt64Chunked};

pub(super) fn pad_end<'a>(ca: &'a StringChunked, length: usize, fill_char: char) -> StringChunked {
// amortize allocation
Expand Down Expand Up @@ -50,38 +51,51 @@ pub(super) fn pad_start<'a>(
ca.apply_mut(f)
}

pub(super) fn zfill<'a>(ca: &'a StringChunked, length: usize) -> StringChunked {
fn zfill_fn<'a>(s: Option<&'a str>, len: Option<u64>, buf: &mut String) -> Option<&'a str> {
match (s, len) {
(Some(s), Some(length)) => {
let length = length.saturating_sub(s.len() as u64);
if length == 0 {
return Some(s);
}
buf.clear();
if let Some(stripped) = s.strip_prefix('-') {
write!(
buf,
"-{:0length$}{value}",
0,
length = length as usize,
value = stripped
)
.unwrap();
} else {
write!(
buf,
"{:0length$}{value}",
0,
length = length as usize,
value = s
)
.unwrap();
};
// extend lifetime
// lifetime is bound to 'a
let slice = buf.as_str();
Some(unsafe { std::mem::transmute::<&str, &'a str>(slice) })
},
_ => None,
}
}

pub(super) fn zfill<'a>(ca: &'a StringChunked, length: &'a UInt64Chunked) -> StringChunked {
// amortize allocation
let mut buf = String::new();
let f = |s: &'a str| {
let length = length.saturating_sub(s.len());
if length == 0 {
return s;
}
buf.clear();
if let Some(stripped) = s.strip_prefix('-') {
write!(
&mut buf,
"-{:0length$}{value}",
0,
length = length,
value = stripped
)
.unwrap();
} else {
write!(
&mut buf,
"{:0length$}{value}",
0,
length = length,
value = s
)
.unwrap();
};
// extend lifetime
// lifetime is bound to 'a
let slice = buf.as_str();
unsafe { std::mem::transmute::<&str, &'a str>(slice) }
};
ca.apply_mut(f)
fn infer<F: for<'a> FnMut(Option<&'a str>, Option<u64>) -> Option<&'a str>>(f: F) -> F where {
f
}
broadcast_binary_elementwise(
ca,
length,
infer(|opt_s, opt_len| zfill_fn(opt_s, opt_len, &mut buf)),
)
}
16 changes: 9 additions & 7 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ pub enum StringFunction {
Titlecase,
Uppercase,
#[cfg(feature = "string_pad")]
ZFill(usize),
ZFill,
#[cfg(feature = "find_many")]
ContainsMany {
ascii_case_insensitive: bool,
Expand Down Expand Up @@ -163,7 +163,7 @@ impl StringFunction {
Uppercase | Lowercase | StripChars | StripCharsStart | StripCharsEnd | StripPrefix
| StripSuffix | Slice => mapper.with_same_dtype(),
#[cfg(feature = "string_pad")]
PadStart { .. } | PadEnd { .. } | ZFill { .. } => mapper.with_same_dtype(),
PadStart { .. } | PadEnd { .. } | ZFill => mapper.with_same_dtype(),
#[cfg(feature = "dtype-struct")]
SplitExact { n, .. } => mapper.with_dtype(DataType::Struct(
(0..n + 1)
Expand Down Expand Up @@ -257,7 +257,7 @@ impl Display for StringFunction {
ToDecimal(_) => "to_decimal",
Uppercase => "uppercase",
#[cfg(feature = "string_pad")]
ZFill(_) => "zfill",
ZFill => "zfill",
#[cfg(feature = "find_many")]
ContainsMany { .. } => "contains_many",
#[cfg(feature = "find_many")]
Expand Down Expand Up @@ -298,8 +298,8 @@ impl From<StringFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
map!(strings::pad_start, length, fill_char)
},
#[cfg(feature = "string_pad")]
ZFill(alignment) => {
map!(strings::zfill, alignment)
ZFill => {
map_as_slice!(strings::zfill)
},
#[cfg(feature = "temporal")]
Strptime(dtype, options) => {
Expand Down Expand Up @@ -472,8 +472,10 @@ pub(super) fn pad_end(s: &Series, length: usize, fill_char: char) -> PolarsResul
}

#[cfg(feature = "string_pad")]
pub(super) fn zfill(s: &Series, length: usize) -> PolarsResult<Series> {
let ca = s.str()?;
pub(super) fn zfill(s: &[Series]) -> PolarsResult<Series> {
let ca = s[0].str()?;
let length_s = s[1].strict_cast(&DataType::UInt64)?;
let length = length_s.u64()?;
Ok(ca.zfill(length).into_series())
}

Expand Down
5 changes: 3 additions & 2 deletions crates/polars-plan/src/dsl/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,8 +191,9 @@ impl StringNameSpace {
/// Strings with length equal to or greater than the given length are
/// returned as-is.
#[cfg(feature = "string_pad")]
pub fn zfill(self, length: usize) -> Expr {
self.0.map_private(StringFunction::ZFill(length).into())
pub fn zfill(self, length: Expr) -> Expr {
self.0
.map_many_private(StringFunction::ZFill.into(), &[length], false, false)
}

/// Find the index of a literal substring within another string value.
Expand Down
3 changes: 2 additions & 1 deletion py-polars/polars/expr/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,7 +919,7 @@ def pad_end(self, length: int, fill_char: str = " ") -> Expr:
return wrap_expr(self._pyexpr.str_pad_end(length, fill_char))

@deprecate_renamed_parameter("alignment", "length", version="0.19.12")
def zfill(self, length: int) -> Expr:
def zfill(self, length: int | IntoExprColumn) -> Expr:
"""
Pad the start of the string with zeros until it reaches the given length.

Expand Down Expand Up @@ -957,6 +957,7 @@ def zfill(self, length: int) -> Expr:
│ null ┆ null │
└────────┴────────┘
"""
length = parse_as_expression(length)
return wrap_expr(self._pyexpr.str_zfill(length))

def contains(
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/series/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -1416,7 +1416,7 @@ def pad_end(self, length: int, fill_char: str = " ") -> Series:
"""

@deprecate_renamed_parameter("alignment", "length", version="0.19.12")
def zfill(self, length: int) -> Series:
def zfill(self, length: int | IntoExprColumn) -> Series:
"""
Pad the start of the string with zeros until it reaches the given length.

Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/expr/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ impl PyExpr {
self.inner.clone().str().pad_end(length, fill_char).into()
}

fn str_zfill(&self, length: usize) -> Self {
self.inner.clone().str().zfill(length).into()
fn str_zfill(&self, length: Self) -> Self {
self.inner.clone().str().zfill(length.inner).into()
}

#[pyo3(signature = (pat, literal, strict))]
Expand Down
22 changes: 22 additions & 0 deletions py-polars/tests/unit/namespaces/string/test_pad.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,28 @@ def test_str_zfill() -> None:
assert df["num"].cast(str).str.zfill(5).to_list() == out


def test_str_zfill_expr() -> None:
df = pl.DataFrame(
{
"num": ["-10", "-1", "0", "1", "10", None, "1"],
"len": [3, 4, 3, 2, 5, 3, None],
}
)
out = df.select(
all_expr=pl.col("num").str.zfill(pl.col("len")),
str_lit=pl.lit("10").str.zfill(pl.col("len")),
len_lit=pl.col("num").str.zfill(5),
)
expected = pl.DataFrame(
{
"all_expr": ["-10", "-001", "000", "01", "00010", None, None],
"str_lit": ["010", "0010", "010", "10", "00010", "010", None],
"len_lit": ["-0010", "-0001", "00000", "00001", "00010", None, "00001"],
}
)
assert_frame_equal(out, expected)


def test_str_ljust_deprecated() -> None:
s = pl.Series(["a", "bc", "def"])

Expand Down
Loading