Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Capture groups should be ignored in replace when literal=True #19413

Merged
merged 2 commits into from
Oct 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions crates/polars-plan/src/dsl/function_expr/strings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -844,20 +844,26 @@ fn replace_n<'a>(
"replacement value length ({}) does not match string column length ({})",
len_val, ca.len(),
);
let literal = literal || is_literal_pat(&pat);
let lit = is_literal_pat(&pat);
let literal_pat = literal || lit;

if literal {
if literal_pat {
pat = escape(&pat)
}

let reg = Regex::new(&pat)?;
let lit = pat.chars().all(|c| !c.is_ascii_punctuation());

let f = |s: &'a str, val: &'a str| {
if lit && (s.len() <= 32) {
Cow::Owned(s.replacen(&pat, val, 1))
} else {
reg.replace(s, val)
// According to the docs for replace
// when literal = True then capture groups are ignored.
if literal {
reg.replace(s, NoExpand(val))
} else {
reg.replace(s, val)
}
}
};
Ok(iter_and_replace(ca, val, f))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ def test_replace_all() -> None:
)


def test_replace_literal_no_caputures() -> None:
def test_replace_all_literal_no_caputures() -> None:
# When using literal = True, capture groups should be disabled

# Single row code path in Rust
Expand Down Expand Up @@ -1034,6 +1034,38 @@ def test_replace_literal_no_caputures() -> None:
assert df2.get_column("text2")[1] == "I lost $2 yesterday."


def test_replace_literal_no_caputures() -> None:
# When using literal = True, capture groups should be disabled

# Single row code path in Rust
df = pl.DataFrame({"text": ["I found <amt> yesterday."], "amt": ["$1"]})
df = df.with_columns(
pl.col("text").str.replace("<amt>", pl.col("amt"), literal=True).alias("text2")
)
assert df.get_column("text2")[0] == "I found $1 yesterday."

# Multi-row code path in Rust
# A string shorter than 32 chars,
# and one longer than 32 chars to test both sub-paths
df2 = pl.DataFrame(
{
"text": [
"I found <amt> yesterday.",
"I lost <amt> yesterday and this string is longer than 32 characters.",
],
"amt": ["$1", "$2"],
}
)
df2 = df2.with_columns(
pl.col("text").str.replace("<amt>", pl.col("amt"), literal=True).alias("text2")
)
assert df2.get_column("text2")[0] == "I found $1 yesterday."
assert (
df2.get_column("text2")[1]
== "I lost $2 yesterday and this string is longer than 32 characters."
)


def test_replace_expressions() -> None:
df = pl.DataFrame({"foo": ["123 bla 45 asd", "xyz 678 910t"], "value": ["A", "B"]})
out = df.select([pl.col("foo").str.replace(pl.col("foo").first(), pl.col("value"))])
Expand Down