From 6ea53aacf1da4ded8544cd4c298b7621f1be961a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hoffmann?= Date: Tue, 1 Jun 2021 21:41:30 +0200 Subject: [PATCH 1/3] add method to replace that takes a function RegexMatch -> String --- base/exports.jl | 1 + base/regex.jl | 29 +++++++++++++++++++++-------- test/strings/util.jl | 2 ++ 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/base/exports.jl b/base/exports.jl index 88933dad882ca..dc8172dfe4a80 100644 --- a/base/exports.jl +++ b/base/exports.jl @@ -70,6 +70,7 @@ export Rational, Regex, RegexMatch, + RegexReplacer, Returns, RoundFromZero, RoundDown, diff --git a/base/regex.jl b/base/regex.jl index 15744fe14ce47..11fae14de7d38 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -364,6 +364,17 @@ true """ function match end +function _create_match(str::Union{SubString{String}, String}, re::Regex, data::Ptr{Cvoid}) + n = div(PCRE.ovec_length(data), 2) - 1 + p = PCRE.ovec_ptr(data) + mat = SubString(str, unsafe_load(p, 1)+1, prevind(str, unsafe_load(p, 2)+1)) + cap = Union{Nothing,SubString{String}}[unsafe_load(p,2i+1) == PCRE.UNSET ? nothing : + SubString(str, unsafe_load(p,2i+1)+1, + prevind(str, unsafe_load(p,2i+2)+1)) for i=1:n] + off = Int[ unsafe_load(p,2i+1)+1 for i=1:n ] + RegexMatch(mat, cap, unsafe_load(p,1)+1, off, re) +end + function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, add_opts::UInt32=UInt32(0)) compile(re) @@ -373,14 +384,7 @@ function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, PCRE.free_match_data(data) return nothing end - n = div(PCRE.ovec_length(data), 2) - 1 - p = PCRE.ovec_ptr(data) - mat = SubString(str, unsafe_load(p, 1)+1, prevind(str, unsafe_load(p, 2)+1)) - cap = Union{Nothing,SubString{String}}[unsafe_load(p,2i+1) == PCRE.UNSET ? nothing : - SubString(str, unsafe_load(p,2i+1)+1, - prevind(str, unsafe_load(p,2i+2)+1)) for i=1:n] - off = Int[ unsafe_load(p,2i+1)+1 for i=1:n ] - result = RegexMatch(mat, cap, unsafe_load(p,1)+1, off, re) + result = _create_match(str, re, data) PCRE.free_match_data(data) return result end @@ -668,6 +672,15 @@ function _replace(io, repl_s::SubstitutionString, str, r, re) end end +struct RegexReplacer + f::Function +end + +function _replace(io, repl::RegexReplacer, str, r, re::RegexAndMatchData) + match = _create_match(str, re.re, re.match_data) + print(io, repl.f(match)) +end + struct RegexMatchIterator regex::Regex string::String diff --git a/test/strings/util.jl b/test/strings/util.jl index 3cf434feab113..afd7280aff061 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -310,6 +310,8 @@ end # Issue 36953 @test replace("abc", "" => "_", count=1) == "_abc" + # test replace with a RegexReplacer + @test replace("ax ay bx by", r"([ab])([xy])" => RegexReplacer(m -> uppercase(m[1]) * m[2])) === "Ax Ay Bx By" end @testset "replace many" begin From 9c10d12742191d0f874901fd3698d5f0d03685f0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hoffmann?= Date: Sat, 5 Jun 2021 20:09:57 +0200 Subject: [PATCH 2/3] add documentation for RegexReplacer --- base/regex.jl | 31 +++++++++++++++++++++++++++++++ base/strings/util.jl | 2 ++ doc/src/base/strings.md | 1 + 3 files changed, 34 insertions(+) diff --git a/base/regex.jl b/base/regex.jl index 11fae14de7d38..85230088daa7d 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -532,6 +532,8 @@ end Stores the given string `substr` as a `SubstitutionString`, for use in regular expression substitutions. Most commonly constructed using the [`@s_str`](@ref) macro. +See also [`RegexReplacer`](@ref). + ```jldoctest julia> SubstitutionString("Hello \\\\g, it's \\\\1") s"Hello \\g, it's \\1" @@ -568,6 +570,8 @@ Construct a substitution string, used for regular expression substitutions. Wit string, sequences of the form `\\N` refer to the Nth capture group in the regex, and `\\g` refers to a named capture group with name `groupname`. +See also [`RegexReplacer`](@ref). + ```jldoctest julia> msg = "#Hello# from Julia"; @@ -672,6 +676,33 @@ function _replace(io, repl_s::SubstitutionString, str, r, re) end end +""" + RegexReplacer(f::Function) + +Create a `RegexReplacer` that can be used to +[`replace`](@ref replace(::AbstractString, ::Pair)) +[`Regex`](@ref)-matched text using the given function `f` +with access to the current match and capture groups. + +`f` must be callable with a [`RegexMatch`](@ref) and return a [`print`](@ref)-able object. + +See also [`SubstitutionString`](@ref), [`@s_str`](@ref). + +# Examples +```jldoctest +julia> s = "ax ay az bx by bz"; + +julia> replace(s, r"([ab])([xy])" => RegexReplacer(match -> uppercase(match[1]) * match[2])) +"Ax Ay az Bx By bz" + +julia> template = "the {animal} is {activity}"; + +julia> variables = Dict("animal" => "fox", "activity" => "running"); + +julia> replace(template, r"{(.*?)}" => RegexReplacer(match -> variables[match[1]])) +"the fox is running" +``` +""" struct RegexReplacer f::Function end diff --git a/base/strings/util.jl b/base/strings/util.jl index c6dad5f34bafb..32280473108fe 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -604,6 +604,8 @@ where `s` is the matched substring (when `pat` is a `AbstractPattern` or `Abstra character (when `pat` is an `AbstractChar` or a collection of `AbstractChar`). If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then capture group references in `r` are replaced with the corresponding matched text. +If `pat` is a regular expression and `r` is a [`RegexReplacer`](@ref), +then the matched substring is replaced with `r.f(m)` where `m` is a [`RegexMatch`](@ref). To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`). Multiple patterns can be specified, and they will be applied left-to-right diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md index a7e9a8ee4eeee..d7baa29bfc6ce 100644 --- a/doc/src/base/strings.md +++ b/doc/src/base/strings.md @@ -35,6 +35,7 @@ Base.match Base.eachmatch Base.RegexMatch Base.keys(::RegexMatch) +Base.RegexReplacer Base.isless(::AbstractString, ::AbstractString) Base.:(==)(::AbstractString, ::AbstractString) Base.cmp(::AbstractString, ::AbstractString) From 9f1c9f915067e5f9b64ef1c739fe46c1aa11ace8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Hoffmann?= Date: Fri, 4 Jun 2021 21:50:25 +0200 Subject: [PATCH 3/3] add multi-replacements tests with RegexReplacer --- test/strings/util.jl | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/strings/util.jl b/test/strings/util.jl index afd7280aff061..bdd58524cdd04 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -484,6 +484,14 @@ end @test_throws ErrorException("PCRE error: unknown substring") replace(s, r"q" => s"a\1b") @test_throws ErrorException("Bad replacement string: pattern is not a Regex") replace(s, "q" => s"a\1b") end + + # test replace with a RegexReplacer + @test replace("ax ay bx by", + "ay" => "cy", + r"([ab])([xy])" => RegexReplacer(m -> uppercase(m[1]) * m[2])) === "Ax cy Bx By" + @test replace("ax ay bx by", + r"([a])([xy])" => RegexReplacer(m -> uppercase(m[1]) * m[2]), + r"([ab])([xy])" => RegexReplacer(m -> "<" * m[1] * ">" * m[2]), count=3) === "Ax Ay x by" end @testset "chomp/chop" begin