Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bug in SubstitutionString replace #30513

Merged
merged 14 commits into from
Aug 16, 2019
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ New language features

* Support for Unicode 12.1.0 ([#32002]).
* Methods can now be added to an abstract type ([#31916]).
* Added argument `keep` to `unescape_string` ([#27125]).

Language changes
----------------
Expand Down
9 changes: 6 additions & 3 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,15 @@ function _write_capture(io, re::RegexAndMatchData, group)
io.size = max(io.size, io.ptr - 1)
end


const SUB_CHAR = '\\'
nalimilan marked this conversation as resolved.
Show resolved Hide resolved
const GROUP_CHAR = 'g'
const KEEP_ESC = [SUB_CHAR, GROUP_CHAR, '0':'9'...]

function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
SUB_CHAR = '\\'
GROUP_CHAR = 'g'
LBRACKET = '<'
RBRACKET = '>'
repl = repl_s.string
repl = unescape_string(repl_s.string, KEEP_ESC)
i = firstindex(repl)
e = lastindex(repl)
while i <= e
Expand Down
15 changes: 12 additions & 3 deletions base/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -373,10 +373,12 @@ end
# TODO: handle unescaping invalid UTF-8 sequences
"""
unescape_string(str::AbstractString)::AbstractString
unescape_string(io, str::AbstractString)::Nothing
unescape_string(io, s::AbstractString, keep = ())::Nothing

General unescaping of traditional C and Unicode escape sequences. The first form returns
the escaped string, the second prints the result to `io`.
The argument `keep` specifies a collection of characters which (along with backlashes) are
to be kept as they are.

The following escape sequences are recognised:
- Escaped backslash (`\\\\`)
Expand All @@ -396,17 +398,22 @@ julia> unescape_string("\\\\u03c0") # unicode

julia> unescape_string("\\\\101") # octal
"A"

julia> unescape_string("aaa \\\\g \\\\n", ['g']) # using `keep` argument
"aaa \\\\g \\n"
```

## See also
[`escape_string`](@ref).
"""
function unescape_string(io, s::AbstractString)
function unescape_string(io::IO, s::AbstractString, keep = ())
a = Iterators.Stateful(s)
for c in a
if !isempty(a) && c == '\\'
c = popfirst!(a)
if c == 'x' || c == 'u' || c == 'U'
if c in keep
print(io, '\\', c)
elseif c == 'x' || c == 'u' || c == 'U'
n = k = 0
m = c == 'x' ? 2 :
c == 'u' ? 4 : 8
Expand Down Expand Up @@ -456,6 +463,8 @@ function unescape_string(io, s::AbstractString)
end
end
end
unescape_string(s::AbstractString, keep = ()) =
sprint(unescape_string, s, keep; sizehint=lastindex(s))
unescape_string(s::AbstractString) = sprint(unescape_string, s, sizehint=lastindex(s))

"""
Expand Down
6 changes: 6 additions & 0 deletions test/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,12 @@
@test_throws ArgumentError match(r"test", GenericString("this is a test"))
@test_throws ArgumentError findfirst(r"test", GenericString("this is a test"))

# Issue 27125
msg = "#Hello# from Julia"
re = r"#(.+)# from (?<name>\w+)"
subst = s"FROM: \g<name>\n MESSAGE: \1"
@test replace(msg, re => subst) == "FROM: Julia\n MESSAGE: Hello"

# findall:
@test findall(r"\w+", "foo bar") == [1:3, 5:7]
@test findall(r"\w+", "foo bar", overlap=true) == [1:3, 2:3, 3:3, 5:7, 6:7, 7:7]
Expand Down
4 changes: 4 additions & 0 deletions test/strings/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@
@test "\x01" == unescape_string("\\x01")
@test "\x0f" == unescape_string("\\x0f")
@test "\x0F" == unescape_string("\\x0F")

str= "aaa \\g \\n"
@test "aaa \\g \n" == unescape_string(str, ['g'])
@test "aaa \\g \\n" == unescape_string(str, ['g', 'n'])
end
end
@testset "join()" begin
Expand Down