From 2dd14736aa2983bdcbcf98d550a7becb6df12406 Mon Sep 17 00:00:00 2001 From: sam0410 Date: Wed, 26 Dec 2018 03:26:12 +0530 Subject: [PATCH 1/9] Fix bug in SubstitutionString --- base/regex.jl | 2 ++ base/strings/io.jl | 9 +++++---- test/regex.jl | 6 ++++++ 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index 384d6ca3dfa21..b954aaf31b235 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -309,6 +309,8 @@ function _replace(io, repl_s::SubstitutionString, str, r, re) LBRACKET = '<' RBRACKET = '>' repl = repl_s.string + keep_esc = [SUB_CHAR, GROUP_CHAR, collect('0':'9')...] + repl = unescape_string(repl_s.string, keep_esc) i = firstindex(repl) e = lastindex(repl) while i <= e diff --git a/base/strings/io.jl b/base/strings/io.jl index 71767cefb52af..e96d1481a4161 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -382,12 +382,14 @@ julia> unescape_string("\\\\101") # octal ## See also [`escape_string`](@ref). """ -function unescape_string(io, s::AbstractString) +function unescape_string(io, s::AbstractString, keep_esc::AbstractArray{<:AbstractChar}=Char[]) a = Iterators.Stateful(s) for c in a if !isempty(a) && c == '\\' c = popfirst!(a) - if c == 'x' || c == 'u' || c == 'U' + if c in keep_esc + print(io, '\\', c) + elseif c == 'x' || c == 'u' || c == 'U' n = k = 0 m = c == 'x' ? 2 : c == 'u' ? 4 : 8 @@ -437,8 +439,7 @@ function unescape_string(io, s::AbstractString) end end end -unescape_string(s::AbstractString) = sprint(unescape_string, s, sizehint=lastindex(s)) - +unescape_string(s::AbstractString, keep_esc::AbstractArray{<:AbstractChar}=Char[]) = sprint(unescape_string, s, keep_esc; sizehint=lastindex(s)) macro b_str(s) v = codeunits(unescape_string(s)) diff --git a/test/regex.jl b/test/regex.jl index 223a0f15efb3c..0afcd57f96b6d 100644 --- a/test/regex.jl +++ b/test/regex.jl @@ -46,6 +46,12 @@ @test_throws ArgumentError match(r"test", GenericString("this is a test")) @test_throws ArgumentError findfirst(r"test", GenericString("this is a test")) + # Issue 27125 + msg = "#Hello# from Julia" + re = r"#(.+)# from (?\w+)" + subst = s"FROM: \g\n MESSAGE: \1" + @test replace(msg, re => subst) == "FROM: Julia\n MESSAGE: Hello" + # Named subpatterns let m = match(r"(?.)(.)(?.)", "xyz") @test (m[:a], m[2], m["b"]) == ("x", "y", "z") From 7525c64efe00d5e49b4ef38e87643187b06ed08b Mon Sep 17 00:00:00 2001 From: Alex Arslan Date: Wed, 26 Dec 2018 18:44:33 -0800 Subject: [PATCH 2/9] Fix whitespace Incorrect indentation and overly long line --- base/strings/io.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index e96d1481a4161..0181b70639e02 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -388,7 +388,7 @@ function unescape_string(io, s::AbstractString, keep_esc::AbstractArray{<:Abstra if !isempty(a) && c == '\\' c = popfirst!(a) if c in keep_esc - print(io, '\\', c) + print(io, '\\', c) elseif c == 'x' || c == 'u' || c == 'U' n = k = 0 m = c == 'x' ? 2 : @@ -439,7 +439,8 @@ function unescape_string(io, s::AbstractString, keep_esc::AbstractArray{<:Abstra end end end -unescape_string(s::AbstractString, keep_esc::AbstractArray{<:AbstractChar}=Char[]) = sprint(unescape_string, s, keep_esc; sizehint=lastindex(s)) +unescape_string(s::AbstractString, keep_esc::AbstractArray{<:AbstractChar}=Char[]) = + sprint(unescape_string, s, keep_esc; sizehint=lastindex(s)) macro b_str(s) v = codeunits(unescape_string(s)) From 247a99e264f5b755e3cfca753815d0b0cd6fc765 Mon Sep 17 00:00:00 2001 From: sam0410 Date: Fri, 28 Dec 2018 16:12:48 +0530 Subject: [PATCH 3/9] add news and description --- NEWS.md | 1 + base/regex.jl | 8 ++++---- base/strings/io.jl | 7 ++++--- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index ebb6d68faa426..c1b8ec05b175b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -22,6 +22,7 @@ New library functions Standard library changes ------------------------ +* Added argument `keep` to `unescape_string` ([#27125]). #### LinearAlgebra diff --git a/base/regex.jl b/base/regex.jl index b954aaf31b235..ae0d9fb7f6d5d 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -303,13 +303,13 @@ function _write_capture(io, re, group) io.size = max(io.size, io.ptr - 1) end +const SUB_CHAR = '\\' +const GROUP_CHAR = 'g' +const keep_esc = [SUB_CHAR, GROUP_CHAR, collect('0':'9')...] + function _replace(io, repl_s::SubstitutionString, str, r, re) - SUB_CHAR = '\\' - GROUP_CHAR = 'g' LBRACKET = '<' RBRACKET = '>' - repl = repl_s.string - keep_esc = [SUB_CHAR, GROUP_CHAR, collect('0':'9')...] repl = unescape_string(repl_s.string, keep_esc) i = firstindex(repl) e = lastindex(repl) diff --git a/base/strings/io.jl b/base/strings/io.jl index e96d1481a4161..258f02e86fc98 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -354,10 +354,11 @@ end # TODO: handle unescaping invalid UTF-8 sequences """ unescape_string(str::AbstractString)::AbstractString - unescape_string(io, str::AbstractString)::Nothing + unescape_string(io, str::AbstractString, keep::AbstractArray{<:AbstractChar} = nothing)::Nothing General unescaping of traditional C and Unicode escape sequences. The first form returns the escaped string, the second prints the result to `io`. +The argument `keep` specifies an array of characters which are to be kept as it is. The following escape sequences are recognised: - Escaped backslash (`\\\\`) @@ -382,12 +383,12 @@ julia> unescape_string("\\\\101") # octal ## See also [`escape_string`](@ref). """ -function unescape_string(io, s::AbstractString, keep_esc::AbstractArray{<:AbstractChar}=Char[]) +function unescape_string(io, s::AbstractString, keep::AbstractArray{<:AbstractChar} = nothing) a = Iterators.Stateful(s) for c in a if !isempty(a) && c == '\\' c = popfirst!(a) - if c in keep_esc + if c in keep print(io, '\\', c) elseif c == 'x' || c == 'u' || c == 'U' n = k = 0 From d2a1826d1c15582ba76e2ad97b2e267f5d2c4450 Mon Sep 17 00:00:00 2001 From: sam0410 Date: Fri, 28 Dec 2018 21:34:33 +0530 Subject: [PATCH 4/9] use () --- base/strings/io.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index 404246ef39892..bea696f0f13a8 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -383,7 +383,7 @@ julia> unescape_string("\\\\101") # octal ## See also [`escape_string`](@ref). """ -function unescape_string(io, s::AbstractString, keep::AbstractArray{<:AbstractChar} = nothing) +function unescape_string(io, s::AbstractString, keep = ()) a = Iterators.Stateful(s) for c in a if !isempty(a) && c == '\\' @@ -440,8 +440,8 @@ function unescape_string(io, s::AbstractString, keep::AbstractArray{<:AbstractCh end end end -unescape_string(s::AbstractString, keep_esc::AbstractArray{<:AbstractChar}=Char[]) = - sprint(unescape_string, s, keep_esc; sizehint=lastindex(s)) +unescape_string(s::AbstractString, keep = ())= + sprint(unescape_string, s, keep; sizehint=lastindex(s)) macro b_str(s) v = codeunits(unescape_string(s)) From ac2d5d6d7802716e61a51eba24acc85bea8f89bf Mon Sep 17 00:00:00 2001 From: sam0410 Date: Sat, 29 Dec 2018 13:30:22 +0530 Subject: [PATCH 5/9] add changes --- NEWS.md | 2 +- base/strings/io.jl | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/NEWS.md b/NEWS.md index c1b8ec05b175b..65ceb113535eb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,7 @@ New language features * The `extrema` function now accepts a function argument in the same manner as `minimum` and `maximum` ([#30323]). +* Added argument `keep` to `unescape_string` ([#27125]). Language changes ---------------- @@ -22,7 +23,6 @@ New library functions Standard library changes ------------------------ -* Added argument `keep` to `unescape_string` ([#27125]). #### LinearAlgebra diff --git a/base/strings/io.jl b/base/strings/io.jl index bea696f0f13a8..e5169c94bbeeb 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -354,11 +354,12 @@ end # TODO: handle unescaping invalid UTF-8 sequences """ unescape_string(str::AbstractString)::AbstractString - unescape_string(io, str::AbstractString, keep::AbstractArray{<:AbstractChar} = nothing)::Nothing + unescape_string(io, s::AbstractString, keep::Union{AbstractArray{<:AbstractChar} , Nothing} = nothing)::Nothing General unescaping of traditional C and Unicode escape sequences. The first form returns the escaped string, the second prints the result to `io`. -The argument `keep` specifies an array of characters which are to be kept as it is. +The argument `keep` specifies a collection of characters which (along with backlashes) are +to be kept as they are. The following escape sequences are recognised: - Escaped backslash (`\\\\`) @@ -383,13 +384,13 @@ julia> unescape_string("\\\\101") # octal ## See also [`escape_string`](@ref). """ -function unescape_string(io, s::AbstractString, keep = ()) +function unescape_string(io::IO, s::AbstractString, keep::Union{AbstractArray{<:AbstractChar} , Nothing} = nothing) a = Iterators.Stateful(s) for c in a if !isempty(a) && c == '\\' c = popfirst!(a) if c in keep - print(io, '\\', c) + print(io, '\\', c) elseif c == 'x' || c == 'u' || c == 'U' n = k = 0 m = c == 'x' ? 2 : @@ -440,7 +441,7 @@ function unescape_string(io, s::AbstractString, keep = ()) end end end -unescape_string(s::AbstractString, keep = ())= +unescape_string(s::AbstractString, keep::Union{AbstractArray{<:AbstractChar} , Nothing} = nothing)= sprint(unescape_string, s, keep; sizehint=lastindex(s)) macro b_str(s) From 9341e9d26236d4c9f6a253e281175516a2df5387 Mon Sep 17 00:00:00 2001 From: sam0410 Date: Sat, 29 Dec 2018 14:12:08 +0530 Subject: [PATCH 6/9] Add docs and test for unescape_string --- base/strings/io.jl | 5 ++++- test/strings/io.jl | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index e5169c94bbeeb..e5fa810af7c18 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -358,7 +358,7 @@ end General unescaping of traditional C and Unicode escape sequences. The first form returns the escaped string, the second prints the result to `io`. -The argument `keep` specifies a collection of characters which (along with backlashes) are +The argument `keep` specifies a collection of characters which (along with backlashes) are to be kept as they are. The following escape sequences are recognised: @@ -379,6 +379,9 @@ julia> unescape_string("\\\\u03c0") # unicode julia> unescape_string("\\\\101") # octal "A" + +julia> unescape_string("aaa \\g \\n", ['g']) # using `keep` argument +"aaa \\g \n" ``` ## See also diff --git a/test/strings/io.jl b/test/strings/io.jl index e0d3c2284d81a..2454323551f17 100644 --- a/test/strings/io.jl +++ b/test/strings/io.jl @@ -143,6 +143,10 @@ @test "\x01" == unescape_string("\\x01") @test "\x0f" == unescape_string("\\x0f") @test "\x0F" == unescape_string("\\x0F") + + str= "aaa \\g \\n" + @test "aaa \\g \n" == unescape_string(str, ['g']) + @test "aaa \\g \\n" == unescape_string(str, ['g', 'n']) end end @testset "join()" begin From 49dd2fee18182acb5385d62def556a16c0ded1c5 Mon Sep 17 00:00:00 2001 From: sam0410 Date: Sat, 29 Dec 2018 20:06:33 +0530 Subject: [PATCH 7/9] Add () --- base/strings/io.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index e5fa810af7c18..db8689cfe5600 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -354,7 +354,7 @@ end # TODO: handle unescaping invalid UTF-8 sequences """ unescape_string(str::AbstractString)::AbstractString - unescape_string(io, s::AbstractString, keep::Union{AbstractArray{<:AbstractChar} , Nothing} = nothing)::Nothing + unescape_string(io, s::AbstractString, keep = ())::Nothing General unescaping of traditional C and Unicode escape sequences. The first form returns the escaped string, the second prints the result to `io`. @@ -387,7 +387,7 @@ julia> unescape_string("aaa \\g \\n", ['g']) # using `keep` argument ## See also [`escape_string`](@ref). """ -function unescape_string(io::IO, s::AbstractString, keep::Union{AbstractArray{<:AbstractChar} , Nothing} = nothing) +function unescape_string(io::IO, s::AbstractString, keep = ()) a = Iterators.Stateful(s) for c in a if !isempty(a) && c == '\\' @@ -444,7 +444,7 @@ function unescape_string(io::IO, s::AbstractString, keep::Union{AbstractArray{<: end end end -unescape_string(s::AbstractString, keep::Union{AbstractArray{<:AbstractChar} , Nothing} = nothing)= +unescape_string(s::AbstractString, keep = ()) = sprint(unescape_string, s, keep; sizehint=lastindex(s)) macro b_str(s) From d10b0c421e057f6f032db778fc1c9a8449dada09 Mon Sep 17 00:00:00 2001 From: sam0410 Date: Thu, 10 Jan 2019 02:08:34 +0530 Subject: [PATCH 8/9] escape in docs --- base/strings/io.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/strings/io.jl b/base/strings/io.jl index db8689cfe5600..82a90a1383766 100644 --- a/base/strings/io.jl +++ b/base/strings/io.jl @@ -380,8 +380,8 @@ julia> unescape_string("\\\\u03c0") # unicode julia> unescape_string("\\\\101") # octal "A" -julia> unescape_string("aaa \\g \\n", ['g']) # using `keep` argument -"aaa \\g \n" +julia> unescape_string("aaa \\\\g \\\\n", ['g']) # using `keep` argument +"aaa \\\\g \\n" ``` ## See also From 3ea849135608d16eee8ac5a5774fece8be38182a Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Thu, 10 Jan 2019 11:08:48 +0100 Subject: [PATCH 9/9] Use uppercase for constant --- base/regex.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/base/regex.jl b/base/regex.jl index ae0d9fb7f6d5d..175b956764e2d 100644 --- a/base/regex.jl +++ b/base/regex.jl @@ -305,12 +305,12 @@ end const SUB_CHAR = '\\' const GROUP_CHAR = 'g' -const keep_esc = [SUB_CHAR, GROUP_CHAR, collect('0':'9')...] +const KEEP_ESC = [SUB_CHAR, GROUP_CHAR, '0':'9'...] function _replace(io, repl_s::SubstitutionString, str, r, re) LBRACKET = '<' RBRACKET = '>' - repl = unescape_string(repl_s.string, keep_esc) + repl = unescape_string(repl_s.string, KEEP_ESC) i = firstindex(repl) e = lastindex(repl) while i <= e