From 41f786777f3b5c233151594650068ab48e72746b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 13 Oct 2017 18:09:02 +0200 Subject: [PATCH 1/5] improve chop --- NEWS.md | 3 +++ base/strings/util.jl | 21 +++++++++++++++++++-- test/strings/util.jl | 20 ++++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0a1ee0b1f77aa..19b8463224fe3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -252,6 +252,9 @@ This section lists changes that do not have deprecation warnings. Library improvements -------------------- + * The function `chop` now accepts two arguments `head` and `tail` allowing to specify + number of characters to remove from the head and tail tail of the string ([#?????]). + * Functions `first` and `last` now accept `nchar` argument for `AbstractString`. If this argument is used they return a string consisting of first/last `nchar` characters from the original string ([#23960]). diff --git a/base/strings/util.jl b/base/strings/util.jl index 8fd145ac4cb0c..53adb5c0a3fb4 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -66,9 +66,9 @@ startswith(a::Vector{UInt8}, b::Vector{UInt8}) = # TODO: fast endswith """ - chop(s::AbstractString) + chop(s::AbstractString, head::Integer=0, tail::Integer=1) -Remove the last character from `s`. +Remove first `head` and last `tail` characters from `s`. # Examples ```jldoctest @@ -77,8 +77,25 @@ julia> a = "March" julia> chop(a) "Marc" + +julia> chop(a, 1, 2) +"ar" + +julia> chop(a, 5, 5) +"" ``` """ +function chop(s::AbstractString, head::Integer, tail::Integer) + # negative values of head/tail will throw error in nextind/prevind + if head == 0 + tail == 0 && return SubString(s) + return SubString(s, 1, prevind(s, endof(s), tail)) + end + tail == 0 && return SubString(s, nextind(s, 1, head), endof(s)) + SubString(s, nextind(s, 1, head), prevind(s, endof(s), tail)) +end + +# no head/tail version left for performance reasons chop(s::AbstractString) = SubString(s, 1, prevind(s, endof(s))) """ diff --git a/test/strings/util.jl b/test/strings/util.jl index 9defbe27ebd44..cf9a4b11ebab4 100644 --- a/test/strings/util.jl +++ b/test/strings/util.jl @@ -231,10 +231,30 @@ end @testset "chomp/chop" begin @test chomp("foo\n") == "foo" @test chomp("fo∀\n") == "fo∀" + @test chomp("foo\r\n") == "foo" + @test chomp("fo∀\r\n") == "fo∀" @test chomp("fo∀") == "fo∀" @test chop("fooε") == "foo" @test chop("foεo") == "foε" @test chop("∃∃∃∃") == "∃∃∃" + @test chop("∀ϵ∃Δ", 0, 0) == "∀ϵ∃Δ" + @test chop("∀ϵ∃Δ", 0, 1) == "∀ϵ∃" + @test chop("∀ϵ∃Δ", 0, 2) == "∀ϵ" + @test chop("∀ϵ∃Δ", 0, 3) == "∀" + @test chop("∀ϵ∃Δ", 0, 4) == "" + @test chop("∀ϵ∃Δ", 0, 5) == "" + @test chop("∀ϵ∃Δ", 1, 0) == "ϵ∃Δ" + @test chop("∀ϵ∃Δ", 2, 0) == "∃Δ" + @test chop("∀ϵ∃Δ", 3, 0) == "Δ" + @test chop("∀ϵ∃Δ", 4, 0) == "" + @test chop("∀ϵ∃Δ", 5, 0) == "" + @test chop("∀ϵ∃Δ", 1, 1) == "ϵ∃" + @test chop("∀ϵ∃Δ", 2, 2) == "" + @test chop("∀ϵ∃Δ", 3, 3) == "" + @test_throws ArgumentError chop("∀ϵ∃Δ", -3, 3) + @test_throws ArgumentError chop("∀ϵ∃Δ", 3, -3) + @test_throws ArgumentError chop("∀ϵ∃Δ", -3, -3) + @test isa(chomp("foo"), SubString) @test isa(chop("foo"), SubString) end From fc80066b9718f011e22cfec8e54606102773b06c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 13 Oct 2017 18:15:15 +0200 Subject: [PATCH 2/5] add PR number in news --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 19b8463224fe3..374cc6e8fd645 100644 --- a/NEWS.md +++ b/NEWS.md @@ -253,7 +253,7 @@ Library improvements -------------------- * The function `chop` now accepts two arguments `head` and `tail` allowing to specify - number of characters to remove from the head and tail tail of the string ([#?????]). + number of characters to remove from the head and tail tail of the string ([#24126]). * Functions `first` and `last` now accept `nchar` argument for `AbstractString`. If this argument is used they return a string consisting of first/last `nchar` From cfc97505076d801fafbd3957a571f644953e5cb4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 15 Oct 2017 23:39:27 +0200 Subject: [PATCH 3/5] fixes after code review --- base/strings/util.jl | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index 53adb5c0a3fb4..dcf408ada1762 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -69,6 +69,8 @@ startswith(a::Vector{UInt8}, b::Vector{UInt8}) = chop(s::AbstractString, head::Integer=0, tail::Integer=1) Remove first `head` and last `tail` characters from `s`. +If it is requested to remove more characters than `length(s)` +then an empty string is returned. # Examples ```jldoctest @@ -88,15 +90,20 @@ julia> chop(a, 5, 5) function chop(s::AbstractString, head::Integer, tail::Integer) # negative values of head/tail will throw error in nextind/prevind if head == 0 - tail == 0 && return SubString(s) - return SubString(s, 1, prevind(s, endof(s), tail)) + if tail == 0 + return SubString(s) + else + return SubString(s, start(s), prevind(s, endof(s), tail)) + end + elseif tail == 0 + return SubString(s, nextind(s, start(s), head), endof(s)) + else + return SubString(s, nextind(s, start(s), head), prevind(s, endof(s), tail)) end - tail == 0 && return SubString(s, nextind(s, 1, head), endof(s)) - SubString(s, nextind(s, 1, head), prevind(s, endof(s), tail)) end # no head/tail version left for performance reasons -chop(s::AbstractString) = SubString(s, 1, prevind(s, endof(s))) +chop(s::AbstractString) = SubString(s, start(s), prevind(s, endof(s))) """ chomp(s::AbstractString) @@ -111,10 +118,10 @@ julia> chomp("Hello\\n") """ function chomp(s::AbstractString) i = endof(s) - (i < 1 || s[i] != '\n') && (return SubString(s, 1, i)) + (i < start(s) || s[i] != '\n') && (return SubString(s, start(s), i)) j = prevind(s,i) - (j < 1 || s[j] != '\r') && (return SubString(s, 1, j)) - return SubString(s, 1, prevind(s,j)) + (j < start(s) || s[j] != '\r') && (return SubString(s, start(s), j)) + return SubString(s, start(s), prevind(s,j)) end function chomp(s::String) i = endof(s) @@ -195,11 +202,11 @@ function rstrip(s::AbstractString, chars::Chars=_default_delims) while !done(r,i) c, j = next(r,i) if !(c in chars) - return SubString(s, 1, endof(s)-i+1) + return SubString(s, start(s), endof(s)-i+1) end i = j end - SubString(s, 1, 0) + SubString(s, start(s), 0) end """ From f29f56e0d3890ec54b3332c69fd6eac50cecef5c Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Mon, 16 Oct 2017 10:05:05 +0200 Subject: [PATCH 4/5] Remove repeated "tail" --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 374cc6e8fd645..11a56e3b90408 100644 --- a/NEWS.md +++ b/NEWS.md @@ -253,7 +253,7 @@ Library improvements -------------------- * The function `chop` now accepts two arguments `head` and `tail` allowing to specify - number of characters to remove from the head and tail tail of the string ([#24126]). + number of characters to remove from the head and tail of the string ([#24126]). * Functions `first` and `last` now accept `nchar` argument for `AbstractString`. If this argument is used they return a string consisting of first/last `nchar` From 44cfed282a3fca9288aedfbb2ff53bb956515ce3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 16 Oct 2017 15:08:57 +0200 Subject: [PATCH 5/5] fixes after a review --- base/strings/util.jl | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/base/strings/util.jl b/base/strings/util.jl index dcf408ada1762..0e556aa822ffe 100644 --- a/base/strings/util.jl +++ b/base/strings/util.jl @@ -68,7 +68,8 @@ startswith(a::Vector{UInt8}, b::Vector{UInt8}) = """ chop(s::AbstractString, head::Integer=0, tail::Integer=1) -Remove first `head` and last `tail` characters from `s`. +Remove the first `head` and the last `tail` characters from `s`. +The call `chop(s)` removes the last character from `s`. If it is requested to remove more characters than `length(s)` then an empty string is returned. @@ -89,17 +90,9 @@ julia> chop(a, 5, 5) """ function chop(s::AbstractString, head::Integer, tail::Integer) # negative values of head/tail will throw error in nextind/prevind - if head == 0 - if tail == 0 - return SubString(s) - else - return SubString(s, start(s), prevind(s, endof(s), tail)) - end - elseif tail == 0 - return SubString(s, nextind(s, start(s), head), endof(s)) - else - return SubString(s, nextind(s, start(s), head), prevind(s, endof(s), tail)) - end + headidx = head == 0 ? start(s) : nextind(s, start(s), head) + tailidx = tail == 0 ? endof(s) : prevind(s, endof(s), tail) + SubString(s, headidx, tailidx) end # no head/tail version left for performance reasons @@ -118,10 +111,10 @@ julia> chomp("Hello\\n") """ function chomp(s::AbstractString) i = endof(s) - (i < start(s) || s[i] != '\n') && (return SubString(s, start(s), i)) + (i < 1 || s[i] != '\n') && (return SubString(s, 1, i)) j = prevind(s,i) - (j < start(s) || s[j] != '\r') && (return SubString(s, start(s), j)) - return SubString(s, start(s), prevind(s,j)) + (j < 1 || s[j] != '\r') && (return SubString(s, 1, j)) + return SubString(s, 1, prevind(s,j)) end function chomp(s::String) i = endof(s) @@ -202,11 +195,11 @@ function rstrip(s::AbstractString, chars::Chars=_default_delims) while !done(r,i) c, j = next(r,i) if !(c in chars) - return SubString(s, start(s), endof(s)-i+1) + return SubString(s, 1, endof(s)-i+1) end i = j end - SubString(s, start(s), 0) + SubString(s, 1, 0) end """