Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improved nextind and prevind #23805

Merged
merged 4 commits into from
Oct 1, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,9 @@ This section lists changes that do not have deprecation warnings.
Library improvements
--------------------

* The functions `nextind` and `prevind` now accept `nchar` argument that indicates
number of characters to move ([#23805]).

* The functions `strip`, `lstrip` and `rstrip` now return `SubString` ([#22496]).

* The functions `strwidth` and `charwidth` have been merged into `textwidth`([#20816]).
Expand Down
63 changes: 61 additions & 2 deletions base/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,23 @@ end
prevind(s::DirectIndexString, i::Integer) = Int(i)-1
nextind(s::DirectIndexString, i::Integer) = Int(i)+1

function prevind(s::DirectIndexString, i::Integer, nchar::Integer)
nchar > 0 || throw(ArgumentError("nchar must be greater than 0"))
Int(i)-nchar
end

function nextind(s::DirectIndexString, i::Integer, nchar::Integer)
nchar > 0 || throw(ArgumentError("nchar must be greater than 0"))
Int(i)+nchar
end


"""
prevind(str::AbstractString, i::Integer)
prevind(str::AbstractString, i::Integer, nchar::Integer=1)

Get the previous valid string index before `i`.
Returns a value less than `1` at the beginning of the string.
If the `nchar` argument is given the function goes back `nchar` characters.

# Examples
```jldoctest
Expand All @@ -252,6 +264,10 @@ julia> prevind("αβγdef", 3)

julia> prevind("αβγdef", 1)
0

julia> prevind("αβγdef", 3, 2)
0

```
"""
function prevind(s::AbstractString, i::Integer)
Expand All @@ -269,11 +285,32 @@ function prevind(s::AbstractString, i::Integer)
return 0 # out of range
end

function prevind(s::AbstractString, i::Integer, nchar::Integer)
nchar > 0 || throw(ArgumentError("nchar must be greater than 0"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not accept 0?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Handling of 0:

  • requires separate logic (additional if)
  • is unclear what should be the result if i is not a proper byte index (if one calls prevind one expects to get a proper byte index in return - this is an important invariant of those functions in my opinion)

e = endof(s)
j = Int(i)
j < 1 && return 0
while nchar > 0
if j > e
j = e
else
j -= 1
while j >= 1 && !isvalid(s,j)
j -= 1
end
end
j < 1 && return 0
nchar -= 1
end
j
end

"""
nextind(str::AbstractString, i::Integer)
nextind(str::AbstractString, i::Integer, nchar::Integer=1)

Get the next valid string index after `i`.
Returns a value greater than `endof(str)` at or after the end of the string.
If the `nchar` argument is given the function goes forward `nchar` characters.

# Examples
```jldoctest
Expand All @@ -282,6 +319,9 @@ julia> str = "αβγdef";
julia> nextind(str, 1)
3

julia> nextind(str, 1, 2)
5

julia> endof(str)
9

Expand All @@ -305,6 +345,25 @@ function nextind(s::AbstractString, i::Integer)
next(s,e)[2] # out of range
end

function nextind(s::AbstractString, i::Integer, nchar::Integer)
nchar > 0 || throw(ArgumentError("nchar must be greater than 0"))
e = endof(s)
j = Int(i)
while nchar > 0
if j < 1
j = 1
else
j > e && return j + nchar
j == e && return next(s,e)[2] + nchar - 1
for j = j+1:e
isvalid(s,j) && break
end
end
nchar -= 1
end
j
end

checkbounds(s::AbstractString, i::Integer) = start(s) <= i <= endof(s) || throw(BoundsError(s, i))
checkbounds(s::AbstractString, r::AbstractRange{<:Integer}) = isempty(r) || (minimum(r) >= start(s) && maximum(r) <= endof(s)) || throw(BoundsError(s, r))
# The following will end up using a deprecated checkbounds, when the covariant parameter is not Integer
Expand Down
38 changes: 38 additions & 0 deletions base/strings/string.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,25 @@ function prevind(s::String, i::Integer)
j
end

function prevind(s::String, i::Integer, nchar::Integer)
nchar > 0 || throw(ArgumentError("nchar must be greater than 0"))
j = Int(i)
e = sizeof(s)
while nchar > 0
if j > e
j = endof(s)
else
j -= 1
@inbounds while j > 0 && is_valid_continuation(codeunit(s,j))
j -= 1
end
end
nchar -= 1
j <= 0 && return j - nchar
end
j
end

function nextind(s::String, i::Integer)
j = Int(i)
if j < 1
Expand All @@ -120,6 +139,25 @@ function nextind(s::String, i::Integer)
j
end

function nextind(s::String, i::Integer, nchar::Integer)
nchar > 0 || throw(ArgumentError("nchar must be greater than 0"))
j = Int(i)
e = sizeof(s)
while nchar > 0
if j < 1
j = 1
else
j += 1
@inbounds while j <= e && is_valid_continuation(codeunit(s,j))
j += 1
end
end
nchar -= 1
j > e && return j + nchar
end
j
end

## checking UTF-8 & ACSII validity ##

byte_string_classify(data::Vector{UInt8}) =
Expand Down
71 changes: 71 additions & 0 deletions test/strings/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -552,3 +552,74 @@ end
@test_throws ParseError parse("\"\\.\"")
@test_throws ParseError parse("\'\\.\'")
end

@testset "prevind and nextind" begin
let strs = Any["∀α>β:α+1>β", GenericString("∀α>β:α+1>β")]
for i in 1:2
@test prevind(strs[i], 1) == 0
@test prevind(strs[i], 1, 1) == 0
@test prevind(strs[i], 2) == 1
@test prevind(strs[i], 2, 1) == 1
@test prevind(strs[i], 4) == 1
@test prevind(strs[i], 4, 1) == 1
@test prevind(strs[i], 5) == 4
@test prevind(strs[i], 5, 1) == 4
@test prevind(strs[i], 5, 2) == 1
@test prevind(strs[i], 5, 3) == 0
@test prevind(strs[i], 15) == 14
@test prevind(strs[i], 15, 1) == 14
@test prevind(strs[i], 15, 2) == 13
@test prevind(strs[i], 15, 3) == 12
@test prevind(strs[i], 15, 4) == 10
@test prevind(strs[i], 15, 10) == 0
@test prevind(strs[i], 15, 9) == 1
@test prevind(strs[i], 15, 10) == 0
@test prevind(strs[i], 16) == 15
@test prevind(strs[i], 16, 1) == 15
@test prevind(strs[i], 16, 2) == 14
@test prevind(strs[i], 20) == 15
@test prevind(strs[i], 20, 1) == 15
@test prevind(strs[i], 20, 10) == 1
@test_throws ArgumentError prevind(strs[i], 20, 0)

@test nextind(strs[i], -1) == 1
@test nextind(strs[i], -1, 1) == 1
@test nextind(strs[i], 0, 2) == 4
@test nextind(strs[i], 0, 20) == 26
@test nextind(strs[i], 0, 10) == 15
@test nextind(strs[i], 1) == 4
@test nextind(strs[i], 1, 1) == 4
@test nextind(strs[i], 1, 2) == 6
@test nextind(strs[i], 1, 9) == 15
@test nextind(strs[i], 1, 10) == 17
@test nextind(strs[i], 2) == 4
@test nextind(strs[i], 2, 1) == 4
@test nextind(strs[i], 3) == 4
@test nextind(strs[i], 3, 1) == 4
@test nextind(strs[i], 4) == 6
@test nextind(strs[i], 4, 1) == 6
@test nextind(strs[i], 14) == 15
@test nextind(strs[i], 14, 1) == 15
@test nextind(strs[i], 15) == 17
@test nextind(strs[i], 15, 1) == 17
@test nextind(strs[i], 20) == 21
@test nextind(strs[i], 20, 1) == 21
@test_throws ArgumentError nextind(strs[i], 20, 0)

for x in -10:20
n = p = x
for j in 1:40
p = prevind(strs[i], p)
@test prevind(strs[i], x, j) == p
n = nextind(strs[i], n)
@test nextind(strs[i], x, j) == n
end
end
end
@test prevind(strs[1], -1) == -2
@test prevind(strs[1], -1, 1) == -2

@test prevind(strs[2], -1) == 0
@test prevind(strs[2], -1, 1) == 0
end
end