Skip to content

Commit

Permalink
implement comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Moelf committed Aug 30, 2020
1 parent f073830 commit 2d4b684
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 34 deletions.
59 changes: 33 additions & 26 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -188,8 +188,7 @@ function _search_bloom_mask(c)
end

_nthbyte(s::String, i) = codeunit(s, i)
_nthbyte(a::Union{Vector{UInt8},Vector{Int8}}, i) = a[i]
_nthbyte(t::AbstractVector, index) = t[firstindex(t) + (index-1)]
_nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]

function _searchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
Expand All @@ -199,29 +198,29 @@ end

function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
t::AbstractVector{<:Union{Int8,UInt8}},
i::Integer)
_i::Integer)
n = length(t)
m = length(s)
f_s = firstindex(s)
i < f_s && throw(BoundsError(s, i))
i = Int(_i) - (firstindex(s) - 1)
i < 1 && throw(BoundsError(s, _i))

if n == 0
return f_s <= i <= m+1 ? max(f_s, i) : 0
return 1 <= i <= m+1 ? max(1, i) : 0
elseif m == 0
return 0
elseif n == 1
return something(findnext(isequal(_nthbyte(t,1)), s, i), 0)
end

w = m - n
if w < 0 || i - f_s > w
if w < 0 || i - 1 > w
return 0
end

bloom_mask = UInt64(0)
skip = n - f_s
skip = n - 1
tlast = _nthbyte(t,n)
for j in eachindex(t)
for j in 1:n
bloom_mask |= _search_bloom_mask(_nthbyte(t,j))
if _nthbyte(t,j) == tlast && j < n
skip = n - j - 1
Expand All @@ -242,7 +241,8 @@ function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},

# match found
if j == n - 1
return i+f_s
# restore in case `s` is an OffSetArray
return i+firstindex(s)
end

# no match, try to rule out the next character
Expand Down Expand Up @@ -333,17 +333,20 @@ Find the next occurrence of the sequence `pattern` in vector `A` starting at pos
# Examples
```jldoctest
julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 5) === nothing
julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 3) === nothing
true
julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
4:5
```
"""
findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer) =
function findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer)
(start == (lastindex(A)+1)) && return nothing
(start > (lastindex(A)+1)) && throw(BoundsError(A, start))
_search(A, pattern, start)
end

"""
findlast(pattern::AbstractString, string::AbstractString)
Expand Down Expand Up @@ -376,9 +379,10 @@ julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62])
3:4
```
"""
findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}}) =
function findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}})
findprev(pattern, A, lastindex(A))
end
"""
findlast(ch::AbstractChar, string::AbstractString)
Expand Down Expand Up @@ -452,29 +456,29 @@ function _rsearchindex(s::String, t::String, i::Integer)
end
end

function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, k::Integer)
function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer)
n = length(t)
m = length(s)
f_s = firstindex(s)
k < f_s && throw(BoundsError(s, k))
k = Int(_k) - (firstindex(s) - 1)
k < 1 && throw(BoundsError(s, _k))

if n == 0
return 0 <= k <= m ? max(f_s, k) : 0
return 0 <= k <= m ? max(k, 1) : 0
elseif m == 0
return 0
elseif n == 1
return something(findprev(isequal(_nthbyte(t,1)), s, k), 0)
end

w = m - n
if w < 0 || k <= f_s
if w < 0 || k <= 0
return 0
end

bloom_mask = UInt64(0)
skip = n - 1
tfirst = _nthbyte(t,1)
for j in reverse(eachindex(t))
for j in n:-1:1
bloom_mask |= _search_bloom_mask(_nthbyte(t,j))
if _nthbyte(t,j) == tfirst && j > 1
skip = j - 2
Expand All @@ -495,7 +499,7 @@ function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector

# match found
if j == n
return i + f_s - 1
return i - 1 + firstindex(s)
end

# no match, try to rule out the next character
Expand Down Expand Up @@ -587,10 +591,13 @@ julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
2:3
```
"""
findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer) =
function findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer)
(start == (lastindex(A)+1)) && return nothing
(start > (lastindex(A)+1)) && throw(BoundsError(A, start))
_rsearch(A, pattern, start)
end
"""
occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)
Expand Down
10 changes: 6 additions & 4 deletions test/offsetarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -630,24 +630,26 @@ end
OA = OffsetArray(VT[0x40,0x52,0x62,0x52,0x62], 1)
for PT in [Int8, UInt8]
pattern = PT[0x52, 0x62]
l_OA = lastindex(OA)
@test findfirst(pattern, OA) === 3:4
@test findnext(pattern, OA, 2) === 3:4
@test findnext(pattern, OA, 4) === 5:6
@test findnext(pattern, OA, 6) === nothing
@test findnext(pattern, OA, 7) === nothing
@test findnext(pattern, OA, 2) === 3:4
@test findnext(pattern, OA, 4) === 5:6
@test findnext(pattern, OA, 6) === nothing
@test findnext(pattern, OA, 99) === nothing
# 1 idx too far is allowed
@test findnext(pattern, OA, l_OA+1) === nothing
@test_throws BoundsError findnext(pattern, OA, l_OA+2)
@test_throws BoundsError findnext(pattern, OA, 1)

@test findlast(pattern, OA) === 5:6
@test findprev(pattern, OA, 2) === nothing
@test findprev(pattern, OA, 4) === 3:4
@test findprev(pattern, OA, 6) === 5:6
@test findprev(pattern, OA, 99) === findlast(pattern, OA)
@test findnext(pattern, OA, l_OA+1) === nothing
@test_throws BoundsError findnext(pattern, OA, l_OA+2)
@test_throws BoundsError findprev(pattern, OA, 1)
end
end
end

10 changes: 6 additions & 4 deletions test/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -405,16 +405,18 @@ end
@test findfirst(pattern, A) === 2:3
@test findnext(pattern, A, 2) === 2:3
@test findnext(pattern, A, 3) === 4:5
@test findnext(pattern, A, 5) === nothing
@test findnext(pattern, A, 99) === nothing
# 1 idx too long is allowed
@test findnext(pattern, A, length(A)+1) === nothing
@test_throws BoundsError findnext(pattern, A, -3)
@test_throws BoundsError findnext(pattern, A, length(A)+2)

@test findlast(pattern, A) === 4:5
@test findprev(pattern, A, 3) === 2:3
@test findprev(pattern, A, 5) === 4:5
@test findprev(pattern, A, 2) === nothing
@test findprev(pattern, A, 99) === findlast(pattern, A)
@test_throws BoundsError findprev(pattern, A, -2)
@test findprev(pattern, A, length(A)+1) === nothing
@test_throws BoundsError findprev(pattern, A, -3)
@test_throws BoundsError findprev(pattern, A, length(A)+2)
end
end

Expand Down

0 comments on commit 2d4b684

Please sign in to comment.