Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

expose findfirst findnext for UInt8 vector #37283

Merged
merged 27 commits into from
Oct 26, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
149f43b
expose findfirst findnext for UInt8 vector
Moelf Aug 30, 2020
d1055eb
Update base/strings/search.jl
Moelf Aug 30, 2020
acc3760
Update base/strings/search.jl
Moelf Aug 30, 2020
b1acc16
Update base/strings/search.jl
Moelf Aug 30, 2020
1338f8e
Update base/strings/search.jl
Moelf Aug 30, 2020
5f05214
Update base/strings/search.jl
Moelf Aug 30, 2020
2d3913c
Update test/strings/search.jl
Moelf Aug 30, 2020
eba8833
Update base/strings/search.jl
Moelf Aug 30, 2020
bd63ef4
address comments
Moelf Aug 30, 2020
cc6d364
address comments add OffsetArray test
Moelf Aug 30, 2020
fdb1d9e
add findlast findprev
Moelf Aug 30, 2020
114e380
implement comments
Moelf Aug 30, 2020
5e1c0e3
let _(r)searchindex handle exception
Moelf Aug 31, 2020
c28246a
fix _rsearchindex special behavior
Moelf Aug 31, 2020
68df4cf
style fix
Moelf Aug 31, 2020
13bd2fa
restirct to 1-indexed array
Moelf Aug 31, 2020
54b1d9d
Update test/strings/search.jl
Moelf Aug 31, 2020
1bff730
Update base/strings/search.jl
Moelf Aug 31, 2020
ed24961
Update base/strings/search.jl
Moelf Aug 31, 2020
1d64047
Update base/strings/search.jl
Moelf Aug 31, 2020
ec9fdc4
Update base/strings/search.jl
Moelf Aug 31, 2020
e30cef9
address comments
Moelf Aug 31, 2020
cb01268
change sentinel value to firstindex - 1
Moelf Aug 31, 2020
ff679b0
NEWS for find* on Vector of U/Int8
Moelf Sep 2, 2020
7c508a8
Merge branch 'master' into findfirst_searchindex
Moelf Sep 19, 2020
94a2340
Merge branch 'master' into findfirst_searchindex
Moelf Oct 21, 2020
6535158
Update NEWS.md
stevengj Oct 26, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ New language features
* The library name passed to `ccall` or `@ccall` can now be an expression involving
global variables and function calls. The expression will be evaluated the first
time the `ccall` executes ([#36458]).
* `findfirst`, `findnext`, `findlast`, and `findall` now support `AbstractVector{<:Union{Int8,UInt8}}` (pattern, array) arguments ([#37283]).
* `ꜛ` (U+A71B), `ꜜ` (U+A71C) and `ꜝ` (U+A71D) can now also be used as operator
suffixes. They can be tab-completed from `\^uparrow`, `\^downarrow` and `\^!` in the REPL
([#37542]).
Expand Down
151 changes: 120 additions & 31 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,25 @@ true
"""
findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string)

"""
findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}})

Find the first occurrence of sequence `pattern` in vector `A`.

!!! compat "Julia 1.6"
This method requires at least Julia 1.6.

# Examples
```jldoctest
julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63])
2:3
```
"""
findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}}) =
_search(A, pattern, firstindex(A))

# AbstractString implementation of the generic findnext interface
function findnext(testf::Function, s::AbstractString, i::Integer)
i = Int(i)
Expand Down Expand Up @@ -166,29 +185,34 @@ function _search_bloom_mask(c)
end

_nthbyte(s::String, i) = codeunit(s, i)
_nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i]
_nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]

function _searchindex(s::String, t::String, i::Integer)
# Check for fast case of a single byte
lastindex(t) == 1 && return something(findnext(isequal(t[1]), s, i), 0)
_searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i)
end

function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
n = sizeof(t)
m = sizeof(s)
function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
t::AbstractVector{<:Union{Int8,UInt8}},
_i::Integer)
sentinel = firstindex(s) - 1
n = length(t)
m = length(s)
i = Int(_i) - sentinel
(i < 1 || i > m+1) && throw(BoundsError(s, _i))

if n == 0
return 1 <= i <= m+1 ? max(1, i) : 0
return 1 <= i <= m+1 ? max(1, i) : sentinel
elseif m == 0
return 0
return sentinel
elseif n == 1
return something(findnext(isequal(_nthbyte(t,1)), s, i), 0)
return something(findnext(isequal(_nthbyte(t,1)), s, i), sentinel)
end

w = m - n
if w < 0 || i - 1 > w
return 0
return sentinel
end

bloom_mask = UInt64(0)
Expand All @@ -215,7 +239,8 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer)

# match found
if j == n - 1
return i+1
# restore in case `s` is an OffSetArray
return i+firstindex(s)
end

# no match, try to rule out the next character
Expand All @@ -232,16 +257,16 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
i += 1
end

0
sentinel
end

function _search(s::Union{AbstractString,ByteArray},
t::Union{AbstractString,AbstractChar,Int8,UInt8},
function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}},
t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}},
i::Integer)
idx = _searchindex(s,t,i)
if isempty(t)
idx:idx-1
elseif idx > 0
elseif idx >= firstindex(s)
idx:(idx + lastindex(t) - 1)
else
nothing
Expand Down Expand Up @@ -274,7 +299,7 @@ julia> findnext("Lang", "JuliaLang", 2)
6:9
```
"""
findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, Int(i))
findnext(t::AbstractString, s::AbstractString, start::Integer) = _search(s, t, Int(start))

"""
findnext(ch::AbstractChar, string::AbstractString, start::Integer)
Expand All @@ -293,8 +318,32 @@ julia> findnext('o', "Hello to the world", 6)
8
```
"""
findnext(ch::AbstractChar, string::AbstractString, ind::Integer) =
findnext(==(ch), string, ind)
findnext(ch::AbstractChar, string::AbstractString, start::Integer) =
findnext(==(ch), string, start)

"""
findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer)

Find the next occurrence of the sequence `pattern` in vector `A` starting at position `start`.

!!! compat "Julia 1.6"
This method requires at least Julia 1.6.

# Examples
```jldoctest
julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 3) === nothing
true

julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
4:5
```
"""
findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer) =
_search(A, pattern, start)

"""
findlast(pattern::AbstractString, string::AbstractString)
Expand All @@ -314,6 +363,23 @@ julia> findfirst("Julia", "JuliaLang")
findlast(pattern::AbstractString, string::AbstractString) =
findprev(pattern, string, lastindex(string))

"""
findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}})

Find the last occurrence of `pattern` in array `A`. Equivalent to
[`findprev(pattern, A, lastindex(A))`](@ref).

# Examples
```jldoctest
julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62])
3:4
```
"""
findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}}) =
findprev(pattern, A, lastindex(A))

"""
findlast(ch::AbstractChar, string::AbstractString)

Expand Down Expand Up @@ -387,21 +453,24 @@ function _rsearchindex(s::String, t::String, i::Integer)
end
end

function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
n = sizeof(t)
m = sizeof(s)
function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer)
sentinel = firstindex(s) - 1
n = length(t)
m = length(s)
k = Int(_k) - sentinel
k < 1 && throw(BoundsError(s, _k))

if n == 0
return 0 <= k <= m ? max(k, 1) : 0
return 0 <= k <= m ? max(k, 1) : sentinel
elseif m == 0
return 0
return sentinel
elseif n == 1
return something(findprev(isequal(_nthbyte(t,1)), s, k), 0)
return something(findprev(isequal(_nthbyte(t,1)), s, k), sentinel)
end

w = m - n
if w < 0 || k <= 0
return 0
return sentinel
end

bloom_mask = UInt64(0)
Expand All @@ -426,9 +495,9 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
j += 1
end

# match found
# match found, restore in case `s` is an OffsetArray
if j == n
return i
return i + sentinel
end

# no match, try to rule out the next character
Expand All @@ -445,16 +514,16 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
i -= 1
end

0
sentinel
end

function _rsearch(s::Union{AbstractString,ByteArray},
t::Union{AbstractString,AbstractChar,Int8,UInt8},
function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}},
t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}},
i::Integer)
idx = _rsearchindex(s,t,i)
if isempty(t)
idx:idx-1
elseif idx > 0
elseif idx > firstindex(s) - 1
Moelf marked this conversation as resolved.
Show resolved Hide resolved
idx:(idx + lastindex(t) - 1)
else
nothing
Expand Down Expand Up @@ -503,9 +572,29 @@ julia> findprev('o', "Hello to the world", 18)
15
```
"""
findprev(ch::AbstractChar, string::AbstractString, ind::Integer) =
findprev(==(ch), string, ind)
findprev(ch::AbstractChar, string::AbstractString, start::Integer) =
findprev(==(ch), string, start)

"""
findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer)

Find the previous occurrence of the sequence `pattern` in vector `A` starting at position `start`.

!!! compat "Julia 1.6"
This method requires at least Julia 1.6.

# Examples
```jldoctest
julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
2:3
```
"""
findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
A::AbstractVector{<:Union{Int8,UInt8}},
start::Integer) =
_rsearch(A, pattern, start)
"""
occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)

Expand Down
30 changes: 30 additions & 0 deletions test/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,36 @@ s_18109 = "fooα🐨βcd3"
@test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6]
end

# issue 37280
@testset "UInt8, Int8 vector" begin
for T in [Int8, UInt8], VT in [Int8, UInt8]
A = T[0x40, 0x52, 0x62, 0x52, 0x62]

@test findfirst(VT[0x30], A) === nothing
@test findfirst(VT[0x52], A) === 2:2
@test findlast(VT[0x30], A) === nothing
@test findlast(VT[0x52], A) === 4:4

pattern = VT[0x52, 0x62]

@test findfirst(pattern, A) === 2:3
@test findnext(pattern, A, 2) === 2:3
@test findnext(pattern, A, 3) === 4:5
# 1 idx too far is allowed
@test findnext(pattern, A, length(A)+1) === nothing
@test_throws BoundsError findnext(pattern, A, -3)
Moelf marked this conversation as resolved.
Show resolved Hide resolved
@test_throws BoundsError findnext(pattern, A, length(A)+2)

@test findlast(pattern, A) === 4:5
@test findprev(pattern, A, 3) === 2:3
@test findprev(pattern, A, 5) === 4:5
@test findprev(pattern, A, 2) === nothing
@test findprev(pattern, A, length(A)+1) == findlast(pattern, A)
@test findprev(pattern, A, length(A)+2) == findlast(pattern, A)
@test_throws BoundsError findprev(pattern, A, -3)
end
end

# issue 32568
for T = (UInt, BigInt)
for x = (4, 5)
Expand Down