diff --git a/NEWS.md b/NEWS.md index a8401633fe11a..23cf4eed7cf0c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -13,6 +13,7 @@ New language features * The library name passed to `ccall` or `@ccall` can now be an expression involving global variables and function calls. The expression will be evaluated the first time the `ccall` executes ([#36458]). +* `findfirst`, `findnext`, `findlast`, and `findall` now support `AbstractVector{<:Union{Int8,UInt8}}` (pattern, array) arguments ([#37283]). * `ꜛ` (U+A71B), `ꜜ` (U+A71C) and `ꜝ` (U+A71D) can now also be used as operator suffixes. They can be tab-completed from `\^uparrow`, `\^downarrow` and `\^!` in the REPL ([#37542]). diff --git a/base/strings/search.jl b/base/strings/search.jl index b1908ac99c860..dca4d808e10ba 100644 --- a/base/strings/search.jl +++ b/base/strings/search.jl @@ -123,6 +123,25 @@ true """ findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string) +""" + findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) + +Find the first occurrence of sequence `pattern` in vector `A`. + +!!! compat "Julia 1.6" + This method requires at least Julia 1.6. + +# Examples +```jldoctest +julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63]) +2:3 +``` +""" +findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) = + _search(A, pattern, firstindex(A)) + # AbstractString implementation of the generic findnext interface function findnext(testf::Function, s::AbstractString, i::Integer) i = Int(i) @@ -166,7 +185,7 @@ function _search_bloom_mask(c) end _nthbyte(s::String, i) = codeunit(s, i) -_nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i] +_nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)] function _searchindex(s::String, t::String, i::Integer) # Check for fast case of a single byte @@ -174,21 +193,26 @@ function _searchindex(s::String, t::String, i::Integer) _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i) end -function _searchindex(s::ByteArray, t::ByteArray, i::Integer) - n = sizeof(t) - m = sizeof(s) +function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}}, + t::AbstractVector{<:Union{Int8,UInt8}}, + _i::Integer) + sentinel = firstindex(s) - 1 + n = length(t) + m = length(s) + i = Int(_i) - sentinel + (i < 1 || i > m+1) && throw(BoundsError(s, _i)) if n == 0 - return 1 <= i <= m+1 ? max(1, i) : 0 + return 1 <= i <= m+1 ? max(1, i) : sentinel elseif m == 0 - return 0 + return sentinel elseif n == 1 - return something(findnext(isequal(_nthbyte(t,1)), s, i), 0) + return something(findnext(isequal(_nthbyte(t,1)), s, i), sentinel) end w = m - n if w < 0 || i - 1 > w - return 0 + return sentinel end bloom_mask = UInt64(0) @@ -215,7 +239,8 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer) # match found if j == n - 1 - return i+1 + # restore in case `s` is an OffSetArray + return i+firstindex(s) end # no match, try to rule out the next character @@ -232,16 +257,16 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer) i += 1 end - 0 + sentinel end -function _search(s::Union{AbstractString,ByteArray}, - t::Union{AbstractString,AbstractChar,Int8,UInt8}, +function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, + t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}}, i::Integer) idx = _searchindex(s,t,i) if isempty(t) idx:idx-1 - elseif idx > 0 + elseif idx >= firstindex(s) idx:(idx + lastindex(t) - 1) else nothing @@ -274,7 +299,7 @@ julia> findnext("Lang", "JuliaLang", 2) 6:9 ``` """ -findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, Int(i)) +findnext(t::AbstractString, s::AbstractString, start::Integer) = _search(s, t, Int(start)) """ findnext(ch::AbstractChar, string::AbstractString, start::Integer) @@ -293,8 +318,32 @@ julia> findnext('o', "Hello to the world", 6) 8 ``` """ -findnext(ch::AbstractChar, string::AbstractString, ind::Integer) = - findnext(==(ch), string, ind) +findnext(ch::AbstractChar, string::AbstractString, start::Integer) = + findnext(==(ch), string, start) + +""" + findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) + +Find the next occurrence of the sequence `pattern` in vector `A` starting at position `start`. + +!!! compat "Julia 1.6" + This method requires at least Julia 1.6. + +# Examples +```jldoctest +julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 3) === nothing +true + +julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) +4:5 +``` +""" +findnext(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) = + _search(A, pattern, start) """ findlast(pattern::AbstractString, string::AbstractString) @@ -314,6 +363,23 @@ julia> findfirst("Julia", "JuliaLang") findlast(pattern::AbstractString, string::AbstractString) = findprev(pattern, string, lastindex(string)) +""" + findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) + +Find the last occurrence of `pattern` in array `A`. Equivalent to +[`findprev(pattern, A, lastindex(A))`](@ref). + +# Examples +```jldoctest +julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62]) +3:4 +``` +""" +findlast(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}) = + findprev(pattern, A, lastindex(A)) + """ findlast(ch::AbstractChar, string::AbstractString) @@ -387,21 +453,24 @@ function _rsearchindex(s::String, t::String, i::Integer) end end -function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) - n = sizeof(t) - m = sizeof(s) +function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer) + sentinel = firstindex(s) - 1 + n = length(t) + m = length(s) + k = Int(_k) - sentinel + k < 1 && throw(BoundsError(s, _k)) if n == 0 - return 0 <= k <= m ? max(k, 1) : 0 + return 0 <= k <= m ? max(k, 1) : sentinel elseif m == 0 - return 0 + return sentinel elseif n == 1 - return something(findprev(isequal(_nthbyte(t,1)), s, k), 0) + return something(findprev(isequal(_nthbyte(t,1)), s, k), sentinel) end w = m - n if w < 0 || k <= 0 - return 0 + return sentinel end bloom_mask = UInt64(0) @@ -426,9 +495,9 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) j += 1 end - # match found + # match found, restore in case `s` is an OffsetArray if j == n - return i + return i + sentinel end # no match, try to rule out the next character @@ -445,16 +514,16 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer) i -= 1 end - 0 + sentinel end -function _rsearch(s::Union{AbstractString,ByteArray}, - t::Union{AbstractString,AbstractChar,Int8,UInt8}, +function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}}, + t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}}, i::Integer) idx = _rsearchindex(s,t,i) if isempty(t) idx:idx-1 - elseif idx > 0 + elseif idx > firstindex(s) - 1 idx:(idx + lastindex(t) - 1) else nothing @@ -503,9 +572,29 @@ julia> findprev('o', "Hello to the world", 18) 15 ``` """ -findprev(ch::AbstractChar, string::AbstractString, ind::Integer) = - findprev(==(ch), string, ind) +findprev(ch::AbstractChar, string::AbstractString, start::Integer) = + findprev(==(ch), string, start) + +""" + findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) + +Find the previous occurrence of the sequence `pattern` in vector `A` starting at position `start`. +!!! compat "Julia 1.6" + This method requires at least Julia 1.6. + +# Examples +```jldoctest +julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3) +2:3 +``` +""" +findprev(pattern::AbstractVector{<:Union{Int8,UInt8}}, + A::AbstractVector{<:Union{Int8,UInt8}}, + start::Integer) = + _rsearch(A, pattern, start) """ occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString) diff --git a/test/strings/search.jl b/test/strings/search.jl index 8a7abaec50309..6b0080abea02d 100644 --- a/test/strings/search.jl +++ b/test/strings/search.jl @@ -390,6 +390,36 @@ s_18109 = "fooα🐨βcd3" @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6] end +# issue 37280 +@testset "UInt8, Int8 vector" begin + for T in [Int8, UInt8], VT in [Int8, UInt8] + A = T[0x40, 0x52, 0x62, 0x52, 0x62] + + @test findfirst(VT[0x30], A) === nothing + @test findfirst(VT[0x52], A) === 2:2 + @test findlast(VT[0x30], A) === nothing + @test findlast(VT[0x52], A) === 4:4 + + pattern = VT[0x52, 0x62] + + @test findfirst(pattern, A) === 2:3 + @test findnext(pattern, A, 2) === 2:3 + @test findnext(pattern, A, 3) === 4:5 + # 1 idx too far is allowed + @test findnext(pattern, A, length(A)+1) === nothing + @test_throws BoundsError findnext(pattern, A, -3) + @test_throws BoundsError findnext(pattern, A, length(A)+2) + + @test findlast(pattern, A) === 4:5 + @test findprev(pattern, A, 3) === 2:3 + @test findprev(pattern, A, 5) === 4:5 + @test findprev(pattern, A, 2) === nothing + @test findprev(pattern, A, length(A)+1) == findlast(pattern, A) + @test findprev(pattern, A, length(A)+2) == findlast(pattern, A) + @test_throws BoundsError findprev(pattern, A, -3) + end +end + # issue 32568 for T = (UInt, BigInt) for x = (4, 5)