Skip to content

Commit

Permalink
Merge pull request #2 from ScottPJones/spj/pattern
Browse files Browse the repository at this point in the history
Add AbstractPattern & AbstractMatch to allow for generic pattern matching
  • Loading branch information
ScottPJones authored Oct 21, 2020
2 parents d922cfc + 12a6906 commit 58e451d
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 12 deletions.
2 changes: 1 addition & 1 deletion base/broadcast.jl
Original file line number Diff line number Diff line change
Expand Up @@ -675,7 +675,7 @@ julia> Broadcast.broadcastable("hello") # Strings break convention of matching i
Base.RefValue{String}("hello")
```
"""
broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,Regex,Pair}) = Ref(x)
broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair}) = Ref(x)
broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
broadcastable(x::Union{AbstractArray,Number,Ref,Tuple,Broadcasted}) = x
# Default to collecting iterables — which will error for non-iterables
Expand Down
2 changes: 2 additions & 0 deletions base/exports.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ export
AbstractVector,
AbstractVecOrMat,
Array,
AbstractMatch,
AbstractPattern,
AbstractDict,
BigFloat,
BigInt,
Expand Down
28 changes: 21 additions & 7 deletions base/regex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ include("pcre.jl")
const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.NO_UTF_CHECK | PCRE.ALT_BSUX | PCRE.UCP
const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK

"""
An abstract type representing any sort of pattern matching expression (typically a regular
expression).
`AbstractPattern` objects can be used to match strings with [`match`](@ref).
"""
abstract type AbstractPattern end

"""
Regex(pattern[, flags])
Expand All @@ -17,7 +24,7 @@ with [`match`](@ref).
`Regex(pattern[, flags])` constructor is usually used if the `pattern` string needs
to be interpolated. See the documentation of the string macro for details on flags.
"""
mutable struct Regex
mutable struct Regex <: AbstractPattern
pattern::String
compile_options::UInt32
match_options::UInt32
Expand Down Expand Up @@ -128,10 +135,16 @@ function show(io::IO, re::Regex)
end
end

"""
`AbstractMatch` objects are used to represent information about matches found in a string
using an `AbstractPattern`.
"""
abstract type AbstractMatch end

# TODO: map offsets into strings in other encodings back to original indices.
# or maybe it's better to just fail since that would be quite slow

struct RegexMatch
struct RegexMatch <: AbstractMatch
match::SubString{String}
captures::Vector{Union{Nothing,SubString{String}}}
offset::Int
Expand Down Expand Up @@ -278,7 +291,8 @@ true
"""
function match end

function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, add_opts::UInt32=UInt32(0))
function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
add_opts::UInt32=UInt32(0))
compile(re)
opts = re.match_options | add_opts
matched, data = PCRE.exec_r_data(re.regex, str, idx-1, opts)
Expand Down Expand Up @@ -336,7 +350,7 @@ findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s))

"""
findall(
pattern::Union{AbstractString,Regex},
pattern::Union{AbstractString,AbstractPattern},
string::AbstractString;
overlap::Bool = false,
)
Expand Down Expand Up @@ -365,7 +379,7 @@ julia> findall("a", "banana")
6:6
```
"""
function findall(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
function findall(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
found = UnitRange{Int}[]
i, e = firstindex(s), lastindex(s)
while true
Expand All @@ -381,7 +395,7 @@ end

"""
count(
pattern::Union{AbstractString,Regex},
pattern::Union{AbstractString,AbstractPattern},
string::AbstractString;
overlap::Bool = false,
)
Expand All @@ -392,7 +406,7 @@ calling `length(findall(pattern, string))` but more efficient.
If `overlap=true`, the matching sequences are allowed to overlap indices in the
original string, otherwise they must be from disjoint character ranges.
"""
function count(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
function count(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
n = 0
i, e = firstindex(s), lastindex(s)
while true
Expand Down
6 changes: 3 additions & 3 deletions base/strings/search.jl
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ end

"""
findfirst(pattern::AbstractString, string::AbstractString)
findfirst(pattern::Regex, string::String)
findfirst(pattern::AbstractPattern, string::String)
Find the first occurrence of `pattern` in `string`. Equivalent to
[`findnext(pattern, string, firstindex(s))`](@ref).
Expand Down Expand Up @@ -250,7 +250,7 @@ end

"""
findnext(pattern::AbstractString, string::AbstractString, start::Integer)
findnext(pattern::Regex, string::String, start::Integer)
findnext(pattern::AbstractPattern, string::String, start::Integer)
Find the next occurrence of `pattern` in `string` starting at position `start`.
`pattern` can be either a string, or a regular expression, in which case `string`
Expand Down Expand Up @@ -507,7 +507,7 @@ findprev(ch::AbstractChar, string::AbstractString, ind::Integer) =
findprev(==(ch), string, ind)

"""
occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)
occursin(needle::Union{AbstractString,AbstractPattern,AbstractChar}, haystack::AbstractString)
Determine whether the first argument is a substring of the second. If `needle`
is a regular expression, checks whether `haystack` contains a match.
Expand Down
2 changes: 1 addition & 1 deletion base/strings/util.jl
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ If `count` is provided, replace at most `count` occurrences.
`pat` may be a single character, a vector or a set of characters, a string,
or a regular expression.
If `r` is a function, each occurrence is replaced with `r(s)`
where `s` is the matched substring (when `pat` is a `Regex` or `AbstractString`) or
where `s` is the matched substring (when `pat` is a `AbstractPattern` or `AbstractString`) or
character (when `pat` is an `AbstractChar` or a collection of `AbstractChar`).
If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then capture group
references in `r` are replaced with the corresponding matched text.
Expand Down

0 comments on commit 58e451d

Please sign in to comment.