From 12a6906f1e11d0f69c2f97f82dbfd23e650b3dbb Mon Sep 17 00:00:00 2001
From: ScottPJones <scottjones@alum.mit.edu>
Date: Sun, 18 Oct 2020 00:54:36 -0400
Subject: [PATCH] Add AbstractPattern and AbstractMatch to allow for more
 general pattern matching

---
 base/broadcast.jl      |  2 +-
 base/exports.jl        |  2 ++
 base/regex.jl          | 28 +++++++++++++++++++++-------
 base/strings/search.jl |  6 +++---
 base/strings/util.jl   |  2 +-
 5 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/base/broadcast.jl b/base/broadcast.jl
index b55051d82546d1..9c4533b77414a7 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -675,7 +675,7 @@ julia> Broadcast.broadcastable("hello") # Strings break convention of matching i
 Base.RefValue{String}("hello")
 ```
 """
-broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,Regex,Pair}) = Ref(x)
+broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair}) = Ref(x)
 broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
 broadcastable(x::Union{AbstractArray,Number,Ref,Tuple,Broadcasted}) = x
 # Default to collecting iterables — which will error for non-iterables
diff --git a/base/exports.jl b/base/exports.jl
index 2c0c628eec866b..287866ca59503b 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -22,6 +22,8 @@ export
     AbstractVector,
     AbstractVecOrMat,
     Array,
+    AbstractMatch,
+    AbstractPattern,
     AbstractDict,
     BigFloat,
     BigInt,
diff --git a/base/regex.jl b/base/regex.jl
index 75c3777fd681a0..68b8acf3c3cecb 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -7,6 +7,13 @@ include("pcre.jl")
 const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.NO_UTF_CHECK | PCRE.ALT_BSUX | PCRE.UCP
 const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK
 
+"""
+    An abstract type representing any sort of pattern matching expression (typically a regular
+    expression).
+    `AbstractPattern` objects can be used to match strings with [`match`](@ref).
+"""
+abstract type AbstractPattern end
+
 """
     Regex(pattern[, flags])
 
@@ -17,7 +24,7 @@ with [`match`](@ref).
 `Regex(pattern[, flags])` constructor is usually used if the `pattern` string needs
 to be interpolated. See the documentation of the string macro for details on flags.
 """
-mutable struct Regex
+mutable struct Regex <: AbstractPattern
     pattern::String
     compile_options::UInt32
     match_options::UInt32
@@ -128,10 +135,16 @@ function show(io::IO, re::Regex)
     end
 end
 
+"""
+   `AbstractMatch` objects are used to represent information about matches found in a string
+   using an `AbstractPattern`.
+"""
+abstract type AbstractMatch end
+
 # TODO: map offsets into strings in other encodings back to original indices.
 # or maybe it's better to just fail since that would be quite slow
 
-struct RegexMatch
+struct RegexMatch <: AbstractMatch
     match::SubString{String}
     captures::Vector{Union{Nothing,SubString{String}}}
     offset::Int
@@ -278,7 +291,8 @@ true
 """
 function match end
 
-function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, add_opts::UInt32=UInt32(0))
+function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
+               add_opts::UInt32=UInt32(0))
     compile(re)
     opts = re.match_options | add_opts
     matched, data = PCRE.exec_r_data(re.regex, str, idx-1, opts)
@@ -336,7 +350,7 @@ findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s))
 
 """
     findall(
-        pattern::Union{AbstractString,Regex},
+        pattern::Union{AbstractString,AbstractPattern},
         string::AbstractString;
         overlap::Bool = false,
     )
@@ -365,7 +379,7 @@ julia> findall("a", "banana")
  6:6
 ```
 """
-function findall(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
+function findall(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     found = UnitRange{Int}[]
     i, e = firstindex(s), lastindex(s)
     while true
@@ -381,7 +395,7 @@ end
 
 """
     count(
-        pattern::Union{AbstractString,Regex},
+        pattern::Union{AbstractString,AbstractPattern},
         string::AbstractString;
         overlap::Bool = false,
     )
@@ -392,7 +406,7 @@ calling `length(findall(pattern, string))` but more efficient.
 If `overlap=true`, the matching sequences are allowed to overlap indices in the
 original string, otherwise they must be from disjoint character ranges.
 """
-function count(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
+function count(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     n = 0
     i, e = firstindex(s), lastindex(s)
     while true
diff --git a/base/strings/search.jl b/base/strings/search.jl
index b1908ac99c8600..140a5eab06350b 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -88,7 +88,7 @@ end
 
 """
     findfirst(pattern::AbstractString, string::AbstractString)
-    findfirst(pattern::Regex, string::String)
+    findfirst(pattern::AbstractPattern, string::String)
 
 Find the first occurrence of `pattern` in `string`. Equivalent to
 [`findnext(pattern, string, firstindex(s))`](@ref).
@@ -250,7 +250,7 @@ end
 
 """
     findnext(pattern::AbstractString, string::AbstractString, start::Integer)
-    findnext(pattern::Regex, string::String, start::Integer)
+    findnext(pattern::AbstractPattern, string::String, start::Integer)
 
 Find the next occurrence of `pattern` in `string` starting at position `start`.
 `pattern` can be either a string, or a regular expression, in which case `string`
@@ -507,7 +507,7 @@ findprev(ch::AbstractChar, string::AbstractString, ind::Integer) =
     findprev(==(ch), string, ind)
 
 """
-    occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)
+    occursin(needle::Union{AbstractString,AbstractPattern,AbstractChar}, haystack::AbstractString)
 
 Determine whether the first argument is a substring of the second. If `needle`
 is a regular expression, checks whether `haystack` contains a match.
diff --git a/base/strings/util.jl b/base/strings/util.jl
index c45a353f07c593..15da308c36460b 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -546,7 +546,7 @@ If `count` is provided, replace at most `count` occurrences.
 `pat` may be a single character, a vector or a set of characters, a string,
 or a regular expression.
 If `r` is a function, each occurrence is replaced with `r(s)`
-where `s` is the matched substring (when `pat` is a `Regex` or `AbstractString`) or
+where `s` is the matched substring (when `pat` is a `AbstractPattern` or `AbstractString`) or
 character (when `pat` is an `AbstractChar` or a collection of `AbstractChar`).
 If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then capture group
 references in `r` are replaced with the corresponding matched text.