Merge pull request #17355 from JuliaLang/teh/checkbounds_revisited

Revise checkbounds again
JuliaLang · Jul 14, 2016 · 828f7ae · 828f7ae
2 parents 089590b + f16a3fe
commit 828f7ae
Show file tree

Hide file tree

Showing 8 changed files with 326 additions and 107 deletions.
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
@@ -45,8 +45,14 @@ function indices{T,N}(A::AbstractArray{T,N})
     map(s->OneTo(s), size(A))
 end
 
+# Performance optimization: get rid of a branch on `d` in `indices(A,
+# d)` for d=1. 1d arrays are heavily used, and the first dimension
+# comes up in other applications.
 indices1{T}(A::AbstractArray{T,0}) = OneTo(1)
-indices1{T}(A::AbstractArray{T})   = indices(A)[1]
+indices1{T}(A::AbstractArray{T})   = (@_inline_meta; indices(A)[1])
+
+unsafe_indices(A) = indices(A)
+unsafe_indices(r::Range) = (OneTo(unsafe_length(r)),) # Ranges use checked_sub for size
 
 """
     linearindices(A)
@@ -60,8 +66,8 @@ is `indices(A, 1)`.
 Calling this function is the "safe" way to write algorithms that
 exploit linear indexing.
 """
-linearindices(A) = 1:length(A)
-linearindices(A::AbstractVector) = indices1(A)
+linearindices(A)                 = (@_inline_meta; 1:length(A))
+linearindices(A::AbstractVector) = (@_inline_meta; indices1(A))
 eltype{T}(::Type{AbstractArray{T}}) = T
 eltype{T,N}(::Type{AbstractArray{T,N}}) = T
 elsize{T}(::AbstractArray{T}) = sizeof(T)
@@ -144,6 +150,120 @@ linearindexing(::LinearFast, ::LinearFast) = LinearFast()
 linearindexing(::LinearIndexing, ::LinearIndexing) = LinearSlow()
 
 ## Bounds checking ##
+
+# The overall hierarchy is
+#     `checkbounds(A, I...)` ->
+#         `checkbounds(Bool, A, I...)` -> either of:
+#             - `checkbounds_logical(Bool, A, I)` when `I` is a single logical array
+#             - `checkbounds_indices(Bool, IA, I)` otherwise (uses `checkindex`)
+#
+# See the "boundscheck" devdocs for more information.
+#
+# Note this hierarchy has been designed to reduce the likelihood of
+# method ambiguities.  We try to make `checkbounds` the place to
+# specialize on array type, and try to avoid specializations on index
+# types; conversely, `checkindex` is intended to be specialized only
+# on index type (especially, its last argument).
+
+"""
+    checkbounds(Bool, A, I...)
+
+Return `true` if the specified indices `I` are in bounds for the given
+array `A`. Subtypes of `AbstractArray` should specialize this method
+if they need to provide custom bounds checking behaviors; however, in
+many cases one can rely on `A`'s indices and `checkindex`.
+
+See also `checkindex`.
+"""
+function checkbounds(::Type{Bool}, A::AbstractArray, I...)
+    @_inline_meta
+    checkbounds_indices(Bool, indices(A), I)
+end
+function checkbounds(::Type{Bool}, A::AbstractArray, I::AbstractArray{Bool})
+    @_inline_meta
+    checkbounds_logical(Bool, A, I)
+end
+
+"""
+    checkbounds(A, I...)
+
+Throw an error if the specified indices `I` are not in bounds for the given array `A`.
+"""
+function checkbounds(A::AbstractArray, I...)
+    @_inline_meta
+    checkbounds(Bool, A, I...) || throw_boundserror(A, I)
+    nothing
+end
+checkbounds(A::AbstractArray) = checkbounds(A, 1) # 0-d case
+
+"""
+    checkbounds_indices(Bool, IA, I)
+
+Return `true` if the "requested" indices in the tuple `I` fall within
+the bounds of the "permitted" indices specified by the tuple
+`IA`. This function recursively consumes elements of these tuples,
+usually in a 1-for-1 fashion,
+
+    checkbounds_indices(Bool, (IA1, IA...), (I1, I...)) = checkindex(Bool, IA1, I1) &
+                                                          checkbounds_indices(Bool, IA, I)
+
+Note that `checkindex` is being used to perform the actual
+bounds-check for a single dimension of the array.
+
+There are two important exceptions to the 1-1 rule: linear indexing and
+CartesianIndex{N}, both of which may "consume" more than one element
+of `IA`.
+"""
+function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple)
+    @_inline_meta
+    checkindex(Bool, IA[1], I[1]) & checkbounds_indices(Bool, tail(IA), tail(I))
+end
+checkbounds_indices(::Type{Bool}, ::Tuple{},  ::Tuple{})    = true
+checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple{Any}) = (@_inline_meta; checkindex(Bool, 1:1, I[1]))
+function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
+    @_inline_meta
+    checkindex(Bool, 1:1, I[1]) & checkbounds_indices(Bool, (), tail(I))
+end
+function checkbounds_indices(::Type{Bool}, IA::Tuple{Any}, I::Tuple{Any})
+    @_inline_meta
+    checkindex(Bool, IA[1], I[1])
+end
+function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple{Any})
+    @_inline_meta
+    checkindex(Bool, 1:prod(map(dimlength, IA)), I[1])  # linear indexing
+end
+
+"""
+    checkbounds_logical(Bool, A, I::AbstractArray{Bool})
+
+Return `true` if the logical array `I` is consistent with the indices
+of `A`. `I` and `A` should have the same size and compatible indices.
+"""
+function checkbounds_logical(::Type{Bool}, A::AbstractArray, I::AbstractArray{Bool})
+    indices(A) == indices(I)
+end
+function checkbounds_logical(::Type{Bool}, A::AbstractArray, I::AbstractVector{Bool})
+    length(A) == length(I)
+end
+function checkbounds_logical(::Type{Bool}, A::AbstractVector, I::AbstractArray{Bool})
+    length(A) == length(I)
+end
+function checkbounds_logical(::Type{Bool}, A::AbstractVector, I::AbstractVector{Bool})
+    indices(A) == indices(I)
+end
+
+"""
+    checkbounds_logical(A, I::AbstractArray{Bool})
+
+Throw an error if the logical array `I` is inconsistent with the indices of `A`.
+"""
+function checkbounds_logical(A, I::AbstractVector{Bool})
+    checkbounds_logical(Bool, A, I) || throw_boundserror(A, I)
+    nothing
+end
+
+throw_boundserror(A, I) = (@_noinline_meta; throw(BoundsError(A, I)))
+
 @generated function trailingsize{T,N,n}(A::AbstractArray{T,N}, ::Type{Val{n}})
     (isa(n, Int) && isa(N, Int)) || error("Must have concrete type")
     n > N && return 1
@@ -156,7 +276,7 @@ end
 
 # check along a single dimension
 """
-    checkindex(Bool, inds::UnitRange, index)
+    checkindex(Bool, inds::AbstractUnitRange, index)
 
 Return `true` if the given `index` is within the bounds of
 `inds`. Custom types that would like to behave as indices for all
@@ -180,59 +300,6 @@ function checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractArray)
     b
 end
 
-# check all indices/dimensions
-# To make extension easier, avoid specializations of checkbounds on index types
-# (That said, typically one does not need to specialize this function.)
-"""
-    checkbounds(Bool, array, indexes...)
-
-Return `true` if the specified `indexes` are in bounds for the given `array`. Subtypes of
-`AbstractArray` should specialize this method if they need to provide custom bounds checking
-behaviors.
-"""
-function checkbounds(::Type{Bool}, A::AbstractArray, i::Integer)
-    @_inline_meta
-    checkindex(Bool, linearindices(A), i)
-end
-function checkbounds{T}(::Type{Bool}, A::Union{Array{T,1},Range{T}}, i::Integer)
-    @_inline_meta
-    (1 <= i) & (i <= length(A))
-end
-function checkbounds(::Type{Bool}, A::AbstractArray, I::AbstractArray{Bool})
-    @_inline_meta
-    checkbounds_logical(A, I)
-end
-function checkbounds(::Type{Bool}, A::AbstractArray, I...)
-    @_inline_meta
-    checkbounds_indices(indices(A), I)
-end
-
-checkbounds_indices(::Tuple{},  ::Tuple{})    = true
-checkbounds_indices(::Tuple{}, I::Tuple{Any}) = (@_inline_meta; checkindex(Bool, 1:1, I[1]))
-checkbounds_indices(::Tuple{}, I::Tuple)      = (@_inline_meta; checkindex(Bool, 1:1, I[1]) & checkbounds_indices((), tail(I)))
-checkbounds_indices(inds::Tuple{Any}, I::Tuple{Any}) = (@_inline_meta; checkindex(Bool, inds[1], I[1]))
-checkbounds_indices(inds::Tuple, I::Tuple{Any}) = (@_inline_meta; checkindex(Bool, 1:prod(map(dimlength, inds)), I[1]))
-checkbounds_indices(inds::Tuple, I::Tuple) = (@_inline_meta; checkindex(Bool, inds[1], I[1]) & checkbounds_indices(tail(inds), tail(I)))
-
-# Single logical array indexing:
-checkbounds_logical(A::AbstractArray, I::AbstractArray{Bool})   = indices(A) == indices(I)
-checkbounds_logical(A::AbstractArray, I::AbstractVector{Bool})  = length(A) == length(I)
-checkbounds_logical(A::AbstractVector, I::AbstractArray{Bool})  = length(A) == length(I)
-checkbounds_logical(A::AbstractVector, I::AbstractVector{Bool}) = indices(A) == indices(I)
-
-throw_boundserror(A, I) = (@_noinline_meta; throw(BoundsError(A, I)))
-
-"""
-    checkbounds(array, indexes...)
-
-Throw an error if the specified `indexes` are not in bounds for the given `array`.
-"""
-function checkbounds(A::AbstractArray, I...)
-    @_inline_meta
-    checkbounds(Bool, A, I...) || throw_boundserror(A, I)
-end
-checkbounds(A::AbstractArray) = checkbounds(A, 1) # 0-d case
-
 # See also specializations in multidimensional
 
 ## Constructors ##
@@ -1193,8 +1260,6 @@ nextL(L, l::Integer) = L*l
 nextL(L, r::AbstractUnitRange) = L*unsafe_length(r)
 offsetin(i, l::Integer) = i-1
 offsetin(i, r::AbstractUnitRange) = i-first(r)
-unsafe_length(r::UnitRange) = r.stop-r.start+1
-unsafe_length(r::OneTo) = length(r)
 
 ind2sub(::Tuple{}, ind::Integer) = (@_inline_meta; ind == 1 ? () : throw(BoundsError()))
 ind2sub(dims::DimsInteger, ind::Integer) = (@_inline_meta; _ind2sub(dims, ind-1))

diff --git a/base/multidimensional.jl b/base/multidimensional.jl
@@ -141,21 +141,47 @@ simd_index{I<:CartesianIndex{0}}(iter::CartesianRange{I}, ::CartesianIndex, I1::
     CartesianIndex((I1+iter.start[1], Ilast.I...))
 end
 
+# Split out the first N elements of a tuple
+@inline split{N}(t, V::Type{Val{N}}) = _split((), t, V)
+@inline _split(tN, trest, V) = _split((tN..., trest[1]), tail(trest), V)
+# exit either when we've exhausted the input tuple or when tN has length N
+@inline _split{N}(tN::NTuple{N}, ::Tuple{}, ::Type{Val{N}}) = tN, ()  # ambig.
+@inline _split{N}(tN,            ::Tuple{}, ::Type{Val{N}}) = tN, ()
+@inline _split{N}(tN::NTuple{N},  trest,    ::Type{Val{N}}) = tN, trest
+
 end  # IteratorsMD
 
 using .IteratorsMD
 
 ## Bounds-checking with CartesianIndex
-# Ambiguity with linear indexing:
-@inline _chkbnds(A::AbstractVector, checked::NTuple{1,Bool}, I::CartesianIndex) = _chkbnds(A, checked, I.I...)
-@inline _chkbnds(A::AbstractArray, checked::NTuple{1,Bool}, I::CartesianIndex) = _chkbnds(A, checked, I.I...)
-# Generic bounds checking
-@inline _chkbnds{T,N}(A::AbstractArray{T,N}, checked::NTuple{N,Bool}, I1::CartesianIndex, I...) = _chkbnds(A, checked, I1.I..., I...)
-@inline _chkbnds{T,N,M}(A::AbstractArray{T,N}, checked::NTuple{M,Bool}, I1::CartesianIndex, I...) = _chkbnds(A, checked, I1.I..., I...)
-
-@inline checkbounds_indices(::Tuple{},   I::Tuple{CartesianIndex,Vararg{Any}}) = checkbounds_indices((),   (I[1].I..., tail(I)...))
-@inline checkbounds_indices(inds::Tuple{Any}, I::Tuple{CartesianIndex,Vararg{Any}}) = checkbounds_indices(inds, (I[1].I..., tail(I)...))
-@inline checkbounds_indices(inds::Tuple, I::Tuple{CartesianIndex,Vararg{Any}}) = checkbounds_indices(inds, (I[1].I..., tail(I)...))
+@inline checkbounds_indices(::Type{Bool}, ::Tuple{},   I::Tuple{CartesianIndex,Vararg{Any}}) =
+    checkbounds_indices(Bool, (), (I[1].I..., tail(I)...))
+@inline checkbounds_indices(::Type{Bool}, IA::Tuple{Any}, I::Tuple{CartesianIndex,Vararg{Any}}) =
+    checkbounds_indices(Bool, IA, (I[1].I..., tail(I)...))
+@inline checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple{CartesianIndex,Vararg{Any}}) =
+    checkbounds_indices(Bool, IA, (I[1].I..., tail(I)...))
+
+# Support indexing with an array of CartesianIndex{N}s
+# Here we try to consume N of the indices (if there are that many available)
+# The first two simply handle ambiguities
+@inline function checkbounds_indices{N}(::Type{Bool}, ::Tuple{}, I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}})
+    checkindex(Bool, (), I[1]) & checkbounds_indices(Bool, (), tail(I))
+end
+@inline function checkbounds_indices{N}(::Type{Bool}, IA::Tuple{Any}, I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}})
+    checkindex(Bool, IA, I[1]) & checkbounds_indices(Bool, (), tail(I))
+end
+@inline function checkbounds_indices{N}(::Type{Bool}, IA::Tuple, I::Tuple{AbstractArray{CartesianIndex{N}},Vararg{Any}})
+    IA1, IArest = IteratorsMD.split(IA, Val{N})
+    checkindex(Bool, IA1, I[1]) & checkbounds_indices(Bool, IArest, tail(I))
+end
+
+function checkindex{N}(::Type{Bool}, inds::Tuple, I::AbstractArray{CartesianIndex{N}})
+    b = true
+    for i in I
+        b &= checkbounds_indices(Bool, inds, (i,))
+    end
+    b
+end
 
 # Recursively compute the lengths of a list of indices, without dropping scalars
 # These need to be inlined for more than 3 indexes

diff --git a/base/range.jl b/base/range.jl
@@ -326,12 +326,17 @@ step(r::AbstractUnitRange) = 1
 step(r::FloatRange) = r.step/r.divisor
 step{T}(r::LinSpace{T}) = ifelse(r.len <= 0, convert(T,NaN), (r.stop-r.start)/r.divisor)
 
-function length(r::StepRange)
+unsafe_length(r::Range) = length(r)  # generic fallback
+
+function unsafe_length(r::StepRange)
     n = Integer(div(r.stop+r.step - r.start, r.step))
     isempty(r) ? zero(n) : n
 end
-length(r::AbstractUnitRange) = Integer(last(r) - first(r) + 1)
-length(r::OneTo) = r.stop
+length(r::StepRange) = unsafe_length(r)
+unsafe_length(r::AbstractUnitRange) = Integer(last(r) - first(r) + 1)
+unsafe_length(r::OneTo) = r.stop
+length(r::AbstractUnitRange) = unsafe_length(r)
+length(r::OneTo) = unsafe_length(r)
 length(r::FloatRange) = Integer(r.len)
 length(r::LinSpace) = Integer(r.len + signbit(r.len - 1))
 

diff --git a/base/subarray.jl b/base/subarray.jl
@@ -278,17 +278,11 @@ end
 # they are taken from the range/vector
 # Since bounds-checking is performance-critical and uses
 # indices, it's worth optimizing these implementations thoroughly
-indices(S::SubArray) = (@_inline_meta; _indices_sub(S, 1, S.indexes...))
-_indices_sub(S::SubArray, dim::Int) = ()
-_indices_sub(S::SubArray, dim::Int, ::Real, I...) = (@_inline_meta; _indices_sub(S, dim+1, I...))
-_indices_sub(S::SubArray, dim::Int, ::Colon, I...) = (@_inline_meta; (indices(parent(S), dim), _indices_sub(S, dim+1, I...)...))
-_indices_sub(S::SubArray, dim::Int, i1::AbstractArray, I...) = (@_inline_meta; (indices(i1)..., _indices_sub(S, dim+1, I...)...))
-indices1(S::SubArray) = (@_inline_meta; _indices1(S, 1, S.indexes...))
-_indices1(S::SubArray, dim) = OneTo(1)
-_indices1(S::SubArray, dim, i1::Real, I...) = (@_inline_meta; _indices1(S, dim+1, I...))
-_indices1(S::SubArray, dim, i1::Colon, I...) = (@_inline_meta; indices(parent(S), dim))
-_indices1(S::SubArray, dim, i1::AbstractArray, I...) = (@_inline_meta; indices1(i1))
-_indices1{T}(S::SubArray, dim, i1::AbstractArray{T,0}, I...) = (@_inline_meta; _indices1(S, dim+1, I...))
+indices(S::SubArray) = (@_inline_pure_meta; _indices_sub(S, indices(S.parent), S.indexes...))
+_indices_sub(S::SubArray, pinds) = ()
+_indices_sub(S::SubArray, pinds, ::Real, I...) = _indices_sub(S, tail(pinds), I...)
+_indices_sub(S::SubArray, pinds, ::Colon, I...) = (pinds[1], _indices_sub(S, tail(pinds), I...)...)
+_indices_sub(S::SubArray, pinds, i1::AbstractArray, I...) = (unsafe_indices(i1)..., _indices_sub(S, tail(pinds), I...)...)
 
 ## Compatability
 # deprecate?

diff --git a/doc/devdocs/boundscheck.rst b/doc/devdocs/boundscheck.rst
@@ -59,3 +59,52 @@ instance, the default ``getindex`` methods have the chain
 To override the "one layer of inlining" rule, a function may be marked with
 ``@propagate_inbounds`` to propagate an inbounds context (or out of bounds
 context) through one additional layer of inlining.
+
+The bounds checking call hierarchy
+----------------------------------
+
+The overall hierarchy is:
+
+|   ``checkbounds(A, I...)`` which calls
+|     ``checkbounds(Bool, A, I...)`` which calls either of:
+|       ``checkbounds_logical(Bool, A, I)``  when ``I`` is a single logical array
+|       ``checkbounds_indices(Bool, indices(A), I)`` otherwise
+|
+
+Here ``A`` is the array, and ``I`` contains the "requested" indices.
+``indices(A)`` returns a tuple of "permitted" indices of ``A``.
+
+``checkbounds(A, I...)`` throws an error if the indices are invalid,
+whereas ``checkbounds(Bool, A, I...)`` returns ``false`` in that
+circumstance.  ``checkbounds_indices`` discards any information about
+the array other than its ``indices`` tuple, and performs a pure
+indices-vs-indices comparison: this allows relatively few compiled
+methods to serve a huge variety of array types. Indices are specified
+as tuples, and are usually compared in a 1-1 fashion with individual
+dimensions handled by calling another important function,
+``checkindex``: typically,
+::
+
+   checkbounds_indices(Bool, (IA1, IA...), (I1, I...)) = checkindex(Bool, IA1, I1) &
+                                                         checkbounds_indices(Bool, IA, I)
+
+so ``checkindex`` checks a single dimension.  All of these functions,
+including the unexported ``checkbounds_indices`` and
+``checkbounds_logical``, have docstrings accessible with ``?`` .
+
+If you have to customize bounds checking for a specific array type,
+you should specialize ``checkbounds(Bool, A, I...)``. However, in most
+cases you should be able to rely on ``checkbounds_indices`` as long as
+you supply useful ``indices`` for your array type.
+
+If you have novel index types, first consider specializing
+``checkindex``, which handles a single index for a particular
+dimension of an array.  If you have a custom multidimensional index
+type (similar to ``CartesianIndex``), then you may have to consider
+specializing ``checkbounds_indices``.
+
+Note this hierarchy has been designed to reduce the likelihood of
+method ambiguities.  We try to make ``checkbounds`` the place to
+specialize on array type, and try to avoid specializations on index
+types; conversely, ``checkindex`` is intended to be specialized only
+on index type (especially, the last argument).