JuliaLang · mbauman · Oct 24, 2018 · Oct 20, 2018 · Oct 22, 2018 · Oct 24, 2018
diff --git a/base/bitarray.jl b/base/bitarray.jl
@@ -84,6 +84,7 @@ IndexStyle(::Type{<:BitArray}) = IndexLinear()
 const _msk64 = ~UInt64(0)
 @inline _div64(l) = l >> 6
 @inline _mod64(l) = l & 63
+@inline _BLSR(x)= x & (x-1) #zeros the last set bit. Has native instruction on many archs. needed in multidimensional.jl
 @inline _msk_end(l::Integer) = _msk64 >>> _mod64(-l)
 @inline _msk_end(B::BitArray) = _msk_end(length(B))
 num_bit_chunks(n::Int) = _div64(n+63)

diff --git a/base/multidimensional.jl b/base/multidimensional.jl
@@ -523,19 +523,23 @@ end
         L.mask[idx] && return (idx, s)
     end
 end
-# When wrapping a BitArray, lean heavily upon its internals -- this is a common
-# case. Just use the Int index and count as its state.
-@inline function iterate(L::LogicalIndex{Int,<:BitArray}, s=(0,1))
-    s[2] > length(L) && return nothing
-    i, n = s
+# When wrapping a BitArray, lean heavily upon its internals.
+@inline function iterate(L::Base.LogicalIndex{Int,<:BitArray})
+    L.sum == 0 && return nothing
     Bc = L.mask.chunks
-    while true
-        if Bc[_div64(i)+1] & (UInt64(1)<<_mod64(i)) != 0
-            i += 1
-            return (i, (i, n+1))
-        end
-        i += 1
+    return iterate(L::Base.LogicalIndex{Int,<:BitArray}, (1, @inbounds Bc[1]))
+end
+@propagate_inbounds function iterate(L::Base.LogicalIndex{Int,<:BitArray}, s)
+    Bc = L.mask.chunks
+    i1, c = s
+    while c==0
+        i1 == length(Bc) && return nothing
 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing) 
 iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing) 
+        i1 += 1
+        c = Bc[i1]
     end
+    tz = trailing_zeros(c) + 1
+    c = _BLSR(c)
+    return ((i1-1)<<6 + tz, (i1, c))
 end
 
 @inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex{<:Any,<:AbstractArray{Bool,1}}) =