Skip to content

Commit

Permalink
bitarray indexing: sync with array + refactor
Browse files Browse the repository at this point in the history
* Use setindex_shape_check and DimensionMismatch instead
  of generic errors; this (and a couple of extra minor fixes)
  makes BitArrays behave like Arrays (again)
* Move portions of the indexing code to multidimensional.jl
* Restrict signatures so as to avoid to_index and convert, then
  create very generic methods which do the conversion and the
  dispatch: this greatly simplifies the logic and removes most
  of the need for disambiguation. Should also improve code
  generation.
  • Loading branch information
carlobaldassi committed Feb 3, 2014
1 parent 793d769 commit 2fb9bad
Show file tree
Hide file tree
Showing 3 changed files with 142 additions and 285 deletions.
267 changes: 39 additions & 228 deletions base/bitarray.jl
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,10 @@ falses(args...) = fill!(BitArray(args...), false)
trues(args...) = fill!(BitArray(args...), true)

function one(x::BitMatrix)
m, n = size(x)
a = falses(size(x))
for i = 1 : min(m,n)
m,n = size(x)
m == n || throw(DimensionMismatch("multiplicative identity defined only for square matrices"))
a = falses(n, n)
for i = 1:n
a[i,i] = true
end
return a
Expand Down Expand Up @@ -258,7 +259,7 @@ end

function reshape{N}(B::BitArray, dims::NTuple{N,Int})
if prod(dims) != length(B)
error("new dimensions $(dims) inconsistent with the array length $(length(B))")
throw(DimensionMismatch("new dimensions $(dims) must be consistent with array size $(length(B))"))
end
Br = BitArray{N}(ntuple(N,i->0)...)
Br.chunks = B.chunks
Expand Down Expand Up @@ -320,18 +321,7 @@ end
convert{N}(::Type{BitArray{N}}, B::BitArray{N}) = B

reinterpret{N}(::Type{Bool}, B::BitArray, dims::NTuple{N,Int}) = reinterpret(B, dims)
function reinterpret{N}(B::BitArray, dims::NTuple{N,Int})
if prod(dims) != length(B)
error("new dimensions $(dims) are inconsistent with array length $(length(B))")
end
A = BitArray{N}(ntuple(N,i->0)...)
A.chunks = B.chunks
A.len = prod(dims)
if N != 1
A.dims = dims
end
return A
end
reinterpret{N}(B::BitArray, dims::NTuple{N,Int}) = reshape(B, dims)

# shorthand forms BitArray <-> Array
bitunpack{N}(B::BitArray{N}) = convert(Array{Bool,N}, B)
Expand Down Expand Up @@ -359,9 +349,7 @@ function getindex_unchecked(Bc::Vector{Uint64}, i::Int)
end

function getindex(B::BitArray, i::Int)
if i < 1 || i > length(B)
throw(BoundsError())
end
1 <= i <= length(B) || throw(BoundsError())
return getindex_unchecked(B.chunks, i)
end

Expand All @@ -372,58 +360,6 @@ getindex(B::BitArray) = getindex(B, 1)
# 0d bitarray
getindex(B::BitArray{0}) = getindex_unchecked(B.chunks, 1)

function getindex(B::BitArray, i1::Real, i2::Real)
#checkbounds(B, i0, i1) # manually inlined for performance
i1, i2 = to_index(i1, i2)
l1 = size(B,1)
1 <= i1 <= l1 || throw(BoundsError())
return B[i1 + l1*(i2-1)]
end
function getindex(B::BitArray, i1::Real, i2::Real, i3::Real)
#checkbounds(B, i0, i1, i2) # manually inlined for performance
i1, i2, i3 = to_index(i1, i2, i3)
l1 = size(B,1)
1 <= i1 <= l1 || throw(BoundsError())
l2 = size(B,2)
1 <= i2 <= l2 || throw(BoundsError())
return B[i1 + l1*((i2-1) + l2*(i3-1))]
end
function getindex(B::BitArray, i1::Real, i2::Real, i3::Real, i4::Real)
#checkbounds(B, i1, i2, i3, i4)
i1, i2, i3, i4 = to_index(i1, i2, i3, i4)
l1 = size(B,1)
1 <= i1 <= l1 || throw(BoundsError())
l2 = size(B,2)
1 <= i2 <= l2 || throw(BoundsError())
l3 = size(B,3)
1 <= i3 <= l3 || throw(BoundsError())
return B[i1 + l1*((i2-1) + l2*((i3-1) + l3*(i4-1)))]
end

function getindex(B::BitArray, I::Real...)
#checkbounds(B, I...) # inlined for performance
#I = to_index(I) # inlined for performance
ndims = length(I)
i = to_index(I[1])
l = size(B,1)
1 <= i <= l || throw(BoundsError())
index = i
stride = 1
for k = 2:ndims-1
stride *= l
i = to_index(I[k])
l = size(B,k)
1 <= i <= l || throw(BoundsError())
index += (i-1) * stride
end
stride *= l
i = to_index(I[ndims])
index += (i-1) * stride
return B[index]
end

# note: the Range1{Int} case is still handled by the version above
# (which is fine)
function getindex{T<:Real}(B::BitArray, I::AbstractVector{T})
X = BitArray(length(I))
lB = length(B)
Expand All @@ -432,40 +368,34 @@ function getindex{T<:Real}(B::BitArray, I::AbstractVector{T})
ind = 1
for i in I
# faster X[ind] = B[i]
i = to_index(i)
1 <= i <= lB || throw(BoundsError())
setindex_unchecked(Xc, getindex_unchecked(Bc, i), ind)
j = to_index(i)
1 <= j <= lB || throw(BoundsError())
setindex_unchecked(Xc, getindex_unchecked(Bc, j), ind)
ind += 1
end
return X
end

# logical indexing

function getindex_bool_1d(B::BitArray, I::AbstractArray{Bool})
n = sum(I)
X = BitArray(n)
lI = length(I)
if lI != length(B)
throw(BoundsError())
end
Xc = X.chunks
Bc = B.chunks
ind = 1
for i = 1:length(I)
if I[i]
# faster X[ind] = B[i]
setindex_unchecked(Xc, getindex_unchecked(Bc, i), ind)
ind += 1
# (multiple signatures for disambiguation)
for IT in [AbstractVector{Bool}, AbstractArray{Bool}]
@eval function getindex(B::BitArray, I::$IT)
checkbounds(B, I)
n = sum(I)
X = BitArray(n)
Xc = X.chunks
Bc = B.chunks
ind = 1
for i = 1:length(I)
if I[i]
# faster X[ind] = B[i]
setindex_unchecked(Xc, getindex_unchecked(Bc, i), ind)
ind += 1
end
end
return X
end
return X
end

# multiple signatures required for disambiguation
# (see also getindex in multidimensional.jl)
for BT in [BitVector, BitArray], IT in [Range1{Bool}, AbstractVector{Bool}, AbstractArray{Bool}]
@eval getindex(B::$BT, I::$IT) = getindex_bool_1d(B, I)
end

## Indexing: setindex! ##
Expand All @@ -482,133 +412,31 @@ function setindex_unchecked(Bc::Array{Uint64}, x::Bool, i::Int)
end
end

setindex!(B::BitArray, x::Bool) = setindex!(B, x, 1)

function setindex!(B::BitArray, x::Bool, i::Int)
if i < 1 || i > length(B)
throw(BoundsError())
end
1 <= i <= length(B) || throw(BoundsError())
setindex_unchecked(B.chunks, x, i)
return B
end

setindex!(B::BitArray, x) = setindex!(B, x, 1)

setindex!(B::BitArray, x, i::Real) = setindex!(B, convert(Bool,x), to_index(i))

function setindex!(B::BitArray, x, i1::Real, i2::Real)
#checkbounds(B, i0, i1) # manually inlined for performance
i1, i2 = to_index(i1, i2)
l1 = size(B,1)
1 <= i1 <= l1 || throw(BoundsError())
B[i1 + l1*(i2-1)] = x
return B
end

function setindex!(B::BitArray, x, i1::Real, i2::Real, i3::Real)
#checkbounds(B, i1, i2, i3) # manually inlined for performance
i1, i2, i3 = to_index(i1, i2, i3)
l1 = size(B,1)
1 <= i1 <= l1 || throw(BoundsError())
l2 = size(B,2)
1 <= i2 <= l2 || throw(BoundsError())
B[i1 + l1*((i2-1) + l2*(i3-1))] = x
return B
end

function setindex!(B::BitArray, x, i1::Real, i2::Real, i3::Real, i4::Real)
#checkbounds(B, i1, i2, i3, i4) # manually inlined for performance
i1, i2, i3, i4 = to_index(i1, i2, i3, i4)
l1 = size(B,1)
1 <= i1 <= l1 || throw(BoundsError())
l2 = size(B,2)
1 <= i2 <= l2 || throw(BoundsError())
l3 = size(B,3)
1 <= i3 <= l3 || throw(BoundsError())
B[i1 + l1*((i2-1) + l2*((i3-1) + l3*(i4-1)))] = x
return B
end

function setindex!(B::BitArray, x, i::Real, I::Real...)
#checkbounds(B, I...) # inlined for performance
#I = to_index(I) # inlined for performance
ndims = length(I) + 1
i = to_index(i)
l = size(B,1)
1 <= i <= l || throw(BoundsError())
index = i
stride = 1
for k = 2:ndims-1
stride *= l
l = size(B,k)
i = to_index(I[k-1])
1 <= i <= l || throw(BoundsError())
index += (i-1) * stride
end
stride *= l
i = to_index(I[ndims-1])
index += (i-1) * stride
B[index] = x
return B
end

function setindex!{T<:Real}(B::BitArray, X::AbstractArray, I::AbstractVector{T})
if length(X) != length(I); error("argument dimensions must match"); end
count = 1
for i in I
B[i] = X[count]
count += 1
end
return B
end

function setindex!(B::BitArray, X::AbstractArray, i0::Real)
if length(X) != 1
error("argument dimensions must match")
end
return setindex!(B, X[1], i0)
end

function setindex!(B::BitArray, X::AbstractArray, i0::Real, i1::Real)
if length(X) != 1
error("argument dimensions must match")
end
return setindex!(B, X[1], i0, i1)
end

function setindex!(B::BitArray, X::AbstractArray, I0::Real, I::Real...)
if length(X) != 1
error("argument dimensions must match")
end
return setindex!(B, X[1], i0, I...)
end

function setindex!{T<:Real}(B::BitArray, x, I::AbstractVector{T})
x = convert(Bool, x)
for i in I
B[i] = x
end
return B
end

# logical indexing

function setindex_bool_1d(A::BitArray, x, I::AbstractArray{Bool})
if length(I) > length(A)
throw(BoundsError())
end
function setindex!(A::BitArray, x, I::AbstractArray{Bool})
checkbounds(A, I)
y = convert(Bool, x)
Ac = A.chunks
for i = 1:length(I)
if I[i]
# faster A[i] = x
setindex_unchecked(Ac, convert(Bool, x), i)
# faster A[i] = y
setindex_unchecked(Ac, y, i)
end
end
A
end

function setindex_bool_1d(A::BitArray, X::AbstractArray, I::AbstractArray{Bool})
if length(I) > length(A)
throw(BoundsError())
end
function setindex!(A::BitArray, X::AbstractArray, I::AbstractArray{Bool})
checkbounds(A, I)
Ac = A.chunks
c = 1
for i = 1:length(I)
Expand All @@ -618,27 +446,10 @@ function setindex_bool_1d(A::BitArray, X::AbstractArray, I::AbstractArray{Bool})
c += 1
end
end
A
end

# lots of definitions here are required just for disambiguation
# (see also setindex! in multidimensional.jl)
for XT in [BitArray, AbstractArray, Any]
for IT in [AbstractVector{Bool}, AbstractArray{Bool}]
@eval setindex!(A::BitArray, X::$XT, I::$IT) = setindex_bool_1d(A, X, I)
end

for IT in [Range1{Bool}, AbstractVector{Bool}], JT in [Range1{Bool}, AbstractVector{Bool}]
@eval setindex!(A::BitMatrix, x::$XT, I::$IT, J::$JT) = (A[find(I),find(J)] = x; A)
if length(X) != c-1
throw(DimensionMismatch("assigned $(length(X)) elements to length $(c-1) destination"))
end

for IT in [Range1{Bool}, AbstractVector{Bool}], JT in [Real, Range1]
@eval setindex!(A::BitMatrix, x::$XT, I::$IT, J::$JT) = (A[find(I),J] = x; A)
end
@eval setindex!{T<:Real}(A::BitMatrix, x::$XT, I::AbstractVector{Bool}, J::AbstractVector{T}) = (A[find(I),J] = x; A)

@eval setindex!(A::BitMatrix, x::$XT, I::Real, J::AbstractVector{Bool}) = (A[I,find(J)] = x; A)
@eval setindex!{T<:Real}(A::BitMatrix, x::$XT, I::AbstractVector{T}, J::AbstractVector{Bool}) = (A[I,find(J)] = x; A)
A
end

## Dequeue functionality ##
Expand Down
Loading

5 comments on commit 2fb9bad

@carlobaldassi
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@timholy : this is another 140 lines less thanks to cartesian magic! However, there is a performance issue: multi-dimensional indexing used to be inlined (by hand) until 4 dimensions in the previous code, while in this version @ngenerate takes care of that. The result is that performance is on par with the previous version up until CARTESIAN_DIMS+1, after which it drops horribly and gets much worse than the previous varargs version.

Can CARTESIAN_DIMS be increased then, like it says in the comment after its definition? Otherwise I'm hesitant to introduce performance regressions, even though it's likely for an uncommon case. I've tested with 4 and everything seems fine.

@timholy
Copy link
Member

@timholy timholy commented on 2fb9bad Feb 3, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is another 140 lines less thanks to cartesian magic!

Cool!

Can CARTESIAN_DIMS be increased?

Yes, that was the plan. First, I probably need a more generic fix to an unanticipated problem with @ngenerate: the problem is that once you switch over to the Dict wrapper, it loses return-type information---return values you specify in the function definition get buried inside the local _F_ and don't get propagated out. As a quick hack, I just wrapped usages with another function (as Jeff suggested), but you have to write one such wrapper for each explicitly-generated dimension (plus the one that matters, the generic version). This means that you have to alter more than just CARTESIAN_DIMS when you make changes.

A better fix might be to add a variant that allows you to specify the return args or types to the macro, @ngenerate ITERSYM RETURNTYPES FUNCTION. Here's an example (EDITed to concretely specify typeof(dst) rather than AbstractArray{T,N}):

# typeof(dst) is RETURNTYPES
@ngenerate N typeof(dst) function copy!{T,N}(dst::AbstractArray{T,N}, src::AbstractArray{T,N})
    # size checks go here
    @nloops N i src
        (@nref N dst i) = (@nref N src i)
    end
    dst
end

which would add type information to the output of the cached function, e.g.

let copy!_cache = Dict{Int,Function}()
function copy!{T,N}(dst::AbstractArray{T,N}, src::AbstractArray{T,N})
    if !haskey(copy!_cache, N)
        gen1 = Base.Cartesian.generate1(...)
        copy!_cache[N] = eval(...)
    end
    (copy!_cache[N](dst, src))::typeof(dst)   # added due to RETURNTYPES
end

(When I first started writing this, my idea was to specify dst as the return expression and have the macro insert it in appropriate places. We could do that, but I like this better. Ideas frequently improve as you explain them!)

Sound reasonable? If so, should we require that people specify the return type? Or should we allow either the two-argument or three-argument version? Making it required would prevent one from using Cartesian in contexts in which return type cannot be specified at compile time, but perhaps that's not a terrible restriction.

@timholy
Copy link
Member

@timholy timholy commented on 2fb9bad Feb 3, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess since people can write Any for the return-type expression, I'm leaning towards supporting just the 3-argument version. That means existing usages would have to be changed.

@carlobaldassi
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with adding a return type with whatever syntax (after all it's an internal API) and with making it mandatory. I'll update this branch and merge it when this happens.
For the future, do you think it will be possible to leverage the inference machinery and add the return type automatically?

@timholy
Copy link
Member

@timholy timholy commented on 2fb9bad Feb 3, 2014

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps, but I think the right way to solve this problem is to get away from having to maintain our own "shadow method table." Not only would that solve the type problem, it would also eliminate the performance problem you saw when you got to CARTESIAN_DIMS+1. Something like a latefunction (#5536) is needed.

Please sign in to comment.