diff --git a/base/sparse/abstractsparse.jl b/base/sparse/abstractsparse.jl
index 5b5cd0dfc0a2f..60d70f97cc9f6 100644
--- a/base/sparse/abstractsparse.jl
+++ b/base/sparse/abstractsparse.jl
@@ -20,4 +20,4 @@ issparse{T, A<:AbstractSparseMatrix}(S::LinAlg.UnitLowerTriangular{T, A}) = true
 issparse{T, A<:AbstractSparseMatrix}(S::UpperTriangular{T, A}) = true
 issparse{T, A<:AbstractSparseMatrix}(S::LinAlg.UnitUpperTriangular{T, A}) = true
 
-indtype{Tv,Ti}(S::AbstractSparseArray{Tv,Ti}) = Ti
+indtype{Tv,Ti}(S::AbstractSparseArray{Tv,Ti}) = (Base.@_pure_meta; Ti)
diff --git a/base/sparse/higherorderfns.jl b/base/sparse/higherorderfns.jl
new file mode 100644
index 0000000000000..671b4a847f597
--- /dev/null
+++ b/base/sparse/higherorderfns.jl
@@ -0,0 +1,850 @@
+# This file is a part of Julia. License is MIT: http://julialang.org/license
+
+module HigherOrderFns
+
+# This module provides higher order functions specialized for sparse arrays,
+# particularly map[!]/broadcast[!] for SparseVectors and SparseMatrixCSCs at present.
+import Base: map, map!, broadcast, broadcast!
+
+using Base: tail, to_shape
+using ..SparseArrays: SparseVector, SparseMatrixCSC, indtype
+
+# This module is organized as follows:
+# (1) Define a common interface to SparseVectors and SparseMatrixCSCs sufficient for
+#       map[!]/broadcast[!]'s purposes. The methods below are written against this interface.
+# (2) Define entry points for map[!] (short children of _map_[not]zeropres!).
+# (3) Define entry points for broadcast[!] (short children of _broadcast_[not]zeropres!).
+# (4) Define _map_[not]zeropres! specialized for a single (input) sparse vector/matrix.
+# (5) Define _map_[not]zeropres! specialized for a pair of (input) sparse vectors/matrices.
+# (6) Define general _map_[not]zeropres! capable of handling >2 (input) sparse vectors/matrices.
+# (7) Define _broadcast_[not]zeropres! specialized for a pair of (input) sparse vectors/matrices.
+# (8) Define general _broadcast_[not]zeropres! capabel of handling >2 (input) sparse vectors/matrices.
+# (9) Define methods handling combinations of broadcast scalars and sparse vectors/matrices.
+
+
+# (1) The definitions below provide a common interface to sparse vectors and matrices
+# sufficient for the purposes of map[!]/broadcast[!]. This interface treats sparse vectors
+# as n-by-one sparse matrices which, though technically incorrect, is how broacast[!] views
+# sparse vectors in practice.
+SparseVecOrMat = Union{SparseVector,SparseMatrixCSC}
+@inline numrows(A::SparseVector) = A.n
+@inline numrows(A::SparseMatrixCSC) = A.m
+@inline numcols(A::SparseVector) = 1
+@inline numcols(A::SparseMatrixCSC) = A.n
+# numrows and numcols respectively yield size(A, 1) and size(A, 2), but avoid a branch
+@inline columns(A::SparseVector) = 1
+@inline columns(A::SparseMatrixCSC) = 1:A.n
+@inline colrange(A::SparseVector, j) = 1:length(A.nzind)
+@inline colrange(A::SparseMatrixCSC, j) = nzrange(A, j)
+@inline colstartind(A::SparseVector, j) = one(indtype(A))
+@inline colboundind(A::SparseVector, j) = convert(indtype(A), length(A.nzind) + 1)
+@inline colstartind(A::SparseMatrixCSC, j) = A.colptr[j]
+@inline colboundind(A::SparseMatrixCSC, j) = A.colptr[j + 1]
+@inline storedinds(A::SparseVector) = A.nzind
+@inline storedinds(A::SparseMatrixCSC) = A.rowval
+@inline storedvals(A::SparseVecOrMat) = A.nzval
+@inline setcolptr!(A::SparseVector, j, val) = val
+@inline setcolptr!(A::SparseMatrixCSC, j, val) = A.colptr[j] = val
+function trimstorage!(A::SparseVecOrMat, maxstored)
+    resize!(storedinds(A), maxstored)
+    resize!(storedvals(A), maxstored)
+    return maxstored
+end
+function expandstorage!(A::SparseVecOrMat, maxstored)
+    length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored)
+    length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored)
+    return maxstored
+end
+
+
+# (2) map[!] entry points
+map{Tf}(f::Tf, A::SparseVecOrMat) = _noshapecheck_map(f, A)
+map{Tf,N}(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) =
+    (_checksameshape(A, Bs...); _noshapecheck_map(f, A, Bs...))
+map!{Tf,N}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) =
+    (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, A, Bs...))
+function _noshapecheck_map!{Tf,N}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N})
+    fofzeros = f(_zeros_eltypes(A, Bs...)...)
+    fpreszeros = fofzeros == zero(fofzeros)
+    return fpreszeros ? _map_zeropres!(f, C, A, Bs...) :
+                        _map_notzeropres!(f, fofzeros, C, A, Bs...)
+end
+function _noshapecheck_map{Tf,N}(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N})
+    fofzeros = f(_zeros_eltypes(A, Bs...)...)
+    fpreszeros = fofzeros == zero(fofzeros)
+    maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A)
+    entrytypeC = Base.Broadcast._broadcast_type(f, A, Bs...)
+    indextypeC = _promote_indtype(A, Bs...)
+    C = _allocres(size(A), indextypeC, entrytypeC, maxnnzC)
+    return fpreszeros ? _map_zeropres!(f, C, A, Bs...) :
+                        _map_notzeropres!(f, fofzeros, C, A, Bs...)
+end
+# (3) broadcast[!] entry points
+broadcast{Tf}(f::Tf, A::SparseVecOrMat) = _noshapecheck_map(f, A)
+broadcast!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) = map!(f, C, A)
+function broadcast!{Tf,N}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N})
+    _aresameshape(C, A, Bs...) && return _noshapecheck_map!(f, C, A, Bs...)
+    Base.Broadcast.check_broadcast_indices(indices(C), A, Bs...)
+    fofzeros = f(_zeros_eltypes(A, Bs...)...)
+    fpreszeros = fofzeros == zero(fofzeros)
+    return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) :
+                        _broadcast_notzeropres!(f, fofzeros, C, A, Bs...)
+end
+# the following three similar defs are necessary for type stability in the mixed vector/matrix case
+broadcast{Tf,N}(f::Tf, A::SparseVector, Bs::Vararg{SparseVector,N}) =
+    _aresameshape(A, Bs...) ? _noshapecheck_map(f, A, Bs...) : _diffshape_broadcast(f, A, Bs...)
+broadcast{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) =
+    _aresameshape(A, Bs...) ? _noshapecheck_map(f, A, Bs...) : _diffshape_broadcast(f, A, Bs...)
+broadcast{Tf,N}(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) =
+    _diffshape_broadcast(f, A, Bs...)
+function _diffshape_broadcast{Tf,N}(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N})
+    fofzeros = f(_zeros_eltypes(A, Bs...)...)
+    fpreszeros = fofzeros == zero(fofzeros)
+    indextypeC = _promote_indtype(A, Bs...)
+    entrytypeC = Base.Broadcast._broadcast_type(f, A, Bs...)
+    shapeC = to_shape(Base.Broadcast.broadcast_indices(A, Bs...))
+    maxnnzC = fpreszeros ? _checked_maxnnzbcres(shapeC, A, Bs...) : _densennz(shapeC)
+    C = _allocres(shapeC, indextypeC, entrytypeC, maxnnzC)
+    return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) :
+                        _broadcast_notzeropres!(f, fofzeros, C, A, Bs...)
+end
+# helper functions for map[!]/broadcast[!] entry points (and related methods below)
+@inline _sumnnzs(A) = nnz(A)
+@inline _sumnnzs(A, Bs...) = nnz(A) + _sumnnzs(Bs...)
+@inline _zeros_eltypes(A) = (zero(eltype(A)),)
+@inline _zeros_eltypes(A, Bs...) = (zero(eltype(A)), _zeros_eltypes(Bs...)...)
+@inline _promote_indtype(A) = indtype(A)
+@inline _promote_indtype(A, Bs...) = promote_type(indtype(A), _promote_indtype(Bs...))
+@inline _aresameshape(A) = true
+@inline _aresameshape(A, B) = size(A) == size(B)
+@inline _aresameshape(A, B, Cs...) = _aresameshape(A, B) ? _aresameshape(B, Cs...) : false
+@inline _checksameshape(As...) = _aresameshape(As...) || throw(DimensionMismatch("argument shapes must match"))
+@inline _densennz(shape::NTuple{1}) = shape[1]
+@inline _densennz(shape::NTuple{2}) = shape[1] * shape[2]
+_maxnnzfrom(shape::NTuple{1}, A) = nnz(A) * div(shape[1], A.n)
+_maxnnzfrom(shape::NTuple{2}, A::SparseVector) = nnz(A) * div(shape[1], A.n) * shape[2]
+_maxnnzfrom(shape::NTuple{2}, A::SparseMatrixCSC) = nnz(A) * div(shape[1], A.m) * div(shape[2], A.n)
+@inline _maxnnzfrom_each(shape, ::Tuple{}) = ()
+@inline _maxnnzfrom_each(shape, As) = (_maxnnzfrom(shape, first(As)), _maxnnzfrom_each(shape, tail(As))...)
+@inline _unchecked_maxnnzbcres(shape, As) = min(_densennz(shape), sum(_maxnnzfrom_each(shape, As)))
+@inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
+@inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
+@inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz)
+    storedinds = Vector{indextype}(maxnnz)
+    storedvals = Vector{entrytype}(maxnnz)
+    return SparseVector(shape..., storedinds, storedvals)
+end
+@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
+    pointers = Vector{indextype}(shape[2] + 1)
+    storedinds = Vector{indextype}(maxnnz)
+    storedvals = Vector{entrytype}(maxnnz)
+    return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
+end
+# Ambiguity killers, TODO: nix conflicting specializations
+broadcast(::typeof(abs), x::SparseVector) = _noshapecheck_map(abs, x) # base/sparse/sparevectors.jl: 921
+ambiguityfunnel{Tf}(f::Tf, x, y) = _aresameshape(x, y) ? _noshapecheck_map(f, x, y) : _diffshape_broadcast(f, x, y)
+broadcast(::typeof(+), x::SparseVector, y::SparseVector) = ambiguityfunnel(+, x, y) # base/sparse/sparsevectors.jl:1266
+broadcast(::typeof(-), x::SparseVector, y::SparseVector) = ambiguityfunnel(-, x, y) # base/sparse/sparsevectors.jl:1266
+broadcast(::typeof(*), x::SparseVector, y::SparseVector) = ambiguityfunnel(*, x, y) # base/sparse/sparsevectors.jl:1266
+function broadcast!(::typeof(identity), C::SparseMatrixCSC, A::SparseMatrixCSC) # from #17623, loc?
+    _checksameshape(C, A); return copy!(C, A)
+end
+
+
+# (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix
+"Stores only the nonzero entries of `map(f, Array(A))` in `C`."
+function _map_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat)
+    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    Ck = 1
+    @inbounds for j in columns(C)
+        setcolptr!(C, j, Ck)
+        for Ak in colrange(A, j)
+            Cx = f(storedvals(A)[Ak])
+            if Cx != zero(eltype(C))
+                Ck > spaceC && (spaceC = expandstorage!(C, Ck + nnz(A) - (Ak - 1)))
+                storedinds(C)[Ck] = storedinds(A)[Ak]
+                storedvals(C)[Ck] = Cx
+                Ck += 1
+            end
+        end
+    end
+    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
+    trimstorage!(C, Ck - 1)
+    return C
+end
+"""
+Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
+`f(A[i])`/`f(A[i,j])` in place of each stored entry `A[i]`/`A[i,j]` in `A`.
+"""
+function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat)
+    # Build dense matrix structure in C, expanding storage if necessary
+    _densestructure!(C)
+    # Populate values
+    fill!(storedvals(C), fillvalue)
+    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+        for Ak in colrange(A, j)
+            Cx = f(storedvals(A)[Ak])
+            Cx != fillvalue && (storedvals(C)[jo + storedinds(A)[Ak]] = Cx)
+        end
+    end
+    # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
+    # nonsequential access of storedvals(C) does not appear to improve performance.
+    return C
+end
+# helper functions for these methods and some of those below
+@inline _densecoloffsets(A::SparseVector) = 0
+@inline _densecoloffsets(A::SparseMatrixCSC) = 0:A.m:(A.m*A.n - 1)
+function _densestructure!(A::SparseVector)
+    expandstorage!(A, A.n)
+    copy!(A.nzind, 1:A.n)
+    return A
+end
+function _densestructure!(A::SparseMatrixCSC)
+    nnzA = A.m * A.n
+    expandstorage!(A, nnzA)
+    copy!(A.colptr, 1:A.m:(nnzA + 1))
+    for k in _densecoloffsets(A)
+        copy!(A.rowval, k + 1, 1:A.m)
+    end
+    return A
+end
+
+
+# (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices
+function _map_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat)
+    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    rowsentinelA = convert(indtype(A), numrows(C) + 1)
+    rowsentinelB = convert(indtype(B), numrows(C) + 1)
+    Ck = 1
+    @inbounds for j in columns(C)
+        setcolptr!(C, j, Ck)
+        Ak, stopAk = colstartind(A, j), colboundind(A, j)
+        Bk, stopBk = colstartind(B, j), colboundind(B, j)
+        Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+        Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+        while true
+            if Ai == Bi
+                Ai == rowsentinelA && break # column complete
+                Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
+                Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+            elseif Ai < Bi
+                Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
+                Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+            else # Bi < Ai
+                Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
+                Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+            end
+            # NOTE: The ordering of the conditional chain above impacts which matrices this
+            # method performs best for. In the map situation (arguments have same shape, and
+            # likely same or similar stored entry pattern), the Ai == Bi and termination
+            # cases are equally or more likely than the Ai < Bi and Bi < Ai cases. Hence
+            # the ordering of the conditional chain above differs from that in the
+            # corresponding broadcast code (below).
+            if Cx != zero(eltype(C))
+                Ck > spaceC && (spaceC = expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1))))
+                storedinds(C)[Ck] = Ci
+                storedvals(C)[Ck] = Cx
+                Ck += 1
+            end
+        end
+    end
+    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
+    trimstorage!(C, Ck - 1)
+    return C
+end
+function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat)
+    # Build dense matrix structure in C, expanding storage if necessary
+    _densestructure!(C)
+    # Populate values
+    fill!(storedvals(C), fillvalue)
+    # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
+    # nonsequential access of storedvals(C) does not appear to improve performance.
+    rowsentinelA = convert(indtype(A), numrows(A) + 1)
+    rowsentinelB = convert(indtype(B), numrows(B) + 1)
+    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+        Ak, stopAk = colstartind(A, j), colboundind(A, j)
+        Bk, stopBk = colstartind(B, j), colboundind(B, j)
+        Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+        Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+        while true
+            if Ai == Bi
+                Ai == rowsentinelA && break # column complete
+                Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
+                Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+            elseif Ai < Bi
+                Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
+                Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+            else # Bi < Ai
+                Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
+                Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+            end
+            Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
+        end
+    end
+    return C
+end
+
+
+# (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors
+function _map_zeropres!{Tf,N}(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
+    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    rowsentinel = numrows(C) + 1
+    Ck = 1
+    stopks = _colstartind_all(1, As)
+    @inbounds for j in columns(C)
+        setcolptr!(C, j, Ck)
+        ks = stopks
+        stopks = _colboundind_all(j, As)
+        rows = _rowforind_all(rowsentinel, ks, stopks, As)
+        activerow = min(rows...)
+        while activerow < rowsentinel
+            # activerows = _isactiverow_all(activerow, rows)
+            # Cx = f(_gatherargs(activerows, ks, As)...)
+            # ks = _updateind_all(activerows, ks)
+            # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
+            vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
+            Cx = f(vals...)
+            if Cx != zero(eltype(C))
+                Ck > spaceC && (spaceC = expandstorage!(C, Ck + min(length(C), _sumnnzs(As...)) - (sum(ks) - N)))
+                storedinds(C)[Ck] = activerow
+                storedvals(C)[Ck] = Cx
+                Ck += 1
+            end
+            activerow = min(rows...)
+        end
+    end
+    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
+    trimstorage!(C, Ck - 1)
+    return C
+end
+function _map_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
+    # Build dense matrix structure in C, expanding storage if necessary
+    _densestructure!(C)
+    # Populate values
+    fill!(storedvals(C), fillvalue)
+    # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
+    # nonsequential access of C.nzval does not appear to improve performance.
+    rowsentinel = numrows(C) + 1
+    stopks = _colstartind_all(1, As)
+    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+        ks = stopks
+        stopks = _colboundind_all(j, As)
+        rows = _rowforind_all(rowsentinel, ks, stopks, As)
+        activerow = min(rows...)
+        while activerow < rowsentinel
+            # activerows = _isactiverow_all(activerow, rows)
+            # Cx = f(_gatherargs(activerows, ks, As)...)
+            # ks = _updateind_all(activerows, ks)
+            # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
+            vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
+            Cx = f(vals...)
+            Cx != fillvalue && (storedvals(C)[jo + activerow] = Cx)
+            activerow = min(rows...)
+        end
+    end
+    return C
+end
+# helper methods for map/map! methods just above
+@inline _colstartind(j, A) = colstartind(A, j)
+@inline _colstartind_all(j, ::Tuple{}) = ()
+@inline _colstartind_all(j, As) = (
+    _colstartind(j, first(As)),
+    _colstartind_all(j, tail(As))...)
+@inline _colboundind(j, A) = colboundind(A, j)
+@inline _colboundind_all(j, ::Tuple{}) = ()
+@inline _colboundind_all(j, As) = (
+    _colboundind(j, first(As)),
+    _colboundind_all(j, tail(As))...)
+@inline _rowforind(rowsentinel, k, stopk, A) =
+    k < stopk ? storedinds(A)[k] : convert(indtype(A), rowsentinel)
+@inline _rowforind_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+@inline _rowforind_all(rowsentinel, ks, stopks, As) = (
+    _rowforind(rowsentinel, first(ks), first(stopks), first(As)),
+    _rowforind_all(rowsentinel, tail(ks), tail(stopks), tail(As))...)
+# fusing the following defs. avoids a few branches, yielding 5-30% runtime reduction
+# @inline _isactiverow(activerow, row) = row == activerow
+# @inline _isactiverow_all(activerow, ::Tuple{}) = ()
+# @inline _isactiverow_all(activerow, rows) = (
+#     _isactiverow(activerow, first(rows)),
+#     _isactiverow_all(activerow, tail(rows))...)
+# @inline _gatherarg(isactiverow, k, A) = isactiverow ? storedvals(A)[k] : zero(eltype(A))
+# @inline _gatherargs(::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+# @inline _gatherargs(activerows, ks, As) = (
+#     _gatherarg(first(activerows), first(ks), first(As)),
+#     _gatherargs(tail(activerows), tail(ks), tail(As))...)
+# @inline _updateind(isactiverow, k) = isactiverow ? (k + one(k)) : k
+# @inline _updateind_all(::Tuple{}, ::Tuple{}) = ()
+# @inline _updateind_all(activerows, ks) = (
+#     _updateind(first(activerows), first(ks)),
+#     _updateind_all(tail(activerows), tail(ks))...)
+# @inline _updaterow(rowsentinel, isrowactive, presrow, k, stopk, A) =
+#     isrowactive ? (k < stopk ? storedinds(A)[k] : oftype(presrow, rowsentinel)) : presrow
+# @inline _updaterow_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+# @inline _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) = (
+#     _updaterow(rowsentinel, first(activerows), first(rows), first(ks), first(stopks), first(As)),
+#     _updaterow_all(rowsentinel, tail(activerows), tail(rows), tail(ks), tail(stopks), tail(As))...)
+@inline function _fusedupdate(rowsentinel, activerow, row, k, stopk, A)
+    # returns (val, nextk, nextrow)
+    if row == activerow
+        nextk = k + one(k)
+        (storedvals(A)[k], nextk, (nextk < stopk ? storedinds(A)[nextk] : oftype(row, rowsentinel)))
+    else
+        (zero(eltype(A)), k, row)
+    end
+end
+@inline _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As) =
+    _fusedupdate_all((#=vals=#), (#=nextks=#), (#=nextrows=#), rowsentinel, activerow, rows, ks, stopks, As)
+@inline _fusedupdate_all(vals, nextks, nextrows, rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) =
+    (vals, nextks, nextrows)
+@inline function _fusedupdate_all(vals, nextks, nextrows, rowsentinel, activerow, rows, ks, stopks, As)
+    val, nextk, nextrow = _fusedupdate(rowsentinel, activerow, first(rows), first(ks), first(stopks), first(As))
+    return _fusedupdate_all((vals..., val), (nextks..., nextk), (nextrows..., nextrow),
+                        rowsentinel, activerow, tail(rows), tail(ks), tail(stopks), tail(As))
+end
+
+
+# (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices
+function _broadcast_zeropres!{Tf}(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat)
+    isempty(C) && return _finishempty!(C)
+    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    rowsentinelA = convert(indtype(A), numrows(C) + 1)
+    rowsentinelB = convert(indtype(B), numrows(C) + 1)
+    # A and B cannot have the same shape, as we directed that case to map in broadcast's
+    # entry point; here we need efficiently handle only heterogeneous combinations of matrices
+    # with no singleton dimensions ("matrices" hereafter), one singleton dimension ("columns"
+    # and "rows"), and two singleton dimensions ("scalars"). Cases involving scalars should
+    # be rare and optimizing that case complicates the code appreciably, so we largely
+    # ignore that case's performance below.
+    #
+    # We first divide the cases into two groups: those in which neither argument expands
+    # vertically (matrix-column combinations) and those in which an argument expands
+    # vertically (matrix-row and column-row combinations).
+    #
+    # NOTE: Placing the loops over columns outside the conditional chain segregating
+    # argument shape combinations eliminates some code replication but unfortunately
+    # hurts performance appreciably in some cases.
+    #
+    # Cases without vertical expansion
+    Ck = 1
+    if numrows(A) == numrows(B)
+        @inbounds for j in columns(C)
+            setcolptr!(C, j, Ck)
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            # Restructuring this k/stopk code to avoid unnecessary colptr retrievals does
+            # not improve performance signicantly. Leave in this less complex form.
+            Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+            Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+            while true
+                if Ai != Bi
+                    if Ai < Bi
+                        Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
+                        Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                    else # Ai > Bi
+                        Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
+                        Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                    end
+                elseif #= Ai == Bi && =# Ai == rowsentinelA
+                    break # column complete
+                else #= Ai == Bi != rowsentinel =#
+                    Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
+                    Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                    Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                end
+                # NOTE: The ordering of the conditional chain above impacts which matrices
+                # this method perform best for. In contrast to the map situation (arguments
+                # have same shape, and likely same or similar stored entry pattern), where
+                # the Ai == Bi and termination cases are equally or more likely than the
+                # Ai < Bi and Bi < Ai cases, in the broadcast situation (arguments have
+                # different shape, and likely largely disjoint expanded stored entry
+                # pattern) the Ai < Bi and Bi < Ai cases are equally or more likely than the
+                # Ai == Bi and termination cases. Hence the ordering of the conditional
+                # chain above differs from that in the corresponding map code.
+                if Cx != zero(eltype(C))
+                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
+                    storedinds(C)[Ck] = Ci
+                    storedvals(C)[Ck] = Cx
+                    Ck += 1
+                end
+            end
+        end
+    # Cases with vertical expansion
+    elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument
+        @inbounds for j in columns(C)
+            setcolptr!(C, j, Ck)
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
+            fvAzB = f(Ax, zero(eltype(B)))
+            if fvAzB == zero(eltype(C))
+                # either A's jth column is empty, or A's jth column contains a nonzero value
+                # Ax but f(Ax, zero(eltype(B))) is nonetheless zero, so we can scan through
+                # B's jth column without storing every entry in C's jth column
+                while Bk < stopBk
+                    Cx = f(Ax, storedvals(B)[Bk])
+                    if Cx != zero(eltype(C))
+                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
+                        storedinds(C)[Ck] = storedinds(B)[Bk]
+                        storedvals(C)[Ck] = Cx
+                        Ck += 1
+                    end
+                    Bk += one(Bk)
+                end
+            else
+                # A's jth column is nonempty and f(Ax, zero(eltype(B))) is not zero, so
+                # we must store (likely) every entry in C's jth column
+                Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                for Ci::indtype(C) in 1:numrows(C)
+                    if Bi == Ci
+                        Cx = f(Ax, storedvals(B)[Bk])
+                        Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                    else
+                        Cx = fvAzB
+                    end
+                    if Cx != zero(eltype(C))
+                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
+                        storedinds(C)[Ck] = Ci
+                        storedvals(C)[Ck] = Cx
+                        Ck += 1
+                    end
+                end
+            end
+        end
+    elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument
+        @inbounds for j in columns(C)
+            setcolptr!(C, j, Ck)
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
+            fzAvB = f(zero(eltype(A)), Bx)
+            if fzAvB == zero(eltype(C))
+                # either B's jth column is empty, or B's jth column contains a nonzero value
+                # Bx but f(zero(eltype(A)), Bx) is nonetheless zero, so we can scan through
+                # A's jth column without storing every entry in C's jth column
+                while Ak < stopAk
+                    Cx = f(storedvals(A)[Ak], Bx)
+                    if Cx != zero(eltype(C))
+                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
+                        storedinds(C)[Ck] = storedinds(A)[Ak]
+                        storedvals(C)[Ck] = Cx
+                        Ck += 1
+                    end
+                    Ak += one(Ak)
+                end
+            else
+                # B's jth column is nonempty and f(zero(eltype(A)), Bx) is not zero, so
+                # we must store (likely) every entry in C's jth column
+                Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                for Ci::indtype(C) in 1:numrows(C)
+                    if Ai == Ci
+                        Cx = f(storedvals(A)[Ak], Bx)
+                        Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                    else
+                        Cx = fzAvB
+                    end
+                    if Cx != zero(eltype(C))
+                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
+                        storedinds(C)[Ck] = Ci
+                        storedvals(C)[Ck] = Cx
+                        Ck += 1
+                    end
+                end
+            end
+        end
+    end
+    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
+    trimstorage!(C, Ck - 1)
+    return C
+end
+function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat)
+    # For information on this code, see comments in similar code in _broadcast_zeropres! above
+    # Build dense matrix structure in C, expanding storage if necessary
+    _densestructure!(C)
+    # Populate values
+    fill!(storedvals(C), fillvalue)
+    rowsentinelA = convert(indtype(A), numrows(C) + 1)
+    rowsentinelB = convert(indtype(B), numrows(C) + 1)
+    # Cases without vertical expansion
+    if numrows(A) == numrows(B)
+        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+            Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+            while true
+                if Ai < Bi
+                    Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
+                    Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                elseif Ai > Bi
+                    Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
+                    Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                elseif #= Ai == Bi && =# Ai == rowsentinelA
+                    break # column complete
+                else #= Ai == Bi != rowsentinel =#
+                    Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
+                    Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                    Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                end
+                Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
+            end
+        end
+    # Cases with vertical expansion
+    elseif numrows(A) == 1 # && numrows(B) != 1, vertically expand first argument
+        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
+            fvAzB = f(Ax, zero(eltype(B)))
+            if fvAzB == zero(eltype(C))
+                while Bk < stopBk
+                    Cx = f(Ax, storedvals(B)[Bk])
+                    Cx != fillvalue && (storedvals(C)[jo + storedinds(B)[Bk]] = Cx)
+                    Bk += one(Bk)
+                end
+            else
+                Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                for Ci::indtype(C) in 1:numrows(C)
+                    if Bi == Ci
+                        Cx = f(Ax, storedvals(B)[Bk])
+                        Bk += one(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
+                    else
+                        Cx = fvAzB
+                    end
+                    Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
+                end
+            end
+        end
+    elseif numrows(B) == 1 # && numrows(A) != 1, vertically expand second argument
+        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
+            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
+            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
+            fzAvB = f(zero(eltype(A)), Bx)
+            if fzAvB == zero(eltype(C))
+                while Ak < stopAk
+                    Cx = f(storedvals(A)[Ak], Bx)
+                    Cx != fillvalue && (storedvals(C)[jo + storedinds(A)[Ak]] = Cx)
+                    Ak += one(Ak)
+                end
+            else
+                Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                for Ci::indtype(C) in 1:numrows(C)
+                    if Ai == Ci
+                        Cx = f(storedvals(A)[Ak], Bx)
+                        Ak += one(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
+                    else
+                        Cx = fzAvB
+                    end
+                    Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
+                end
+            end
+        end
+    end
+    return C
+end
+_finishempty!(C::SparseVector) = C
+_finishempty!(C::SparseMatrixCSC) = (fill!(C.colptr, 1); C)
+
+
+# (8) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices
+function _broadcast_zeropres!{Tf,N}(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
+    isempty(C) && return _finishempty!(C)
+    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
+    expandsverts = _expandsvert_all(C, As)
+    expandshorzs = _expandshorz_all(C, As)
+    rowsentinel = numrows(C) + 1
+    Ck = 1
+    @inbounds for j in columns(C)
+        setcolptr!(C, j, Ck)
+        ks = _startindforbccol_all(j, expandshorzs, As)
+        stopks = _stopindforbccol_all(j, expandshorzs, As)
+        # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated
+        # colptr lookups, improves performance significantly. So keep the less complex approach here.
+        isemptys = _isemptycol_all(ks, stopks)
+        defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As)
+        rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As)
+        defaultCx = f(defargs...)
+        activerow = min(rows...)
+        if defaultCx == zero(eltype(C)) # zero-preserving column scan
+            while activerow < rowsentinel
+                # activerows = _isactiverow_all(activerow, rows)
+                # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
+                # ks = _updateind_all(activerows, ks)
+                # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
+                args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
+                Cx = f(args...)
+                if Cx != zero(eltype(C))
+                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As)))
+                    storedinds(C)[Ck] = activerow
+                    storedvals(C)[Ck] = Cx
+                    Ck += 1
+                end
+                activerow = min(rows...)
+            end
+        else # zero-non-preserving column scan
+            for Ci in 1:numrows(C)
+                if Ci == activerow
+                    # activerows = _isactiverow_all(activerow, rows)
+                    # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
+                    # ks = _updateind_all(activerows, ks)
+                    # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
+                    args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
+                    Cx = f(args...)
+                    activerow = min(rows...)
+                else
+                    Cx = defaultCx
+                end
+                if Cx != zero(eltype(C))
+                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As)))
+                    storedinds(C)[Ck] = Ci
+                    storedvals(C)[Ck] = Cx
+                    Ck += 1
+                end
+            end
+        end
+    end
+    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
+    trimstorage!(C, Ck - 1)
+    return C
+end
+function _broadcast_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N})
+    isempty(C) && return _finishempty!(C)
+    # Build dense matrix structure in C, expanding storage if necessary
+    _densestructure!(C)
+    # Populate values
+    fill!(storedvals(C), fillvalue)
+    expandsverts = _expandsvert_all(C, As)
+    expandshorzs = _expandshorz_all(C, As)
+    rowsentinel = numrows(C) + 1
+    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
+        ks = _startindforbccol_all(j, expandshorzs, As)
+        stopks = _stopindforbccol_all(j, expandshorzs, As)
+        # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated
+        # colptr lookups, improves performance significantly. So keep the less complex approach here.
+        isemptys = _isemptycol_all(ks, stopks)
+        defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As)
+        rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As)
+        defaultCx = f(defargs...)
+        activerow = min(rows...)
+        if defaultCx == fillvalue # fillvalue-preserving column scan
+            while activerow < rowsentinel
+                # activerows = _isactiverow_all(activerow, rows)
+                # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
+                # ks = _updateind_all(activerows, ks)
+                # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
+                args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
+                Cx = f(args...)
+                Cx != fillvalue && (storedvals(C)[jo + activerow] = Cx)
+                activerow = min(rows...)
+            end
+        else # fillvalue-non-preserving column scan
+            for Ci in 1:numrows(C)
+                if Ci == activerow
+                    # activerows = _isactiverow_all(activerow, rows)
+                    # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
+                    # ks = _updateind_all(activerows, ks)
+                    # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
+                    args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
+                    Cx = f(args...)
+                    activerow = min(rows...)
+                else
+                    Cx = defaultCx
+                end
+                Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
+            end
+        end
+    end
+    return C
+end
+# helper method for broadcast/broadcast! methods just above
+@inline _expandsvert(C, A) = numrows(A) != numrows(C)
+@inline _expandsvert_all(C, ::Tuple{}) = ()
+@inline _expandsvert_all(C, As) = (_expandsvert(C, first(As)), _expandsvert_all(C, tail(As))...)
+@inline _expandshorz(C, A) = numcols(A) != numcols(C)
+@inline _expandshorz_all(C, ::Tuple{}) = ()
+@inline _expandshorz_all(C, As) = (_expandshorz(C, first(As)), _expandshorz_all(C, tail(As))...)
+@inline _startindforbccol(j, expandshorz, A) = expandshorz ? colstartind(A, 1) : colstartind(A, j)
+@inline _startindforbccol_all(j, ::Tuple{}, ::Tuple{}) = ()
+@inline _startindforbccol_all(j, expandshorzs, As) = (
+    _startindforbccol(j, first(expandshorzs), first(As)),
+    _startindforbccol_all(j, tail(expandshorzs), tail(As))...)
+@inline _stopindforbccol(j, expandshorz, A) = expandshorz ? colboundind(A, 1) : colboundind(A, j)
+@inline _stopindforbccol_all(j, ::Tuple{}, ::Tuple{}) = ()
+@inline _stopindforbccol_all(j, expandshorzs, As) = (
+    _stopindforbccol(j, first(expandshorzs), first(As)),
+    _stopindforbccol_all(j, tail(expandshorzs), tail(As))...)
+@inline _isemptycol(k, stopk) = k == stopk
+@inline _isemptycol_all(::Tuple{}, ::Tuple{}) = ()
+@inline _isemptycol_all(ks, stopks) = (
+    _isemptycol(first(ks), first(stopks)),
+    _isemptycol_all(tail(ks), tail(stopks))...)
+@inline _initrowforcol(j, rowsentinel, isempty, expandsvert, k, A) =
+    expandsvert || isempty ? convert(indtype(A), rowsentinel) : storedinds(A)[k]
+@inline _initrowforcol_all(j, rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+@inline _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As) = (
+    _initrowforcol(j, rowsentinel, first(isemptys), first(expandsverts), first(ks), first(As)),
+    _initrowforcol_all(j, rowsentinel, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...)
+@inline _defargforcol(j, isempty, expandsvert, k, A) =
+    expandsvert && !isempty ? storedvals(A)[k] : zero(eltype(A))
+@inline _defargforcol_all(j, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+@inline _defargforcol_all(j, isemptys, expandsverts, ks, As) = (
+    _defargforcol(j, first(isemptys), first(expandsverts), first(ks), first(As)),
+    _defargforcol_all(j, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...)
+# fusing the following defs. avoids a few branches and construction of a tuple, yielding 1-20% runtime reduction
+# @inline _isactiverow(activerow, row) = row == activerow
+# @inline _isactiverow_all(activerow, ::Tuple{}) = ()
+# @inline _isactiverow_all(activerow, rows) = (
+#     _isactiverow(activerow, first(rows)),
+#     _isactiverow_all(activerow, tail(rows))...)
+# @inline _gatherbcarg(isactiverow, defarg, k, A) = isactiverow ? storedvals(A)[k] : defarg
+# @inline _gatherbcargs(::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+# @inline _gatherbcargs(activerows, defargs, ks, As) = (
+#     _gatherbcarg(first(activerows), first(defargs), first(ks), first(As)),
+#     _gatherbcargs(tail(activerows), tail(defargs), tail(ks), tail(As))...)
+# @inline _updateind(isactiverow, k) = isactiverow ? (k + one(k)) : k
+# @inline _updateind_all(::Tuple{}, ::Tuple{}) = ()
+# @inline _updateind_all(activerows, ks) = (
+#     _updateind(first(activerows), first(ks)),
+#     _updateind_all(tail(activerows), tail(ks))...)
+# @inline _updaterow(rowsentinel, isrowactive, presrow, k, stopk, A) =
+#     isrowactive ? (k < stopk ? storedinds(A)[k] : oftype(presrow, rowsentinel)) : presrow
+# @inline _updaterow_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
+# @inline _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) = (
+#     _updaterow(rowsentinel, first(activerows), first(rows), first(ks), first(stopks), first(As)),
+#     _updaterow_all(rowsentinel, tail(activerows), tail(rows), tail(ks), tail(stopks), tail(As))...)
+@inline function _fusedupdatebc(rowsentinel, activerow, row, defarg, k, stopk, A)
+    # returns (val, nextk, nextrow)
+    if row == activerow
+        nextk = k + one(k)
+        (storedvals(A)[k], nextk, (nextk < stopk ? storedinds(A)[nextk] : oftype(row, rowsentinel)))
+    else
+        (defarg, k, row)
+    end
+end
+@inline _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) =
+    _fusedupdatebc_all((#=vals=#), (#=nextks=#), (#=nextrows=#), rowsentinel, activerow, rows, defargs, ks, stopks, As)
+@inline _fusedupdatebc_all(vals, nextks, nextrows, rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) =
+    (vals, nextks, nextrows)
+@inline function _fusedupdatebc_all(vals, nextks, nextrows, rowsentinel, activerow, rows, defargs, ks, stopks, As)
+    val, nextk, nextrow = _fusedupdatebc(rowsentinel, activerow, first(rows), first(defargs), first(ks), first(stopks), first(As))
+    return _fusedupdatebc_all((vals..., val), (nextks..., nextk), (nextrows..., nextrow),
+                        rowsentinel, activerow, tail(rows), tail(defargs), tail(ks), tail(stopks), tail(As))
+end
+
+
+# (9) broadcast[!] over combinations of broadcast scalars and sparse vectors/matrices
+#
+# TODO: The minimal snippet below is not satisfying: A better solution would achieve
+# the same for (1) all broadcast scalar types (Base.Broadcast.containertype(x) == Any?) and
+# (2) any combination (number, order, type mixture) of broadcast scalars.
+#
+broadcast{Tf}(f::Tf, x::Union{Number,Bool}, A::SparseMatrixCSC) = broadcast(y -> f(x, y), A)
+broadcast{Tf}(f::Tf, A::SparseMatrixCSC, y::Union{Number,Bool}) = broadcast(x -> f(x, y), A)
+# NOTE: The following two method definitions work around #19096. These definitions should
+# be folded into the two preceding definitions on resolution of #19096.
+broadcast{Tf,T}(f::Tf, ::Type{T}, A::SparseMatrixCSC) = broadcast(y -> f(T, y), A)
+broadcast{Tf,T}(f::Tf, A::SparseMatrixCSC, ::Type{T}) = broadcast(x -> f(x, T), A)
+
+end
\ No newline at end of file
diff --git a/base/sparse/sparse.jl b/base/sparse/sparse.jl
index 1585c1904d763..577dec5fada80 100644
--- a/base/sparse/sparse.jl
+++ b/base/sparse/sparse.jl
@@ -26,7 +26,7 @@ import Base: @get!, acos, acosd, acot, acotd, acsch, asech, asin, asind, asinh,
     rotl90, rotr90, round, scale!, setindex!, similar, size, transpose, tril,
     triu, vec, permute!, map, map!
 
-import Base.Broadcast: _broadcast_type, broadcast_indices
+import Base.Broadcast: broadcast_indices
 
 export AbstractSparseArray, AbstractSparseMatrix, AbstractSparseVector,
     SparseMatrixCSC, SparseVector, blkdiag, dense, droptol!, dropzeros!, dropzeros,
@@ -36,6 +36,7 @@ export AbstractSparseArray, AbstractSparseMatrix, AbstractSparseVector,
 include("abstractsparse.jl")
 include("sparsematrix.jl")
 include("sparsevector.jl")
+include("higherorderfns.jl")
 
 include("linalg.jl")
 if Base.USE_GPL_LIBS
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
index da0a3be768008..1e228c8465cc0 100644
--- a/base/sparse/sparsematrix.jl
+++ b/base/sparse/sparsematrix.jl
@@ -1398,767 +1398,6 @@ end
 
 sparse(S::UniformScaling, m::Integer, n::Integer=m) = speye_scaled(S.λ, m, n)
 
-## map/map! and broadcast/broadcast! over sparse matrices
-
-# map/map! entry points
-function map!{Tf,N}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N})
-    _checksameshape(C, A, Bs...)
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = fofzeros == zero(fofzeros)
-    return fpreszeros ? _map_zeropres!(f, C, A, Bs...) :
-                        _map_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-function map{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N})
-    _checksameshape(A, Bs...)
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = fofzeros == zero(fofzeros)
-    maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A)
-    entrytypeC = _broadcast_type(f, A, Bs...)
-    indextypeC = _promote_indtype(A, Bs...)
-    Ccolptr = Vector{indextypeC}(A.n + 1)
-    Crowval = Vector{indextypeC}(maxnnzC)
-    Cnzval = Vector{entrytypeC}(maxnnzC)
-    C = SparseMatrixCSC(A.m, A.n, Ccolptr, Crowval, Cnzval)
-    return fpreszeros ? _map_zeropres!(f, C, A, Bs...) :
-                        _map_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-# broadcast/broadcast! entry points
-broadcast{Tf}(f::Tf, A::SparseMatrixCSC) = map(f, A)
-broadcast!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC) = map!(f, C, A)
-function broadcast!{Tf,N}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N})
-    _aresameshape(C, A, Bs...) && return map!(f, C, A, Bs...) # could avoid a second dims check in map
-    Base.Broadcast.check_broadcast_indices(indices(C), A, Bs...)
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = fofzeros == zero(fofzeros)
-    return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) :
-                        _broadcast_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-# needed to eliminate method ambiguity:
-function broadcast!(::typeof(identity), C::SparseMatrixCSC, A::SparseMatrixCSC)
-    _checksameshape(C, A)
-    return copy!(C, A)
-end
-function broadcast{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N})
-    _aresameshape(A, Bs...) && return map(f, A, Bs...) # could avoid a second dims check in map
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = fofzeros == zero(fofzeros)
-    indextypeC = _promote_indtype(A, Bs...)
-    entrytypeC = _broadcast_type(f, A, Bs...)
-    Cm, Cn = Base.to_shape(Base.Broadcast.broadcast_indices(A, Bs...))
-    maxnnzC = fpreszeros ? _checked_maxnnzbcres(Cm, Cn, A, Bs...) : (Cm * Cn)
-    Ccolptr = Vector{indextypeC}(Cn + 1)
-    Crowval = Vector{indextypeC}(maxnnzC)
-    Cnzval = Vector{entrytypeC}(maxnnzC)
-    C = SparseMatrixCSC(Cm, Cn, Ccolptr, Crowval, Cnzval)
-    return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) :
-                        _broadcast_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-# map/map! and broadcast/broadcast! entry point helper functions
-@inline _sumnnzs(A) = nnz(A)
-@inline _sumnnzs(A, Bs...) = nnz(A) + _sumnnzs(Bs...)
-@inline _zeros_eltypes(A) = (zero(eltype(A)),)
-@inline _zeros_eltypes(A, Bs...) = (zero(eltype(A)), _zeros_eltypes(Bs...)...)
-@inline _promote_indtype(A) = eltype(A.rowval)
-@inline _promote_indtype(A, Bs...) = promote_type(eltype(A.rowval), _promote_indtype(Bs...))
-@inline _aresameshape(A) = true
-@inline _aresameshape(A, B) = size(A) == size(B)
-@inline _aresameshape(A, B, Cs...) = _aresameshape(A, B) ? _aresameshape(B, Cs...) : false
-@inline _checksameshape(As...) = _aresameshape(As...) || throw(DimensionMismatch("argument shapes must match"))
-_maxnnzfrom(Cm, Cn, A) = nnz(A) * div(Cm, A.m) * div(Cn, A.n)
-@inline _maxnnzfrom_each(Cm, Cn, ::Tuple{}) = ()
-@inline _maxnnzfrom_each(Cm, Cn, As) = (_maxnnzfrom(Cm, Cn, first(As)), _maxnnzfrom_each(Cm, Cn, tail(As))...)
-@inline _unchecked_maxnnzbcres(Cm, Cn, As) = min(Cm * Cn, sum(_maxnnzfrom_each(Cm, Cn, As)))
-@inline _checked_maxnnzbcres(Cm, Cn, As...) = Cm != 0 && Cn != 0 ? _unchecked_maxnnzbcres(Cm, Cn, As) : 0
-
-# _map_zeropres!/_map_notzeropres! specialized for a single sparse matrix
-"Stores only the nonzero entries of `map(f, Matrix(A))` in `C`."
-function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC)
-    spaceC = min(length(C.rowval), length(C.nzval))
-    Ck = 1
-    @inbounds for j in 1:C.n
-        C.colptr[j] = Ck
-        for Ak in nzrange(A, j)
-            Cx = f(A.nzval[Ak])
-            if Cx != zero(eltype(C))
-                Ck > spaceC && (spaceC = _expandstorage!(C, Ck + nnz(A) - (Ak - 1)))
-                C.rowval[Ck] = A.rowval[Ak]
-                C.nzval[Ck] = Cx
-                Ck += 1
-            end
-        end
-    end
-    @inbounds C.colptr[C.n + 1] = Ck
-    _trimstorage!(C, Ck - 1)
-    return C
-end
-"""
-Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
-`f(A[i,j])` in place of each stored entry `A[i,j]` in `A`.
-"""
-function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMatrixCSC)
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(C.nzval, fillvalue)
-    @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)), Ak in nzrange(A, j)
-        Cx = f(A.nzval[Ak])
-        Cx != fillvalue && (C.nzval[jo + A.rowval[Ak]] = Cx)
-    end
-    # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
-    # nonsequential access of C.nzval does not appear to improve performance.
-    return C
-end
-# helper functions for these methods and some of those below
-function _expandstorage!(X::SparseMatrixCSC, maxstored)
-    length(X.rowval) < maxstored && resize!(X.rowval, maxstored)
-    length(X.nzval) < maxstored && resize!(X.nzval, maxstored)
-    return maxstored
-end
-function _trimstorage!(X::SparseMatrixCSC, maxstored)
-    resize!(X.rowval, maxstored)
-    resize!(X.nzval, maxstored)
-    return maxstored
-end
-function _densestructure!(X::SparseMatrixCSC)
-    nnzX = X.m * X.n
-    # Expand storage if necessary
-    resize!(X.rowval, nnzX)
-    resize!(X.nzval, nnzX)
-    # Build dense structure in C
-    copy!(X.colptr, 1:X.m:(nnzX + 1))
-    for k in 1:X.m:(nnzX - X.m + 1)
-        copy!(X.rowval, k, 1:X.m)
-    end
-    return X
-end
-
-# _map_zeropres!/_map_notzeropres! specialized for a pair of sparse matrices
-function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC)
-    spaceC = min(length(C.rowval), length(C.nzval))
-    rowsentinelA = convert(eltype(A.rowval), C.m + 1)
-    rowsentinelB = convert(eltype(B.rowval), C.m + 1)
-    Ck = 1
-    @inbounds for j in 1:C.n
-        C.colptr[j] = Ck
-        Ak, stopAk = A.colptr[j], A.colptr[j + 1]
-        Bk, stopBk = B.colptr[j], B.colptr[j + 1]
-        Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-        Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-        while true
-            if Ai == Bi
-                Ai == rowsentinelA && break # column complete
-                Cx, Ci::eltype(C.rowval) = f(A.nzval[Ak], B.nzval[Bk]), Ai
-                Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-            elseif Ai < Bi
-                Cx, Ci = f(A.nzval[Ak], zero(eltype(B))), Ai
-                Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-            else # Bi < Ai
-                Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi
-                Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-            end
-            # NOTE: The ordering of the conditional chain above impacts which matrices this
-            # method performs best for. In the map situation (arguments have same shape, and
-            # likely same or similar stored entry pattern), the Ai == Bi and termination
-            # cases are equally or more likely than the Ai < Bi and Bi < Ai cases. Hence
-            # the ordering of the conditional chain above differs from that in the
-            # corresponding broadcast code (below).
-            if Cx != zero(eltype(C))
-                Ck > spaceC && (spaceC = _expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1))))
-                C.rowval[Ck] = Ci
-                C.nzval[Ck] = Cx
-                Ck += 1
-            end
-        end
-    end
-    @inbounds C.colptr[C.n + 1] = Ck
-    _trimstorage!(C, Ck - 1)
-    return C
-end
-function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC)
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(C.nzval, fillvalue)
-    # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
-    # nonsequential access of C.nzval does not appear to improve performance.
-    rowsentinelA = convert(eltype(A.rowval), C.m + 1)
-    rowsentinelB = convert(eltype(B.rowval), C.m + 1)
-    @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
-        Ak, stopAk = A.colptr[j], A.colptr[j + 1]
-        Bk, stopBk = B.colptr[j], B.colptr[j + 1]
-        Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-        Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-        while true
-            if Ai == Bi
-                Ai == rowsentinelA && break # column complete
-                Cx, Ci::eltype(C.rowval) = f(A.nzval[Ak], B.nzval[Bk]), Ai
-                Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-            elseif Ai < Bi
-                Cx, Ci = f(A.nzval[Ak], zero(eltype(B))), Ai
-                Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-            else # Bi < Ai
-                Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi
-                Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-            end
-            Cx != fillvalue && (C.nzval[jo + Ci] = Cx)
-        end
-    end
-    return C
-end
-# _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse matrices
-function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC)
-    isempty(C) && (fill!(C.colptr, 1); return C)
-    spaceC = min(length(C.rowval), length(C.nzval))
-    rowsentinelA = convert(eltype(A.rowval), C.m + 1)
-    rowsentinelB = convert(eltype(B.rowval), C.m + 1)
-    # A and B cannot have the same shape, as we directed that case to map in broadcast's
-    # entry point; here we need efficiently handle only heterogeneous combinations of matrices
-    # with no singleton dimensions ("matrices" hereafter), one singleton dimension ("columns"
-    # and "rows"), and two singleton dimensions ("scalars"). Cases involving scalars should
-    # be rare and optimizing that case complicates the code appreciably, so we largely
-    # ignore that case's performance below.
-    #
-    # We first divide the cases into two groups: those in which neither argument expands
-    # vertically (matrix-column combinations) and those in which an argument expands
-    # vertically (matrix-row and column-row combinations).
-    #
-    # NOTE: Placing the loops over columns outside the conditional chain segregating
-    # argument shape combinations eliminates some code replication but unfortunately
-    # hurts performance appreciably in some cases.
-    #
-    # Cases without vertical expansion
-    Ck = 1
-    if A.m == B.m
-        @inbounds for j in 1:C.n
-            C.colptr[j] = Ck
-            Ak, stopAk = A.n == 1 ? (A.colptr[1], A.colptr[2]) : (A.colptr[j], A.colptr[j + 1])
-            Bk, stopBk = B.n == 1 ? (B.colptr[1], B.colptr[2]) : (B.colptr[j], B.colptr[j + 1])
-            # Restructuring this k/stopk code to avoid unnecessary colptr retrievals does
-            # not improve performance signicantly. Leave in this less complex form.
-            Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-            Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-            while true
-                if Ai != Bi
-                    if Ai < Bi
-                        Cx, Ci = f(A.nzval[Ak], zero(eltype(B))), Ai
-                        Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                    else # Ai > Bi
-                        Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi
-                        Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                    end
-                elseif #= Ai == Bi && =# Ai == rowsentinelA
-                    break # column complete
-                else #= Ai == Bi != rowsentinel =#
-                    Cx, Ci::eltype(C.rowval) = f(A.nzval[Ak], B.nzval[Bk]), Ai
-                    Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                    Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                end
-                # NOTE: The ordering of the conditional chain above impacts which matrices
-                # this method perform best for. In contrast to the map situation (arguments
-                # have same shape, and likely same or similar stored entry pattern), where
-                # the Ai == Bi and termination cases are equally or more likely than the
-                # Ai < Bi and Bi < Ai cases, in the broadcast situation (arguments have
-                # different shape, and likely largely disjoint expanded stored entry
-                # pattern) the Ai < Bi and Bi < Ai cases are equally or more likely than the
-                # Ai == Bi and termination cases. Hence the ordering of the conditional
-                # chain above differs from that in the corresponding map code.
-                if Cx != zero(eltype(C))
-                    Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B)))
-                    C.rowval[Ck] = Ci
-                    C.nzval[Ck] = Cx
-                    Ck += 1
-                end
-            end
-        end
-    # Cases with vertical expansion
-    elseif A.m == 1 # && B.m != 1, vertically expand first argument
-        @inbounds for j in 1:C.n
-            C.colptr[j] = Ck
-            Ak, stopAk = A.n == 1 ? (A.colptr[1], A.colptr[2]) : (A.colptr[j], A.colptr[j + 1])
-            Bk, stopBk = B.n == 1 ? (B.colptr[1], B.colptr[2]) : (B.colptr[j], B.colptr[j + 1])
-            Ax = Ak < stopAk ? A.nzval[Ak] : zero(eltype(A))
-            fvAzB = f(Ax, zero(eltype(B)))
-            if fvAzB == zero(eltype(C))
-                # either A's jth column is empty, or A's jth column contains a nonzero value
-                # Ax but f(Ax, zero(eltype(B))) is nonetheless zero, so we can scan through
-                # B's jth column without storing every entry in C's jth column
-                while Bk < stopBk
-                    Cx = f(Ax, B.nzval[Bk])
-                    if Cx != zero(eltype(C))
-                        Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B)))
-                        C.rowval[Ck] = B.rowval[Bk]
-                        C.nzval[Ck] = Cx
-                        Ck += 1
-                    end
-                    Bk += one(Bk)
-                end
-            else
-                # A's jth column is nonempty and f(Ax, zero(eltype(B))) is not zero, so
-                # we must store (likely) every entry in C's jth column
-                Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                for Ci::eltype(C.rowval) in 1:C.m
-                    if Bi == Ci
-                        Cx = f(Ax, B.nzval[Bk])
-                        Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                    else
-                        Cx = fvAzB
-                    end
-                    if Cx != zero(eltype(C))
-                        Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B)))
-                        C.rowval[Ck] = Ci
-                        C.nzval[Ck] = Cx
-                        Ck += 1
-                    end
-                end
-            end
-        end
-    elseif B.m == 1 # && A.m != 1, vertically expand second argument
-        @inbounds for j in 1:C.n
-            C.colptr[j] = Ck
-            Ak, stopAk = A.n == 1 ? (A.colptr[1], A.colptr[2]) : (A.colptr[j], A.colptr[j + 1])
-            Bk, stopBk = B.n == 1 ? (B.colptr[1], B.colptr[2]) : (B.colptr[j], B.colptr[j + 1])
-            Bx = Bk < stopBk ? B.nzval[Bk] : zero(eltype(B))
-            fzAvB = f(zero(eltype(A)), Bx)
-            if fzAvB == zero(eltype(C))
-                # either B's jth column is empty, or B's jth column contains a nonzero value
-                # Bx but f(zero(eltype(A)), Bx) is nonetheless zero, so we can scan through
-                # A's jth column without storing every entry in C's jth column
-                while Ak < stopAk
-                    Cx = f(A.nzval[Ak], Bx)
-                    if Cx != zero(eltype(C))
-                        Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B)))
-                        C.rowval[Ck] = A.rowval[Ak]
-                        C.nzval[Ck] = Cx
-                        Ck += 1
-                    end
-                    Ak += one(Ak)
-                end
-            else
-                # B's jth column is nonempty and f(zero(eltype(A)), Bx) is not zero, so
-                # we must store (likely) every entry in C's jth column
-                Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                for Ci::eltype(C.rowval) in 1:C.m
-                    if Ai == Ci
-                        Cx = f(A.nzval[Ak], Bx)
-                        Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                    else
-                        Cx = fzAvB
-                    end
-                    if Cx != zero(eltype(C))
-                        Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B)))
-                        C.rowval[Ck] = Ci
-                        C.nzval[Ck] = Cx
-                        Ck += 1
-                    end
-                end
-            end
-        end
-    end
-    @inbounds C.colptr[C.n + 1] = Ck
-    _trimstorage!(C, Ck - 1)
-    return C
-end
-function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC)
-    # For information on this code, see comments in similar code in _broadcast_zeropres! above
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(C.nzval, fillvalue)
-    rowsentinelA = convert(eltype(A.rowval), C.m + 1)
-    rowsentinelB = convert(eltype(B.rowval), C.m + 1)
-    # Cases without vertical expansion
-    if A.m == B.m
-        @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
-            Ak, stopAk = A.n == 1 ? (A.colptr[1], A.colptr[2]) : (A.colptr[j], A.colptr[j + 1])
-            Bk, stopBk = B.n == 1 ? (B.colptr[1], B.colptr[2]) : (B.colptr[j], B.colptr[j + 1])
-            Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-            Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-            while true
-                if Ai < Bi
-                    Cx, Ci = f(A.nzval[Ak], zero(eltype(B))), Ai
-                    Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                elseif Ai > Bi
-                    Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi
-                    Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                elseif #= Ai == Bi && =# Ai == rowsentinelA
-                    break # column complete
-                else #= Ai == Bi != rowsentinel =#
-                    Cx, Ci::eltype(C.rowval) = f(A.nzval[Ak], B.nzval[Bk]), Ai
-                    Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                    Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                end
-                Cx != fillvalue && (C.nzval[jo + Ci] = Cx)
-            end
-        end
-    # Cases with vertical expansion
-    elseif A.m == 1 # && B.m != 1, vertically expand first argument
-        @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
-            Ak, stopAk = A.n == 1 ? (A.colptr[1], A.colptr[2]) : (A.colptr[j], A.colptr[j + 1])
-            Bk, stopBk = B.n == 1 ? (B.colptr[1], B.colptr[2]) : (B.colptr[j], B.colptr[j + 1])
-            Ax = Ak < stopAk ? A.nzval[Ak] : zero(eltype(A))
-            fvAzB = f(Ax, zero(eltype(B)))
-            if fvAzB == zero(eltype(C))
-                while Bk < stopBk
-                    Cx = f(Ax, B.nzval[Bk])
-                    Cx != fillvalue && (C.nzval[jo + B.rowval[Bk]] = Cx)
-                    Bk += one(Bk)
-                end
-            else
-                Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                for Ci::eltype(C.rowval) in 1:C.m
-                    if Bi == Ci
-                        Cx = f(Ax, B.nzval[Bk])
-                        Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
-                    else
-                        Cx = fvAzB
-                    end
-                    Cx != fillvalue && (C.nzval[jo + Ci] = Cx)
-                end
-            end
-        end
-    elseif B.m == 1 # && A.m != 1, vertically expand second argument
-        @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
-            Ak, stopAk = A.n == 1 ? (A.colptr[1], A.colptr[2]) : (A.colptr[j], A.colptr[j + 1])
-            Bk, stopBk = B.n == 1 ? (B.colptr[1], B.colptr[2]) : (B.colptr[j], B.colptr[j + 1])
-            Bx = Bk < stopBk ? B.nzval[Bk] : zero(eltype(B))
-            fzAvB = f(zero(eltype(A)), Bx)
-            if fzAvB == zero(eltype(C))
-                while Ak < stopAk
-                    Cx = f(A.nzval[Ak], Bx)
-                    Cx != fillvalue && (C.nzval[jo + A.rowval[Ak]] = Cx)
-                    Ak += one(Ak)
-                end
-            else
-                Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                for Ci::eltype(C.rowval) in 1:C.m
-                    if Ai == Ci
-                        Cx = f(A.nzval[Ak], Bx)
-                        Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA
-                    else
-                        Cx = fzAvB
-                    end
-                    Cx != fillvalue && (C.nzval[jo + Ci] = Cx)
-                end
-            end
-        end
-    end
-    return C
-end
-
-# _map_zeropres!/_map_notzeropres! for more than two sparse matrices
-function _map_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{SparseMatrixCSC,N})
-    spaceC = min(length(C.rowval), length(C.nzval))
-    rowsentinel = C.m + 1
-    Ck = 1
-    stopks = _indforcol_all(1, As)
-    @inbounds for j in 1:C.n
-        C.colptr[j] = Ck
-        ks = stopks
-        stopks = _indforcol_all(j + 1, As)
-        rows = _rowforind_all(rowsentinel, ks, stopks, As)
-        activerow = min(rows...)
-        while activerow < rowsentinel
-            # activerows = _isactiverow_all(activerow, rows)
-            # Cx = f(_gatherargs(activerows, ks, As)...)
-            # ks = _updateind_all(activerows, ks)
-            # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
-            vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
-            Cx = f(vals...)
-            if Cx != zero(eltype(C))
-                Ck > spaceC && (spaceC = _expandstorage!(C, Ck + min(length(C), _sumnnzs(As...)) - (sum(ks) - N)))
-                C.rowval[Ck] = activerow
-                C.nzval[Ck] = Cx
-                Ck += 1
-            end
-            activerow = min(rows...)
-        end
-    end
-    @inbounds C.colptr[C.n + 1] = Ck
-    _trimstorage!(C, Ck - 1)
-    return C
-end
-function _map_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As::Vararg{SparseMatrixCSC,N})
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(C.nzval, fillvalue)
-    # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
-    # nonsequential access of C.nzval does not appear to improve performance.
-    rowsentinel = C.m + 1
-    stopks = _indforcol_all(1, As)
-    @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
-        ks = stopks
-        stopks = _indforcol_all(j + 1, As)
-        rows = _rowforind_all(rowsentinel, ks, stopks, As)
-        activerow = min(rows...)
-        while activerow < rowsentinel
-            # activerows = _isactiverow_all(activerow, rows)
-            # Cx = f(_gatherargs(activerows, ks, As)...)
-            # ks = _updateind_all(activerows, ks)
-            # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
-            vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
-            Cx = f(vals...)
-            Cx != fillvalue && (C.nzval[jo + activerow] = Cx)
-            activerow = min(rows...)
-        end
-    end
-    return C
-end
-# helper methods for map/map! methods just above
-@inline _indforcol(j, A) = A.colptr[j]
-@inline _indforcol_all(j, ::Tuple{}) = ()
-@inline _indforcol_all(j, As) = (
-    _indforcol(j, first(As)),
-    _indforcol_all(j, tail(As))...)
-@inline _rowforind(rowsentinel, k, stopk, A) =
-    k < stopk ? A.rowval[k] : convert(eltype(A.rowval), rowsentinel)
-@inline _rowforind_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-@inline _rowforind_all(rowsentinel, ks, stopks, As) = (
-    _rowforind(rowsentinel, first(ks), first(stopks), first(As)),
-    _rowforind_all(rowsentinel, tail(ks), tail(stopks), tail(As))...)
-# fusing the following defs. avoids a few branches, yielding 5-30% runtime reduction
-# @inline _isactiverow(activerow, row) = row == activerow
-# @inline _isactiverow_all(activerow, ::Tuple{}) = ()
-# @inline _isactiverow_all(activerow, rows) = (
-#     _isactiverow(activerow, first(rows)),
-#     _isactiverow_all(activerow, tail(rows))...)
-# @inline _gatherarg(isactiverow, k, A) = isactiverow ? A.nzval[k] : zero(eltype(A))
-# @inline _gatherargs(::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-# @inline _gatherargs(activerows, ks, As) = (
-#     _gatherarg(first(activerows), first(ks), first(As)),
-#     _gatherargs(tail(activerows), tail(ks), tail(As))...)
-# @inline _updateind(isactiverow, k) = isactiverow ? (k + one(k)) : k
-# @inline _updateind_all(::Tuple{}, ::Tuple{}) = ()
-# @inline _updateind_all(activerows, ks) = (
-#     _updateind(first(activerows), first(ks)),
-#     _updateind_all(tail(activerows), tail(ks))...)
-# @inline _updaterow(rowsentinel, isrowactive, presrow, k, stopk, A) =
-#     isrowactive ? (k < stopk ? A.rowval[k] : oftype(presrow, rowsentinel)) : presrow
-# @inline _updaterow_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-# @inline _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) = (
-#     _updaterow(rowsentinel, first(activerows), first(rows), first(ks), first(stopks), first(As)),
-#     _updaterow_all(rowsentinel, tail(activerows), tail(rows), tail(ks), tail(stopks), tail(As))...)
-@inline function _fusedupdate(rowsentinel, activerow, row, k, stopk, A)
-    # returns (val, nextk, nextrow)
-    if row == activerow
-        nextk = k + one(k)
-        (A.nzval[k], nextk, (nextk < stopk ? A.rowval[nextk] : oftype(row, rowsentinel)))
-    else
-        (zero(eltype(A)), k, row)
-    end
-end
-@inline _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As) =
-    _fusedupdate_all((#=vals=#), (#=nextks=#), (#=nextrows=#), rowsentinel, activerow, rows, ks, stopks, As)
-@inline _fusedupdate_all(vals, nextks, nextrows, rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) =
-    (vals, nextks, nextrows)
-@inline function _fusedupdate_all(vals, nextks, nextrows, rowsentinel, activerow, rows, ks, stopks, As)
-    val, nextk, nextrow = _fusedupdate(rowsentinel, activerow, first(rows), first(ks), first(stopks), first(As))
-    return _fusedupdate_all((vals..., val), (nextks..., nextk), (nextrows..., nextrow),
-                        rowsentinel, activerow, tail(rows), tail(ks), tail(stopks), tail(As))
-end
-
-# _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse matrices
-function _broadcast_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{SparseMatrixCSC,N})
-    isempty(C) && (fill!(C.colptr, 1); return C)
-    spaceC = min(length(C.rowval), length(C.nzval))
-    expandsverts = _expandsvert_all(C, As)
-    expandshorzs = _expandshorz_all(C, As)
-    rowsentinel = C.m + 1
-    Ck = 1
-    @inbounds for j in 1:C.n
-        C.colptr[j] = Ck
-        ks = _startindforbccol_all(j, expandshorzs, As)
-        stopks = _stopindforbccol_all(j + 1, expandshorzs, As)
-        # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated
-        # colptr lookups, improves performance significantly. So keep the less complex approach here.
-        isemptys = _isemptycol_all(ks, stopks)
-        defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As)
-        rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As)
-        defaultCx = f(defargs...)
-        activerow = min(rows...)
-        if defaultCx == zero(eltype(C)) # zero-preserving column scan
-            while activerow < rowsentinel
-                # activerows = _isactiverow_all(activerow, rows)
-                # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
-                # ks = _updateind_all(activerows, ks)
-                # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
-                args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                Cx = f(args...)
-                if Cx != zero(eltype(C))
-                    Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, As)))
-                    C.rowval[Ck] = activerow
-                    C.nzval[Ck] = Cx
-                    Ck += 1
-                end
-                activerow = min(rows...)
-            end
-        else # zero-non-preserving column scan
-            for Ci in 1:C.m
-                if Ci == activerow
-                    # activerows = _isactiverow_all(activerow, rows)
-                    # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
-                    # ks = _updateind_all(activerows, ks)
-                    # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
-                    args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                    Cx = f(args...)
-                    activerow = min(rows...)
-                else
-                    Cx = defaultCx
-                end
-                if Cx != zero(eltype(C))
-                    Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, As)))
-                    C.rowval[Ck] = Ci
-                    C.nzval[Ck] = Cx
-                    Ck += 1
-                end
-            end
-        end
-    end
-    @inbounds C.colptr[C.n + 1] = Ck
-    _trimstorage!(C, Ck - 1)
-    return C
-end
-function _broadcast_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As::Vararg{SparseMatrixCSC,N})
-    isempty(C) && (fill!(C.colptr, 1); return C)
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(C.nzval, fillvalue)
-    expandsverts = _expandsvert_all(C, As)
-    expandshorzs = _expandshorz_all(C, As)
-    rowsentinel = C.m + 1
-    @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
-        ks = _startindforbccol_all(j, expandshorzs, As)
-        stopks = _stopindforbccol_all(j + 1, expandshorzs, As)
-        # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated
-        # colptr lookups, improves performance significantly. So keep the less complex approach here.
-        isemptys = _isemptycol_all(ks, stopks)
-        defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As)
-        rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As)
-        defaultCx = f(defargs...)
-        activerow = min(rows...)
-        if defaultCx == fillvalue # fillvalue-preserving column scan
-            while activerow < rowsentinel
-                # activerows = _isactiverow_all(activerow, rows)
-                # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
-                # ks = _updateind_all(activerows, ks)
-                # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
-                args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                Cx = f(args...)
-                Cx != fillvalue && (C.nzval[jo + activerow] = Cx)
-                activerow = min(rows...)
-            end
-        else # fillvalue-non-preserving column scan
-            for Ci in 1:C.m
-                if Ci == activerow
-                    # activerows = _isactiverow_all(activerow, rows)
-                    # Cx = f(_gatherbcargs(activerows, defargs, ks, As)...)
-                    # ks = _updateind_all(activerows, ks)
-                    # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As)
-                    args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                    Cx = f(args...)
-                    activerow = min(rows...)
-                else
-                    Cx = defaultCx
-                end
-                Cx != fillvalue && (C.nzval[jo + Ci] = Cx)
-            end
-        end
-    end
-    return C
-end
-# helper method for broadcast/broadcast! methods just above
-@inline _expandsvert(C, A) = A.m != C.m
-@inline _expandsvert_all(C, ::Tuple{}) = ()
-@inline _expandsvert_all(C, As) = (_expandsvert(C, first(As)), _expandsvert_all(C, tail(As))...)
-@inline _expandshorz(C, A) = A.n != C.n
-@inline _expandshorz_all(C, ::Tuple{}) = ()
-@inline _expandshorz_all(C, As) = (_expandshorz(C, first(As)), _expandshorz_all(C, tail(As))...)
-@inline _startindforbccol(j, expandshorz, A) = expandshorz ? A.colptr[1] : A.colptr[j]
-@inline _startindforbccol_all(j, ::Tuple{}, ::Tuple{}) = ()
-@inline _startindforbccol_all(j, expandshorzs, As) = (
-    _startindforbccol(j, first(expandshorzs), first(As)),
-    _startindforbccol_all(j, tail(expandshorzs), tail(As))...)
-@inline _stopindforbccol(j, expandshorz, A) = expandshorz ? A.colptr[2] : A.colptr[j]
-@inline _stopindforbccol_all(j, ::Tuple{}, ::Tuple{}) = ()
-@inline _stopindforbccol_all(j, expandshorzs, As) = (
-    _stopindforbccol(j, first(expandshorzs), first(As)),
-    _stopindforbccol_all(j, tail(expandshorzs), tail(As))...)
-@inline _isemptycol(k, stopk) = k == stopk
-@inline _isemptycol_all(::Tuple{}, ::Tuple{}) = ()
-@inline _isemptycol_all(ks, stopks) = (
-    _isemptycol(first(ks), first(stopks)),
-    _isemptycol_all(tail(ks), tail(stopks))...)
-@inline _initrowforcol(j, rowsentinel, isempty, expandsvert, k, A) =
-    expandsvert || isempty ? convert(eltype(A.rowval), rowsentinel) : A.rowval[k]
-@inline _initrowforcol_all(j, rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-@inline _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As) = (
-    _initrowforcol(j, rowsentinel, first(isemptys), first(expandsverts), first(ks), first(As)),
-    _initrowforcol_all(j, rowsentinel, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...)
-@inline _defargforcol(j, isempty, expandsvert, k, A) =
-    expandsvert && !isempty ? A.nzval[k] : zero(eltype(A))
-@inline _defargforcol_all(j, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-@inline _defargforcol_all(j, isemptys, expandsverts, ks, As) = (
-    _defargforcol(j, first(isemptys), first(expandsverts), first(ks), first(As)),
-    _defargforcol_all(j, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...)
-# fusing the following defs. avoids a few branches and construction of a tuple, yielding 1-20% runtime reduction
-# @inline _isactiverow(activerow, row) = row == activerow
-# @inline _isactiverow_all(activerow, ::Tuple{}) = ()
-# @inline _isactiverow_all(activerow, rows) = (
-#     _isactiverow(activerow, first(rows)),
-#     _isactiverow_all(activerow, tail(rows))...)
-# @inline _gatherbcarg(isactiverow, defarg, k, A) = isactiverow ? A.nzval[k] : defarg
-# @inline _gatherbcargs(::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-# @inline _gatherbcargs(activerows, defargs, ks, As) = (
-#     _gatherbcarg(first(activerows), first(defargs), first(ks), first(As)),
-#     _gatherbcargs(tail(activerows), tail(defargs), tail(ks), tail(As))...)
-# @inline _updateind(isactiverow, k) = isactiverow ? (k + one(k)) : k
-# @inline _updateind_all(::Tuple{}, ::Tuple{}) = ()
-# @inline _updateind_all(activerows, ks) = (
-#     _updateind(first(activerows), first(ks)),
-#     _updateind_all(tail(activerows), tail(ks))...)
-# @inline _updaterow(rowsentinel, isrowactive, presrow, k, stopk, A) =
-#     isrowactive ? (k < stopk ? A.rowval[k] : oftype(presrow, rowsentinel)) : presrow
-# @inline _updaterow_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-# @inline _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) = (
-#     _updaterow(rowsentinel, first(activerows), first(rows), first(ks), first(stopks), first(As)),
-#     _updaterow_all(rowsentinel, tail(activerows), tail(rows), tail(ks), tail(stopks), tail(As))...)
-@inline function _fusedupdatebc(rowsentinel, activerow, row, defarg, k, stopk, A)
-    # returns (val, nextk, nextrow)
-    if row == activerow
-        nextk = k + one(k)
-        (A.nzval[k], nextk, (nextk < stopk ? A.rowval[nextk] : oftype(row, rowsentinel)))
-    else
-        (defarg, k, row)
-    end
-end
-@inline _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) =
-    _fusedupdatebc_all((#=vals=#), (#=nextks=#), (#=nextrows=#), rowsentinel, activerow, rows, defargs, ks, stopks, As)
-@inline _fusedupdatebc_all(vals, nextks, nextrows, rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) =
-    (vals, nextks, nextrows)
-@inline function _fusedupdatebc_all(vals, nextks, nextrows, rowsentinel, activerow, rows, defargs, ks, stopks, As)
-    val, nextk, nextrow = _fusedupdatebc(rowsentinel, activerow, first(rows), first(defargs), first(ks), first(stopks), first(As))
-    return _fusedupdatebc_all((vals..., val), (nextks..., nextk), (nextrows..., nextrow),
-                        rowsentinel, activerow, tail(rows), tail(defargs), tail(ks), tail(stopks), tail(As))
-end
-
-
-
-## broadcast/broadcast! operations involving sparse matrices and one or more broadcast scalars
-#
-# TODO: The minimal snippet below is not satisfying: A better solution would achieve
-# the same for (1) all broadcast scalar types (Base.Broadcast.containertype(x) == Any?) and
-# (2) any combination (number, order, type mixture) of broadcast scalars.
-#
-broadcast{Tf}(f::Tf, x::Union{Number,Bool}, A::SparseMatrixCSC) = broadcast(y -> f(x, y), A)
-broadcast{Tf}(f::Tf, A::SparseMatrixCSC, y::Union{Number,Bool}) = broadcast(x -> f(x, y), A)
-# NOTE: The following two method definitions work around #19096. These definitions should
-# be folded into the two preceding definitions on resolution of #19096.
-broadcast{Tf,T}(f::Tf, ::Type{T}, A::SparseMatrixCSC) = broadcast(y -> f(T, y), A)
-broadcast{Tf,T}(f::Tf, A::SparseMatrixCSC, ::Type{T}) = broadcast(x -> f(x, T), A)
-
 
 ## Define unexported broadcast_zpreserving[!] methods
 # TODO: Sort out what to do with broadcast_zpreserving and dependencies