diff --git a/src/array.jl b/src/array.jl index d6c3e5e5..000060d4 100644 --- a/src/array.jl +++ b/src/array.jl @@ -405,11 +405,12 @@ copy(A::CategoricalArray) = deepcopy(A) CatArrOrSub{T, N} = Union{CategoricalArray{T, N}, SubArray{<:Any, N, <:CategoricalArray{T}}} where {T, N} -function copyto!(dest::CatArrOrSub{T, N}, dstart::Integer, - src::CatArrOrSub{<:Any, N}, sstart::Integer, - n::Integer) where {T, N} +function copyto!(dest::CatArrOrSub, dstart::Integer, + src::CatArrOrSub, sstart::Integer, + n::Integer) n == 0 && return dest - n < 0 && throw(ArgumentError(string("tried to copy n=", n, " elements, but n should be nonnegative"))) + n < 0 && throw(ArgumentError(string("tried to copy n=", n, + " elements, but n should be nonnegative"))) destinds, srcinds = LinearIndices(dest), LinearIndices(src) (dstart ∈ destinds && dstart+n-1 ∈ destinds) || throw(BoundsError(dest, dstart:dstart+n-1)) (sstart ∈ srcinds && sstart+n-1 ∈ srcinds) || throw(BoundsError(src, sstart:sstart+n-1)) @@ -419,43 +420,49 @@ function copyto!(dest::CatArrOrSub{T, N}, dstart::Integer, dpool = pool(dest) spool = pool(src) - # try converting src to dest type to avoid partial copy corruption of dest - # in the event that the src cannot be copied into dest - slevs = convert(Vector{T}, levels(src)) - if eltype(src) >: Missing && !(eltype(dest) >: Missing) && !all(x -> x > 0, srefs) - throw(MissingException("cannot copy array with missing values to an array with element type $T")) - end - - newlevels, ordered = mergelevels(isordered(dest), levels(dest), slevs) - # Orderedness cannot be preserved if the source was unordered and new levels - # need to be added: new comparisons would only be based on the source's order - # (this is consistent with what happens when adding a new level via setindex!) - ordered &= isordered(src) | (length(newlevels) == length(levels(dest))) - if ordered != isordered(dest) - isa(dest, SubArray) && throw(ArgumentError("cannot set ordered=$ordered on dest SubArray as it would affect the parent. Found when trying to set levels to $newlevels.")) - ordered!(dest, ordered) - end - - # Simple case: replace all values - if !isa(dest, SubArray) && dstart == dstart == 1 && n == length(dest) == length(src) - # Set index to reflect refs - levels!(dpool, T[]) # Needed in case src and dest share some levels - levels!(dpool, index(spool)) - - # Set final levels in their visible order - levels!(dpool, newlevels) - - copyto!(drefs, srefs) - else # More work to do: preserve some values (and therefore index) - levels!(dpool, newlevels) - - indexmap = indexin(index(spool), index(dpool)) - - @inbounds for i = 0:(n-1) - x = srefs[sstart+i] - drefs[dstart+i] = x > 0 ? indexmap[x] : 0 + if isordered(dest) + remap = similar(levels(src), eltype(drefs)) + seen = falses(length(levels(src))) # needed as we have to dynamically collect levels + @inbounds for i in 0:n-1 + s = srefs[sstart+i] + if eltype(src) >: Missing && s == 0 + if eltype(dest) >: Missing + drefs[dstart+i] = 0 + else + throw(MissingException("cannot copy array with missing values to an array with element type $T")) + end + else + if seen[s] + drefs[dstart+i] = remap[s] + else + seen[s] = true + rm = index(dpool)[s] + remap[s] = rm + drefs[dstart+i] = rm + end + end + end + else + remap = Vector{eltype(drefs)}(undef, length(levels(src))) + seen = falses(length(levels(src))) # needed as we have to dynamically collect levels + @inbounds for i in 0:n-1 + s = srefs[sstart+i] + if eltype(src) >: Missing && s == 0 + if eltype(dest) >: Missing + drefs[dstart+i] = 0 + else + throw(MissingException("cannot copy array with missing values to an array with element type $T")) + end + else + if seen[s] + drefs[dstart+i] = remap[s] + else + seen[s] = true + dest[dstart+i] = src[sstart+i] + remap[s] = drefs[dstart+i] + end + end end - end dest @@ -464,11 +471,10 @@ end copyto!(dest::CatArrOrSub, src::CatArrOrSub) = copyto!(dest, 1, src, 1, length(src)) -copyto!(dest::CatArrOrSub, dstart::Integer, src::CatArrOrSub) = - copyto!(dest, dstart, src, 1, length(src)) - @static if VERSION >= v"0.7.0-DEV.3208" using Future + Future.copy!(dest::CatArrOrSub, src::AbstractArray) = + copyto!(dest, 1, src, 1, length(src)) Future.copy!(dest::CatArrOrSub, src::CatArrOrSub) = copyto!(dest, 1, src, 1, length(src)) end @@ -694,14 +700,12 @@ function Base.push!(A::CategoricalVector, item) A end -function Base.append!(A::CategoricalVector, B::CatArrOrSub) - levels!(A, union(levels(A), levels(B))) +function Base.append!(A::CategoricalVector, B::AbstractArray) len = length(A) len2 = length(B) resize!(A.refs, len + len2) - for i = 1:len2 - A[len + i] = B[i] - end + # As in Base, A will be left modified if it is not possible to copy B to A + copyto!(A, len + 1, B) return A end diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl index 2f0422f9..94d4e542 100644 --- a/test/13_arraycommon.jl +++ b/test/13_arraycommon.jl @@ -188,11 +188,20 @@ end levels!(x, ["Young", "Middle", "Old"]) ordered!(x, true) + for copyf! in (copy!, copyto!) + y = CategoricalArray{Union{T, String}}(["X", "Z", "Y", "X"]) + @test_throws KeyError copyf!(x, y) + end + + x = CategoricalArray{Union{T, String}}(["Old", "Young", "Middle", "Young"]) + levels!(x, ["Young", "Middle", "Old"]) + for copyf! in (copy!, copyto!) y = CategoricalArray{Union{T, String}}(["X", "Z", "Y", "X"]) @test copyf!(x, y) === x @test x == y - @test levels(x) == ["Young", "Middle", "Old", "X", "Y", "Z"] + # the additional levels are added in order of appereance in y + @test levels(x) == ["Young", "Middle", "Old", "X", "Z", "Y"] @test !isordered(x) end @@ -200,6 +209,16 @@ end levels!(x, ["Young", "Middle", "Old"]) ordered!(x, true) y = CategoricalArray{Union{T, String}}(["X", "Z", "Y", "X"]) + # Test with missing values + if T === Missing + x[3] = missing + y[3] = missing + end + @test_throws KeyError copyto!(x, 1, y, 2) + + x = CategoricalArray{Union{T, String}}(["Old", "Young", "Middle", "Young"]) + levels!(x, ["Young", "Middle", "Old"]) + y = CategoricalArray{Union{T, String}}(["X", "Z", "Y", "X"]) a = (Union{String, Missing})["Z", "Y", "X", "Young"] # Test with missing values if T === Missing @@ -442,6 +461,32 @@ end end end + @testset "copyto! and copy! corner cases" begin + x = categorical([1:255;1:255], true) + y = categorical([1, 1000], true) + @test copyto!(x, 1, y, 1, 1)[1] == y[1] + @test_throws ErrorException copyto!(x, 1, y, 1, 1)[2] + + x = categorical([1,2,3]) + y = categorical([5,6,7]) + copyto!(x,y) + @test levels(x) = [1,2,3,5,6,7] + + @test copy!(x, [1,1,1]) == [1,1,1] + + x = categorical([1,2,3]) + y = categorical([5,6,7]) + @test copyto!(x, 1, y, 1, 1) == [5, 2, 3] + @test levels(x) == [1, 2, 3, 5] + + x = categorical([1,2,3], ordered=true) + y = categorical([10]) + @test_throws KeyError copyto!(x, 1, y, 1) + + x = categorical([1, 2, 3]) + @test copy!(x, [1, 1, 1]) == [1, 1, 1] + end + @testset "resize!()" begin x = CategoricalArray{Union{T, String}}(["Old", "Young", "Middle", "Young"]) @test resize!(x, 3) === x