From 8709c1fdd5264d280e0efd7f586ff8dabcb424dc Mon Sep 17 00:00:00 2001 From: Milan Bouchet-Valat Date: Sat, 7 Nov 2020 13:16:21 +0100 Subject: [PATCH] Restrict wrapped types to reduce invalidations (#310) Methods like `convert(::Type{Any}, ::CategoricalValue)` triggers lots of invalidations. Restricting the wrapped types to `AbstractString`, `AbstractChar` and `Number` alleviates this problem without affecting usability too much. One limit with this approach is that e.g. `convert(Union{String, T}, CategoricalValue{String})` won't work anymore for any `T`, even though `convert(String, CategoricalValue{String})` will. Though thanks to special casing, it will work for `T <: Missing` and `T <: Nothing`. This also means that `CategoricalArray{Any}` is no longer supported. Adapt promotion rules to ensure that mixing e.g. strings and integers gives `Union{String, Integer}`. Drop support for `nothing`. Also remove the custom `repr` method which was a legacy of the `CategoricalString` era. --- src/array.jl | 74 ++++++++++++++++++++------------- src/pool.jl | 2 +- src/recode.jl | 19 +++++---- src/typedefs.jl | 14 +++---- src/value.jl | 92 ++++++++++++++++++----------------------- test/04_constructors.jl | 4 +- test/05_convert.jl | 58 +++++++++++++++++++------- test/06_show.jl | 91 +++------------------------------------- test/07_levels.jl | 6 --- test/13_arraycommon.jl | 67 ++++++++++++++++++++---------- test/16_recode.jl | 28 ++++++------- 11 files changed, 213 insertions(+), 242 deletions(-) diff --git a/src/array.jl b/src/array.jl index f1079fe1..09fb8696 100644 --- a/src/array.jl +++ b/src/array.jl @@ -19,15 +19,41 @@ function reftype(sz::Int) end end +# This check is only there to print a user-friendly warning before +# a TypeError is thrown due to restrictions in the type signature +function check_supported_eltype(::Type{T}, ::Type{U}) where {T, U} + T === Symbol && + throw(ArgumentError("CategoricalArray no longer supports Symbol as element type "* + "as that forces recompiling too many Julia Base methods: " * + "use strings instead, e.g. via categorical(string.(x))")) + T <: Union{SupportedTypes, Missing} || + throw(ArgumentError("CategoricalArray only supports " * + "AbstractString, AbstractChar and Number element types " * + "(got element type $U)")) +end + fixstringtype(T::Type) = T <: SubString || T === AbstractString ? String : T fixstringtype(T::Union) = Union{fixstringtype(T.a), fixstringtype(T.b)} fixstringtype(::Type{Union{}}) = Union{} +# Find a narrow type that is supported to hold all elements if possible +function fixtype(A::AbstractArray{T}) where T + if T <: Union{SupportedTypes, Missing} + return fixstringtype(T) + else + U = fixstringtype(mapreduce(typeof, Base.promote_typejoin, A)) + check_supported_eltype(U, T) + return U + end +end + """ CategoricalArray{T}(undef, dims::Dims; levels=nothing, ordered=false) CategoricalArray{T}(undef, dims::Int...; levels=nothing, ordered=false) -Construct an uninitialized `CategoricalArray` with levels of type `T` and dimensions `dim`. +Construct an uninitialized `CategoricalArray` with levels of type +`T <: $SupportedTypes` and dimensions `dims`. + The `levels` keyword argument can be a vector specifying possible values for the data (this is equivalent to but more efficient than calling [`levels!`](@ref) on the resulting array). @@ -52,8 +78,6 @@ in ascending order; else, they are kept in their order of appearance in `A`. The `ordered` keyword argument determines whether the array values can be compared according to the ordering of levels or not (see [`isordered`](@ref)). - CategoricalArray(A::CategoricalArray; levels=nothing, ordered=false) - If `A` is already a `CategoricalArray`, its levels, orderedness and reference type are preserved unless explicitly overriden. """ @@ -62,7 +86,8 @@ function CategoricalArray end """ CategoricalVector{T}(undef, m::Int; levels=nothing, ordered=false) -Construct an uninitialized `CategoricalVector` with levels of type `T` and dimensions `dim`. +Construct an uninitialized `CategoricalVector` with levels of type +`T <: $SupportedTypes` and dimensions `dim`. The `levels` keyword argument can be a vector specifying possible values for the data (this is equivalent to but more efficient than calling [`levels!`](@ref) @@ -87,8 +112,6 @@ in ascending order; else, they are kept in their order of appearance in `A`. The `ordered` keyword argument determines whether the array values can be compared according to the ordering of levels or not (see [`isordered`](@ref)). - CategoricalVector(A::CategoricalVector; levels=nothing, ordered=false) - If `A` is already a `CategoricalVector`, its levels, orderedness and reference type are preserved unless explicitly overriden. """ @@ -97,7 +120,8 @@ function CategoricalVector end """ CategoricalMatrix{T}(undef, m::Int, n::Int; levels=nothing, ordered=false) -Construct an uninitialized `CategoricalMatrix` with levels of type `T` and dimensions `dim`. +Construct an uninitialized `CategoricalMatrix` with levels of type +`T <: $SupportedTypes` and dimensions `dim`. The `ordered` keyword argument determines whether the array values can be compared according to the ordering of levels or not (see [`isordered`](@ref)). @@ -118,8 +142,6 @@ in ascending order; else, they are kept in their order of appearance in `A`. The `ordered` keyword argument determines whether the array values can be compared according to the ordering of levels or not (see [`isordered`](@ref)). - CategoricalMatrix(A::CategoricalMatrix; levels=nothing, ordered=isordered(A)) - If `A` is already a `CategoricalMatrix`, its levels, orderedness and reference type are preserved unless explicitly overriden. """ @@ -137,6 +159,7 @@ function CategoricalArray{T, N, R}(::UndefInitializer, dims::NTuple{N,Int}; ordered::Bool=false) where {T, N, R} U = leveltype(nonmissingtype(T)) S = T >: Missing ? Union{U, Missing} : U + check_supported_eltype(S, T) V = CategoricalValue{U, R} levs = levels === nothing ? U[] : collect(U, levels) CategoricalArray{S, N}(zeros(R, dims), CategoricalPool{U, R, V}(levs, ordered)) @@ -231,6 +254,7 @@ function CategoricalArray{T, N, R}(A::AbstractArray; end # From AbstractArray + CategoricalArray{T, N}(A::AbstractArray{S, N}; levels::Union{AbstractVector, Nothing}=nothing, ordered::Bool=_isordered(A)) where {S, T, N} = @@ -242,17 +266,17 @@ CategoricalArray{T}(A::AbstractArray{S, N}; CategoricalArray(A::AbstractArray{T, N}; levels::Union{AbstractVector, Nothing}=nothing, ordered::Bool=_isordered(A)) where {T, N} = - CategoricalArray{fixstringtype(T), N}(A, levels=levels, ordered=ordered) + CategoricalArray{fixtype(A), N}(A, levels=levels, ordered=ordered) CategoricalVector(A::AbstractVector{T}; levels::Union{AbstractVector, Nothing}=nothing, ordered::Bool=_isordered(A)) where {T} = - CategoricalArray{fixstringtype(T), 1}(A, levels=levels, ordered=ordered) + CategoricalArray{fixtype(A), 1}(A, levels=levels, ordered=ordered) CategoricalMatrix(A::AbstractMatrix{T}; levels::Union{AbstractVector, Nothing}=nothing, ordered::Bool=_isordered(A)) where {T} = - CategoricalArray{fixstringtype(T), 2}(A, levels=levels, ordered=ordered) + CategoricalArray{fixtype(A), 2}(A, levels=levels, ordered=ordered) # From CategoricalArray (preserve R) CategoricalArray{T, N}(A::CategoricalArray{S, N, R}; @@ -286,12 +310,12 @@ convert(::Type{CategoricalArray{T, N}}, A::AbstractArray{S, N}) where {S, T, N} convert(::Type{CategoricalArray{T}}, A::AbstractArray{S, N}) where {S, T, N} = convert(CategoricalArray{T, N}, A) convert(::Type{CategoricalArray}, A::AbstractArray{T, N}) where {T, N} = - convert(CategoricalArray{T, N}, A) + convert(CategoricalArray{fixtype(A), N}, A) convert(::Type{CategoricalVector{T}}, A::AbstractVector) where {T} = convert(CategoricalVector{T, DefaultRefType}, A) convert(::Type{CategoricalVector}, A::AbstractVector{T}) where {T} = - convert(CategoricalVector{T}, A) + convert(CategoricalVector{fixtype(A)}, A) convert(::Type{CategoricalVector{T}}, A::CategoricalVector{S, R}) where {S, T, R <: Integer} = convert(CategoricalVector{T, R}, A) @@ -301,7 +325,7 @@ convert(::Type{CategoricalVector}, A::CategoricalVector) = A convert(::Type{CategoricalMatrix{T}}, A::AbstractMatrix) where {T} = convert(CategoricalMatrix{T, DefaultRefType}, A) convert(::Type{CategoricalMatrix}, A::AbstractMatrix{T}) where {T} = - convert(CategoricalMatrix{T}, A) + convert(CategoricalMatrix{fixtype(A)}, A) convert(::Type{CategoricalMatrix{T}}, A::CategoricalMatrix{S, R}) where {S, T, R <: Integer} = convert(CategoricalMatrix{T, R}, A) @@ -313,6 +337,8 @@ convert(::Type{CategoricalArray{T, N, R}}, A::AbstractArray{S, N}) where {S, T, function _convert(::Type{CategoricalArray{T, N, R}}, A::AbstractArray{S, N}; levels::Union{AbstractVector, Nothing}=nothing) where {S, T, N, R} + check_supported_eltype(T, T) + res = CategoricalArray{T, N, R}(undef, size(A), levels=levels) copyto!(res, A) @@ -698,7 +724,7 @@ function vcat(A::CategoricalArray...) [x==0 ? 0 : ii[x] for x in a.refs]::Array{Int,ndims(a)} end - T = Base.promote_eltype(A...) >: Missing ? + T = cat_promote_eltype(A...) >: Missing ? Union{eltype(newlevels), Missing} : eltype(newlevels) refs = DefaultRefType[refsvec...;] pool = CategoricalPool(newlevels, ordered) @@ -912,7 +938,7 @@ are preserved unless explicitly overriden. compress::Bool=false) where {T, N} # @inline is needed so that return type is inferred when compress is not provided RefType = compress ? reftype(length(unique(A))) : DefaultRefType - CategoricalArray{fixstringtype(T), N, RefType}(A, levels=levels, ordered=ordered) + CategoricalArray{fixtype(A), N, RefType}(A, levels=levels, ordered=ordered) end @inline function categorical(A::CategoricalArray{T, N, R}; levels::Union{AbstractVector, Nothing}=nothing, @@ -920,7 +946,7 @@ end compress::Bool=false) where {T, N, R} # @inline is needed so that return type is inferred when compress is not provided RefType = compress ? reftype(length(CategoricalArrays.levels(A))) : R - CategoricalArray{fixstringtype(T), N, RefType}(A, levels=levels, ordered=ordered) + CategoricalArray{T, N, RefType}(A, levels=levels, ordered=ordered) end function in(x::Any, y::CategoricalArray{T, N, R}) where {T, N, R} @@ -1046,21 +1072,11 @@ end StructTypes.construct(::Type{<:CategoricalArray{Union{Missing, T}}}, A::AbstractVector) where {T} = - categoricalmissing(T, A) + CategoricalArray{Union{Missing, T}}(replace(A, nothing=>missing)) StructTypes.construct(::Type{<:CategoricalArray{Union{Missing, T}}}, A::Vector) where {T} = - categoricalmissing(T, A) -categoricalmissing(T, A::AbstractVector) = CategoricalArray{Union{Missing, T}}(replace(A, nothing=>missing)) -StructTypes.construct(::Type{<:CategoricalArray{Union{Nothing, T}}}, - A::AbstractVector) where {T} = - categoricalnothing(T, A) -StructTypes.construct(::Type{<:CategoricalArray{Union{Nothing, T}}}, - A::Vector) where {T} = - categoricalnothing(T, A) -categoricalnothing(T, A::AbstractVector) = CategoricalArray{Union{Nothing, T}}(A) - # DataAPI refarray/refvalue/refpool support struct CategoricalRefPool{T, P} <: AbstractVector{T} pool::P diff --git a/src/pool.jl b/src/pool.jl index 395340f6..3d969da2 100644 --- a/src/pool.jl +++ b/src/pool.jl @@ -62,7 +62,7 @@ avoid doing a dict lookup twice end function mergelevels(ordered, levels...) - T = Base.promote_eltype(levels...) + T = cat_promote_eltype(levels...) res = Vector{T}(undef, 0) nonempty_lv = findfirst(!isempty, levels) diff --git a/src/recode.jl b/src/recode.jl index d333828f..f3ee6089 100644 --- a/src/recode.jl +++ b/src/recode.jl @@ -274,8 +274,9 @@ recode!(a::AbstractArray, default::Any, pairs::Pair...) = recode!(a, a, default, pairs...) recode!(a::AbstractArray, pairs::Pair...) = recode!(a, a, nothing, pairs...) -promote_valuetype(x::Pair{K, V}) where {K, V} = V -promote_valuetype(x::Pair{K, V}, y::Pair...) where {K, V} = promote_type(V, promote_valuetype(y...)) +cat_promote_valuetype(x::Pair{K, V}) where {K, V} = V +cat_promote_valuetype(x::Pair{K, V}, y::Pair...) where {K, V} = + cat_promote_type(V, cat_promote_valuetype(y...)) keytype_hasmissing(x::Pair{K}) where {K} = K === Missing keytype_hasmissing(x::Pair{K}, y::Pair...) where {K} = K === Missing || keytype_hasmissing(y...) @@ -350,11 +351,11 @@ recode(a::AbstractArray, pairs::Pair...) = recode(a, nothing, pairs...) recode(a::CategoricalArray, pairs::Pair...) = recode(a, nothing, pairs...) function recode(a::AbstractArray, default::Any, pairs::Pair...) - V = promote_valuetype(pairs...) + V = cat_promote_valuetype(pairs...) # T cannot take into account eltype(src), since we can't know # whether it matters at compile time (all levels recoded or not) # and using a wider type than necessary would be annoying - T = default isa Nothing ? V : promote_type(typeof(default), V) + T = default isa Nothing ? V : cat_promote_type(typeof(default), V) # Exception 1: if T === Missing and default not missing, # assume the caller wants to recode only some values to missing, # but accept original values @@ -371,11 +372,11 @@ function recode(a::AbstractArray, default::Any, pairs::Pair...) end function recode(a::CategoricalArray{S, N, R}, default::Any, pairs::Pair...) where {S, N, R} - V = promote_valuetype(pairs...) + V = cat_promote_valuetype(pairs...) # T cannot take into account eltype(src), since we can't know # whether it matters at compile time (all levels recoded or not) # and using a wider type than necessary would be annoying - T = default isa Nothing ? V : promote_type(typeof(default), V) + T = default isa Nothing ? V : cat_promote_type(typeof(default), V) # Exception 1: if T === Missing and default not missing, # assume the caller wants to recode only some values to missing, # but accept original values @@ -396,13 +397,13 @@ end function Base.replace(a::CategoricalArray{S, N, R}, pairs::Pair...) where {S, N, R} # Base.replace(a::Array, pairs::Pair...) uses a wider type promotion than # recode. It promotes the source type S with the replaced types T. - T = promote_valuetype(pairs...) + T = cat_promote_valuetype(pairs...) # Exception: replacing missings # Example: replace(categorical([missing,1.5]), missing=>0) if keytype_hasmissing(pairs...) - dest = CategoricalArray{promote_type(nonmissingtype(S), T), N, R}(undef, size(a)) + dest = CategoricalArray{cat_promote_type(nonmissingtype(S), T), N, R}(undef, size(a)) else - dest = CategoricalArray{promote_type(S, T), N, R}(undef, size(a)) + dest = CategoricalArray{cat_promote_type(S, T), N, R}(undef, size(a)) end recode!(dest, a, nothing, pairs...) end diff --git a/src/typedefs.jl b/src/typedefs.jl index f88a4c8d..efd98fa1 100644 --- a/src/typedefs.jl +++ b/src/typedefs.jl @@ -1,4 +1,5 @@ const DefaultRefType = UInt32 +const SupportedTypes = Union{AbstractString, AbstractChar, Number} ## Pools @@ -6,7 +7,7 @@ const DefaultRefType = UInt32 # * `T` type of categorized values # * `R` integer type for referencing category levels # * `V` categorical value type -mutable struct CategoricalPool{T, R <: Integer, V} +mutable struct CategoricalPool{T <: SupportedTypes, R <: Integer, V} levels::Vector{T} # category levels ordered by their reference codes invindex::Dict{T, R} # map from category levels to their reference codes valindex::Vector{V} # "category value" objects 1-to-1 matching `index` @@ -42,9 +43,6 @@ mutable struct CategoricalPool{T, R <: Integer, V} function CategoricalPool{T, R, V}(levels::Vector{T}, invindex::Dict{T, R}, ordered::Bool) where {T, R, V} - if T <: CategoricalValue && T !== Union{} - throw(ArgumentError("Level type $T cannot be a categorical value type")) - end if !(V <: CategoricalValue) throw(ArgumentError("Type $V is not a categorical value type")) end @@ -70,7 +68,7 @@ end ## Values """ - CategoricalValue{T, R <: Integer} + CategoricalValue{T <: $SupportedTypes, R <: Integer} A wrapper around a value of type `T` corresponding to a level in a `CategoricalPool`. @@ -82,7 +80,7 @@ if [`isordered`](@ref) is `true` for the value's pool, and in that case the order of the pool's [`levels`](@ref DataAPI.levels) is used rather than the standard ordering of values of type `T`. """ -struct CategoricalValue{T, R <: Integer} +struct CategoricalValue{T <: SupportedTypes, R <: Integer} level::R pool::CategoricalPool{T, R, CategoricalValue{T, R}} end @@ -96,7 +94,9 @@ end # * `V` original type of elements (excluding Missing) before categorization # * `C` categorical value type # * `U` type of missing value, `Union{}` if missing values are not accepted -abstract type AbstractCategoricalArray{T, N, R, V, C, U} <: AbstractArray{Union{C, U}, N} end +abstract type AbstractCategoricalArray{T <: Union{CategoricalValue, SupportedTypes, Missing}, N, + R <: Integer, V, C <: CategoricalValue, U} <: + AbstractArray{Union{C, U}, N} end const AbstractCategoricalVector{T, R, V, C, U} = AbstractCategoricalArray{T, 1, R, V, C, U} const AbstractCategoricalMatrix{T, R, V, C, U} = AbstractCategoricalArray{T, 2, R, V, C, U} diff --git a/src/value.jl b/src/value.jl index c97c996e..c725106b 100644 --- a/src/value.jl +++ b/src/value.jl @@ -42,35 +42,40 @@ levelcode(x::Missing) = missing DataAPI.levels(x::CategoricalValue) = levels(pool(x)) -Base.promote_rule(::Type{C}, ::Type{T}) where {C <: CategoricalValue, T} = promote_type(leveltype(C), T) -Base.promote_rule(::Type{C1}, ::Type{Union{C2, Missing}}) where {C1 <: CategoricalValue, C2 <: CategoricalValue} = - Union{promote_type(C1, C2), Missing} +function cat_promote_type(::Type{S}, ::Type{T}) where {S, T} + U = promote_type(S, T) + U <: Union{SupportedTypes, Missing} ? + U : typeintersect(Union{SupportedTypes, Missing}, Union{S, T}) +end + +cat_promote_eltype() = Union{} +cat_promote_eltype(v1, vs...) = cat_promote_type(eltype(v1), cat_promote_eltype(vs...)) + +Base.promote_rule(::Type{C}, ::Type{T}) where {C <: CategoricalValue, T} = + promote_type(leveltype(C), T) + +Base.promote_rule(::Type{C}, ::Type{T}) where {C <: CategoricalValue, T >: Missing} = + Union{promote_rule(C, nonmissingtype(T)), Missing} + # To fix ambiguities with definitions from Base Base.promote_rule(::Type{C}, ::Type{Missing}) where {C <: CategoricalValue} = Union{C, Missing} Base.promote_rule(::Type{C}, ::Type{Any}) where {C <: CategoricalValue} = Any - Base.promote_rule(::Type{C1}, ::Type{C2}) where - {R1<:Integer, R2<:Integer, C1<:CategoricalValue{<:Any, R1}, C2<:CategoricalValue{<:Any, R2}} = - CategoricalValue{promote_type(leveltype(C1), leveltype(C2)), promote_type(R1, R2)} + {R1<:Integer, R2<:Integer, + C1<:CategoricalValue{<: SupportedTypes, R1}, + C2<:CategoricalValue{<: SupportedTypes, R2}} = + CategoricalValue{cat_promote_type(leveltype(C1), leveltype(C2)), promote_type(R1, R2)} Base.promote_rule(::Type{C1}, ::Type{C2}) where {C1<:CategoricalValue, C2<:CategoricalValue} = - CategoricalValue{promote_type(leveltype(C1), leveltype(C2))} + CategoricalValue{cat_promote_type(leveltype(C1), leveltype(C2))} -Base.convert(::Type{Ref}, x::CategoricalValue) = RefValue{leveltype(x)}(x) -Base.convert(::Type{String}, x::CategoricalValue) = convert(String, get(x)) -Base.convert(::Type{Any}, x::CategoricalValue) = x - -# Defined separately to avoid ambiguities -Base.convert(::Type{T}, x::T) where {T <: CategoricalValue} = x -Base.convert(::Type{Union{T, Missing}}, x::T) where {T <: CategoricalValue} = x -Base.convert(::Type{Union{T, Nothing}}, x::T) where {T <: CategoricalValue} = x # General fallbacks -Base.convert(::Type{S}, x::T) where {S, T <: CategoricalValue} = - T <: S ? x : convert(S, get(x)) -Base.convert(::Type{Union{S, Missing}}, x::T) where {S, T <: CategoricalValue} = - T <: Union{S, Missing} ? x : convert(Union{S, Missing}, get(x)) -Base.convert(::Type{Union{S, Nothing}}, x::T) where {S, T <: CategoricalValue} = - T <: Union{S, Nothing} ? x : convert(Union{S, Nothing}, get(x)) +Base.convert(::Type{S}, x::CategoricalValue) where {S <: SupportedTypes} = + convert(S, get(x)) +Base.convert(::Type{Union{S, Missing}}, x::CategoricalValue) where {S <: SupportedTypes} = + convert(Union{S, Missing}, get(x)) +Base.convert(::Type{Union{S, Nothing}}, x::CategoricalValue) where {S <: SupportedTypes} = + convert(Union{S, Nothing}, get(x)) (::Type{T})(x::T) where {T <: CategoricalValue} = x @@ -78,18 +83,18 @@ Base.Broadcast.broadcastable(x::CategoricalValue) = Ref(x) function Base.show(io::IO, x::CategoricalValue) if nonmissingtype(get(io, :typeinfo, Any)) === nonmissingtype(typeof(x)) - print(io, repr(x)) - elseif isordered(pool(x)) - @printf(io, "%s %s (%i/%i)", - typeof(x), repr(x), - levelcode(x), length(pool(x))) + show(io, get(x)) else - @printf(io, "%s %s", typeof(x), repr(x)) + print(io, typeof(x)) + print(io, ' ') + show(io, get(x)) + if isordered(pool(x)) + @printf(io, " (%i/%i)", levelcode(x), length(pool(x))) + end end end Base.print(io::IO, x::CategoricalValue) = print(io, get(x)) -Base.repr(x::CategoricalValue) = repr(get(x)) Base.string(x::CategoricalValue) = string(get(x)) Base.write(io::IO, x::CategoricalValue) = write(io, get(x)) Base.String(x::CategoricalValue{<:AbstractString}) = String(get(x)) @@ -102,15 +107,8 @@ Base.String(x::CategoricalValue{<:AbstractString}) = String(get(x)) end end -Base.:(==)(::CategoricalValue, ::Missing) = missing -Base.:(==)(::Missing, ::CategoricalValue) = missing - -# To fix ambiguities with Base -Base.:(==)(x::CategoricalValue, y::WeakRef) = get(x) == y -Base.:(==)(x::WeakRef, y::CategoricalValue) = y == x - -Base.:(==)(x::CategoricalValue, y::Any) = get(x) == y -Base.:(==)(x::Any, y::CategoricalValue) = y == x +Base.:(==)(x::CategoricalValue, y::SupportedTypes) = get(x) == y +Base.:(==)(x::SupportedTypes, y::CategoricalValue) = x == get(y) @inline function Base.isequal(x::CategoricalValue, y::CategoricalValue) if pool(x) === pool(y) @@ -120,11 +118,8 @@ Base.:(==)(x::Any, y::CategoricalValue) = y == x end end -Base.isequal(x::CategoricalValue, y::Any) = isequal(get(x), y) -Base.isequal(x::Any, y::CategoricalValue) = isequal(y, x) - -Base.isequal(::CategoricalValue, ::Missing) = false -Base.isequal(::Missing, ::CategoricalValue) = false +Base.isequal(x::CategoricalValue, y::SupportedTypes) = isequal(get(x), y) +Base.isequal(x::SupportedTypes, y::CategoricalValue) = isequal(x, get(y)) Base.in(x::CategoricalValue, y::AbstractRange{T}) where {T<:Integer} = get(x) in y @@ -139,10 +134,8 @@ function Base.isless(x::CategoricalValue, y::CategoricalValue) end end -Base.isless(x::CategoricalValue, y) = levelcode(x) < levelcode(x.pool[get(x.pool, y)]) -Base.isless(::CategoricalValue, ::Missing) = true -Base.isless(y, x::CategoricalValue) = levelcode(x.pool[get(x.pool, y)]) < levelcode(x) -Base.isless(::Missing, ::CategoricalValue) = false +Base.isless(x::CategoricalValue, y::SupportedTypes) = levelcode(x) < levelcode(x.pool[get(x.pool, y)]) +Base.isless(y::SupportedTypes, x::CategoricalValue) = levelcode(x.pool[get(x.pool, y)]) < levelcode(x) function Base.:<(x::CategoricalValue, y::CategoricalValue) if pool(x) !== pool(y) @@ -154,7 +147,7 @@ function Base.:<(x::CategoricalValue, y::CategoricalValue) end end -function Base.:<(x::CategoricalValue, y) +function Base.:<(x::CategoricalValue, y::SupportedTypes) if !isordered(pool(x)) throw(ArgumentError("Unordered CategoricalValue objects cannot be tested for order using <. Use isless instead, or call the ordered! function on the parent array to change this")) else @@ -162,7 +155,7 @@ function Base.:<(x::CategoricalValue, y) end end -function Base.:<(y, x::CategoricalValue) +function Base.:<(y::SupportedTypes, x::CategoricalValue) if !isordered(pool(x)) throw(ArgumentError("Unordered CategoricalValue objects cannot be tested for order using <. Use isless instead, or call the ordered! function on the parent array to change this")) else @@ -170,9 +163,6 @@ function Base.:<(y, x::CategoricalValue) end end -Base.:<(::CategoricalValue, ::Missing) = missing -Base.:<(::Missing, ::CategoricalValue) = missing - # JSON of CategoricalValue is JSON of the value it refers to JSON.lower(x::CategoricalValue) = JSON.lower(get(x)) DataAPI.defaultarray(::Type{CategoricalValue{T, R}}, N) where {T, R} = diff --git a/test/04_constructors.jl b/test/04_constructors.jl index 7975c1cc..a338c044 100644 --- a/test/04_constructors.jl +++ b/test/04_constructors.jl @@ -5,9 +5,9 @@ using CategoricalArrays: DefaultRefType @testset "Type parameter constraints" begin # cannot use categorical value as level type - @test_throws ArgumentError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8, CategoricalValue{CategoricalValue{Int,UInt8},UInt8}}( + @test_throws TypeError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8, CategoricalValue{CategoricalValue{Int,UInt8},UInt8}}( Dict{CategoricalValue{Int,UInt8}, UInt8}(), false) - @test_throws ArgumentError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8, CategoricalValue{CategoricalValue{Int,UInt8},UInt8}}( + @test_throws TypeError CategoricalPool{CategoricalValue{Int,UInt8}, UInt8, CategoricalValue{CategoricalValue{Int,UInt8},UInt8}}( CategoricalValue{Int,UInt8}[], false) # cannot use non-categorical value as categorical value type @test_throws ArgumentError CategoricalPool{Int, UInt8, Int}(Int[], false) diff --git a/test/05_convert.jl b/test/05_convert.jl index 004e378e..c08981cf 100644 --- a/test/05_convert.jl +++ b/test/05_convert.jl @@ -65,43 +65,78 @@ end CategoricalValue{Float64, UInt32} @test promote_type(CategoricalValue{Int, UInt8}, CategoricalValue{Float64}) === CategoricalValue{Float64} + @test promote_type(CategoricalValue{Int, UInt8}, CategoricalValue{String}) === + CategoricalValue{Union{Int, String}} # Tests that return Any before Julia 1.3 are due to JuliaLang/julia#29348 if VERSION >= v"1.3.0-DEV" @test promote_type(CategoricalValue{Int}, Union{CategoricalValue{Float64}, Missing}) === - Union{Missing, Float64} + Union{CategoricalValue{Float64}, Missing} + @test promote_type(CategoricalValue{Int}, + Union{CategoricalValue{String}, Missing}) === + Union{CategoricalValue{Union{Int, String}}, Missing} @test promote_type(CategoricalValue{Int, UInt8}, Union{CategoricalValue{Float64, UInt32}, Missing}) === Union{CategoricalValue{Float64, UInt32}, Missing} + @test promote_type(CategoricalValue{Int, UInt8}, + Union{CategoricalValue{String, UInt32}, Missing}) === + Union{CategoricalValue{Union{Int, String}, UInt32}, Missing} @test promote_type(Union{CategoricalValue{Int}, Missing}, CategoricalValue{Float64}) === - Union{Missing, Float64} + Union{CategoricalValue{Float64}, Missing} + @test promote_type(Union{CategoricalValue{Int}, Missing}, + CategoricalValue{String}) === + Union{CategoricalValue{Union{Int, String}}, Missing} @test promote_type(Union{CategoricalValue{Int, UInt8}, Missing}, CategoricalValue{Float64, UInt32}) === Union{CategoricalValue{Float64, UInt32}, Missing} + @test promote_type(Union{CategoricalValue{Int, UInt8}, Missing}, + CategoricalValue{String, UInt32}) === + Union{CategoricalValue{Union{Int, String}, UInt32}, Missing} @test promote_type(Union{CategoricalValue{Int}, Missing}, Union{CategoricalValue{Float64}, Missing}) === - Union{Missing, Float64} + Union{CategoricalValue{Float64}, Missing} + @test promote_type(Union{CategoricalValue{Int}, Missing}, + Union{CategoricalValue{String}, Missing}) === + Union{CategoricalValue{Union{Int, String}}, Missing} else @test promote_type(CategoricalValue{Int}, Union{CategoricalValue{Float64}, Missing}) === - Any + Union{CategoricalValue{Float64}, Missing} + @test promote_type(CategoricalValue{Int}, + Union{CategoricalValue{String}, Missing}) === + Union{CategoricalValue{Union{Int, String}}, Missing} @test promote_type(CategoricalValue{Int, UInt8}, Union{CategoricalValue{Float64, UInt32}, Missing}) === Union{CategoricalValue{Float64, UInt32}, Missing} + @test promote_type(CategoricalValue{Int, UInt8}, + Union{CategoricalValue{String, UInt32}, Missing}) === + Union{CategoricalValue{Union{Int, String}, UInt32}, Missing} @test promote_type(Union{CategoricalValue{Int}, Missing}, CategoricalValue{Float64}) === - Any + Union{CategoricalValue{Float64}, Missing} + @test promote_type(Union{CategoricalValue{Int}, Missing}, + CategoricalValue{String}) === + Union{CategoricalValue{Union{Int, String}}, Missing} @test promote_type(Union{CategoricalValue{Int, UInt8}, Missing}, - CategoricalValue{Float64, UInt32}) === + CategoricalValue{Float64, UInt32}) === Union{CategoricalValue{Float64, UInt32}, Missing} + @test promote_type(Union{CategoricalValue{Int, UInt8}, Missing}, + CategoricalValue{String, UInt32}) === + Union{CategoricalValue{Union{Int, String}, UInt32}, Missing} + @test promote_type(Union{CategoricalValue{Int}, Missing}, + Union{CategoricalValue{Float64}, Missing}) === + Any @test promote_type(Union{CategoricalValue{Int}, Missing}, - Union{CategoricalValue{Float64}, Missing}) === + Union{CategoricalValue{String}, Missing}) === Any end @test promote_type(Union{CategoricalValue{Int, UInt8}, Missing}, Union{CategoricalValue{Float64, UInt32}, Missing}) === Union{CategoricalValue{Float64, UInt32}, Missing} + @test promote_type(Union{CategoricalValue{Int, UInt8}, Missing}, + Union{CategoricalValue{String, UInt32}, Missing}) === + Union{CategoricalValue{Union{Int, String}, UInt32}, Missing} @test promote_type(CategoricalValue, Missing) === Union{CategoricalValue, Missing} @test promote_type(CategoricalValue{Int}, Missing) === Union{CategoricalValue{Int}, Missing} @@ -115,15 +150,6 @@ end @test convert(CategoricalPool{Float64, UInt8}, pool).ordered === true end -@testset "convert() with Union{T, Nothing}" begin - pool = CategoricalPool([nothing, 2, 3]) - v1 = CategoricalValue(1, pool) - v2 = CategoricalValue(2, pool) - @test convert(Union{Int, Nothing}, v1) === nothing - @test convert(Union{Int, Nothing}, v2) === 2 - @test convert(Union{Float64, Nothing}, v2) === 2.0 -end - @testset "levelcode" begin pool = CategoricalPool{Int,UInt8}([2, 1, 3]) for i in 1:3 diff --git a/test/06_show.jl b/test/06_show.jl index 4e58a2a8..abb2ee5c 100644 --- a/test/06_show.jl +++ b/test/06_show.jl @@ -18,13 +18,13 @@ using CategoricalArrays @test sprint(show, pool) == "$CategoricalPool{String,UInt32}([\"c\", \"b\", \"a\"])" @test sprint(show, opool) == "$CategoricalPool{String,UInt32}([\"c\", \"b\", \"a\"]) with ordered levels" - @test sprint(show, nv1) == "$CategoricalValue{String,UInt32} \"c\"" - @test sprint(show, nv2) == "$CategoricalValue{String,UInt32} \"b\"" - @test sprint(show, nv3) == "$CategoricalValue{String,UInt32} \"a\"" + @test sprint(show, nv1) == repr(nv1) == "$CategoricalValue{String,UInt32} \"c\"" + @test sprint(show, nv2) == repr(nv2) == "$CategoricalValue{String,UInt32} \"b\"" + @test sprint(show, nv3) == repr(nv3) == "$CategoricalValue{String,UInt32} \"a\"" - @test sprint(show, ov1) == "$CategoricalValue{String,UInt32} \"c\" (1/3)" - @test sprint(show, ov2) == "$CategoricalValue{String,UInt32} \"b\" (2/3)" - @test sprint(show, ov3) == "$CategoricalValue{String,UInt32} \"a\" (3/3)" + @test sprint(show, ov1) == repr(ov1) =="$CategoricalValue{String,UInt32} \"c\" (1/3)" + @test sprint(show, ov2) == repr(ov2) == "$CategoricalValue{String,UInt32} \"b\" (2/3)" + @test sprint(show, ov3) == repr(ov3) =="$CategoricalValue{String,UInt32} \"a\" (3/3)" @test sprint(show, nv1, context=:typeinfo=>typeof(nv1)) == "\"c\"" @test sprint(show, nv2, context=:typeinfo=>typeof(nv2)) == "\"b\"" @@ -46,10 +46,6 @@ using CategoricalArrays @test String(nv2) == String(ov2) == "b" @test String(nv3) == String(ov3) == "a" - @test repr(nv1) == repr(ov1) == "\"c\"" - @test repr(nv2) == repr(ov2) == "\"b\"" - @test repr(nv3) == repr(ov3) == "\"a\"" - b = IOBuffer() @test write(b, nv1) == 1 @test String(take!(b)) == "c" @@ -65,78 +61,6 @@ using CategoricalArrays @test String(take!(b)) == "a" end -@testset "show() for CategoricalPool{Date} and its values" begin - levs = [Date(1999, 12), Date(1991, 8), Date(1993, 10)] - pool = CategoricalPool(levs) - opool = CategoricalPool(levs, true) - - nv1 = CategoricalValue(1, pool) - nv2 = CategoricalValue(2, pool) - nv3 = CategoricalValue(3, pool) - - ov1 = CategoricalValue(1, opool) - ov2 = CategoricalValue(2, opool) - ov3 = CategoricalValue(3, opool) - - if VERSION >= v"1.5.0-DEV" - @test sprint(show, pool) == "$CategoricalPool{$Date,UInt32}($levs)" - @test sprint(show, opool) == "$CategoricalPool{$Date,UInt32}($levs) with ordered levels" - - @test sprint(show, nv1) == "$CategoricalValue{$Date,UInt32} $Date(\"1999-12-01\")" - @test sprint(show, nv2) == "$CategoricalValue{$Date,UInt32} $Date(\"1991-08-01\")" - @test sprint(show, nv3) == "$CategoricalValue{$Date,UInt32} $Date(\"1993-10-01\")" - - @test sprint(show, ov1) == "$CategoricalValue{$Date,UInt32} $Date(\"1999-12-01\") (1/3)" - @test sprint(show, ov2) == "$CategoricalValue{$Date,UInt32} $Date(\"1991-08-01\") (2/3)" - @test sprint(show, ov3) == "$CategoricalValue{$Date,UInt32} $Date(\"1993-10-01\") (3/3)" - - @test sprint(show, nv1, context=:typeinfo=>typeof(nv1)) == "$Date(\"1999-12-01\")" - @test sprint(show, nv2, context=:typeinfo=>typeof(nv2)) == "$Date(\"1991-08-01\")" - @test sprint(show, nv3, context=:typeinfo=>typeof(nv3)) == "$Date(\"1993-10-01\")" - - @test sprint(show, ov1, context=:typeinfo=>typeof(ov1)) == "$Date(\"1999-12-01\")" - @test sprint(show, ov2, context=:typeinfo=>typeof(ov2)) == "$Date(\"1991-08-01\")" - @test sprint(show, ov3, context=:typeinfo=>typeof(ov3)) == "$Date(\"1993-10-01\")" - else - @test sprint(show, pool) == "$CategoricalPool{$Date,UInt32}([1999-12-01, 1991-08-01, 1993-10-01])" - @test sprint(show, opool) == "$CategoricalPool{$Date,UInt32}([1999-12-01, 1991-08-01, 1993-10-01]) with ordered levels" - - @test sprint(show, nv1) == "$CategoricalValue{$Date,UInt32} 1999-12-01" - @test sprint(show, nv2) == "$CategoricalValue{$Date,UInt32} 1991-08-01" - @test sprint(show, nv3) == "$CategoricalValue{$Date,UInt32} 1993-10-01" - - @test sprint(show, ov1) == "$CategoricalValue{$Date,UInt32} 1999-12-01 (1/3)" - @test sprint(show, ov2) == "$CategoricalValue{$Date,UInt32} 1991-08-01 (2/3)" - @test sprint(show, ov3) == "$CategoricalValue{$Date,UInt32} 1993-10-01 (3/3)" - - @test sprint(show, nv1, context=:typeinfo=>typeof(nv1)) == "1999-12-01" - @test sprint(show, nv2, context=:typeinfo=>typeof(nv2)) == "1991-08-01" - @test sprint(show, nv3, context=:typeinfo=>typeof(nv3)) == "1993-10-01" - - @test sprint(show, ov1, context=:typeinfo=>typeof(ov1)) == "1999-12-01" - @test sprint(show, ov2, context=:typeinfo=>typeof(ov2)) == "1991-08-01" - @test sprint(show, ov3, context=:typeinfo=>typeof(ov3)) == "1993-10-01" - end - - @test sprint(print, nv1) == sprint(print, ov1) == "1999-12-01" - @test sprint(print, nv2) == sprint(print, ov2) == "1991-08-01" - @test sprint(print, nv3) == sprint(print, ov3) == "1993-10-01" - - @test string(nv1) == string(ov1) == "1999-12-01" - @test string(nv2) == string(ov2) == "1991-08-01" - @test string(nv3) == string(ov3) == "1993-10-01" - - if VERSION >= v"1.5.0-DEV" - @test repr(nv1) == repr(ov1) == "$Date(\"1999-12-01\")" - @test repr(nv2) == repr(ov2) == "$Date(\"1991-08-01\")" - @test repr(nv3) == repr(ov3) == "$Date(\"1993-10-01\")" - else - @test repr(nv1) == repr(ov1) == "1999-12-01" - @test repr(nv2) == repr(ov2) == "1991-08-01" - @test repr(nv3) == repr(ov3) == "1993-10-01" - end -end - using JSON @testset "JSON.lower" for pool in (CategoricalPool(["a"]), CategoricalPool([1]), @@ -152,9 +76,6 @@ using StructTypes v = CategoricalValue(1, CategoricalPool(["a"])) @test JSON3.write(v) === "\"a\"" - v = CategoricalValue(1, CategoricalPool([:a])) - @test JSON3.write(v) === "\"a\"" - v = CategoricalValue(1, CategoricalPool([1])) @test JSON3.write(v) === "1" @test StructTypes.numbertype(typeof(v)) === Int diff --git a/test/07_levels.jl b/test/07_levels.jl index 88257c0c..f0a43b16 100644 --- a/test/07_levels.jl +++ b/test/07_levels.jl @@ -150,12 +150,6 @@ using CategoricalArrays: DefaultRefType, levels! v = CategoricalValue(1, CategoricalPool(["a", "b"])) @test_throws MethodError get!(pool, v) - # get! with CategoricalValue{Any} (#220) - p1 = CategoricalPool(Any['a', 'b', 'c']) - p2 = CategoricalPool(Any['a', 'b', 'x']) - @test get!(p1, p2[1]) === UInt32(1) - @test get!(p1, p2[3]) === UInt32(4) - # get! with ordered CategoricalValue marks unordered empty pool as ordered p1 = CategoricalPool(['b', 'c', 'a']) ordered!(p1, true) diff --git a/test/13_arraycommon.jl b/test/13_arraycommon.jl index e8ae7adc..16ef171b 100644 --- a/test/13_arraycommon.jl +++ b/test/13_arraycommon.jl @@ -1162,6 +1162,48 @@ end end end +@testset "constructors from arrays with unsupported eltypes" begin + for (CT, a) in zip((CategoricalVector, CategoricalMatrix), + ([1, 2, 3], [1 2 3])), + f in (categorical, CategoricalArray, CT, + x -> convert(CategoricalArray, x), + x -> convert(CT, x)), + T in (Any, Union{Int, Symbol}, Union{Real, Symbol, Missing}) + x = f(collect(T, a)) + @test x isa CT{Int} + @test x == categorical(a) + end + for (CT, a) in zip((CategoricalVector, CategoricalMatrix), + ([1, missing, 3], [1 missing 3])), + f in (categorical, CategoricalArray, CT, + x -> convert(CategoricalArray, x), + x -> convert(CT, x)), + T in (Any, Union{Int, Symbol, Missing}, Union{Real, Symbol, Missing}) + x = f(collect(T, a)) + @test x isa CT{Union{Int, Missing}} + @test x ≅ categorical(a) + end + + for f in (categorical, CategoricalArray, CategoricalVector, + x -> convert(CategoricalArray, x), + x -> convert(CategoricalVector, x)) + @test_throws ArgumentError f([:a]) + @test_throws ArgumentError f(Any[:a]) + @test_throws ArgumentError f([nothing]) + @test_throws ArgumentError f(Any[nothing]) + @test_throws ArgumentError f([1, nothing]) + end + for f in (categorical, CategoricalArray, CategoricalMatrix, + x -> convert(CategoricalArray, x), + x -> convert(CategoricalMatrix, x)) + @test_throws ArgumentError f([:a :a]) + @test_throws ArgumentError f(Any[:a :a]) + @test_throws ArgumentError f([nothing nothing]) + @test_throws ArgumentError f(Any[nothing nothing]) + @test_throws ArgumentError f([1 nothing]) + end +end + @testset "converting from array with missings to array without missings CategoricalArray fails with missings" begin x = CategoricalArray{Union{String, Missing}}(undef, 1) @test_throws MissingException CategoricalArray{String}(x) @@ -1251,11 +1293,11 @@ end if VERSION > v"1.2.0-DEV" @inferred vcat(x, y) end - @test vcat(x, y) isa CategoricalVector{Any} + @test vcat(x, y) isa CategoricalVector{Union{String, Int}} if VERSION > v"1.2.0-DEV" @inferred vcat(x, z1) end - @test vcat(x, z1) isa CategoricalVector{Any} + @test vcat(x, z1) isa CategoricalVector{Union{String, Float64}} if VERSION > v"1.2.0-DEV" @inferred vcat(y, z1) end @@ -2008,7 +2050,7 @@ end end # TODO: move struct definition inside @testset block once we require Julia 1.6 -struct UnorderedBar +struct UnorderedBar <: Number a::String end @@ -2069,25 +2111,6 @@ StructTypes.StructType(::Type{<:MyCustomType}) = StructTypes.Struct() @test levels(readx) == levels(x) @test readx isa CategoricalVector{Union{Missing,String}} - readx = JSON3.read(str, CategoricalVector{Union{Nothing,String}}) - @test all((ismissing(a) && (get(b) isa Nothing)) || a == b for (a,b) in zip(x,readx)) - @test nothing in levels(readx) - @test length(union(setdiff(levels(readx),[nothing]), levels(x))) == length(levels(x)) - @test readx isa CategoricalVector{Union{Nothing,String}} - - readx = JSON3.read(str, CategoricalArray{Union{Nothing,String}}) - @test all((ismissing(a) && (get(b) isa Nothing)) || a == b for (a,b) in zip(x,readx)) - @test nothing in levels(readx) - @test length(union(setdiff(levels(readx),[nothing]), levels(x))) == length(levels(x)) - @test readx isa CategoricalVector{Union{Nothing,String}} - - x = CategoricalArray(["x",nothing,"y","z","y",nothing,"z","x"]) - str = JSON3.write(x) - - readx = JSON3.read(str, CategoricalArray{Union{Missing,String}}) - @test all(((get(a) isa Nothing) && ismissing(b)) || a == b for (a,b) in zip(x,readx)) - @test readx isa CategoricalVector - x = MyCustomType( collect(1:3), CategoricalArray(["x","y","z"]) diff --git a/test/16_recode.jl b/test/16_recode.jl index e228d775..8512967a 100644 --- a/test/16_recode.jl +++ b/test/16_recode.jl @@ -318,15 +318,15 @@ end @test typeof(y) === Vector{Union{Float64, T}} end - # Recoding from Int to Any + # Recoding from Int to Union{Int, String} y = @inferred recode(x, 1=>"a", 2:4=>0, [5; 9:10]=>-1) @test y == ["a", 0, 0, 0, -1, 6, 7, 8, -1, -1] if isa(x, CategoricalArray) - @test isa(y, CategoricalVector{Any, DefaultRefType}) + @test isa(y, CategoricalVector{Union{Int, String, T}, DefaultRefType}) @test levels(y) == [6, 7, 8, "a", 0, -1] @test !isordered(y) else - @test typeof(y) === Vector{Any} + @test typeof(y) === Vector{Union{Int, String, T}} end # Recoding from Int to String, with String default @@ -351,15 +351,15 @@ end @test typeof(y) === Vector{Union{String, T}} end - # Recoding from Int to Int/String (i.e. Any), with default String and other values Int + # Recoding from Int to Union{Int, String}, with default String and other values Int y = @inferred recode(x, "x", 1=>100, 2:4=>0, [5; 9:10]=>-1) @test y == [100, 0, 0, 0, -1, "x", "x", "x", -1, -1] if isa(x, CategoricalArray) - @test isa(y, CategoricalVector{Any, DefaultRefType}) + @test isa(y, CategoricalVector{Union{Int, String, T}, DefaultRefType}) @test levels(y) == [100, 0, -1, "x"] @test !isordered(y) else - @test typeof(y) === Vector{Any} + @test typeof(y) === Vector{Union{Int, String, T}} end # Recoding from Int to Int/String, without any Int value in pairs @@ -434,17 +434,17 @@ end end end -@testset "Recoding from $(typeof(x)) to Int/String (i.e. Any), with levels in custom order" for +@testset "Recoding from $(typeof(x)) to Union{Int, String}, with levels in custom order" for x in (10:-1:1, CategoricalArray(10:-1:1)) y = @inferred recode(x, 0, 1=>"a", 2:4=>"c", [5; 9:10]=>"b") @test y == ["b", "b", 0, 0, 0, "b", "c", "c", "c", "a"] if isa(x, CategoricalArray) - @test isa(y, CategoricalVector{Any, DefaultRefType}) + @test isa(y, CategoricalVector{Union{Int, String}, DefaultRefType}) @test levels(y) == ["a", "c", "b", 0] @test !isordered(y) else - @test typeof(y) === Vector{Any} + @test typeof(y) === Vector{Union{Int, String}} end # Recoding from Int to String via default, with levels in custom order @@ -566,8 +566,8 @@ end testf(replace, String, x, missing => "") testf(replace, Union{String, Missing}, x, "b" => "c") - testf(replace, Any, x, "a" => 1, "b" => 2) - testf(replace, Any, x, "a" => 1, "b" => 2, missing => 3) + testf(replace, Union{Int, String, Missing}, x, "a" => 1, "b" => 2) + testf(replace, Union{Int, String}, x, "a" => 1, "b" => 2, missing => 3) y = testf(replace!, Union{String, Missing}, x, "b" => "c") @test y === x @@ -619,15 +619,15 @@ end end # TODO: move struct definition inside @testset block after 1.6 becomes LTS -struct UnorderedFoo0 +struct UnorderedFoo0 <: Number a::String end - + @testset "recode AbstractVector with unordered eltype" begin x0 = [UnorderedFoo0("s$i") for i in 1:10] x = CategoricalArray{UnorderedFoo0}(undef, size(x0)) recode!(x, x0, UnorderedFoo0("s3") => UnorderedFoo0("xxx")) - + @test x[3] == UnorderedFoo0("xxx") @test x[(1:end) .!= 3] == x0[(1:end) .!= 3] @test levels(x)[1:(end-1)] == x0[(1:end) .!= 3]