diff --git a/NEWS.md b/NEWS.md index a5cdf5a2fd..e9e9295f88 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,25 @@ +# DataFrames v1.0 Release Notes + +## Breaking changes + +* No breaking changes are planned for v1.0 release + +## Bug fixes + +## New functionalities + + +## Deprecated + +* all old deprecations now throw an error + ([#2554](https://github.com/JuliaData/DataFrames.jl/pull/2554)) + +## Dependency changes + + +## Other relevant changes + + # DataFrames v0.22 Release Notes ## Breaking changes diff --git a/src/DataFrames.jl b/src/DataFrames.jl index c3e7ac701b..0715c40a3e 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -1,7 +1,7 @@ module DataFrames using Statistics, Printf, REPL -using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays, CategoricalArrays +using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays @reexport using Missings, InvertedIndices using Base.Sort, Base.Order, Base.Iterators using TableTraits, IteratorInterfaceExtensions diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 0277016a88..cb8aa43d43 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -556,16 +556,11 @@ julia> describe(df, :min, sum => :sum, cols=:x) 1 │ x 0.1 5.5 ``` """ -function DataAPI.describe(df::AbstractDataFrame, stats::Union{Symbol, - Pair{<:Base.Callable, <:SymbolOrString}, - Pair{<:SymbolOrString}}...; # TODO: remove after deprecation - cols=:) - if any(x -> x isa Pair{<:SymbolOrString}, stats) - Base.depwarn("name => function order is deprecated; use function => name instead", :describe) - end - return _describe(select(df, cols, copycols=false), - Any[s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) -end +DataAPI.describe(df::AbstractDataFrame, + stats::Union{Symbol, Pair{<:Base.Callable, <:SymbolOrString}}...; + cols=:) = + _describe(select(df, cols, copycols=false), Any[s for s in stats]) + DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), [:mean, :min, :median, :max, :nmissing, :eltype]) diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index df58ac2cea..e22ca022a4 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -1,7 +1,3 @@ -# TODO: -# * add handling of empty ByRow to filter, and select/transform/combine for GroupedDataFrame -# * add handling of multiple column return rules for select/transform/combine for GroupedDataFrame - # normalize_selection function makes sure that whatever input format of idx is it # will end up in one of four canonical forms # 1) AbstractVector{Int} diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 0a5bdb7a28..8d59fc7dd8 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -337,6 +337,30 @@ function DataFrame(columns::AbstractMatrix, cnames::Symbol) return DataFrame(columns, gennames(size(columns, 2)), makeunique=false) end +# Discontinued constructors + +DataFrame(matrix::Matrix) = + throw(ArgumentError("`DataFrame` constructor from a `Matrix` requires " * + "passing :auto as a second argument to automatically " * + "generate column names: `DataFrame(matrix, :auto)`")) + +DataFrame(vecs::Vector{<:AbstractVector}) = + throw(ArgumentError("`DataFrame` constructor from a `Vector` of vectors requires " * + "passing :auto as a second argument to automatically " * + "generate column names: `DataFrame(vecs, :auto)`")) + +DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, + nrows::Integer=0; makeunique::Bool=false) where T<:Type = + throw(ArgumentError("`DataFrame` constructor with passed eltypes is " * + "deprecated. Pass explicitly created columns to a " * + "`DataFrame` constructor instead.")) + +DataFrame(column_eltypes::AbstractVector{<:Type}, cnames::AbstractVector{<:AbstractString}, + nrows::Integer=0; makeunique::Bool=false) where T<:Type = + throw(ArgumentError("`DataFrame` constructor with passed eltypes is " * + "deprecated. Pass explicitly created columns to a " * + "`DataFrame` constructor instead.")) + ############################################################################## ## @@ -484,17 +508,6 @@ Base.getindex(df::DataFrame, row_ind::typeof(!), ## ############################################################################## -function nextcolname(df::DataFrame) - col = Symbol(string("x", ncol(df) + 1)) - hasproperty(df, col) || return col - i = 1 - while true - col = Symbol(string("x", ncol(df) + 1, "_", i)) - hasproperty(df, col) || return col - i += 1 - end -end - # Will automatically add a new column if needed function insert_single_column!(df::DataFrame, v::AbstractVector, col_ind::ColumnIndex) if ncol(df) != 0 && nrow(df) != length(v) @@ -524,18 +537,6 @@ function insert_single_entry!(df::DataFrame, v::Any, row_ind::Integer, col_ind:: end end -function insert_multiple_entries!(df::DataFrame, - v::Any, - row_inds::AbstractVector, - col_ind::ColumnIndex) - if haskey(index(df), col_ind) - _columns(df)[index(df)[col_ind]][row_inds] .= v - return v - else - throw(ArgumentError("Cannot assign to non-existent column: $col_ind")) - end -end - # df[!, SingleColumnIndex] = AbstractVector function Base.setindex!(df::DataFrame, v::AbstractVector, ::typeof(!), col_ind::ColumnIndex) insert_single_column!(df, v, col_ind) diff --git a/src/deprecated.jl b/src/deprecated.jl index ea9a9c323e..d5d9f1a6bb 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,6 +1,6 @@ -import Base: @deprecate +# commenting out till we decide to start deprecating things again -@deprecate DataFrame!(args...; kwargs...) DataFrame(args...; copycols=false, kwargs...) +# import Base: @deprecate # TODO: remove these definitions in year 2021 by(args...; kwargs...) = throw(ArgumentError("by function was removed from DataFrames.jl. " * @@ -8,140 +8,3 @@ by(args...; kwargs...) = throw(ArgumentError("by function was removed from DataF aggregate(args...; kwargs...) = throw(ArgumentError("aggregate function was removed from DataFrames.jl. " * "Use the `combine` function instead.")) - -export categorical, categorical! -function CategoricalArrays.categorical(df::AbstractDataFrame, - cols::Union{ColumnIndex, MultiColumnIndex}; - compress::Union{Bool, Nothing}=nothing) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "(x -> categorical(x, compress=$compress))" - end - if cols isa AbstractVector{<:Union{AbstractString, Symbol}} - Base.depwarn("`categorical(df, cols)` is deprecated. " * - "Use `transform(df, cols .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - return transform(df, cols .=> (x -> categorical(x, compress=compress)), renamecols=false) - elseif cols isa Union{AbstractString, Symbol} - Base.depwarn("`categorical(df, cols)` is deprecated. " * - "Use `transform(df, cols => $categoricalstr, renamecols=false)` instead.", - :categorical) - return transform(df, cols => (x -> categorical(x, compress=compress)), renamecols=false) - else - Base.depwarn("`categorical(df, cols)` is deprecated. " * - "Use `transform(df, names(df, cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - return transform(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) - end -end - -function CategoricalArrays.categorical(df::AbstractDataFrame, - cols::Union{Type, Nothing}=nothing; - compress::Bool=false) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "categorical(x, compress=$compress)" - end - if cols === nothing - cols = Union{AbstractString, Missing} - Base.depwarn("`categorical(df)` is deprecated. " * - "Use `transform(df, names(df, $cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - else - Base.depwarn("`categorical(df, T)` is deprecated. " * - "Use transform(df, names(df, T) .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - end - return transform(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) -end - -function categorical!(df::DataFrame, cols::Union{ColumnIndex, MultiColumnIndex}; - compress::Union{Bool, Nothing}=nothing) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "(x -> categorical(x, compress=$compress))" - end - if cols isa AbstractVector{<:Union{AbstractString, Symbol}} - Base.depwarn("`categorical!(df, cols)` is deprecated. " * - "Use `transform!(df, cols .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - return transform!(df, cols .=> (x -> categorical(x, compress=compress)), renamecols=false) - elseif cols isa Union{AbstractString, Symbol} - Base.depwarn("`categorical!(df, cols)` is deprecated. " * - "Use `transform!(df, cols => $categoricalstr, renamecols=false)` instead.", - :categorical!) - return transform!(df, cols => (x -> categorical(x, compress=compress)), renamecols=false) - else - Base.depwarn("`categorical!(df, cols)` is deprecated. " * - "Use `transform!(df, names(df, cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) - end -end - -function categorical!(df::DataFrame, cols::Union{Type, Nothing}=nothing; - compress::Bool=false) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "(x -> categorical(x, compress=$compress))" - end - if cols === nothing - cols = Union{AbstractString, Missing} - Base.depwarn("`categorical!(df)` is deprecated. " * - "Use `transform!(df, names(df, $cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - else - Base.depwarn("`categorical!(df, T)` is deprecated. " * - "Use `transform!(df, names(df, T) .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - end - return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) -end - -@deprecate DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false, - copycols::Bool=true) where {N} DataFrame(pairs..., makeunique=makeunique, copycols=copycols) -@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol}; makeunique::Bool=false, - copycols::Bool=true) where {N} DataFrame(collect(columns), collect(cnames); - makeunique=makeunique, copycols=copycols) -@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, AbstractString}; makeunique::Bool=false, - copycols::Bool=true) where {N} DataFrame(collect(columns), [Symbol(c) for c in cnames]; - makeunique=makeunique, copycols=copycols) -@deprecate DataFrame(columns::NTuple{N, AbstractVector}; - copycols::Bool=true) where {N} DataFrame(collect(columns), - Symbol.(:x, 1:length(columns)), copycols=copycols) - -# this deprecation is very important, becuase without it users will -# get strange results with old code as described in https://github.com/JuliaData/Tables.jl/issues/208 -@deprecate DataFrame(columns::AbstractVector{<:AbstractVector}; makeunique::Bool=false, - copycols::Bool=true) DataFrame(columns, :auto, copycols=copycols) - -@deprecate DataFrame(columns::AbstractMatrix) DataFrame(columns, :auto) - -function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, - nrows::Integer=0; makeunique::Bool=false)::DataFrame where T<:Type - Base.depwarn("`DataFrame` constructor with passed eltypes is deprecated. " * - "Pass explicitly created columns to a `DataFrame` constructor instead.", - :DataFrame) - columns = AbstractVector[elty >: Missing ? - fill!(Tables.allocatecolumn(elty, nrows), missing) : - Tables.allocatecolumn(elty, nrows) - for elty in column_eltypes] - return DataFrame(columns, Index(convert(Vector{Symbol}, cnames), - makeunique=makeunique), copycols=false) -end - -DataFrame(column_eltypes::AbstractVector{<:Type}, - cnames::AbstractVector{<:AbstractString}, - nrows::Integer=0; makeunique::Bool=false) = - DataFrame(column_eltypes, Symbol.(cnames), nrows; makeunique=makeunique) - -import Base: convert -@deprecate convert(::Type{DataFrame}, A::AbstractMatrix) DataFrame(Tables.table(A, header=Symbol.(:x, axes(A, 2)))) diff --git a/test/constructors.jl b/test/constructors.jl index f2dcf592c7..ca56b2b3e3 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -356,4 +356,13 @@ end end +@testset "removed constructors" begin + @test_throws ArgumentError DataFrame([1 2; 3 4]) + @test_throws ArgumentError DataFrame([[1, 2], [3, 4]]) + @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b]) + @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b], 2) + @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"]) + @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"], 2) +end + end # module diff --git a/test/dataframe.jl b/test/dataframe.jl index c8041d9edb..d0688bafd1 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -673,6 +673,7 @@ end DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) @test_throws ArgumentError describe(df, :mean, :all) + @test_throws MethodError describe(DataFrame(a=[1, 2]), cols = :a, "max2" => maximum) @test_throws ArgumentError describe(df, :min, :min) @test_throws ArgumentError describe(df, :minimum) end diff --git a/test/deprecated.jl b/test/deprecated.jl index 3c3b145262..4dcfcded0b 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -1,234 +1,10 @@ module TestDeprecated -using Test, DataFrames, CategoricalArrays +using Test, DataFrames -const ≅ = isequal - -@testset "DataFrame!" begin - x = [1, 2, 3] - y = [4, 5, 6] - @test DataFrame!(x=x, y=y, copycols=true) == DataFrame(x=x, y=y) - df1 = DataFrame(x=x, y=y) - df2 = DataFrame!(df1) - @test df1 == df2 - @test df1.x === df2.x - @test df1.y === df2.y - - a=[1, 2, 3] - df = DataFrame!(:a=>a, :b=>1, :c=>1:3) - @test propertynames(df) == [:a, :b, :c] - @test df.a === a - - df = DataFrame!("a"=>a, "b"=>1, "c"=>1:3) - @test propertynames(df) == [:a, :b, :c] - @test df."a" === a - - df = DataFrame!(Dict(:a=>a, :b=>1, :c=>1:3)) - @test propertynames(df) == [:a, :b, :c] - @test df.a === a - - df = DataFrame!(Dict("a"=>a, "b"=>1, "c"=>1:3)) - @test propertynames(df) == [:a, :b, :c] - @test df."a" === a - - df = DataFrame!((x, y)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame!((x, y), (:x1, :x2)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame!((x, y), ("x1", "x2")) - @test names(df) == ["x1", "x2"] - @test df."x1" === x - @test df."x2" === y - - @test_throws MethodError DataFrame!([Union{Int, Missing}, Union{Float64, Missing}], - [:x1, :x2], 2) -end - -@testset "test categorical" begin - df = DataFrame(x=["a", "b", "c"], - y=["a", "b", missing], - z=[1, 2, 3]) - for x in [df, view(df, :, :)] - y = categorical(x) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa CategoricalVector{String} - @test y.y isa CategoricalVector{Union{Missing, String}} - @test y.z isa Vector{Int} - - y = categorical(x, Int) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa Vector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa CategoricalVector{Int} - - for colsel in [:, names(x), [1, 2, 3], [true, true, true], r"", Not(r"a")] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa CategoricalVector{String} - @test y.y isa CategoricalVector{Union{Missing, String}} - @test y.z isa CategoricalVector{Int} - end - - for colsel in [:x, "x", 1, [:x], ["x"], [1], [true, false, false], r"x", Not(2:3)] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa CategoricalVector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa Vector{Int} - end - - for colsel in [:z, "z", 3, [:z], ["z"], [3], [false, false, true], r"z", Not(1:2)] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa Vector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa CategoricalVector{Int} - end - - for colsel in [Int[], Symbol[], [false, false, false], r"a", Not(:)] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa Vector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa Vector{Int} - end - end -end - -@testset "test categorical!" begin - using DataFrames: _columns - df = DataFrame(A = Vector{Union{Int, Missing}}(1:3), B = Vector{Union{Int, Missing}}(4:6)) - DRT = CategoricalArrays.DefaultRefType - @test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df)))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), [1, 2]))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), [:A, :B]))) - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), [:A]))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), :A))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), [1]))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), 1))) == 1 - - @test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df)))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), Not(Not([1, 2]))))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), Not(Not([:A, :B]))))) - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not([:A]))))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not(:A))))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not([1]))))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not(1))))) == 1 -end - -@testset "categorical!" begin - df = DataFrame([["a", "b"], ['a', 'b'], [true, false], 1:2, ["x", "y"]], :auto) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df)))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - Char, Bool, Int, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), :))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - CategoricalArrays.CategoricalValue{Char, UInt32}, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), compress=true))), - [CategoricalArrays.CategoricalValue{String, UInt8}, - Char, Bool, Int, - CategoricalArrays.CategoricalValue{String, UInt8}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), names(df)))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - CategoricalArrays.CategoricalValue{Char, UInt32}, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), names(df), compress=true))), - [CategoricalArrays.CategoricalValue{String, UInt8}, - CategoricalArrays.CategoricalValue{Char, UInt8}, - CategoricalArrays.CategoricalValue{Bool, UInt8}, - CategoricalArrays.CategoricalValue{Int, UInt8}, - CategoricalArrays.CategoricalValue{String, UInt8}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), Not(1:0)))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - CategoricalArrays.CategoricalValue{Char, UInt32}, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), Not(1:0), compress=true))), - [CategoricalArrays.CategoricalValue{String, UInt8}, - CategoricalArrays.CategoricalValue{Char, UInt8}, - CategoricalArrays.CategoricalValue{Bool, UInt8}, - CategoricalArrays.CategoricalValue{Int, UInt8}, - CategoricalArrays.CategoricalValue{String, UInt8}])) - - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), Integer))), - [String, Char, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - String])) - - df = DataFrame([["a", missing]], :auto) - categorical!(df) - @test df.x1 isa CategoricalVector{Union{Missing, String}} - - df = DataFrame(x1=[1, 2]) - categorical!(df) - @test df.x1 isa Vector{Int} - categorical!(df, :) - @test df.x1 isa CategoricalVector{Int} -end - -@testset "categorical with Cols, All and Between" begin - df = DataFrame(x1=["a", "b"], y=[2, 3]) - categorical(df, All()) - categorical(df, Cols()) - categorical(df, Between(1, 2)) - categorical!(df, All()) - categorical!(df, Cols()) - categorical!(df, Between(1, 2)) -end - -@testset "deprecated describe syntax" begin - @test describe(DataFrame(a=[1, 2]), cols = :a, :min, :min2 => minimum, "max2" => maximum, :max) == - DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) +@testset "by and aggregate" begin + @test_throws ArgumentError by() + @test_throws ArgumentError aggregate() end @testset "All indexing" begin @@ -282,158 +58,4 @@ end @test df[:, All(Not(r"1"), r"a")] == df[:, [2, 4, 1]] end -@testset "deprecated DataFrame constructors" begin - @test DataFrame(([1, 2], [3, 4])) == DataFrame([[1, 2], [3, 4]], :auto) - @test DataFrame((categorical([1, 2]), categorical([3, 4]))) == - DataFrame([categorical([1, 2]), categorical([3, 4])], :auto) - @test DataFrame(([1, 2], [3, 4]), ("a", "b")) == DataFrame([[1, 2], [3, 4]], ["a", "b"]) - @test DataFrame(([1, 2], [3, 4]), (:a, :b)) == DataFrame([[1, 2], [3, 4]], [:a, :b]) - @test DataFrame(([1, 2, 3], [1, 2, 3])) == DataFrame((1:3, 1:3)) == DataFrame((1:3, [1, 2, 3])) - @test DataFrame(("x1"=>1:3, "x2"=>[1, 2, 3])) == DataFrame(["x1"=>1:3, "x2"=>[1, 2, 3]]) - @test DataFrame((:x1=>1:3, :x2=>[1, 2, 3])) == DataFrame([:x1=>1:3, :x2=>[1, 2, 3]]) - @inferred DataFrame((1:3, 1:3)) - @inferred DataFrame((1:3, 1:3), (:a, :b)) - @inferred DataFrame((1:3, 1:3), ("a", "b")) - @inferred DataFrame((:x1=>1:3, :x2=>[1, 2, 3])) - @inferred DataFrame(("x1"=>1:3, "x2"=>[1, 2, 3])) - @test DataFrame(Union{Float64, Missing}[0.0 1.0; - 0.0 1.0; - 0.0 1.0]) == - convert(DataFrame, Union{Float64, Missing}[0.0 1.0; - 0.0 1.0; - 0.0 1.0]) - @test names(DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], ["a", "b"])) == ["a", "b"] - @test names(DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], [:a, :b])) == ["a", "b"] - - x = [1, 2, 3] - y = [1, 2, 3] - - df = DataFrame((x, y)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), copycols=true) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), copycols=false) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame((x, y), (:x1, :x2)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), (:x1, :x2), copycols=true) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), (:x1, :x2), copycols=false) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame((x, y), ("x1", "x2")) - @test names(df) == ["x1", "x2"] - @test df."x1" == x - @test df."x2" == y - @test df."x1" !== x - @test df."x2" !== y - df = DataFrame((x, y), ("x1", "x2"), copycols=true) - @test names(df) == ["x1", "x2"] - @test df."x1" == x - @test df."x2" == y - @test df."x1" !== x - @test df."x2" !== y - df = DataFrame((x, y), ("x1", "x2"), copycols=false) - @test names(df) == ["x1", "x2"] - @test df."x1" === x - @test df."x2" === y - - df = DataFrame([1 2; 3 4], :auto) - @test size(df) == (2, 2) - @test df.x1 == [1, 3] - @test df.x2 == [2, 4] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C], 100) - @test size(df, 1) == 100 - @test size(df, 2) == 3 - @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} - @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, 3]) == Vector{Union{String, Missing}} - @test all(ismissing, df[!, 1]) - @test all(ismissing, df[!, 2]) - @test all(ismissing, df[!, 3]) - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - ["A", "B", "C"], 100) - @test size(df, 1) == 100 - @test size(df, 2) == 3 - @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} - @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} - @test all(ismissing, df[!, "A"]) - @test all(ismissing, df[!, "B"]) - @test all(ismissing, df[!, "C"]) - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}], [:x1, :x2], 2) - @test size(df) == (2, 2) - @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] - - @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}], - [:x1, :x2], 2, copycols=false) - @test size(df) == (2, 2) - @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C]) - @test size(df, 1) == 0 - @test size(df, 2) == 3 - @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} - @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, 3]) == Vector{Union{String, Missing}} - @test propertynames(df) == [:A, :B, :C] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - ["A", "B", "C"]) - @test size(df, 1) == 0 - @test size(df, 2) == 3 - @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} - @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} - @test names(df) == ["A", "B", "C"] - - df = convert(DataFrame, zeros(10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - - df = convert(DataFrame, ones(10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - - df = convert(DataFrame, Matrix{Float64}(undef, 10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} -end - end # module diff --git a/test/show.jl b/test/show.jl index 33c96b7078..bc809920ea 100644 --- a/test/show.jl +++ b/test/show.jl @@ -12,7 +12,7 @@ Base.show(io::IO, f::F) = show(io, f.i) module TestShow -using DataFrames, Dates, Random, Test +using DataFrames, Dates, Random, Test, CategoricalArrays import Main: ⛵⛵⛵⛵⛵, F