From 3b083811f1b416654f12da874acefb979f564ba1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 20 Nov 2020 15:39:48 +0100 Subject: [PATCH 1/7] remove deprecations and CategoricalArrays.jl dependency --- NEWS.md | 25 ++ src/DataFrames.jl | 2 +- src/abstractdataframe/abstractdataframe.jl | 17 +- src/abstractdataframe/selection.jl | 4 - src/deprecated.jl | 141 +------- test/dataframe.jl | 1 + test/deprecated.jl | 385 +-------------------- test/show.jl | 2 +- 8 files changed, 37 insertions(+), 540 deletions(-) diff --git a/NEWS.md b/NEWS.md index a5cdf5a2fd..6180d0c4a5 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,28 @@ +# DataFrames v1.0 Release Notes + +## Breaking changes + +* No breaking changes are planned for v1.0 release + +## Bug fixes + +* fixed bug when displaying floating point columns with eltype turned off in + text/plain ([2542](https://github.com/JuliaData/DataFrames.jl/pull/2542)) + +## New functionalities + + +## Deprecated + +* all old deprecations now throw an error + ([]()) + +## Dependency changes + + +## Other relevant changes + + # DataFrames v0.22 Release Notes ## Breaking changes diff --git a/src/DataFrames.jl b/src/DataFrames.jl index c3e7ac701b..0715c40a3e 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -1,7 +1,7 @@ module DataFrames using Statistics, Printf, REPL -using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays, CategoricalArrays +using Reexport, SortingAlgorithms, Compat, Unicode, PooledArrays @reexport using Missings, InvertedIndices using Base.Sort, Base.Order, Base.Iterators using TableTraits, IteratorInterfaceExtensions diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 95f76d8d12..e38314a42c 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -556,19 +556,14 @@ julia> describe(df, :min, sum => :sum, cols=:x) 1 │ x 0.1 5.5 ``` """ -function DataAPI.describe(df::AbstractDataFrame, stats::Union{Symbol, - Pair{<:Base.Callable, <:SymbolOrString}, - Pair{<:SymbolOrString}}...; # TODO: remove after deprecation - cols=:) - if any(x -> x isa Pair{<:SymbolOrString}, stats) - Base.depwarn("name => function order is deprecated; use function => name instead", :describe) - end - return _describe(select(df, cols, copycols=false), - Any[s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) -end +DataAPI.describe(df::AbstractDataFrame, + stats::Union{Symbol, Pair{<:Base.Callable, <:SymbolOrString}}...; + cols=:) = + _describe(select(df, cols, copycols=false), Any[s for s in stats]) + DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), - [:mean, :min, :median, :max, :nmissing, :eltype]) + Any[:mean, :min, :median, :max, :nmissing, :eltype]) function _describe(df::AbstractDataFrame, stats::AbstractVector) predefined_funs = Symbol[s for s in stats if s isa Symbol] diff --git a/src/abstractdataframe/selection.jl b/src/abstractdataframe/selection.jl index df58ac2cea..e22ca022a4 100644 --- a/src/abstractdataframe/selection.jl +++ b/src/abstractdataframe/selection.jl @@ -1,7 +1,3 @@ -# TODO: -# * add handling of empty ByRow to filter, and select/transform/combine for GroupedDataFrame -# * add handling of multiple column return rules for select/transform/combine for GroupedDataFrame - # normalize_selection function makes sure that whatever input format of idx is it # will end up in one of four canonical forms # 1) AbstractVector{Int} diff --git a/src/deprecated.jl b/src/deprecated.jl index ea9a9c323e..d5d9f1a6bb 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -1,6 +1,6 @@ -import Base: @deprecate +# commenting out till we decide to start deprecating things again -@deprecate DataFrame!(args...; kwargs...) DataFrame(args...; copycols=false, kwargs...) +# import Base: @deprecate # TODO: remove these definitions in year 2021 by(args...; kwargs...) = throw(ArgumentError("by function was removed from DataFrames.jl. " * @@ -8,140 +8,3 @@ by(args...; kwargs...) = throw(ArgumentError("by function was removed from DataF aggregate(args...; kwargs...) = throw(ArgumentError("aggregate function was removed from DataFrames.jl. " * "Use the `combine` function instead.")) - -export categorical, categorical! -function CategoricalArrays.categorical(df::AbstractDataFrame, - cols::Union{ColumnIndex, MultiColumnIndex}; - compress::Union{Bool, Nothing}=nothing) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "(x -> categorical(x, compress=$compress))" - end - if cols isa AbstractVector{<:Union{AbstractString, Symbol}} - Base.depwarn("`categorical(df, cols)` is deprecated. " * - "Use `transform(df, cols .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - return transform(df, cols .=> (x -> categorical(x, compress=compress)), renamecols=false) - elseif cols isa Union{AbstractString, Symbol} - Base.depwarn("`categorical(df, cols)` is deprecated. " * - "Use `transform(df, cols => $categoricalstr, renamecols=false)` instead.", - :categorical) - return transform(df, cols => (x -> categorical(x, compress=compress)), renamecols=false) - else - Base.depwarn("`categorical(df, cols)` is deprecated. " * - "Use `transform(df, names(df, cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - return transform(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) - end -end - -function CategoricalArrays.categorical(df::AbstractDataFrame, - cols::Union{Type, Nothing}=nothing; - compress::Bool=false) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "categorical(x, compress=$compress)" - end - if cols === nothing - cols = Union{AbstractString, Missing} - Base.depwarn("`categorical(df)` is deprecated. " * - "Use `transform(df, names(df, $cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - else - Base.depwarn("`categorical(df, T)` is deprecated. " * - "Use transform(df, names(df, T) .=> $categoricalstr, renamecols=false)` instead.", - :categorical) - end - return transform(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) -end - -function categorical!(df::DataFrame, cols::Union{ColumnIndex, MultiColumnIndex}; - compress::Union{Bool, Nothing}=nothing) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "(x -> categorical(x, compress=$compress))" - end - if cols isa AbstractVector{<:Union{AbstractString, Symbol}} - Base.depwarn("`categorical!(df, cols)` is deprecated. " * - "Use `transform!(df, cols .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - return transform!(df, cols .=> (x -> categorical(x, compress=compress)), renamecols=false) - elseif cols isa Union{AbstractString, Symbol} - Base.depwarn("`categorical!(df, cols)` is deprecated. " * - "Use `transform!(df, cols => $categoricalstr, renamecols=false)` instead.", - :categorical!) - return transform!(df, cols => (x -> categorical(x, compress=compress)), renamecols=false) - else - Base.depwarn("`categorical!(df, cols)` is deprecated. " * - "Use `transform!(df, names(df, cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) - end -end - -function categorical!(df::DataFrame, cols::Union{Type, Nothing}=nothing; - compress::Bool=false) - if compress === nothing - compress = false - categoricalstr = "categorical" - else - categoricalstr = "(x -> categorical(x, compress=$compress))" - end - if cols === nothing - cols = Union{AbstractString, Missing} - Base.depwarn("`categorical!(df)` is deprecated. " * - "Use `transform!(df, names(df, $cols) .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - else - Base.depwarn("`categorical!(df, T)` is deprecated. " * - "Use `transform!(df, names(df, T) .=> $categoricalstr, renamecols=false)` instead.", - :categorical!) - end - return transform!(df, names(df, cols) .=> (x -> categorical(x, compress=compress)), renamecols=false) -end - -@deprecate DataFrame(pairs::NTuple{N, Pair}; makeunique::Bool=false, - copycols::Bool=true) where {N} DataFrame(pairs..., makeunique=makeunique, copycols=copycols) -@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, Symbol}; makeunique::Bool=false, - copycols::Bool=true) where {N} DataFrame(collect(columns), collect(cnames); - makeunique=makeunique, copycols=copycols) -@deprecate DataFrame(columns::NTuple{N, AbstractVector}, cnames::NTuple{N, AbstractString}; makeunique::Bool=false, - copycols::Bool=true) where {N} DataFrame(collect(columns), [Symbol(c) for c in cnames]; - makeunique=makeunique, copycols=copycols) -@deprecate DataFrame(columns::NTuple{N, AbstractVector}; - copycols::Bool=true) where {N} DataFrame(collect(columns), - Symbol.(:x, 1:length(columns)), copycols=copycols) - -# this deprecation is very important, becuase without it users will -# get strange results with old code as described in https://github.com/JuliaData/Tables.jl/issues/208 -@deprecate DataFrame(columns::AbstractVector{<:AbstractVector}; makeunique::Bool=false, - copycols::Bool=true) DataFrame(columns, :auto, copycols=copycols) - -@deprecate DataFrame(columns::AbstractMatrix) DataFrame(columns, :auto) - -function DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, - nrows::Integer=0; makeunique::Bool=false)::DataFrame where T<:Type - Base.depwarn("`DataFrame` constructor with passed eltypes is deprecated. " * - "Pass explicitly created columns to a `DataFrame` constructor instead.", - :DataFrame) - columns = AbstractVector[elty >: Missing ? - fill!(Tables.allocatecolumn(elty, nrows), missing) : - Tables.allocatecolumn(elty, nrows) - for elty in column_eltypes] - return DataFrame(columns, Index(convert(Vector{Symbol}, cnames), - makeunique=makeunique), copycols=false) -end - -DataFrame(column_eltypes::AbstractVector{<:Type}, - cnames::AbstractVector{<:AbstractString}, - nrows::Integer=0; makeunique::Bool=false) = - DataFrame(column_eltypes, Symbol.(cnames), nrows; makeunique=makeunique) - -import Base: convert -@deprecate convert(::Type{DataFrame}, A::AbstractMatrix) DataFrame(Tables.table(A, header=Symbol.(:x, axes(A, 2)))) diff --git a/test/dataframe.jl b/test/dataframe.jl index 2a820c4a8d..29b3ed6ebb 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -669,6 +669,7 @@ end DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) @test_throws ArgumentError describe(df, :mean, :all) + @test_throws MethodError describe(DataFrame(a=[1, 2]), cols = :a, "max2" => maximum) end @testset "append!" begin diff --git a/test/deprecated.jl b/test/deprecated.jl index 3c3b145262..049029adc8 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -1,235 +1,6 @@ module TestDeprecated -using Test, DataFrames, CategoricalArrays - -const ≅ = isequal - -@testset "DataFrame!" begin - x = [1, 2, 3] - y = [4, 5, 6] - @test DataFrame!(x=x, y=y, copycols=true) == DataFrame(x=x, y=y) - df1 = DataFrame(x=x, y=y) - df2 = DataFrame!(df1) - @test df1 == df2 - @test df1.x === df2.x - @test df1.y === df2.y - - a=[1, 2, 3] - df = DataFrame!(:a=>a, :b=>1, :c=>1:3) - @test propertynames(df) == [:a, :b, :c] - @test df.a === a - - df = DataFrame!("a"=>a, "b"=>1, "c"=>1:3) - @test propertynames(df) == [:a, :b, :c] - @test df."a" === a - - df = DataFrame!(Dict(:a=>a, :b=>1, :c=>1:3)) - @test propertynames(df) == [:a, :b, :c] - @test df.a === a - - df = DataFrame!(Dict("a"=>a, "b"=>1, "c"=>1:3)) - @test propertynames(df) == [:a, :b, :c] - @test df."a" === a - - df = DataFrame!((x, y)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame!((x, y), (:x1, :x2)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame!((x, y), ("x1", "x2")) - @test names(df) == ["x1", "x2"] - @test df."x1" === x - @test df."x2" === y - - @test_throws MethodError DataFrame!([Union{Int, Missing}, Union{Float64, Missing}], - [:x1, :x2], 2) -end - -@testset "test categorical" begin - df = DataFrame(x=["a", "b", "c"], - y=["a", "b", missing], - z=[1, 2, 3]) - for x in [df, view(df, :, :)] - y = categorical(x) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa CategoricalVector{String} - @test y.y isa CategoricalVector{Union{Missing, String}} - @test y.z isa Vector{Int} - - y = categorical(x, Int) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa Vector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa CategoricalVector{Int} - - for colsel in [:, names(x), [1, 2, 3], [true, true, true], r"", Not(r"a")] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa CategoricalVector{String} - @test y.y isa CategoricalVector{Union{Missing, String}} - @test y.z isa CategoricalVector{Int} - end - - for colsel in [:x, "x", 1, [:x], ["x"], [1], [true, false, false], r"x", Not(2:3)] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa CategoricalVector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa Vector{Int} - end - - for colsel in [:z, "z", 3, [:z], ["z"], [3], [false, false, true], r"z", Not(1:2)] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa Vector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa CategoricalVector{Int} - end - - for colsel in [Int[], Symbol[], [false, false, false], r"a", Not(:)] - y = categorical(x, colsel) - @test y isa DataFrame - @test x ≅ y - @test x.x !== y.x - @test x.y !== y.y - @test x.z !== y.z - @test y.x isa Vector{String} - @test y.y isa Vector{Union{Missing, String}} - @test y.z isa Vector{Int} - end - end -end - -@testset "test categorical!" begin - using DataFrames: _columns - df = DataFrame(A = Vector{Union{Int, Missing}}(1:3), B = Vector{Union{Int, Missing}}(4:6)) - DRT = CategoricalArrays.DefaultRefType - @test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df)))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), [1, 2]))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), [:A, :B]))) - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), [:A]))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), :A))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), [1]))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), 1))) == 1 - - @test all(c -> isa(c, Vector{Union{Int, Missing}}), eachcol(categorical!(deepcopy(df)))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), Not(Not([1, 2]))))) - @test all(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - eachcol(categorical!(deepcopy(df), Not(Not([:A, :B]))))) - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not([:A]))))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not(:A))))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not([1]))))) == 1 - @test findfirst(c -> typeof(c) <: CategoricalVector{Union{Int, Missing}}, - _columns(categorical!(deepcopy(df), Not(Not(1))))) == 1 -end - -@testset "categorical!" begin - df = DataFrame([["a", "b"], ['a', 'b'], [true, false], 1:2, ["x", "y"]], :auto) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df)))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - Char, Bool, Int, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), :))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - CategoricalArrays.CategoricalValue{Char, UInt32}, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), compress=true))), - [CategoricalArrays.CategoricalValue{String, UInt8}, - Char, Bool, Int, - CategoricalArrays.CategoricalValue{String, UInt8}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), names(df)))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - CategoricalArrays.CategoricalValue{Char, UInt32}, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), names(df), compress=true))), - [CategoricalArrays.CategoricalValue{String, UInt8}, - CategoricalArrays.CategoricalValue{Char, UInt8}, - CategoricalArrays.CategoricalValue{Bool, UInt8}, - CategoricalArrays.CategoricalValue{Int, UInt8}, - CategoricalArrays.CategoricalValue{String, UInt8}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), Not(1:0)))), - [CategoricalArrays.CategoricalValue{String, UInt32}, - CategoricalArrays.CategoricalValue{Char, UInt32}, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - CategoricalArrays.CategoricalValue{String, UInt32}])) - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), Not(1:0), compress=true))), - [CategoricalArrays.CategoricalValue{String, UInt8}, - CategoricalArrays.CategoricalValue{Char, UInt8}, - CategoricalArrays.CategoricalValue{Bool, UInt8}, - CategoricalArrays.CategoricalValue{Int, UInt8}, - CategoricalArrays.CategoricalValue{String, UInt8}])) - - @test all(map(<:, eltype.(eachcol(categorical!(deepcopy(df), Integer))), - [String, Char, - CategoricalArrays.CategoricalValue{Bool, UInt32}, - CategoricalArrays.CategoricalValue{Int, UInt32}, - String])) - - df = DataFrame([["a", missing]], :auto) - categorical!(df) - @test df.x1 isa CategoricalVector{Union{Missing, String}} - - df = DataFrame(x1=[1, 2]) - categorical!(df) - @test df.x1 isa Vector{Int} - categorical!(df, :) - @test df.x1 isa CategoricalVector{Int} -end - -@testset "categorical with Cols, All and Between" begin - df = DataFrame(x1=["a", "b"], y=[2, 3]) - categorical(df, All()) - categorical(df, Cols()) - categorical(df, Between(1, 2)) - categorical!(df, All()) - categorical!(df, Cols()) - categorical!(df, Between(1, 2)) -end - -@testset "deprecated describe syntax" begin - @test describe(DataFrame(a=[1, 2]), cols = :a, :min, :min2 => minimum, "max2" => maximum, :max) == - DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) -end +using Test, DataFrames @testset "All indexing" begin df = DataFrame(a=1, b=2, c=3) @@ -282,158 +53,4 @@ end @test df[:, All(Not(r"1"), r"a")] == df[:, [2, 4, 1]] end -@testset "deprecated DataFrame constructors" begin - @test DataFrame(([1, 2], [3, 4])) == DataFrame([[1, 2], [3, 4]], :auto) - @test DataFrame((categorical([1, 2]), categorical([3, 4]))) == - DataFrame([categorical([1, 2]), categorical([3, 4])], :auto) - @test DataFrame(([1, 2], [3, 4]), ("a", "b")) == DataFrame([[1, 2], [3, 4]], ["a", "b"]) - @test DataFrame(([1, 2], [3, 4]), (:a, :b)) == DataFrame([[1, 2], [3, 4]], [:a, :b]) - @test DataFrame(([1, 2, 3], [1, 2, 3])) == DataFrame((1:3, 1:3)) == DataFrame((1:3, [1, 2, 3])) - @test DataFrame(("x1"=>1:3, "x2"=>[1, 2, 3])) == DataFrame(["x1"=>1:3, "x2"=>[1, 2, 3]]) - @test DataFrame((:x1=>1:3, :x2=>[1, 2, 3])) == DataFrame([:x1=>1:3, :x2=>[1, 2, 3]]) - @inferred DataFrame((1:3, 1:3)) - @inferred DataFrame((1:3, 1:3), (:a, :b)) - @inferred DataFrame((1:3, 1:3), ("a", "b")) - @inferred DataFrame((:x1=>1:3, :x2=>[1, 2, 3])) - @inferred DataFrame(("x1"=>1:3, "x2"=>[1, 2, 3])) - @test DataFrame(Union{Float64, Missing}[0.0 1.0; - 0.0 1.0; - 0.0 1.0]) == - convert(DataFrame, Union{Float64, Missing}[0.0 1.0; - 0.0 1.0; - 0.0 1.0]) - @test names(DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], ["a", "b"])) == ["a", "b"] - @test names(DataFrame([0.0 1.0; - 0.0 1.0; - 0.0 1.0], [:a, :b])) == ["a", "b"] - - x = [1, 2, 3] - y = [1, 2, 3] - - df = DataFrame((x, y)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), copycols=true) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), copycols=false) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame((x, y), (:x1, :x2)) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), (:x1, :x2), copycols=true) - @test propertynames(df) == [:x1, :x2] - @test df.x1 == x - @test df.x2 == y - @test df.x1 !== x - @test df.x2 !== y - df = DataFrame((x, y), (:x1, :x2), copycols=false) - @test propertynames(df) == [:x1, :x2] - @test df.x1 === x - @test df.x2 === y - - df = DataFrame((x, y), ("x1", "x2")) - @test names(df) == ["x1", "x2"] - @test df."x1" == x - @test df."x2" == y - @test df."x1" !== x - @test df."x2" !== y - df = DataFrame((x, y), ("x1", "x2"), copycols=true) - @test names(df) == ["x1", "x2"] - @test df."x1" == x - @test df."x2" == y - @test df."x1" !== x - @test df."x2" !== y - df = DataFrame((x, y), ("x1", "x2"), copycols=false) - @test names(df) == ["x1", "x2"] - @test df."x1" === x - @test df."x2" === y - - df = DataFrame([1 2; 3 4], :auto) - @test size(df) == (2, 2) - @test df.x1 == [1, 3] - @test df.x2 == [2, 4] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C], 100) - @test size(df, 1) == 100 - @test size(df, 2) == 3 - @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} - @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, 3]) == Vector{Union{String, Missing}} - @test all(ismissing, df[!, 1]) - @test all(ismissing, df[!, 2]) - @test all(ismissing, df[!, 3]) - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - ["A", "B", "C"], 100) - @test size(df, 1) == 100 - @test size(df, 2) == 3 - @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} - @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} - @test all(ismissing, df[!, "A"]) - @test all(ismissing, df[!, "B"]) - @test all(ismissing, df[!, "C"]) - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}], [:x1, :x2], 2) - @test size(df) == (2, 2) - @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] - - @test_throws MethodError DataFrame([Union{Int, Missing}, Union{Float64, Missing}], - [:x1, :x2], 2, copycols=false) - @test size(df) == (2, 2) - @test eltype.(eachcol(df)) == [Union{Int, Missing}, Union{Float64, Missing}] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - [:A, :B, :C]) - @test size(df, 1) == 0 - @test size(df, 2) == 3 - @test typeof(df[!, 1]) == Vector{Union{Int, Missing}} - @test typeof(df[!, 2]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, 3]) == Vector{Union{String, Missing}} - @test propertynames(df) == [:A, :B, :C] - - df = DataFrame([Union{Int, Missing}, Union{Float64, Missing}, Union{String, Missing}], - ["A", "B", "C"]) - @test size(df, 1) == 0 - @test size(df, 2) == 3 - @test typeof(df[!, "A"]) == Vector{Union{Int, Missing}} - @test typeof(df[!, "B"]) == Vector{Union{Float64, Missing}} - @test typeof(df[!, "C"]) == Vector{Union{String, Missing}} - @test names(df) == ["A", "B", "C"] - - df = convert(DataFrame, zeros(10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - - df = convert(DataFrame, ones(10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} - - df = convert(DataFrame, Matrix{Float64}(undef, 10, 5)) - @test size(df, 1) == 10 - @test size(df, 2) == 5 - @test typeof(df[!, 1]) == Vector{Float64} - @test typeof(df[:, 1]) == Vector{Float64} -end - end # module diff --git a/test/show.jl b/test/show.jl index 33c96b7078..bc809920ea 100644 --- a/test/show.jl +++ b/test/show.jl @@ -12,7 +12,7 @@ Base.show(io::IO, f::F) = show(io, f.i) module TestShow -using DataFrames, Dates, Random, Test +using DataFrames, Dates, Random, Test, CategoricalArrays import Main: ⛵⛵⛵⛵⛵, F From f1e661aad01f459086910d3f850dfc5189e6a9c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 20 Nov 2020 18:30:20 +0100 Subject: [PATCH 2/7] more informative error for removed constructors --- src/dataframe/dataframe.jl | 24 ++++++++++++++++++++++++ test/constructors.jl | 9 +++++++++ 2 files changed, 33 insertions(+) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 0a5bdb7a28..6c34d2fe1f 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -337,6 +337,30 @@ function DataFrame(columns::AbstractMatrix, cnames::Symbol) return DataFrame(columns, gennames(size(columns, 2)), makeunique=false) end +# Discontinued constructors + +DataFrame(matrix::Matrix) = + throw(ArgumentError("`DataFrame` constructor from a `Matrix` requires " * + "passing :auto as a second argument to automatically " * + "generate column names: `DataFrame(matrix, :auto)`")) + +DataFrame(matrix::Vector{<:AbstractVector}) = + throw(ArgumentError("`DataFrame` constructor from a `Vector` of vectors requires " * + "passing :auto as a second argument to automatically " * + "generate column names: `DataFrame(matrix, :auto)`")) + +DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, + nrows::Integer=0; makeunique::Bool=false) where T<:Type = + throw(ArgumentError("`DataFrame` constructor with passed eltypes is " * + "deprecated. Pass explicitly created columns to a " * + "`DataFrame` constructor instead.")) + +DataFrame(column_eltypes::AbstractVector{<:Type}, cnames::AbstractVector{<:AbstractString}, + nrows::Integer=0; makeunique::Bool=false) where T<:Type = + throw(ArgumentError("`DataFrame` constructor with passed eltypes is " * + "deprecated. Pass explicitly created columns to a " * + "`DataFrame` constructor instead.")) + ############################################################################## ## diff --git a/test/constructors.jl b/test/constructors.jl index f4407d0632..a910855c2b 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -355,4 +355,13 @@ end end +@testset "removed constructors" begin + @test_throws ArgumentError DataFrame([1 2; 3 4]) + @test_throws ArgumentError DataFrame([[1 2], [3 4]]) + @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b]) + @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b], 2) + @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"]) + @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"], 2) +end + end # module From 589d99c45d8b416ef35b78ceb8bb2a116075f2a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 20 Nov 2020 20:11:05 +0100 Subject: [PATCH 3/7] Update NEWS.md --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6180d0c4a5..edc65c929b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -15,7 +15,7 @@ ## Deprecated * all old deprecations now throw an error - ([]()) + ([#2554](https://github.com/JuliaData/DataFrames.jl/pull/2554)) ## Dependency changes From 240b605b95ffa358fb61f69f375f76f052fce40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 20 Nov 2020 20:20:05 +0100 Subject: [PATCH 4/7] remove two methods that are not used any more --- src/dataframe/dataframe.jl | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 6c34d2fe1f..be92b3073c 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -508,17 +508,6 @@ Base.getindex(df::DataFrame, row_ind::typeof(!), ## ############################################################################## -function nextcolname(df::DataFrame) - col = Symbol(string("x", ncol(df) + 1)) - hasproperty(df, col) || return col - i = 1 - while true - col = Symbol(string("x", ncol(df) + 1, "_", i)) - hasproperty(df, col) || return col - i += 1 - end -end - # Will automatically add a new column if needed function insert_single_column!(df::DataFrame, v::AbstractVector, col_ind::ColumnIndex) if ncol(df) != 0 && nrow(df) != length(v) @@ -548,18 +537,6 @@ function insert_single_entry!(df::DataFrame, v::Any, row_ind::Integer, col_ind:: end end -function insert_multiple_entries!(df::DataFrame, - v::Any, - row_inds::AbstractVector, - col_ind::ColumnIndex) - if haskey(index(df), col_ind) - _columns(df)[index(df)[col_ind]][row_inds] .= v - return v - else - throw(ArgumentError("Cannot assign to non-existent column: $col_ind")) - end -end - # df[!, SingleColumnIndex] = AbstractVector function Base.setindex!(df::DataFrame, v::AbstractVector, ::typeof(!), col_ind::ColumnIndex) insert_single_column!(df, v, col_ind) From 5973f360118a8fd2e01c11b8cab3ff83d1443345 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 26 Nov 2020 09:26:31 +0100 Subject: [PATCH 5/7] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/abstractdataframe/abstractdataframe.jl | 2 +- src/dataframe/dataframe.jl | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index e38314a42c..e3ddb6f647 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -563,7 +563,7 @@ DataAPI.describe(df::AbstractDataFrame, DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), - Any[:mean, :min, :median, :max, :nmissing, :eltype]) + [:mean, :min, :median, :max, :nmissing, :eltype]) function _describe(df::AbstractDataFrame, stats::AbstractVector) predefined_funs = Symbol[s for s in stats if s isa Symbol] diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index be92b3073c..8d59fc7dd8 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -344,10 +344,10 @@ DataFrame(matrix::Matrix) = "passing :auto as a second argument to automatically " * "generate column names: `DataFrame(matrix, :auto)`")) -DataFrame(matrix::Vector{<:AbstractVector}) = +DataFrame(vecs::Vector{<:AbstractVector}) = throw(ArgumentError("`DataFrame` constructor from a `Vector` of vectors requires " * "passing :auto as a second argument to automatically " * - "generate column names: `DataFrame(matrix, :auto)`")) + "generate column names: `DataFrame(vecs, :auto)`")) DataFrame(column_eltypes::AbstractVector{T}, cnames::AbstractVector{Symbol}, nrows::Integer=0; makeunique::Bool=false) where T<:Type = From 38208480fd5e7b29d1bf6a227833e3e00ac63ae8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Thu, 26 Nov 2020 23:54:41 +0100 Subject: [PATCH 6/7] updates after the review --- NEWS.md | 3 --- test/constructors.jl | 2 +- test/deprecated.jl | 5 +++++ 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index edc65c929b..e9e9295f88 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,9 +6,6 @@ ## Bug fixes -* fixed bug when displaying floating point columns with eltype turned off in - text/plain ([2542](https://github.com/JuliaData/DataFrames.jl/pull/2542)) - ## New functionalities diff --git a/test/constructors.jl b/test/constructors.jl index a910855c2b..7a3cf2f3b3 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -357,7 +357,7 @@ end @testset "removed constructors" begin @test_throws ArgumentError DataFrame([1 2; 3 4]) - @test_throws ArgumentError DataFrame([[1 2], [3 4]]) + @test_throws ArgumentError DataFrame([[1, 2], [3, 4]]) @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b]) @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b], 2) @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"]) diff --git a/test/deprecated.jl b/test/deprecated.jl index 049029adc8..efabfc25ec 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -2,6 +2,11 @@ module TestDeprecated using Test, DataFrames +@testset "by and aggregate" begin + @test_throws by() + @test_throws aggregate() +end + @testset "All indexing" begin df = DataFrame(a=1, b=2, c=3) From c20003edc0f172ea20700589ab23cf5e429413d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Fri, 27 Nov 2020 00:28:30 +0100 Subject: [PATCH 7/7] fix test --- test/deprecated.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/deprecated.jl b/test/deprecated.jl index efabfc25ec..4dcfcded0b 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -3,8 +3,8 @@ module TestDeprecated using Test, DataFrames @testset "by and aggregate" begin - @test_throws by() - @test_throws aggregate() + @test_throws ArgumentError by() + @test_throws ArgumentError aggregate() end @testset "All indexing" begin