From 197e7b0d74a037e33de651e1303c11df666756bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 31 Aug 2020 15:25:08 +0200 Subject: [PATCH 1/5] deprecate name => fun in favor of fun => name in describe --- src/abstractdataframe/abstractdataframe.jl | 28 +++++++++++++--------- test/dataframe.jl | 7 ++++-- 2 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 51a69236b1..23b74d2cab 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -475,7 +475,7 @@ where each row represents a variable and each column a summary statistic. `:nmissing`. The default statistics used are `:mean`, `:min`, `:median`, `:max`, `:nmissing`, and `:eltype`. - `:all` as the only `Symbol` argument to return all statistics. - - A `name => function` pair where `name` is a `Symbol` or string. This will + - A `function => name` pair where `name` is a `Symbol` or string. This will create a column of summary statistics with the provided name. - `cols` : a keyword argument allowing to select only a subset of columns from `df` to describe. Can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR). @@ -521,7 +521,7 @@ julia> describe(df, :min, :max) │ 2 │ x │ 0.1 │ 1.0 │ │ 3 │ y │ 'a' │ 'j' │ -julia> describe(df, :min, :sum => sum) +julia> describe(df, :min, sum => :sum) 3×3 DataFrame │ Row │ variable │ min │ sum │ │ │ Symbol │ Any │ Any │ @@ -530,7 +530,7 @@ julia> describe(df, :min, :sum => sum) │ 2 │ x │ 0.1 │ 5.5 │ │ 3 │ y │ 'a' │ │ -julia> describe(df, :min, :sum => sum, cols=:x) +julia> describe(df, cols=:x, :min, sum => :sum) 1×3 DataFrame │ Row │ variable │ min │ sum │ │ │ Symbol │ Float64 │ Float64 │ @@ -538,11 +538,17 @@ julia> describe(df, :min, :sum => sum, cols=:x) │ 1 │ x │ 0.1 │ 5.5 │ ``` """ -DataAPI.describe(df::AbstractDataFrame, - stats::Union{Symbol, Pair{<:SymbolOrString}}...; - cols=:) = - _describe(select(df, cols, copycols=false), collect(stats)) - +function DataAPI.describe(df::AbstractDataFrame, + stats::Union{Symbol, + Pair{<:Base.Callable,<:SymbolOrString}, + Pair{<:SymbolOrString}}...; # TODO: remove after deprecation + cols=:) + if any(x -> x isa Pair{<:SymbolOrString}, stats) + Base.depwarn("name => function order is deprecated; use function => name instead", :describe) + end + return _describe(select(df, cols, copycols=false), + [s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) +end DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), [:mean, :min, :median, :max, :nmissing, :eltype]) @@ -565,9 +571,9 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector) throw(ArgumentError(":$not_allowed not allowed." * allowed_msg)) end - custom_funs = Pair[Symbol(s[1]) => s[2] for s in stats if s isa Pair] + custom_funs = Pair[s[1] => Symbol(s[2]) for s in stats if s isa Pair] - ordered_names = [s isa Symbol ? s : Symbol(first(s)) for s in stats] + ordered_names = [s isa Symbol ? s : Symbol(last(s)) for s in stats] if !allunique(ordered_names) df_ord_names = DataFrame(ordered_names = ordered_names) @@ -662,7 +668,7 @@ end function get_stats!(d::Dict, col::AbstractVector, stats::AbstractVector{<:Pair}) for stat in stats - d[stat[1]] = try stat[2](col) catch end + d[stat[2]] = try stat[1](col) catch end end end diff --git a/test/dataframe.jl b/test/dataframe.jl index 157329e72a..2ac06d7f89 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -669,9 +669,9 @@ end describe_output.test_std = describe_output.std # Test that describe works with a Pair and a symbol @test describe_output[:, [:variable, :mean, :test_std]] ≅ - describe(df, :mean, :test_std => std) + describe(df, :mean, std => :test_std) @test describe_output[:, [:variable, :mean, :test_std]] ≅ - describe(df, :mean, "test_std" => std) + describe(df, :mean, std => "test_std") # Test that describe works with a dataframe with no observations df = DataFrame(a = Int[], b = String[], c = []) @@ -682,6 +682,9 @@ end @test describe(df, cols=Not(1)) ≅ describe(select(df, Not(1))) @test describe(df, cols=Not("a")) ≅ describe(select(df, Not(1))) + @test describe(DataFrame(a=[1,2]), cols = :a, :min, minimum => :min2, maximum => "max2", :max) == + DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) + @test_throws ArgumentError describe(df, :mean, :all) end From bf7fc5b19ae7d68250e28ebf23d7ff946a23ad94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 31 Aug 2020 15:28:08 +0200 Subject: [PATCH 2/5] add NEWS.md --- NEWS.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 6c4d79cfe9..309cc7fa1b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -24,7 +24,10 @@ ([#2357](https://github.com/JuliaData/DataFrames.jl/pull/2357)) * the `categorical` and `categorical!` functions have been deprecated in favor of `transform(df, cols .=> categorical .=> cols)` and similar syntaxes - [#2394]((https://github.com/JuliaData/DataFrames.jl/pull/2394)) + ([#2394](https://github.com/JuliaData/DataFrames.jl/pull/2394)) +* in `describe` the specification of custom aggregation is now `function => name`; + old `name => function` order is now deprecated + ([#2401](https://github.com/JuliaData/DataFrames.jl/pull/2401)) ## New functionalities From eb2d1729b4fc82991b1578f0af8dc21feeeefe67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 31 Aug 2020 19:16:28 +0200 Subject: [PATCH 3/5] fix indentation --- src/abstractdataframe/abstractdataframe.jl | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 23b74d2cab..3eb72f0171 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -538,16 +538,15 @@ julia> describe(df, cols=:x, :min, sum => :sum) │ 1 │ x │ 0.1 │ 5.5 │ ``` """ -function DataAPI.describe(df::AbstractDataFrame, - stats::Union{Symbol, - Pair{<:Base.Callable,<:SymbolOrString}, - Pair{<:SymbolOrString}}...; # TODO: remove after deprecation +function DataAPI.describe(df::AbstractDataFrame, stats::Union{Symbol, + Pair{<:Base.Callable,<:SymbolOrString}, + Pair{<:SymbolOrString}}...; # TODO: remove after deprecation cols=:) if any(x -> x isa Pair{<:SymbolOrString}, stats) Base.depwarn("name => function order is deprecated; use function => name instead", :describe) end return _describe(select(df, cols, copycols=false), - [s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) + [s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) end DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), From 91d6c25d774f6e9497ec8b30572879da062bd5cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 7 Sep 2020 17:49:25 +0200 Subject: [PATCH 4/5] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- NEWS.md | 3 --- src/abstractdataframe/abstractdataframe.jl | 4 ++-- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/NEWS.md b/NEWS.md index 0c288941a1..68612ec5ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -32,9 +32,6 @@ choose the fast path only when it is safe; this resolves inconsistencies with what the same functions not using fast path produce ([#2357](https://github.com/JuliaData/DataFrames.jl/pull/2357)) -* the `categorical` and `categorical!` functions have been deprecated in favor of - `transform(df, cols .=> categorical .=> cols)` and similar syntaxes - ([#2394](https://github.com/JuliaData/DataFrames.jl/pull/2394)) * in `describe` the specification of custom aggregation is now `function => name`; old `name => function` order is now deprecated ([#2401](https://github.com/JuliaData/DataFrames.jl/pull/2401)) diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 3eb72f0171..b1964f3d1a 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -530,7 +530,7 @@ julia> describe(df, :min, sum => :sum) │ 2 │ x │ 0.1 │ 5.5 │ │ 3 │ y │ 'a' │ │ -julia> describe(df, cols=:x, :min, sum => :sum) +julia> describe(df, :min, sum => :sum, cols=:x) 1×3 DataFrame │ Row │ variable │ min │ sum │ │ │ Symbol │ Float64 │ Float64 │ @@ -546,7 +546,7 @@ function DataAPI.describe(df::AbstractDataFrame, stats::Union{Symbol, Base.depwarn("name => function order is deprecated; use function => name instead", :describe) end return _describe(select(df, cols, copycols=false), - [s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) + Any[s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats]) end DataAPI.describe(df::AbstractDataFrame; cols=:) = _describe(select(df, cols, copycols=false), From 2508debb1d2b32eda27ca2f4f4938bbca7749d4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 7 Sep 2020 17:52:34 +0200 Subject: [PATCH 5/5] add deprecated test --- test/deprecated.jl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/deprecated.jl b/test/deprecated.jl index 189df3d9f8..20b4600dfb 100644 --- a/test/deprecated.jl +++ b/test/deprecated.jl @@ -226,4 +226,9 @@ end categorical!(df, Between(1,2)) end +@testset "deprecated describe syntax" begin + @test describe(DataFrame(a=[1,2]), cols = :a, :min, :min2 => minimum, "max2" => maximum, :max) == + DataFrame(variable=:a, min=1, min2=1, max2=2, max=2) +end + end # module