Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[BREAKING] deprecate name => fun in favor of fun => name in describe #2401

Merged
merged 6 commits into from
Sep 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@
choose the fast path only when it is safe; this resolves inconsistencies
with what the same functions not using fast path produce
([#2357](https://github.com/JuliaData/DataFrames.jl/pull/2357))
* in `describe` the specification of custom aggregation is now `function => name`;
old `name => function` order is now deprecated
([#2401](https://github.com/JuliaData/DataFrames.jl/pull/2401))

## New functionalities

Expand Down
27 changes: 16 additions & 11 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ where each row represents a variable and each column a summary statistic.
`:nmissing`. The default statistics used are `:mean`, `:min`, `:median`,
`:max`, `:nmissing`, and `:eltype`.
- `:all` as the only `Symbol` argument to return all statistics.
- A `name => function` pair where `name` is a `Symbol` or string. This will
- A `function => name` pair where `name` is a `Symbol` or string. This will
create a column of summary statistics with the provided name.
- `cols` : a keyword argument allowing to select only a subset of columns from `df`
to describe. Can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
Expand Down Expand Up @@ -521,7 +521,7 @@ julia> describe(df, :min, :max)
│ 2 │ x │ 0.1 │ 1.0 │
│ 3 │ y │ 'a' │ 'j' │

julia> describe(df, :min, :sum => sum)
julia> describe(df, :min, sum => :sum)
3×3 DataFrame
│ Row │ variable │ min │ sum │
│ │ Symbol │ Any │ Any │
Expand All @@ -530,19 +530,24 @@ julia> describe(df, :min, :sum => sum)
│ 2 │ x │ 0.1 │ 5.5 │
│ 3 │ y │ 'a' │ │

julia> describe(df, :min, :sum => sum, cols=:x)
julia> describe(df, :min, sum => :sum, cols=:x)
1×3 DataFrame
│ Row │ variable │ min │ sum │
│ │ Symbol │ Float64 │ Float64 │
├─────┼──────────┼─────────┼─────────┤
│ 1 │ x │ 0.1 │ 5.5 │
```
"""
DataAPI.describe(df::AbstractDataFrame,
stats::Union{Symbol, Pair{<:SymbolOrString}}...;
cols=:) =
_describe(select(df, cols, copycols=false), collect(stats))

function DataAPI.describe(df::AbstractDataFrame, stats::Union{Symbol,
Pair{<:Base.Callable,<:SymbolOrString},
Pair{<:SymbolOrString}}...; # TODO: remove after deprecation
cols=:)
bkamins marked this conversation as resolved.
Show resolved Hide resolved
if any(x -> x isa Pair{<:SymbolOrString}, stats)
Base.depwarn("name => function order is deprecated; use function => name instead", :describe)
end
return _describe(select(df, cols, copycols=false),
Any[s isa Pair{<:SymbolOrString} ? last(s) => first(s) : s for s in stats])
end
DataAPI.describe(df::AbstractDataFrame; cols=:) =
_describe(select(df, cols, copycols=false),
[:mean, :min, :median, :max, :nmissing, :eltype])
Expand All @@ -565,9 +570,9 @@ function _describe(df::AbstractDataFrame, stats::AbstractVector)
throw(ArgumentError(":$not_allowed not allowed." * allowed_msg))
end

custom_funs = Pair[Symbol(s[1]) => s[2] for s in stats if s isa Pair]
custom_funs = Pair[s[1] => Symbol(s[2]) for s in stats if s isa Pair]

ordered_names = [s isa Symbol ? s : Symbol(first(s)) for s in stats]
ordered_names = [s isa Symbol ? s : Symbol(last(s)) for s in stats]

if !allunique(ordered_names)
df_ord_names = DataFrame(ordered_names = ordered_names)
Expand Down Expand Up @@ -662,7 +667,7 @@ end

function get_stats!(d::Dict, col::AbstractVector, stats::AbstractVector{<:Pair})
for stat in stats
d[stat[1]] = try stat[2](col) catch end
d[stat[2]] = try stat[1](col) catch end
end
end

Expand Down
7 changes: 5 additions & 2 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -670,9 +670,9 @@ end
describe_output.test_std = describe_output.std
# Test that describe works with a Pair and a symbol
@test describe_output[:, [:variable, :mean, :test_std]] ≅
describe(df, :mean, :test_std => std)
describe(df, :mean, std => :test_std)
@test describe_output[:, [:variable, :mean, :test_std]] ≅
describe(df, :mean, "test_std" => std)
describe(df, :mean, std => "test_std")

# Test that describe works with a dataframe with no observations
df = DataFrame(a = Int[], b = String[], c = [])
Expand All @@ -683,6 +683,9 @@ end
@test describe(df, cols=Not(1)) ≅ describe(select(df, Not(1)))
@test describe(df, cols=Not("a")) ≅ describe(select(df, Not(1)))

@test describe(DataFrame(a=[1,2]), cols = :a, :min, minimum => :min2, maximum => "max2", :max) ==
DataFrame(variable=:a, min=1, min2=1, max2=2, max=2)

@test_throws ArgumentError describe(df, :mean, :all)
end
bkamins marked this conversation as resolved.
Show resolved Hide resolved

Expand Down
5 changes: 5 additions & 0 deletions test/deprecated.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,4 +226,9 @@ end
categorical!(df, Between(1,2))
end

@testset "deprecated describe syntax" begin
@test describe(DataFrame(a=[1,2]), cols = :a, :min, :min2 => minimum, "max2" => maximum, :max) ==
DataFrame(variable=:a, min=1, min2=1, max2=2, max=2)
end

end # module