Skip to content

Commit

Permalink
Improve sorting docstrings. Deprecate passing no columns behavior. (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
Chandu-4444 authored Nov 21, 2021
1 parent 9cc3446 commit 9791095
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 8 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,10 @@

* `delete!` is deprecated in favor of `deleteat!`
([#2854](https://github.com/JuliaData/DataFrames.jl/issues/2854))
* In `sort`, `sort!`, `issorted` and `sortperm` it is now documented
that the result of passing an empty column selector uses lexicographic
ordering of all columns, but this behavior is deprecated.
([#2941](https://github.com/JuliaData/DataFrames.jl/issues/2941))

## Planned changes

Expand Down
50 changes: 44 additions & 6 deletions src/abstractdataframe/sort.jl
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,8 @@ function ordering(df::AbstractDataFrame, cols::AbstractVector, lt::Function,
by::Function, rev::Bool, order::Ordering)

if length(cols) == 0
Base.depwarn("When empty column selector is passed ordering is done on all colums. " *
"This behavior is deprecated and will change in the future.", :ordering)
return ordering(df, lt, by, rev, order)
end

Expand Down Expand Up @@ -332,19 +334,50 @@ Sort.defalg(df::AbstractDataFrame, o::Ordering; alg=nothing, cols=[]) =
########################

"""
issorted(df::AbstractDataFrame, cols;
issorted(df::AbstractDataFrame, cols=All();
lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
Test whether data frame `df` sorted by column(s) `cols`.
Checking against multiple columns is done lexicographically.
`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
If `cols` selects no columns, check whether `df` is sorted on all columns
(this behaviour is deprecated and will change in future versions).
`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
If `rev` is `true`, reverse sorting is performed. To enable reverse sorting
only for some columns, pass `order(c, rev=true)` in `cols`, with `c` the
corresponding column index (see example below).
See other methods for a description of other keyword arguments.
# Examples
```jldoctest
julia> df = DataFrame(a = [1, 2, 3, 4], b = [4, 3, 2, 1])
4×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 1 4
2 │ 2 3
3 │ 3 2
4 │ 4 1
julia> issorted(df)
true
julia> issorted(df, :a)
true
julia> issorted(df, :b)
false
julia> issorted(df, :b, rev=true)
true
```
"""
function Base.issorted(df::AbstractDataFrame, cols=[];
function Base.issorted(df::AbstractDataFrame, cols=All();
lt=isless, by=identity, rev=false, order=Forward)
# exclude AbstractVector as in that case cols can contain order(...) clauses
if cols isa MultiColumnIndex && !(cols isa AbstractVector)
Expand All @@ -360,14 +393,16 @@ function Base.issorted(df::AbstractDataFrame, cols=[];
end

"""
sort(df::AbstractDataFrame, cols;
sort(df::AbstractDataFrame, cols=All();
alg::Union{Algorithm, Nothing}=nothing, lt=isless, by=identity,
rev::Bool=false, order::Ordering=Forward, view::Bool=false)
Return a data frame containing the rows in `df` sorted by column(s) `cols`.
Sorting on multiple columns is done lexicographically.
`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
If `cols` selects no columns, sort `df` on all columns
(this behaviour is deprecated and will change in future versions).
If `alg` is `nothing` (the default), the most appropriate algorithm is
chosen automatically among `TimSort`, `MergeSort` and `RadixSort` depending
Expand Down Expand Up @@ -435,21 +470,24 @@ julia> sort(df, [:x, order(:y, rev=true)])
4 │ 3 b
```
"""
@inline function Base.sort(df::AbstractDataFrame, cols=[]; alg=nothing, lt=isless,
@inline function Base.sort(df::AbstractDataFrame, cols=All(); alg=nothing, lt=isless,
by=identity, rev=false, order=Forward, view::Bool=false)
rowidxs = sortperm(df, cols, alg=alg, lt=lt, by=by, rev=rev, order=order)
return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
end

"""
sortperm(df::AbstractDataFrame, cols;
sortperm(df::AbstractDataFrame, cols=All();
alg::Union{Algorithm, Nothing}=nothing, lt=isless, by=identity,
rev::Bool=false, order::Ordering=Forward)
Return a permutation vector of row indices of data frame `df` that puts them in
sorted order according to column(s) `cols`.
Order on multiple columns is computed lexicographically.
`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
If `cols` selects no columns, return permutation vector based on sorting all columns
(this behaviour is deprecated and will change in future versions).
If `alg` is `nothing` (the default), the most appropriate algorithm is
chosen automatically among `TimSort`, `MergeSort` and `RadixSort` depending
Expand Down Expand Up @@ -502,7 +540,7 @@ julia> sortperm(df, [:x, order(:y, rev=true)])
1
```
"""
function Base.sortperm(df::AbstractDataFrame, cols=[];
function Base.sortperm(df::AbstractDataFrame, cols=All();
alg=nothing, lt=isless, by=identity, rev=false, order=Forward)
if !(by isa Base.Callable || (by isa AbstractVector && eltype(by) <: Base.Callable))
msg = "'by' must be a Function or a vector of Functions. " *
Expand Down
6 changes: 4 additions & 2 deletions src/dataframe/sort.jl
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@

"""
sort!(df::AbstractDataFrame, cols;
sort!(df::AbstractDataFrame, cols=All();
alg::Union{Algorithm, Nothing}=nothing, lt=isless, by=identity,
rev::Bool=false, order::Ordering=Forward)
Sort data frame `df` by column(s) `cols`.
Sorting on multiple columns is done lexicographicallly.
`cols` can be any column selector ($COLUMNINDEX_STR; $MULTICOLUMNINDEX_STR).
If `cols` selects no columns, sort `df` on all columns
(this behaviour is deprecated and will change in future versions).
If `alg` is `nothing` (the default), the most appropriate algorithm is
chosen automatically among `TimSort`, `MergeSort` and `RadixSort` depending
Expand Down Expand Up @@ -72,7 +74,7 @@ julia> sort!(df, [:x, order(:y, rev=true)])
4 │ 3 b
```
"""
function Base.sort!(df::DataFrame, cols=[]; alg=nothing,
function Base.sort!(df::DataFrame, cols=All(); alg=nothing,
lt=isless, by=identity, rev=false, order=Forward)
if !(isa(by, Function) || eltype(by) <: Function)
msg = "'by' must be a Function or a vector of Functions. " *
Expand Down

0 comments on commit 9791095

Please sign in to comment.