diff --git a/NEWS.md b/NEWS.md
index da12048624..39aee15a8f 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -15,6 +15,9 @@
 * Joining functions now support `order` keyword argument allowing the user
   to specify the order of the rows in the produced table
   ([#3233](https://github.com/JuliaData/DataFrames.jl/pull/3233))
+* Add `keep` keyword argument to `nonunique`, `unique`, and `unique!`
+  allowing to specify which duplicate rows should be kept
+  ([#3260](https://github.com/JuliaData/DataFrames.jl/pull/3260))
 
 ## Bug fixes
 
diff --git a/src/DataFrames.jl b/src/DataFrames.jl
index c5d8366214..a2a652154a 100644
--- a/src/DataFrames.jl
+++ b/src/DataFrames.jl
@@ -134,6 +134,7 @@ include("other/utils.jl")
 include("other/index.jl")
 
 include("abstractdataframe/abstractdataframe.jl")
+include("abstractdataframe/unique.jl")
 include("dataframe/dataframe.jl")
 include("subdataframe/subdataframe.jl")
 include("dataframerow/dataframerow.jl")
diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl
index 9fba690d49..1056ed665b 100644
--- a/src/abstractdataframe/abstractdataframe.jl
+++ b/src/abstractdataframe/abstractdataframe.jl
@@ -1342,278 +1342,6 @@ end
 Base.Array(df::AbstractDataFrame) = Matrix(df)
 Base.Array{T}(df::AbstractDataFrame) where {T} = Matrix{T}(df)
 
-"""
-    nonunique(df::AbstractDataFrame)
-    nonunique(df::AbstractDataFrame, cols)
-
-Return a `Vector{Bool}` in which `true` entries indicate duplicate rows.
-A row is a duplicate if there exists a prior row with all columns containing
-equal values (according to `isequal`).
-
-See also [`unique`](@ref) and [`unique!`](@ref).
-
-# Arguments
-- `df` : `AbstractDataFrame`
-- `cols` : a selector specifying the column(s) or their transformations to compare.
-  Can be any column selector or transformation accepted by [`select`](@ref) that
-  returns at least one column if `df` has at least one column.
-
-# Examples
-
-```jldoctest
-julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
-4×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-
-julia> df = vcat(df, df)
-8×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-   5 │     1      1
-   6 │     2      2
-   7 │     3      1
-   8 │     4      2
-
-julia> nonunique(df)
-8-element Vector{Bool}:
- 0
- 0
- 0
- 0
- 1
- 1
- 1
- 1
-
-julia> nonunique(df, 2)
-8-element Vector{Bool}:
- 0
- 0
- 1
- 1
- 1
- 1
- 1
- 1
-```
-"""
-function nonunique(df::AbstractDataFrame)
-    ncol(df) == 0 && return Bool[]
-    gslots = row_group_slots(ntuple(i -> df[!, i], ncol(df)), Val(true), nothing, false, nothing)[3]
-    # unique rows are the first encountered group representatives,
-    # nonunique are everything else
-    res = fill(true, nrow(df))
-    @inbounds for g_row in gslots
-        (g_row > 0) && (res[g_row] = false)
-    end
-    return res
-end
-
-function nonunique(df::AbstractDataFrame, cols)
-    udf = _try_select_no_copy(df, cols)
-    if ncol(df) > 0 && ncol(udf) == 0
-         throw(ArgumentError("finding duplicate rows in data frame when " *
-                             "`cols` selects no columns is not allowed"))
-    else
-        return nonunique(udf)
-    end
-end
-
-"""
-    allunique(df::AbstractDataFrame, cols=:)
-
-Return `true` if all rows of `df` are not duplicated. Two rows are duplicate if
-all their columns contain equal values (according to `isequal`).
-
-See also [`unique`](@ref) and [`nonunique`](@ref).
-
-# Arguments
-- `df` : `AbstractDataFrame`
-- `cols` : a selector specifying the column(s) or their transformations to compare.
-  Can be any column selector or transformation accepted by [`select`](@ref).
-
-# Examples
-
-```jldoctest
-julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
-4×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-
-julia> allunique(df)
-true
-
-julia> allunique(df, :x)
-false
-
-julia> allunique(df, :i => ByRow(isodd))
-false
-```
-"""
-function Base.allunique(df::AbstractDataFrame, cols=:)
-    udf = _try_select_no_copy(df, cols)
-    nrow(udf) == 0 && return true
-    return row_group_slots(ntuple(i -> udf[!, i], ncol(udf)),
-                           Val(false), nothing, false, nothing)[1] == nrow(df)
-end
-
-"""
-    unique(df::AbstractDataFrame; view::Bool=false)
-    unique(df::AbstractDataFrame, cols; view::Bool=false)
-
-Return a data frame containing only the first occurrence of unique rows in `df`.
-When `cols` is specified, the returned `DataFrame` contains complete rows,
-retaining in each case the first occurrence of a given combination of values
-in selected columns or their transformations. `cols` can be any column
-selector or transformation accepted by [`select`](@ref).
-
-If `view=false` a freshly allocated `DataFrame` is returned,
-and if `view=true` then a `SubDataFrame` view into `df` is returned.
-
-# Arguments
-- `df` : the AbstractDataFrame
-- `cols` :  column indicator (`Symbol`, `Int`, `Vector{Symbol}`, `Regex`, etc.)
-specifying the column(s) to compare.
-
-$METADATA_FIXED
-
-See also: [`unique!`](@ref), [`nonunique`](@ref).
-
-# Examples
-
-```jldoctest
-julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
-4×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-
-julia> df = vcat(df, df)
-8×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-   5 │     1      1
-   6 │     2      2
-   7 │     3      1
-   8 │     4      2
-
-julia> unique(df)   # doesn't modify df
-4×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-
-julia> unique(df, 2)
-2×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-```
-"""
-@inline function Base.unique(df::AbstractDataFrame; view::Bool=false)
-    rowidxs = (!).(nonunique(df))
-    return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
-end
-
-@inline function Base.unique(df::AbstractDataFrame, cols; view::Bool=false)
-    rowidxs = (!).(nonunique(df, cols))
-    return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
-end
-
-"""
-    unique!(df::AbstractDataFrame)
-    unique!(df::AbstractDataFrame, cols)
-
-Update `df` in-place to contain only the first occurrence of unique rows in `df`.
-When `cols` is specified, the returned `DataFrame` contains complete rows,
-retaining in each case the first occurrence of a given combination of values
-in selected columns or their transformations. `cols` can be any column
-selector or transformation accepted by [`select`](@ref).
-
-# Arguments
-- `df` : the AbstractDataFrame
-- `cols` :  column indicator (`Symbol`, `Int`, `Vector{Symbol}`, `Regex`, etc.)
-specifying the column(s) to compare.
-
-$METADATA_FIXED
-
-See also: [`unique!`](@ref), [`nonunique`](@ref).
-
-# Examples
-
-```jldoctest
-julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
-4×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-
-julia> df = vcat(df, df)
-8×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-   5 │     1      1
-   6 │     2      2
-   7 │     3      1
-   8 │     4      2
-
-julia> unique!(df)  # modifies df
-4×2 DataFrame
- Row │ i      x
-     │ Int64  Int64
-─────┼──────────────
-   1 │     1      1
-   2 │     2      2
-   3 │     3      1
-   4 │     4      2
-```
-"""
-Base.unique!(df::AbstractDataFrame) = deleteat!(df, _findall(nonunique(df)))
-Base.unique!(df::AbstractDataFrame, cols::AbstractVector) =
-    deleteat!(df, _findall(nonunique(df, cols)))
-Base.unique!(df::AbstractDataFrame, cols) =
-    deleteat!(df, _findall(nonunique(df, cols)))
-
 """
     fillcombinations(df::AbstractDataFrame, indexcols;
                          allowduplicates::Bool=false,
@@ -1676,8 +1404,9 @@ function fillcombinations(df::AbstractDataFrame, indexcols;
                             "must be specified"))
     end
 
-    has_duplicates = row_group_slots(ntuple(i -> df[!, colind[i]], length(colind)),
-                                     Val(false), nothing, false, nothing)[1] != nrow(df)
+    # we use hashing algorithm here, because we assume that the tables we work with are not huge
+    has_duplicates = row_group_slots!(ntuple(i -> df[!, colind[i]], length(colind)),
+                                      Val(false), nothing, false, nothing, true)[1] != nrow(df)
     if has_duplicates && !allowduplicates
         throw(ArgumentError("duplicate combinations of `indexcols` are not " *
                             "allowed in input when `allowduplicates=false`"))
@@ -3402,4 +3131,3 @@ function Base.iterate(itr::Iterators.PartitionIterator{<:AbstractDataFrame}, sta
     r = min(state + itr.n - 1, last_idx)
     return view(itr.c, state:r, :), r + 1
 end
-
diff --git a/src/abstractdataframe/unique.jl b/src/abstractdataframe/unique.jl
new file mode 100644
index 0000000000..03cddfe74d
--- /dev/null
+++ b/src/abstractdataframe/unique.jl
@@ -0,0 +1,376 @@
+"""
+    nonunique(df::AbstractDataFrame; keep::Symbol=:first)
+    nonunique(df::AbstractDataFrame, cols; keep::Symbol=:first)
+
+Return a `Vector{Bool}` in which `true` entries indicate duplicate rows.
+
+Duplicate rows are those for which at least another row contains equal values
+(according to `isequal`) for all columns in `cols` (by default, all columns).
+If `keep=:first` (the default), only the first occurrence of a set of duplicate
+rows is indicated with a `false` entry.
+If `keep=:last`, only the last occurrence of a set of duplicate rows is
+indicated with a `false` entry.
+If `keep=:noduplicates`, only rows without any duplicates are indicated with a
+`false` entry.
+
+# Arguments
+- `df` : `AbstractDataFrame`
+- `cols` : a selector specifying the column(s) or their transformations to
+  compare. Can be any column selector or transformation accepted by
+  [`select`](@ref) that returns at least one column if `df` has at least one
+  column.
+
+See also [`unique`](@ref) and [`unique!`](@ref).
+
+# Examples
+
+```jldoctest
+julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
+4×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+
+julia> df = vcat(df, df)
+8×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+   5 │     1      1
+   6 │     2      2
+   7 │     3      1
+   8 │     4      2
+
+julia> nonunique(df)
+8-element Vector{Bool}:
+ 0
+ 0
+ 0
+ 0
+ 1
+ 1
+ 1
+ 1
+
+julia> nonunique(df, keep=:last)
+8-element Vector{Bool}:
+ 1
+ 1
+ 1
+ 1
+ 0
+ 0
+ 0
+ 0
+
+julia> nonunique(df, 2)
+8-element Vector{Bool}:
+ 0
+ 0
+ 1
+ 1
+ 1
+ 1
+ 1
+ 1
+```
+"""
+function nonunique(df::AbstractDataFrame; keep::Symbol=:first)
+    if !(keep in (:first, :last, :noduplicates))
+        throw(ArgumentError("`keep` must be :first, :last, or :noduplicates"))
+    end
+    ncol(df) == 0 && return Bool[]
+    res = fill(true, nrow(df))
+    cols = ntuple(i -> df[!, i], ncol(df))
+    if keep == :first
+        rpa = refpool_and_array.(cols)
+        refpools = first.(rpa)
+        refarrays = last.(rpa)
+        # if refarray cannot be used, we can avoid allocating a groups vector
+        if any(isnothing, refpools) || any(isnothing, refarrays)
+            _, _, gslots, _ = row_group_slots!(cols, Val(true), nothing,
+                                               false, nothing, false)
+            # unique rows are the first encountered group representatives,
+            # nonunique are everything else
+            @inbounds for g_row in gslots
+                g_row > 0 && (res[g_row] = false)
+            end
+        else # faster refarray method but allocates a groups vector
+            groups = Vector{Int}(undef, nrow(df))
+            ngroups = row_group_slots!(cols, refpools, refarrays,
+                                       Val(false), groups, false, false, false)[1]
+            seen = fill(false, ngroups)
+            for i in 1:nrow(df)
+                g = groups[i]
+                if !seen[g]
+                    seen[g] = true
+                    res[i] = false
+                end
+            end
+        end
+    else
+       # always allocate a group vector, use refarray automatically if possible
+        groups = Vector{Int}(undef, nrow(df))
+        ngroups = row_group_slots!(cols, Val(false), groups, false, nothing, false)[1]
+        if keep == :last
+            seen = fill(false, ngroups)
+            for i in nrow(df):-1:1
+                g = groups[i]
+                if !seen[g]
+                    seen[g] = true
+                    res[i] = false
+                end
+            end
+        else
+            @assert keep == :noduplicates
+            # -1 indicates that we have not seen the group yet
+            # positive value indicates the first position we have seen the group
+            # 0 indicates that we have seen the group at least twice
+            firstseen = fill(-1, ngroups)
+            for i in 1:nrow(df)
+                g = groups[i]
+                j = firstseen[g]
+                if j == -1
+                    # this is possibly a non duplicate row
+                    firstseen[g] = i
+                    res[i] = false
+                elseif j > 0
+                    # the row had a duplicate
+                    res[j] = true
+                    firstseen[g] = 0
+                end
+            end
+        end
+    end
+    return res
+end
+
+function nonunique(df::AbstractDataFrame, cols; keep::Symbol=:first)
+    udf = _try_select_no_copy(df, cols)
+    if ncol(df) > 0 && ncol(udf) == 0
+         throw(ArgumentError("finding duplicate rows in data frame when " *
+                             "`cols` selects no columns is not allowed"))
+    end
+    return nonunique(udf, keep=keep)
+end
+
+"""
+    allunique(df::AbstractDataFrame, cols=:)
+
+Return `true` if none of the rows of `df` are duplicated. Two rows are
+duplicates if all their columns contain equal values (according to `isequal`)
+for all columns in `cols` (by default, all columns).
+
+# Arguments
+- `df` : `AbstractDataFrame`
+- `cols` : a selector specifying the column(s) or their transformations to
+  compare. Can be any column selector or transformation accepted by
+  [`select`](@ref).
+
+See also [`unique`](@ref) and [`nonunique`](@ref).
+
+# Examples
+
+```jldoctest
+julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
+4×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+
+julia> allunique(df)
+true
+
+julia> allunique(df, :x)
+false
+
+julia> allunique(df, :i => ByRow(isodd))
+false
+```
+"""
+function Base.allunique(df::AbstractDataFrame, cols=:)
+    udf = _try_select_no_copy(df, cols)
+    nrow(udf) == 0 && return true
+    return row_group_slots!(ntuple(i -> udf[!, i], ncol(udf)),
+                            Val(false), nothing, false, nothing, true)[1] == nrow(df)
+end
+
+"""
+    unique(df::AbstractDataFrame; view::Bool=false, keep::Symbol=:first)
+    unique(df::AbstractDataFrame, cols; view::Bool=false, keep::Symbol=:first)
+
+Return a data frame containing only unique rows in `df`.
+
+Non-unique (duplicate) rows are those for which at least another row contains
+equal values (according to `isequal`) for all columns in `cols` (by default,
+all columns).
+If `keep=:first` (the default), only the first occurrence of a set of duplicate
+rows is kept.
+If `keep=:last`, only the last occurrence of a set of duplicate rows is kept.
+If `keep=:noduplicates`, only rows without any duplicates are kept.
+
+If `view=false` a freshly allocated `DataFrame` is returned, and if `view=true`
+then a `SubDataFrame` view into `df` is returned.
+
+# Arguments
+- `df` : the AbstractDataFrame
+- `cols` : a selector specifying the column(s) or their transformations to
+  compare. Can be any column selector or transformation accepted by
+  [`select`](@ref) that returns at least one column if `df` has at least one
+  column.
+
+$METADATA_FIXED
+
+See also: [`unique!`](@ref), [`nonunique`](@ref).
+
+# Examples
+
+```jldoctest
+julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
+4×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+
+julia> df = vcat(df, df)
+8×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+   5 │     1      1
+   6 │     2      2
+   7 │     3      1
+   8 │     4      2
+
+julia> unique(df)   # doesn't modify df
+4×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+
+julia> unique(df, 2)
+2×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+
+julia> unique(df, keep=:noduplicates)
+0×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┴──────────────
+```
+"""
+@inline function Base.unique(df::AbstractDataFrame; view::Bool=false,
+                             keep::Symbol=:first)
+    rowidxs = (!).(nonunique(df, keep=keep))
+    return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
+end
+
+@inline function Base.unique(df::AbstractDataFrame, cols; view::Bool=false,
+                             keep::Symbol=:first)
+    rowidxs = (!).(nonunique(df, cols, keep=keep))
+    return view ? Base.view(df, rowidxs, :) : df[rowidxs, :]
+end
+
+"""
+    unique!(df::AbstractDataFrame; keep::Symbol=:first)
+    unique!(df::AbstractDataFrame, cols; keep::Symbol=:first)
+
+Update `df` in-place to containi only unique rows.
+
+Non-unique (duplicate) rows are those for which at least another row contains
+equal values (according to `isequal`) for all columns in `cols` (by default,
+all columns).
+If `keep=:first` (the default), only the first occurrence of a set of duplicate
+rows is kept.
+If `keep=:last`, only the last occurrence of a set of duplicate rows is kept.
+If `keep=:noduplicates`, only rows without any duplicates are kept.
+
+# Arguments
+- `df` : the AbstractDataFrame
+- `cols` :  column indicator (`Symbol`, `Int`, `Vector{Symbol}`, `Regex`, etc.)
+  specifying the column(s) to compare. Can be any column selector or
+  transformation accepted by [`select`](@ref) that returns at least one column
+  if `df` has at least one column.
+
+$METADATA_FIXED
+
+See also: [`unique!`](@ref), [`nonunique`](@ref).
+
+# Examples
+
+```jldoctest
+julia> df = DataFrame(i=1:4, x=[1, 2, 1, 2])
+4×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+
+julia> df = vcat(df, df)
+8×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+   5 │     1      1
+   6 │     2      2
+   7 │     3      1
+   8 │     4      2
+
+julia> unique!(copy(df))  # modifies df
+4×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┼──────────────
+   1 │     1      1
+   2 │     2      2
+   3 │     3      1
+   4 │     4      2
+
+julia> unique(df, keep=:noduplicates)
+0×2 DataFrame
+ Row │ i      x
+     │ Int64  Int64
+─────┴──────────────
+```
+"""
+Base.unique!(df::AbstractDataFrame; keep::Symbol=:first) =
+    deleteat!(df, _findall(nonunique(df, keep=keep)))
+Base.unique!(df::AbstractDataFrame, cols::AbstractVector; keep::Symbol=:first) =
+    deleteat!(df, _findall(nonunique(df, cols, keep=keep)))
+Base.unique!(df::AbstractDataFrame, cols; keep::Symbol=:first) =
+    deleteat!(df, _findall(nonunique(df, cols, keep=keep)))
diff --git a/src/groupeddataframe/groupeddataframe.jl b/src/groupeddataframe/groupeddataframe.jl
index f6d4bf9c69..d08bef7f55 100644
--- a/src/groupeddataframe/groupeddataframe.jl
+++ b/src/groupeddataframe/groupeddataframe.jl
@@ -223,7 +223,7 @@ function groupby(df::AbstractDataFrame, cols;
        (cols isa AbstractVector && any(x -> x isa UserColOrdering, cols))
         if isnothing(sort) || sort === true
             # if sort === true replace it with NamedTuple to avoid sorting
-            # in row_group_slots as we will perform sorting later
+            # in row_group_slots! as we will perform sorting later
             sort = NamedTuple()
         elseif sort === false
             throw(ArgumentError("passing `order` is only allowed if `sort` " *
@@ -248,13 +248,14 @@ function groupby(df::AbstractDataFrame, cols;
 
     groups = Vector{Int}(undef, nrow(df))
     ngroups, rhashes, gslots, sorted =
-        row_group_slots(ntuple(i -> sdf[!, i], ncol(sdf)), Val(false),
-                        groups, skipmissing, sort isa NamedTuple ? nothing : sort)
+        row_group_slots!(ntuple(i -> sdf[!, i], ncol(sdf)), Val(false),
+                         groups, skipmissing,
+                         sort isa NamedTuple ? nothing : sort, true)
 
     gd = GroupedDataFrame(df, copy(_names(sdf)), groups, nothing, nothing, nothing,
                           ngroups, nothing, Threads.ReentrantLock())
 
-    # sort groups if row_group_slots hasn't already done that
+    # sort groups if row_group_slots! hasn't already done that
     if (sort === true && !sorted) || (sort isa NamedTuple)
         # Find index of representative row for each group
         idx = Vector{Int}(undef, length(gd))
diff --git a/src/groupeddataframe/utils.jl b/src/groupeddataframe/utils.jl
index 3139f30339..d8acb7983a 100644
--- a/src/groupeddataframe/utils.jl
+++ b/src/groupeddataframe/utils.jl
@@ -82,12 +82,12 @@ isequal_row(cols1::Tuple{Vararg{AbstractVector}}, r1::Int,
 
 # IntegerRefarray and IntegerRefPool are two complementary view types that allow
 # wrapping arrays with Union{Real, Missing} eltype to satisfy the DataAPI.refpool
-# and DataAPI.refarray API when calling row_group_slots.
+# and DataAPI.refarray API when calling row_group_slots!.
 # IntegerRefarray converts values to Int and replaces missing with an integer
 # (set by the caller to the maximum value + 1)
 # IntegerRefPool subtracts the minimum value - 1 and replaces back the maximum
 # value + 1 to missing. This ensures all values are in 1:length(refpool), while
-# row_group_slots knows the number of (potential) groups via length(refpool)
+# row_group_slots! knows the number of (potential) groups via length(refpool)
 # and is able to skip missing values when skipmissing=true
 
 struct IntegerRefarray{T<:AbstractArray} <: AbstractVector{Int}
@@ -157,7 +157,7 @@ function refpool_and_array(x::AbstractArray)
             minval, maxval = extrema(x)
         end
         ngroups = big(maxval) - big(minval) + 1
-        # Threshold chosen with the same rationale as the row_group_slots refpool method:
+        # Threshold chosen with the same rationale as the row_group_slots! refpool method:
         # refpool approach is faster but we should not allocate too much memory either
         # We also have to avoid overflow, including with ngroups + 1 for missing values
         # (note that it would be possible to allow minval and maxval to be outside of the
@@ -178,14 +178,22 @@ end
 # 2) vector of row hashes (may be empty if hash=Val(false))
 # 3) slot array for a hash map, non-zero values are
 #    the indices of the first row in a group
+#    (returned only if hashes are generated)
 # 4) whether groups are already sorted
 # Optional `groups` vector is set to the group indices of each row (starting at 1)
 # With skipmissing=true, rows with missing values are attributed index 0.
-function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
-                         hash::Val,
-                         groups::Union{Vector{Int}, Nothing},
-                         skipmissing::Bool,
-                         sort::Union{Bool, Nothing})::Tuple{Int, Vector{UInt}, Vector{Int}, Bool}
+#
+# Also the last argument is `compress`. If it is `false` then groups are not
+# compressed to form a continuous sequence. Normally `true` should be passed
+# as this ensures that returned `ngroups` indeed indicates the number of groups
+# but e.g. in `nonunique` we do not use this information so compressing
+# can be skipped by passing `compress=false`
+function row_group_slots!(cols::Tuple{Vararg{AbstractVector}},
+                          hash::Val,
+                          groups::Union{Vector{Int}, Nothing},
+                          skipmissing::Bool,
+                          sort::Union{Bool, Nothing},
+                          compress::Bool)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool}
     rpa = refpool_and_array.(cols)
     if sort === false
         refpools = nothing
@@ -194,17 +202,19 @@ function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
         refpools = first.(rpa)
         refarrays = last.(rpa)
     end
-    row_group_slots(cols, refpools, refarrays, hash, groups, skipmissing, sort === true)
+    row_group_slots!(cols, refpools, refarrays, hash, groups, skipmissing,
+                     sort === true, compress)
 end
 
 # Generic fallback method based on open addressing hash table
-function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
-                         refpools::Any,  # Ignored
-                         refarrays::Any, # Ignored
-                         hash::Val,
-                         groups::Union{Vector{Int}, Nothing},
-                         skipmissing::Bool,
-                         sort::Bool)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool}
+function row_group_slots!(cols::Tuple{Vararg{AbstractVector}},
+                          refpools::Any,  # Ignored
+                          refarrays::Any, # Ignored
+                          hash::Val,
+                          groups::Union{Vector{Int}, Nothing},
+                          skipmissing::Bool,
+                          sort::Bool,
+                          compress::Bool)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool}
     @assert groups === nothing || length(groups) == length(cols[1])
     rhashes, missings = hashrows(cols, skipmissing)
     # inspired by Dict code from base cf. https://github.com/JuliaData/DataTables.jl/pull/17#discussion_r102481481
@@ -251,16 +261,17 @@ function row_group_slots(cols::Tuple{Vararg{AbstractVector}},
 end
 
 # Optimized method for arrays for which DataAPI.refpool is defined and returns an AbstractVector
-function row_group_slots(cols::NTuple{N, AbstractVector},
-                         refpools::NTuple{N, AbstractVector},
-                         refarrays::NTuple{N,
-                             Union{AbstractVector{<:Real},
-                                   Missings.EachReplaceMissing{
-                                       <:AbstractVector{<:Union{Real, Missing}}}}},
-                         hash::Val{false},
-                         groups::Vector{Int},
-                         skipmissing::Bool,
-                         sort::Bool)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool} where N
+function row_group_slots!(cols::NTuple{N, AbstractVector},
+                          refpools::NTuple{N, AbstractVector},
+                          refarrays::NTuple{N,
+                              Union{AbstractVector{<:Real},
+                                    Missings.EachReplaceMissing{
+                                        <:AbstractVector{<:Union{Real, Missing}}}}},
+                          hash::Val{false},
+                          groups::Vector{Int},
+                          skipmissing::Bool,
+                          sort::Bool,
+                          compress::Bool)::Tuple{Int, Vector{UInt}, Vector{Int}, Bool} where N
     # Computing neither hashes nor groups isn't very useful,
     # and this method needs to allocate a groups vector anyway
     @assert all(col -> length(col) == length(groups), cols)
@@ -296,10 +307,10 @@ function row_group_slots(cols::NTuple{N, AbstractVector},
         newcols = (skipmissing && any(refpool -> eltype(refpool) >: Missing, refpools)) ||
                   !(refarrays isa NTuple{<:Any, AbstractVector}) ||
                   sort ? cols : refarrays
-        return invoke(row_group_slots,
+        return invoke(row_group_slots!,
                       Tuple{Tuple{Vararg{AbstractVector}}, Any, Any, Val,
-                            Union{Vector{Int}, Nothing}, Bool, Bool},
-                      newcols, refpools, refarrays, hash, groups, skipmissing, sort)
+                            Union{Vector{Int}, Nothing}, Bool, Bool, Bool},
+                      newcols, refpools, refarrays, hash, groups, skipmissing, sort, compress)
     end
 
     strides = (cumprod(collect(reverse(ngroupstup)))[end-1:-1:1]..., 1)::NTuple{N, Int}
@@ -428,7 +439,9 @@ function row_group_slots(cols::NTuple{N, AbstractVector},
     # If some groups are unused, compress group indices to drop them
     # sum(seen) is faster than all(seen) when not short-circuiting,
     # and short-circuit would only happen in the slower case anyway
-    if sum(seen) < length(seen)
+    #
+    # This process is not needed if row_group_slots! is called with compress=false
+    if compress && sum(seen) < length(seen)
         oldngroups = ngroups
         remap = zeros(Int, ngroups)
         ngroups = 0
diff --git a/test/data.jl b/test/data.jl
index 3399ad35e7..b5348c5705 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -229,62 +229,6 @@ end
     @test_throws ArgumentError dropmissing(df, view=true, disallowmissing=true)
 end
 
-@testset "nonunique, nonunique, unique! with extra argument" begin
-    df1 = DataFrame(a=Union{String, Missing}["a", "b", "a", "b", "a", "b"],
-                    b=Vector{Union{Int, Missing}}(1:6),
-                    c=Union{Int, Missing}[1:3;1:3])
-    df = vcat(df1, df1)
-    @test findall(nonunique(df)) == collect(7:12)
-    @test findall(nonunique(df, :)) == collect(7:12)
-    @test findall(nonunique(df, Colon())) == collect(7:12)
-    @test findall(nonunique(df, :a)) == collect(3:12)
-    @test findall(nonunique(df, "a")) == collect(3:12)
-    @test findall(nonunique(df, [:a, :c])) == collect(7:12)
-    @test findall(nonunique(df, ["a", "c"])) == collect(7:12)
-    @test findall(nonunique(df, r"[ac]")) == collect(7:12)
-    @test findall(nonunique(df, Not(2))) == collect(7:12)
-    @test findall(nonunique(df, Not([2]))) == collect(7:12)
-    @test findall(nonunique(df, Not(:b))) == collect(7:12)
-    @test findall(nonunique(df, Not([:b]))) == collect(7:12)
-    @test findall(nonunique(df, Not([false, true, false]))) == collect(7:12)
-    @test findall(nonunique(df, [1, 3])) == collect(7:12)
-    @test findall(nonunique(df, 1)) == collect(3:12)
-    @test findall(nonunique(df, :a => x -> 1)) == 2:12
-
-    @test unique(df) == df1
-    @test unique(df, :) == df1
-    @test unique(df, Colon()) == df1
-    @test unique(df, 2:3) == df1
-    @test unique(df, 3) == df1[1:3, :]
-    @test unique(df, [1, 3]) == df1
-    @test unique(df, [:a, :c]) == df1
-    @test unique(df, ["a", "c"]) == df1
-    @test unique(df, r"[ac]") == df1
-    @test unique(df, Not(2)) == df1
-    @test unique(df, Not([2])) == df1
-    @test unique(df, Not(:b)) == df1
-    @test unique(df, Not([:b])) == df1
-    @test unique(df, Not([false, true, false])) == df1
-    @test unique(df, :a) == df1[1:2, :]
-    @test unique(df, "a") == df1[1:2, :]
-    @test unique(df, :a => x -> 1) == df[1:1, :]
-    @test unique(DataFrame()) == DataFrame()
-    @test isempty(nonunique(DataFrame())) && nonunique(DataFrame()) isa Vector{Bool}
-    @test_throws ArgumentError nonunique(DataFrame(a=1:3), [])
-    @test_throws ArgumentError unique(DataFrame(a=1:3), [])
-
-    @test unique(copy(df1), "a") == unique(copy(df1), :a) == unique(copy(df1), 1) ==
-          df1[1:2, :]
-
-    unique!(df, [1, 3])
-    @test df == df1
-    for cols in (r"[ac]", Not(:b), Not(2), Not([:b]), Not([2]), Not([false, true, false]))
-        df = vcat(df1, df1)
-        unique!(df, cols)
-        @test df == df1
-    end
-end
-
 @testset "filter() and filter!()" begin
     df = DataFrame(x=[3, 1, 2, 1], y=["b", "c", "a", "b"])
     @test filter(r -> r[:x] > 1, df) == DataFrame(x=[3, 2], y=["b", "a"])
diff --git a/test/duplicates.jl b/test/duplicates.jl
index ec85020c02..61c01874d2 100644
--- a/test/duplicates.jl
+++ b/test/duplicates.jl
@@ -1,6 +1,6 @@
 module TestDuplicates
 
-using Test, DataFrames, CategoricalArrays
+using Test, DataFrames, CategoricalArrays, Random
 const ≅ = isequal
 
 @testset "nonunique" begin
@@ -41,4 +41,138 @@ const ≅ = isequal
     @test_throws ArgumentError unique(pdf, true)
 end
 
+@testset "nonunique, nonunique, unique! with extra argument" begin
+    df1 = DataFrame(a=Union{String, Missing}["a", "b", "a", "b", "a", "b"],
+                    b=Vector{Union{Int, Missing}}(1:6),
+                    c=Union{Int, Missing}[1:3;1:3])
+    df = vcat(df1, df1)
+    @test findall(nonunique(df)) == collect(7:12)
+    @test findall(nonunique(df, :)) == collect(7:12)
+    @test findall(nonunique(df, Colon())) == collect(7:12)
+    @test findall(nonunique(df, :a)) == collect(3:12)
+    @test findall(nonunique(df, "a")) == collect(3:12)
+    @test findall(nonunique(df, [:a, :c])) == collect(7:12)
+    @test findall(nonunique(df, ["a", "c"])) == collect(7:12)
+    @test findall(nonunique(df, r"[ac]")) == collect(7:12)
+    @test findall(nonunique(df, Not(2))) == collect(7:12)
+    @test findall(nonunique(df, Not([2]))) == collect(7:12)
+    @test findall(nonunique(df, Not(:b))) == collect(7:12)
+    @test findall(nonunique(df, Not([:b]))) == collect(7:12)
+    @test findall(nonunique(df, Not([false, true, false]))) == collect(7:12)
+    @test findall(nonunique(df, [1, 3])) == collect(7:12)
+    @test findall(nonunique(df, 1)) == collect(3:12)
+    @test findall(nonunique(df, :a => x -> 1)) == 2:12
+
+    @test unique(df) == df1
+    @test unique(df, :) == df1
+    @test unique(df, Colon()) == df1
+    @test unique(df, 2:3) == df1
+    @test unique(df, 3) == df1[1:3, :]
+    @test unique(df, [1, 3]) == df1
+    @test unique(df, [:a, :c]) == df1
+    @test unique(df, ["a", "c"]) == df1
+    @test unique(df, r"[ac]") == df1
+    @test unique(df, Not(2)) == df1
+    @test unique(df, Not([2])) == df1
+    @test unique(df, Not(:b)) == df1
+    @test unique(df, Not([:b])) == df1
+    @test unique(df, Not([false, true, false])) == df1
+    @test unique(df, :a) == df1[1:2, :]
+    @test unique(df, "a") == df1[1:2, :]
+    @test unique(df, :a => x -> 1) == df[1:1, :]
+    @test unique(DataFrame()) == DataFrame()
+    @test isempty(nonunique(DataFrame())) && nonunique(DataFrame()) isa Vector{Bool}
+    @test_throws ArgumentError nonunique(DataFrame(a=1:3), [])
+    @test_throws ArgumentError unique(DataFrame(a=1:3), [])
+
+    @test unique(copy(df1), "a") == unique(copy(df1), :a) == unique(copy(df1), 1) ==
+          df1[1:2, :]
+
+    unique!(df, [1, 3])
+    @test df == df1
+    for cols in (r"[ac]", Not(:b), Not(2), Not([:b]), Not([2]), Not([false, true, false]))
+        df = vcat(df1, df1)
+        unique!(df, cols)
+        @test df == df1
+    end
+end
+
+@testset "keep argument to nonunique/unique/unique!" begin
+    df = DataFrame(a=[1, 2, 3, 1, 2, 1],
+                   b=["a", "b", "c", "a", "b", "a"],
+                   c=categorical(["a", "b", "c", "a", "b", "a"]))
+    for cols in (1, 2, 3, [1, 2], [1, 3], [2, 3], [1, 2, 3])
+        @test nonunique(df, cols, keep=:first) ==
+              [false, false, false, true, true, true]
+        @test nonunique(df, cols, keep=:last) ==
+              [true, true, false, true, false, false]
+        @test nonunique(df, cols, keep=:noduplicates) ==
+              [true, true, false, true, true, true]
+        @test nonunique(select(df, cols), keep=:first) ==
+              [false, false, false, true, true, true]
+        @test nonunique(select(df, cols), keep=:last) ==
+              [true, true, false, true, false, false]
+        @test nonunique(select(df, cols), keep=:noduplicates) ==
+              [true, true, false, true, true, true]
+
+        @test unique(df, cols, keep=:first) ==
+              df[.![false, false, false, true, true, true], :]
+        @test unique(df, cols, keep=:last) ==
+              df[.![true, true, false, true, false, false], :]
+        @test unique(df, cols, keep=:noduplicates) ==
+              df[.![true, true, false, true, true, true], :]
+        @test unique(select(df, cols), keep=:first) ==
+              df[.![false, false, false, true, true, true], Cols(cols)]
+        @test unique(select(df, cols), keep=:last) ==
+              df[.![true, true, false, true, false, false], Cols(cols)]
+        @test unique(select(df, cols), keep=:noduplicates) ==
+              df[.![true, true, false, true, true, true], Cols(cols)]
+
+        @test unique!(copy(df), cols, keep=:first) ==
+              df[.![false, false, false, true, true, true], :]
+        @test unique!(copy(df), cols, keep=:last) ==
+              df[.![true, true, false, true, false, false], :]
+        @test unique!(copy(df), cols, keep=:noduplicates) ==
+              df[.![true, true, false, true, true, true], :]
+        @test unique!(select(df, cols), keep=:first) ==
+              df[.![false, false, false, true, true, true], Cols(cols)]
+        @test unique!(select(df, cols), keep=:last) ==
+              df[.![true, true, false, true, false, false], Cols(cols)]
+        @test unique!(select(df, cols), keep=:noduplicates) ==
+              df[.![true, true, false, true, true, true], Cols(cols)]
+    end
+
+    # some larger randomized test
+    Random.seed!(1234)
+    df = DataFrame(a=rand(1:10^5, 10^5))
+    df.b = string.(df.a)
+    df.c = categorical(df.b)
+    df.id = 1:10^5
+
+    for cols in (1, 2, 3, [1, 2], [1, 3], [2, 3], [1, 2, 3])
+        @test select(unique(df, cols, keep=:first), cols, Not(cols)) ==
+              combine(groupby(df, cols, sort=false), first)
+        @test select(unique(df, cols, keep=:last), cols, Not(cols)) ==
+              sort(combine(groupby(df, cols, sort=false), last), :id)
+        @test select(unique(df, cols, keep=:noduplicates), cols, Not(cols)) ==
+              sort(combine(groupby(df, cols, sort=false),
+                           sdf -> nrow(sdf) == 1 ? sdf : NamedTuple()), :id)
+    end
+
+    @test isempty(nonunique(DataFrame(), keep=:first))
+    @test unique(DataFrame(a=[]), keep=:last) == DataFrame(a=[])
+    @test unique!(DataFrame(), keep=:noduplicates) == DataFrame()
+    @test_throws ArgumentError nonunique(DataFrame(), keep=:a)
+    @test_throws ArgumentError unique(DataFrame(), keep=:b)
+    @test_throws ArgumentError unique!(DataFrame(), keep=:c)
+end
+
+@testset "case when groups are not compressed in row_group_slots!" begin
+   df = DataFrame(x=repeat([1:1000; -1], 2));
+   @test getindex.(keys(groupby(df, :x, sort=true)), 1) == [-1; 1:1000]
+   @test nonunique(df, :x) == [falses(1001); trues(1001)]
+   @test nonunique(df, :x, keep=:last) == [trues(1001); falses(1001)]
+   @test all(nonunique(df, :x, keep=:noduplicates))
+end
+
 end # module