Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for Not with multiple positional indices #3302

Merged
merged 12 commits into from
Apr 4, 2023
9 changes: 9 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# DataFrames.jl v1.6 Release Notes

## New functionalities

* `Not` allows passing multiple positional arguments that are
treated as if they were wrapped in `Cols` and does not throw an error
when a vector of duplicate indices is passed when doing column selection
([#3302](https://github.com/JuliaData/DataFrames.jl/pull/3302))

# DataFrames.jl v1.5 Release Notes

## New functionalities
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ CategoricalArrays = "0.10.0"
Compat = "4.2"
DataAPI = "1.14.0"
InlineStrings = "1.3.0"
InvertedIndices = "1"
InvertedIndices = "1.3"
IteratorInterfaceExtensions = "0.1.1, 1"
Missings = "0.4.2, 1"
PooledArrays = "1.4.2"
Expand Down
2 changes: 2 additions & 0 deletions docs/src/lib/indexing.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ The following values are a valid column index:
* a `Not` expression (see
[InvertedIndices.jl](https://github.com/JuliaData/InvertedIndices.jl));
`Not(idx)` selects all indices not in the passed `idx`;
when passed as column selector `Not(idx...)` is equivalent to
`Not(Cols(idx...))`.
* a `Cols` expression (see
[DataAPI.jl](https://github.com/JuliaData/DataAPI.jl)); `Cols(idxs...)`
selects the union of the selections in `idxs`; in particular `Cols()`
Expand Down
9 changes: 9 additions & 0 deletions src/other/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,15 @@ end
@inline Base.getindex(x::AbstractIndex, ::Colon) = Base.OneTo(length(x))
@inline Base.getindex(x::AbstractIndex, notidx::Not) =
setdiff(1:length(x), getindex(x, notidx.skip))

@inline function Base.getindex(x::AbstractIndex, notidx::Not{<:AbstractVector})
skip = notidx.skip
todrop = getindex(x, eltype(skip) === Bool ? skip : unique(skip))
return setdiff(1:length(x), todrop)
end

@inline Base.getindex(x::AbstractIndex, notidx::Not{InvertedIndices.NotMultiIndex}) =
nalimilan marked this conversation as resolved.
Show resolved Hide resolved
setdiff(1:length(x), getindex(x, Cols(notidx.skip.indices...)))
@inline Base.getindex(x::AbstractIndex, idx::Between) = x[idx.first]:x[idx.last]
@inline Base.getindex(x::AbstractIndex, idx::All) =
isempty(idx.cols) ? (1:length(x)) : throw(ArgumentError("All(args...) is not supported: use Cols(args...) instead"))
Expand Down
18 changes: 15 additions & 3 deletions test/index.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,13 @@ using DataFrames: Index, SubIndex, fuzzymatch
@test_throws ArgumentError i[Not(:x)]
@test_throws ArgumentError i[Not("x")]
@test_throws BoundsError i[Not(1:3)]
@test_throws ArgumentError i[Not([1, 1])]
@test_throws ArgumentError i[Not([:A, :A])]
@test_throws ArgumentError i[Not(["A", "A"])]

@test i[Not([1, 1])] == [2]
@test i[Not([:A, :A])] == [2]
@test i[Not(["A", "A"])] == [2]
@test isempty(i[Not([true, true])])
@test i[Not([false, false])] == 1:2
@test i[Not([true, false])] == [2]

@test i[1:1] == 1:1

Expand Down Expand Up @@ -115,6 +119,9 @@ end
si7 = SubIndex(i, Not(1:2))
si8 = SubIndex(i, ["C", "D", "E"])
si9 = SubIndex(i, Not(Not(["C", "D", "E"])))
si10 = SubIndex(i, Not(1, 2))
si11 = SubIndex(i, Not(:A, :B))
si12 = SubIndex(i, Not(2, "A"))

@test copy(si1) == i
@test copy(si2) == Index([:C, :D, :E])
Expand All @@ -125,6 +132,9 @@ end
@test copy(si7) == Index([:C, :D, :E])
@test copy(si8) == Index([:C, :D, :E])
@test copy(si9) == Index([:C, :D, :E])
@test copy(si10) == Index([:C, :D, :E])
@test copy(si11) == Index([:C, :D, :E])
@test copy(si12) == Index([:C, :D, :E])

@test_throws ArgumentError SubIndex(i, 1)
@test_throws ArgumentError SubIndex(i, :A)
Expand Down Expand Up @@ -327,6 +337,8 @@ end
push!(i, :x131)
push!(i, :y13)
push!(i, :yy13)
@test i[Not(2, 4, 5)] == [1, 3]
@test i[Not(2, :y13, "yy13")] == [1, 3]
@test i[Not(Not(r"x1."))] == [2, 3]
@test isempty(i[Not(Not(r"xx"))])
@test i[Not(Not(r""))] == 1:5
Expand Down
17 changes: 17 additions & 0 deletions test/indexing.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,23 @@ using Test, DataFrames, Unicode, Random
@test dfx[!, 1] === df[!, names(dfx)[1]]
end

@test df[!, Not(1, 2)] == DataFrame(c=7:9)
@test df[!, Not(1, 1, 2)] == DataFrame(c=7:9)
@test df[!, Not([1, 1, 2])] == DataFrame(c=7:9)
@test df[!, Not(:b, 1)] == DataFrame(c=7:9)
@test df[!, Not(:b, :b, 1)] == DataFrame(c=7:9)
@test df[!, Not("c", :a)] == DataFrame(b=4:6)
@test df[!, Not("c", "c", :a)] == DataFrame(b=4:6)
@test df[!, Not(:c, :c, :a)] == DataFrame(b=4:6)
@test df[!, Not([:c, :c, :a])] == DataFrame(b=4:6)
@test df[!, Not("c", "c", "a")] == DataFrame(b=4:6)
@test df[!, Not(["c", "c", "a"])] == DataFrame(b=4:6)
@test df[!, Not(:b, "c", :a)] == DataFrame()
@test df[!, Not([1, 2], :b)] == DataFrame(c=7:9)
@test df[!, Not([:c, :a], :b)] == DataFrame()
@test df[!, Not([1, 2], 2)] == DataFrame(c=7:9)
@test df[!, Not([1, 2], [1, 2])] == DataFrame(c=7:9)

@test df[1, 1] == 1
@test df[1, 1:2] isa DataFrameRow
@test df[1, r"[ab]"] isa DataFrameRow
Expand Down
9 changes: 6 additions & 3 deletions test/select.jl
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ Random.seed!(1234)
df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@test_throws BoundsError select!(df, Not(0))
@test_throws BoundsError select!(df, Not(6))
@test_throws ArgumentError select!(df, Not([1, 1]))
@test_throws ArgumentError select!(df, Not(:f))
@test_throws BoundsError select!(df, Not([true, false]))

@test select!(copy(df), Not([1, 1])) == df[!, 2:end]

d = copy(df)
select!(d, Not([:a, :e, :c]))
@test d == DataFrame(b=2, d=4)
Expand Down Expand Up @@ -63,10 +64,11 @@ end
df = DataFrame(a=1, b=2, c=3, d=4, e=5)
@test_throws BoundsError select(df, Not(0))
@test_throws BoundsError select(df, Not(6))
@test_throws ArgumentError select(df, Not([1, 1]))
@test_throws ArgumentError select(df, Not(:f))
@test_throws BoundsError select(df, Not([true, false]))

@test select(df, Not([1, 1])) == df[!, 2:end]

df2 = copy(df)
d = select(df, Not([:a, :e, :c]))
@test d == df[:, [:b, :d]]
Expand Down Expand Up @@ -151,10 +153,11 @@ end
df = view(DataFrame(a=1, b=2, c=3, d=4, e=5), :, :)
@test_throws BoundsError select(df, Not(0))
@test_throws BoundsError select(df, Not(6))
@test_throws ArgumentError select(df, Not([1, 1]))
@test_throws ArgumentError select(df, Not(:f))
@test_throws BoundsError select(df, Not([true, false]))

@test select(df, Not([1, 1])) == df[!, 2:end]

df2 = copy(df)
d = select(df, Not([:a, :e, :c]))
@test d isa DataFrame
Expand Down