diff --git a/NEWS.md b/NEWS.md index 86721af19..d37e939d8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,12 @@ +# DataFrames.jl v1.6 Release Notes + +## New functionalities + +* `Not` allows passing multiple positional arguments that are + treated as if they were wrapped in `Cols` and does not throw an error + when a vector of duplicate indices is passed when doing column selection + ([#3302](https://github.com/JuliaData/DataFrames.jl/pull/3302)) + # DataFrames.jl v1.5 Release Notes ## New functionalities diff --git a/Project.toml b/Project.toml index cc56cb70b..464978361 100644 --- a/Project.toml +++ b/Project.toml @@ -31,7 +31,7 @@ CategoricalArrays = "0.10.0" Compat = "4.2" DataAPI = "1.14.0" InlineStrings = "1.3.0" -InvertedIndices = "1" +InvertedIndices = "1.3" IteratorInterfaceExtensions = "0.1.1, 1" Missings = "0.4.2, 1" PooledArrays = "1.4.2" diff --git a/docs/src/lib/indexing.md b/docs/src/lib/indexing.md index 326b3e6d4..89435caad 100644 --- a/docs/src/lib/indexing.md +++ b/docs/src/lib/indexing.md @@ -33,6 +33,8 @@ The following values are a valid column index: * a `Not` expression (see [InvertedIndices.jl](https://github.com/JuliaData/InvertedIndices.jl)); `Not(idx)` selects all indices not in the passed `idx`; + when passed as column selector `Not(idx...)` is equivalent to + `Not(Cols(idx...))`. * a `Cols` expression (see [DataAPI.jl](https://github.com/JuliaData/DataAPI.jl)); `Cols(idxs...)` selects the union of the selections in `idxs`; in particular `Cols()` diff --git a/src/other/index.jl b/src/other/index.jl index b444bbdd9..51aa3a31c 100644 --- a/src/other/index.jl +++ b/src/other/index.jl @@ -226,6 +226,15 @@ end @inline Base.getindex(x::AbstractIndex, ::Colon) = Base.OneTo(length(x)) @inline Base.getindex(x::AbstractIndex, notidx::Not) = setdiff(1:length(x), getindex(x, notidx.skip)) + +@inline function Base.getindex(x::AbstractIndex, notidx::Not{<:AbstractVector}) + skip = notidx.skip + todrop = getindex(x, eltype(skip) === Bool ? skip : unique(skip)) + return setdiff(1:length(x), todrop) +end + +@inline Base.getindex(x::AbstractIndex, notidx::Not{InvertedIndices.NotMultiIndex}) = + setdiff(1:length(x), getindex(x, Cols(notidx.skip.indices...))) @inline Base.getindex(x::AbstractIndex, idx::Between) = x[idx.first]:x[idx.last] @inline Base.getindex(x::AbstractIndex, idx::All) = isempty(idx.cols) ? (1:length(x)) : throw(ArgumentError("All(args...) is not supported: use Cols(args...) instead")) diff --git a/test/index.jl b/test/index.jl index a9b5664fd..fc82540ea 100644 --- a/test/index.jl +++ b/test/index.jl @@ -50,9 +50,13 @@ using DataFrames: Index, SubIndex, fuzzymatch @test_throws ArgumentError i[Not(:x)] @test_throws ArgumentError i[Not("x")] @test_throws BoundsError i[Not(1:3)] - @test_throws ArgumentError i[Not([1, 1])] - @test_throws ArgumentError i[Not([:A, :A])] - @test_throws ArgumentError i[Not(["A", "A"])] + + @test i[Not([1, 1])] == [2] + @test i[Not([:A, :A])] == [2] + @test i[Not(["A", "A"])] == [2] + @test isempty(i[Not([true, true])]) + @test i[Not([false, false])] == 1:2 + @test i[Not([true, false])] == [2] @test i[1:1] == 1:1 @@ -115,6 +119,9 @@ end si7 = SubIndex(i, Not(1:2)) si8 = SubIndex(i, ["C", "D", "E"]) si9 = SubIndex(i, Not(Not(["C", "D", "E"]))) + si10 = SubIndex(i, Not(1, 2)) + si11 = SubIndex(i, Not(:A, :B)) + si12 = SubIndex(i, Not(2, "A")) @test copy(si1) == i @test copy(si2) == Index([:C, :D, :E]) @@ -125,6 +132,9 @@ end @test copy(si7) == Index([:C, :D, :E]) @test copy(si8) == Index([:C, :D, :E]) @test copy(si9) == Index([:C, :D, :E]) + @test copy(si10) == Index([:C, :D, :E]) + @test copy(si11) == Index([:C, :D, :E]) + @test copy(si12) == Index([:C, :D, :E]) @test_throws ArgumentError SubIndex(i, 1) @test_throws ArgumentError SubIndex(i, :A) @@ -327,6 +337,8 @@ end push!(i, :x131) push!(i, :y13) push!(i, :yy13) + @test i[Not(2, 4, 5)] == [1, 3] + @test i[Not(2, :y13, "yy13")] == [1, 3] @test i[Not(Not(r"x1."))] == [2, 3] @test isempty(i[Not(Not(r"xx"))]) @test i[Not(Not(r""))] == 1:5 diff --git a/test/indexing.jl b/test/indexing.jl index 13a1890f8..f4b01e2de 100644 --- a/test/indexing.jl +++ b/test/indexing.jl @@ -19,6 +19,23 @@ using Test, DataFrames, Unicode, Random @test dfx[!, 1] === df[!, names(dfx)[1]] end + @test df[!, Not(1, 2)] == DataFrame(c=7:9) + @test df[!, Not(1, 1, 2)] == DataFrame(c=7:9) + @test df[!, Not([1, 1, 2])] == DataFrame(c=7:9) + @test df[!, Not(:b, 1)] == DataFrame(c=7:9) + @test df[!, Not(:b, :b, 1)] == DataFrame(c=7:9) + @test df[!, Not("c", :a)] == DataFrame(b=4:6) + @test df[!, Not("c", "c", :a)] == DataFrame(b=4:6) + @test df[!, Not(:c, :c, :a)] == DataFrame(b=4:6) + @test df[!, Not([:c, :c, :a])] == DataFrame(b=4:6) + @test df[!, Not("c", "c", "a")] == DataFrame(b=4:6) + @test df[!, Not(["c", "c", "a"])] == DataFrame(b=4:6) + @test df[!, Not(:b, "c", :a)] == DataFrame() + @test df[!, Not([1, 2], :b)] == DataFrame(c=7:9) + @test df[!, Not([:c, :a], :b)] == DataFrame() + @test df[!, Not([1, 2], 2)] == DataFrame(c=7:9) + @test df[!, Not([1, 2], [1, 2])] == DataFrame(c=7:9) + @test df[1, 1] == 1 @test df[1, 1:2] isa DataFrameRow @test df[1, r"[ab]"] isa DataFrameRow diff --git a/test/select.jl b/test/select.jl index 2f46f1571..ee57cfaf6 100644 --- a/test/select.jl +++ b/test/select.jl @@ -20,10 +20,11 @@ Random.seed!(1234) df = DataFrame(a=1, b=2, c=3, d=4, e=5) @test_throws BoundsError select!(df, Not(0)) @test_throws BoundsError select!(df, Not(6)) - @test_throws ArgumentError select!(df, Not([1, 1])) @test_throws ArgumentError select!(df, Not(:f)) @test_throws BoundsError select!(df, Not([true, false])) + @test select!(copy(df), Not([1, 1])) == df[!, 2:end] + d = copy(df) select!(d, Not([:a, :e, :c])) @test d == DataFrame(b=2, d=4) @@ -63,10 +64,11 @@ end df = DataFrame(a=1, b=2, c=3, d=4, e=5) @test_throws BoundsError select(df, Not(0)) @test_throws BoundsError select(df, Not(6)) - @test_throws ArgumentError select(df, Not([1, 1])) @test_throws ArgumentError select(df, Not(:f)) @test_throws BoundsError select(df, Not([true, false])) + @test select(df, Not([1, 1])) == df[!, 2:end] + df2 = copy(df) d = select(df, Not([:a, :e, :c])) @test d == df[:, [:b, :d]] @@ -151,10 +153,11 @@ end df = view(DataFrame(a=1, b=2, c=3, d=4, e=5), :, :) @test_throws BoundsError select(df, Not(0)) @test_throws BoundsError select(df, Not(6)) - @test_throws ArgumentError select(df, Not([1, 1])) @test_throws ArgumentError select(df, Not(:f)) @test_throws BoundsError select(df, Not([true, false])) + @test select(df, Not([1, 1])) == df[!, 2:end] + df2 = copy(df) d = select(df, Not([:a, :e, :c])) @test d isa DataFrame