From 29bbddc4967d4f1d78529062c0fe122b5834cec2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 16 Jun 2020 11:11:47 +0200 Subject: [PATCH 01/13] make DataFrameColumns stop being AbstractVector --- docs/src/lib/functions.md | 6 +++ src/abstractdataframe/iteration.jl | 70 +++++++++++++++++++++++++++--- 2 files changed, 69 insertions(+), 7 deletions(-) diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index 403c4242cb..8e212993e8 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -120,5 +120,11 @@ disallowmissing! ```@docs eachcol eachrow +values pairs +findnext +findprev +findfirst +findlast +findall ``` diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 9546683a05..91454e6b7c 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -30,9 +30,9 @@ Base.iterate(::AbstractDataFrame) = Return a `DataFrameRows` that iterates a data frame row by row, with each row represented as a `DataFrameRow`. -Because `DataFrameRow`s have an `eltype` of `Any`, use `copy(dfr::DataFrameRow)` to obtain -a named tuple, which supports iteration and property access like a `DataFrameRow`, -but also passes information on the `eltypes` of the columns of `df`. +Because `DataFrameRow`s have an `eltype` of `Any`, use `copy(dfr::DataFrameRow)` to obtain +a named tuple, which supports iteration and property access like a `DataFrameRow`, +but also passes information on the `eltypes` of the columns of `df`. # Examples ```jldoctest @@ -107,9 +107,9 @@ Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(pare # Iteration by columns """ - DataFrameColumns{<:AbstractDataFrame} <: AbstractVector{AbstractVector} + DataFrameColumns{<:AbstractDataFrame} -An `AbstractVector` that allows iteration over columns of an `AbstractDataFrame`. +A generator that allows iteration over columns of an `AbstractDataFrame`. Indexing into `DataFrameColumns` objects using integer or symbol indices returns the corresponding column (without copying). """ @@ -125,7 +125,8 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) Return a `DataFrameColumns` that is an `AbstractVector` that allows iterating an `AbstractDataFrame` column by column. -Additionally it is allowed to index `DataFrameColumns` using column names. +Additionally it is allowed to index `DataFrameColumns` using column names, +and convenience functions: `keys`, `values`, `pairs` are defined for it. # Examples ```jldoctest @@ -160,7 +161,6 @@ julia> sum.(eachcol(df)) eachcol(df::AbstractDataFrame) = DataFrameColumns(df) Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),) -Base.IndexStyle(::Type{<:DataFrameColumns}) = Base.IndexLinear() @inline function Base.getindex(itr::DataFrameColumns, j::Int) @boundscheck checkbounds(itr, j) @@ -190,6 +190,13 @@ Get a vector of column names of `dfc` as `Symbol`s. """ Base.keys(itr::DataFrameColumns) = propertynames(itr) +""" + values(dfc::DataFrameColumns) + +Get a vector of columns of `dfc`. +""" +Base.values(itr::DataFrameColumns) = collect(itr) + """ pairs(dfc::DataFrameColumns) @@ -199,6 +206,55 @@ where `name` is the column name of the column `col`. """ Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr)) +""" +findnext(f::Function, itr::DataFrameColumns, i::Integer) + + Find the next integer index after or including an integer `i` of an + element of `itr` for which `f` returns `true`, or `nothing` if not found. + +""" +Base.findnext(f::Function, itr::DataFrameColumns, i::Integer) = + findnext(f, values(itr), i) + +""" +findprev(f::Function, itr::DataFrameColumns, i::Integer) + + Find the previous integer index before or including an integer `i` of an + element of `itr` for which `f` returns `true`, or `nothing` if not found. + +""" +Base.findprev(f::Function, itr::DataFrameColumns, i::Integer) = + findprev(f, values(itr), i) + +""" +findfirst(f::Function, itr::DataFrameColumns) + + Return the integer index of the first element of `itr` for which `f` returns + `true`. Return `nothing` if there is no such element. + +""" +Base.findfirst(f::Function, itr::DataFrameColumns) = + findfirst(f, values(itr)) + +""" +findlast(f::Function, itr::DataFrameColumns) + + Return the integer index of the last element of `itr` for which `f` returns + `true`. Return `nothing` if there is no such element. + +""" +Base.findlast(f::Function, itr::DataFrameColumns) = + findlast(f, values(itr)) + +""" +findall(f::Function, itr::DataFrameColumns) + + Return a vector of the integer indices `i` of `itr` where `f(itr[i])` returns + true. If there are no such elements of `itr`, return an empty array. +""" +Base.findall(f::Function, itr::DataFrameColumns) = + findall(f, values(itr)) + Base.parent(itr::Union{DataFrameRows, DataFrameColumns}) = getfield(itr, :df) Base.names(itr::Union{DataFrameRows, DataFrameColumns}) = names(parent(itr)) Base.names(itr::Union{DataFrameRows, DataFrameColumns}, cols) = names(parent(itr), cols) From a994880568bd589d20af165f1dbadfa9bf83c55d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 16 Jun 2020 11:12:33 +0200 Subject: [PATCH 02/13] add tests --- test/iteration.jl | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/test/iteration.jl b/test/iteration.jl index 51c27268ca..9b49fda304 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -25,10 +25,10 @@ using Test, DataFrames @test size(eachcol(df)) == (size(df, 2),) @test parent(eachcol(df)) === df @test names(eachcol(df)) == names(df) - @test IndexStyle(eachcol(df)) == IndexLinear() - @test Base.IndexStyle(eachcol(df)) == IndexLinear() @test length(eachcol(df)) == size(df, 2) @test eachcol(df)[1] == df[:, 1] + @test eachcol(df)[:A] == df[:, :A] + @test eachcol(df).A == df[:, :A] @test collect(eachcol(df)) isa Vector{AbstractVector} @test collect(eachcol(df)) == [[1, 2], [2, 3]] @test eltype(eachcol(df)) == AbstractVector @@ -127,7 +127,7 @@ end end end -@testset "keys and pairs for eachcol" begin +@testset "keys, values and pairs for eachcol" begin df = DataFrame([11:16 21:26 31:36 41:46]) cols = eachcol(df) @@ -141,6 +141,27 @@ end @test cols[i] === cols[n] end @test_throws ArgumentError cols[:non_existent] + + @test values(cols) == collect(cols) +end + +@testset "findfirst, findnext, findlast, findprev, findall" begin + df = DataFrame(a=[1, 2, 1, 2], b=["1", "2", "1", "2"], + c=[1, 2, 1, 2], d=["1", "2", "1", "2"]) + + rows = eachrow(df) + @test findfirst(row -> row.a == 1, rows) == 1 + @test findnext(row -> row.a == 1, rows, 2) == 3 + @test findlast(row -> row.a == 1, rows) == 3 + @test findprev(row -> row.a == 1, rows, 2) == 1 + @test findall(row -> row.a == 1, rows) == [1, 3] + + cols = eachcol(df) + @test findfirst(col -> eltype(col) <: Int, cols) == 1 + @test findnext(col -> eltype(col) <: Int, cols, 2) == 3 + @test findlast(col -> eltype(col) <: Int, cols) == 3 + @test findprev(col -> eltype(col) <: Int, cols, 2) == 1 + @test findall(col -> eltype(col) <: Int, cols) == [1, 3] end end # module From 20990c8d0d19827dd8684e6a42350821309f6bcb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 16 Jun 2020 11:43:22 +0200 Subject: [PATCH 03/13] add functionality to DataFrameCols --- docs/src/lib/types.md | 9 ++++++--- src/abstractdataframe/iteration.jl | 21 ++++++++++++--------- test/iteration.jl | 7 +++++-- 3 files changed, 23 insertions(+), 14 deletions(-) diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md index 89b59eb813..d0312ca5d4 100644 --- a/docs/src/lib/types.md +++ b/docs/src/lib/types.md @@ -37,12 +37,15 @@ or when accessing a single row of a `DataFrame` or `SubDataFrame` via `getindex` The `eachrow` function returns a value of the `DataFrameRows` type, which serves as an iterator over rows of an `AbstractDataFrame`, returning `DataFrameRow` objects. +The `DataFrameRows` isa a subtype of `AbstractVector` and supports its interface +with the exception that it is read only. Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which -serves as an iterator over columns of an `AbstractDataFrame`. +serves as an iterator over columns of an `AbstractDataFrame` that additionally supports +indexing, `getproperty`, `hasproperty`, `keys`, `values`, `pairs`, +`findfirst`, `findnext`, `findlast`, `findprev`, `findall`, `==`, and `isequal` functions. -The `DataFrameRows` and `DataFrameColumns` types are subtypes of `AbstractVector` and support its interface -with the exception that they are read only. Note that they are not exported and should not be constructed directly, +Note that `DataFrameRows` and `DataFrameColumns` are not exported and should not be constructed directly, but using the `eachrow` and `eachcol` functions. The `RepeatedVector` and `StackedVector` types are subtypes of `AbstractVector` and support its interface diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 91454e6b7c..26dd238a54 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -113,7 +113,7 @@ A generator that allows iteration over columns of an `AbstractDataFrame`. Indexing into `DataFrameColumns` objects using integer or symbol indices returns the corresponding column (without copying). """ -struct DataFrameColumns{T<:AbstractDataFrame} <: AbstractVector{AbstractVector} +struct DataFrameColumns{T<:AbstractDataFrame} df::T end @@ -160,14 +160,17 @@ julia> sum.(eachcol(df)) """ eachcol(df::AbstractDataFrame) = DataFrameColumns(df) -Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),) - -@inline function Base.getindex(itr::DataFrameColumns, j::Int) - @boundscheck checkbounds(itr, j) - @inbounds parent(itr)[!, j] -end - -Base.getindex(itr::DataFrameColumns, j::Symbol) = parent(itr)[!, j] +Base.length(itr::DataFrameColumns) = size(parent(itr), 2) +Base.eltype(::Type{<:DataFrameColumns}) = AbstractVector +Base.iterate(itr::DataFrameColumns, i=1) = + i <= length(itr) ? (itr[i], i + 1) : nothing +Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) = parent(itr)[!, idx] +Base.getindex(itr::DataFrameColumns, idx::MultiColumnIndex) = + eachcol(parent(itr)[!, idx]) +Base.:(==)(itr1::DataFrameColumns, itr2::DataFrameColumns) = + parent(itr1) == parent(itr2) +Base.isequal(itr1::DataFrameColumns, itr2::DataFrameColumns) = + isequal(parent(itr1), parent(itr2)) # separate methods are needed due to dispatch ambiguity Base.getproperty(itr::DataFrameColumns, col_ind::Symbol) = diff --git a/test/iteration.jl b/test/iteration.jl index 9b49fda304..f267ba556a 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -22,13 +22,16 @@ using Test, DataFrames @test collect(pairs(row)) isa Vector{Pair{Symbol, Int}} end - @test size(eachcol(df)) == (size(df, 2),) @test parent(eachcol(df)) === df @test names(eachcol(df)) == names(df) @test length(eachcol(df)) == size(df, 2) @test eachcol(df)[1] == df[:, 1] @test eachcol(df)[:A] == df[:, :A] + @test eachcol(df)[All()] == eachcol(df) + @test isequal(eachcol(df)[[1]], eachcol(df[!, [1]])) @test eachcol(df).A == df[:, :A] + @test eachcol(df)["A"] == df[:, "A"] + @test eachcol(df)."A" == df[:, "A"] @test collect(eachcol(df)) isa Vector{AbstractVector} @test collect(eachcol(df)) == [[1, 2], [2, 3]] @test eltype(eachcol(df)) == AbstractVector @@ -90,7 +93,7 @@ end @test eachrow(sdf) == eachrow(df[[3,1,4], [3,1,4]]) @test size(eachrow(sdf)) == (3,) @test eachcol(sdf) == eachcol(df[[3,1,4], [3,1,4]]) - @test size(eachcol(sdf)) == (3,) + @test length(eachcol(sdf)) == 3 end @testset "parent mutation" begin From f087796d0efef5e9ef22d688e39f6ecdb28f9ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 16 Jun 2020 14:26:01 +0200 Subject: [PATCH 04/13] update function definitions --- docs/src/lib/functions.md | 5 --- docs/src/lib/types.md | 5 ++- src/abstractdataframe/iteration.jl | 56 ++++++++---------------------- test/iteration.jl | 7 ++++ 4 files changed, 23 insertions(+), 50 deletions(-) diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index 8e212993e8..e28b9014a6 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -122,9 +122,4 @@ eachcol eachrow values pairs -findnext -findprev -findfirst -findlast -findall ``` diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md index d0312ca5d4..e1c1aead17 100644 --- a/docs/src/lib/types.md +++ b/docs/src/lib/types.md @@ -41,9 +41,8 @@ The `DataFrameRows` isa a subtype of `AbstractVector` and supports its interface with the exception that it is read only. Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which -serves as an iterator over columns of an `AbstractDataFrame` that additionally supports -indexing, `getproperty`, `hasproperty`, `keys`, `values`, `pairs`, -`findfirst`, `findnext`, `findlast`, `findprev`, `findall`, `==`, and `isequal` functions. +is not an `AbstractVector`, but supports most of its API. The key differences are that it is read-only and +that the `keys` function returns a vector of `Symbols` (and not integers as for normal vectors). Note that `DataFrameRows` and `DataFrameColumns` are not exported and should not be constructed directly, but using the `eachrow` and `eachcol` functions. diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 26dd238a54..11bfe18733 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -125,8 +125,9 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) Return a `DataFrameColumns` that is an `AbstractVector` that allows iterating an `AbstractDataFrame` column by column. -Additionally it is allowed to index `DataFrameColumns` using column names, -and convenience functions: `keys`, `values`, `pairs` are defined for it. +It supports most of `AbstractVector` API. The key differences are that it +read-only and is that the `keys` function returns a vector of `Symbols` (and not +integers as for normal vectors). # Examples ```jldoctest @@ -160,8 +161,18 @@ julia> sum.(eachcol(df)) """ eachcol(df::AbstractDataFrame) = DataFrameColumns(df) -Base.length(itr::DataFrameColumns) = size(parent(itr), 2) +Base.IteratorSize(::Type{<:DataFrameColumns}) = Base.HasShape{1}() +Base.size(itr::DataFrameColumns) = (size(parent(itr), 2),) + +function Base.size(itr::DataFrameColumns, d::Integer) + d < 1 && throw(ArgumentError("dimension out of range")) + return d == 1 ? size(itr)[1] : 1 +end + +Base.length(itr::DataFrameColumns) = size(itr)[1] Base.eltype(::Type{<:DataFrameColumns}) = AbstractVector +Base.firstindex(itr::DataFrameColumns) = 1 +Base.lastindex(itr::DataFrameColumns) = length(itr) Base.iterate(itr::DataFrameColumns, i=1) = i <= length(itr) ? (itr[i], i + 1) : nothing Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) = parent(itr)[!, idx] @@ -208,53 +219,14 @@ with the corresponding column vector, i.e. `name => col` where `name` is the column name of the column `col`. """ Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr)) - -""" -findnext(f::Function, itr::DataFrameColumns, i::Integer) - - Find the next integer index after or including an integer `i` of an - element of `itr` for which `f` returns `true`, or `nothing` if not found. - -""" Base.findnext(f::Function, itr::DataFrameColumns, i::Integer) = findnext(f, values(itr), i) - -""" -findprev(f::Function, itr::DataFrameColumns, i::Integer) - - Find the previous integer index before or including an integer `i` of an - element of `itr` for which `f` returns `true`, or `nothing` if not found. - -""" Base.findprev(f::Function, itr::DataFrameColumns, i::Integer) = findprev(f, values(itr), i) - -""" -findfirst(f::Function, itr::DataFrameColumns) - - Return the integer index of the first element of `itr` for which `f` returns - `true`. Return `nothing` if there is no such element. - -""" Base.findfirst(f::Function, itr::DataFrameColumns) = findfirst(f, values(itr)) - -""" -findlast(f::Function, itr::DataFrameColumns) - - Return the integer index of the last element of `itr` for which `f` returns - `true`. Return `nothing` if there is no such element. - -""" Base.findlast(f::Function, itr::DataFrameColumns) = findlast(f, values(itr)) - -""" -findall(f::Function, itr::DataFrameColumns) - - Return a vector of the integer indices `i` of `itr` where `f(itr[i])` returns - true. If there are no such elements of `itr`, return an empty array. -""" Base.findall(f::Function, itr::DataFrameColumns) = findall(f, values(itr)) diff --git a/test/iteration.jl b/test/iteration.jl index f267ba556a..300b1b58e9 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -22,9 +22,14 @@ using Test, DataFrames @test collect(pairs(row)) isa Vector{Pair{Symbol, Int}} end + @test Base.IteratorSize(eachcol(df)) == Base.HasShape{1}() @test parent(eachcol(df)) === df @test names(eachcol(df)) == names(df) @test length(eachcol(df)) == size(df, 2) + @test size(eachcol(df)) == (size(df, 2),) + @test size(eachcol(df), 1) == size(df, 2) + @test size(eachcol(df), 2) == 1 + @test_throws ArgumentError size(eachcol(df), 0) @test eachcol(df)[1] == df[:, 1] @test eachcol(df)[:A] == df[:, :A] @test eachcol(df)[All()] == eachcol(df) @@ -32,6 +37,8 @@ using Test, DataFrames @test eachcol(df).A == df[:, :A] @test eachcol(df)["A"] == df[:, "A"] @test eachcol(df)."A" == df[:, "A"] + @test eachcol(df)[begin] == df[!, 1] + @test eachcol(df)[end] == df[!, end] @test collect(eachcol(df)) isa Vector{AbstractVector} @test collect(eachcol(df)) == [[1, 2], [2, 3]] @test eltype(eachcol(df)) == AbstractVector From 5ba91afecb53d427d7ce7d1a0737827dd8cfbf5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 16 Jun 2020 14:28:49 +0200 Subject: [PATCH 05/13] fix typo --- docs/src/lib/types.md | 2 +- src/abstractdataframe/iteration.jl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md index e1c1aead17..3ad04236a3 100644 --- a/docs/src/lib/types.md +++ b/docs/src/lib/types.md @@ -42,7 +42,7 @@ with the exception that it is read only. Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which is not an `AbstractVector`, but supports most of its API. The key differences are that it is read-only and -that the `keys` function returns a vector of `Symbols` (and not integers as for normal vectors). +that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). Note that `DataFrameRows` and `DataFrameColumns` are not exported and should not be constructed directly, but using the `eachrow` and `eachcol` functions. diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 11bfe18733..695bd67e34 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -126,7 +126,7 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) Return a `DataFrameColumns` that is an `AbstractVector` that allows iterating an `AbstractDataFrame` column by column. It supports most of `AbstractVector` API. The key differences are that it -read-only and is that the `keys` function returns a vector of `Symbols` (and not +read-only and is that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). # Examples From 405487fed832bf74ffa0e395dd25eb79108480d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Tue, 16 Jun 2020 18:23:59 +0200 Subject: [PATCH 06/13] fixes following removal of <:AbstractVector subtyping --- src/dataframe/dataframe.jl | 2 +- src/deprecated.jl | 2 +- src/subdataframe/subdataframe.jl | 2 +- test/dataframe.jl | 17 +++++++++++++++++ test/select.jl | 6 +++--- test/tables.jl | 4 ++-- 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index ff321a35f3..7a1f9ae48e 100644 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -831,7 +831,7 @@ function Base.copy(df::DataFrame; copycols::Bool=true) if copycols df[:, :] else - DataFrame(eachcol(df), _names(df), copycols=false) + DataFrame(_columns(df), _names(df), copycols=false) end end diff --git a/src/deprecated.jl b/src/deprecated.jl index 982382de7a..494a6a43ce 100644 --- a/src/deprecated.jl +++ b/src/deprecated.jl @@ -313,7 +313,7 @@ function Base.join(df1::AbstractDataFrame, df2::AbstractDataFrame, end end -@deprecate eachcol(df::AbstractDataFrame, names::Bool) names ? collect(pairs(eachcol(df))) : eachcol(df) +@deprecate eachcol(df::AbstractDataFrame, names::Bool) names ? collect(pairs(eachcol(df))) : collect(eachcol(df)) @deprecate groupvars(gd::GroupedDataFrame) groupcols(gd) diff --git a/src/subdataframe/subdataframe.jl b/src/subdataframe/subdataframe.jl index 44349b6cb1..69fb543a43 100644 --- a/src/subdataframe/subdataframe.jl +++ b/src/subdataframe/subdataframe.jl @@ -168,7 +168,7 @@ function DataFrame(sdf::SubDataFrame; copycols::Bool=true) if copycols sdf[:, :] else - DataFrame(eachcol(sdf), _names(sdf), copycols=false) + DataFrame(collect(eachcol(sdf)), _names(sdf), copycols=false) end end diff --git a/test/dataframe.jl b/test/dataframe.jl index ea810bd239..8fd7d7d351 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -123,8 +123,25 @@ end df = DataFrame(a=Union{Int, Missing}[2, 3], b=Union{DataFrame, Missing}[DataFrame(c = 1), DataFrame(d = 2)]) dfc = copy(df) + dfcc = copy(df, copycols=false) dfdc = deepcopy(df) + @test dfc == df + @test dfc.a !== df.a + @test dfc.b !== df.b + @test DataFrames._columns(dfc) == DataFrames._columns(df) + @test DataFrames._columns(dfc) !== DataFrames._columns(df) + @test dfcc == df + @test dfcc.a === df.a + @test dfcc.b === df.b + @test DataFrames._columns(dfcc) == DataFrames._columns(df) + @test DataFrames._columns(dfcc) !== DataFrames._columns(df) + @test dfdc == df + @test dfdc.a !== df.a + @test dfdc.b !== df.b + @test DataFrames._columns(dfdc) == DataFrames._columns(df) + @test DataFrames._columns(dfdc) !== DataFrames._columns(df) + df[1, :a] = 4 df[1, :b][!, :e] .= 5 diff --git a/test/select.jl b/test/select.jl index 3585f1c4e4..dfd5a64ef3 100644 --- a/test/select.jl +++ b/test/select.jl @@ -619,12 +619,12 @@ end df = DataFrame(rand(10, 4)) df2 = select(df, :, :x1 => :x3) - @test df2 == DataFrame(eachcol(df)[[1,2,1,4]]) + @test df2 == DataFrame(collect(eachcol(df))[[1,2,1,4]]) @test df2.x1 !== df2.x3 df2 = select(df, :, :x1 => :x3, copycols=false) - @test df2 == DataFrame(eachcol(df)[[1,2,1,4]]) + @test df2 == DataFrame(collect(eachcol(df))[[1,2,1,4]]) @test df2.x1 === df2.x3 - @test select(df, :x1 => :x3, :) == DataFrame(eachcol(df)[[1,1,2,4]], + @test select(df, :x1 => :x3, :) == DataFrame(collect(eachcol(df))[[1,1,2,4]], [:x3, :x1, :x2, :x4]) select!(df, :, :x1 => :x3) @test df2 == df diff --git a/test/tables.jl b/test/tables.jl index 6df913b582..1d38d69842 100644 --- a/test/tables.jl +++ b/test/tables.jl @@ -209,12 +209,12 @@ end @test all(((a,b),) -> a === b, zip(eachcol(df), eachcol(df2))) df2 = DataFrame(eachcol(df)) - @test propertynames(df2) == [:x1, :x2, :x3, :x4] + @test df == df2 @test all(((a,b),) -> a == b, zip(eachcol(df), eachcol(df2))) @test !any(((a,b),) -> a === b, zip(eachcol(df), eachcol(df2))) df2 = DataFrame(eachcol(df)) - @test propertynames(df2) == [:x1, :x2, :x3, :x4] + @test df == df2 @test !any(((a,b),) -> a === b, zip(eachcol(df), eachcol(df2))) @test Tables.rowtable(df) == Tables.rowtable(eachrow(df)) From b4007919a35bbebea9b2edbd5040cf6929e13406 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 17 Jun 2020 00:33:32 +0200 Subject: [PATCH 07/13] fixes after code review --- src/abstractdataframe/iteration.jl | 4 ++-- test/indexing_begin_tests.jl | 3 +++ test/iteration.jl | 2 -- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 695bd67e34..13f911ec97 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -125,7 +125,7 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) Return a `DataFrameColumns` that is an `AbstractVector` that allows iterating an `AbstractDataFrame` column by column. -It supports most of `AbstractVector` API. The key differences are that it +It supports most of `AbstractVector` API. The key differences are that it is read-only and is that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). @@ -207,7 +207,7 @@ Base.keys(itr::DataFrameColumns) = propertynames(itr) """ values(dfc::DataFrameColumns) -Get a vector of columns of `dfc`. +Get a vector of columns from `dfc`. """ Base.values(itr::DataFrameColumns) = collect(itr) diff --git a/test/indexing_begin_tests.jl b/test/indexing_begin_tests.jl index 24d8c2b761..772e0afe68 100644 --- a/test/indexing_begin_tests.jl +++ b/test/indexing_begin_tests.jl @@ -30,4 +30,7 @@ @test df[[begin, end], [begin, end]] == df[[1,3], [1,4]] df[[begin, end], [begin, end]] .= 1000 @test df.x1 == df.x4 == [1000, 222, 1000] + + @test eachcol(df)[begin] == df[!, begin] + @test eachcol(df)[end] == df[!, end] end diff --git a/test/iteration.jl b/test/iteration.jl index 300b1b58e9..b3917f8086 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -37,8 +37,6 @@ using Test, DataFrames @test eachcol(df).A == df[:, :A] @test eachcol(df)["A"] == df[:, "A"] @test eachcol(df)."A" == df[:, "A"] - @test eachcol(df)[begin] == df[!, 1] - @test eachcol(df)[end] == df[!, end] @test collect(eachcol(df)) isa Vector{AbstractVector} @test collect(eachcol(df)) == [[1, 2], [2, 3]] @test eltype(eachcol(df)) == AbstractVector From af2be967912468a0fadf7ce489015712445f4098 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 21 Jun 2020 20:51:24 +0200 Subject: [PATCH 08/13] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- docs/src/lib/types.md | 4 ++-- src/abstractdataframe/iteration.jl | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/src/lib/types.md b/docs/src/lib/types.md index 3ad04236a3..e647c0d395 100644 --- a/docs/src/lib/types.md +++ b/docs/src/lib/types.md @@ -37,8 +37,8 @@ or when accessing a single row of a `DataFrame` or `SubDataFrame` via `getindex` The `eachrow` function returns a value of the `DataFrameRows` type, which serves as an iterator over rows of an `AbstractDataFrame`, returning `DataFrameRow` objects. -The `DataFrameRows` isa a subtype of `AbstractVector` and supports its interface -with the exception that it is read only. +The `DataFrameRows` is a subtype of `AbstractVector` and supports its interface +with the exception that it is read-only. Similarly, the `eachcol` function returns a value of the `DataFrameColumns` type, which is not an `AbstractVector`, but supports most of its API. The key differences are that it is read-only and diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 13f911ec97..862bfeb848 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -109,7 +109,7 @@ Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(pare """ DataFrameColumns{<:AbstractDataFrame} -A generator that allows iteration over columns of an `AbstractDataFrame`. +A vector-like object that allows iteration over columns of an `AbstractDataFrame`. Indexing into `DataFrameColumns` objects using integer or symbol indices returns the corresponding column (without copying). """ @@ -123,9 +123,9 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) """ eachcol(df::AbstractDataFrame) -Return a `DataFrameColumns` that is an `AbstractVector` +Return a `DataFrameColumns` object that is a vector-like that allows iterating an `AbstractDataFrame` column by column. -It supports most of `AbstractVector` API. The key differences are that it is +It supports most of the `AbstractVector` API. The key differences are that it is read-only and is that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). @@ -173,10 +173,11 @@ Base.length(itr::DataFrameColumns) = size(itr)[1] Base.eltype(::Type{<:DataFrameColumns}) = AbstractVector Base.firstindex(itr::DataFrameColumns) = 1 Base.lastindex(itr::DataFrameColumns) = length(itr) -Base.iterate(itr::DataFrameColumns, i=1) = +Base.iterate(itr::DataFrameColumns, i::Integer=1) = i <= length(itr) ? (itr[i], i + 1) : nothing -Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) = parent(itr)[!, idx] -Base.getindex(itr::DataFrameColumns, idx::MultiColumnIndex) = +Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) = + parent(itr)[!, idx] +Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::MultiColumnIndex) = eachcol(parent(itr)[!, idx]) Base.:(==)(itr1::DataFrameColumns, itr2::DataFrameColumns) = parent(itr1) == parent(itr2) From ce2c5be5c023ebfe03b9eb2279c0e31e73f69a00 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 21 Jun 2020 20:57:49 +0200 Subject: [PATCH 09/13] improve docstrings and make a view when subsetting --- src/abstractdataframe/iteration.jl | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 862bfeb848..a04515f957 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -110,8 +110,16 @@ Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(pare DataFrameColumns{<:AbstractDataFrame} A vector-like object that allows iteration over columns of an `AbstractDataFrame`. -Indexing into `DataFrameColumns` objects using integer or symbol indices + +Indexing into `DataFrameColumns` objects using integer, `Symbol` or string returns the corresponding column (without copying). +Indexing into `DataFrameColumns` objects using multiple column selector +returns a subsetted `DataFrameColumns` object with parent being a view of the +original containg only the selected columns. + +It supports most of the `AbstractVector` API. The key differences are that it is +read-only and is that the `keys` function returns a vector of `Symbol`s (and not +integers as for normal vectors). """ struct DataFrameColumns{T<:AbstractDataFrame} df::T @@ -125,6 +133,13 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) Return a `DataFrameColumns` object that is a vector-like that allows iterating an `AbstractDataFrame` column by column. + +Indexing into `DataFrameColumns` objects using integer, `Symbol` or string +returns the corresponding column (without copying). +Indexing into `DataFrameColumns` objects using multiple column selector +returns a subsetted `DataFrameColumns` object with parent being a view of the +original containg only the selected columns. + It supports most of the `AbstractVector` API. The key differences are that it is read-only and is that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). @@ -178,7 +193,7 @@ Base.iterate(itr::DataFrameColumns, i::Integer=1) = Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) = parent(itr)[!, idx] Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::MultiColumnIndex) = - eachcol(parent(itr)[!, idx]) + eachcol(view(parent(itr), !, idx)) Base.:(==)(itr1::DataFrameColumns, itr2::DataFrameColumns) = parent(itr1) == parent(itr2) Base.isequal(itr1::DataFrameColumns, itr2::DataFrameColumns) = From 7786867fabb9999edf5248ed6bb640ececdd5f90 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 22 Jun 2020 22:41:03 +0200 Subject: [PATCH 10/13] fix problem with multiindex on eachcol --- test/reshape.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/reshape.jl b/test/reshape.jl index c18d4f0ccc..0257e29ca1 100644 --- a/test/reshape.jl +++ b/test/reshape.jl @@ -126,7 +126,7 @@ end Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7], Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d]) @test isa(udf[!, 1], Vector{Int}) - @test all(isa.(eachcol(udf)[2:end], Vector{Union{Int, Missing}})) + @test all(i -> isa(eachcol(udf)[i], Vector{Union{Int, Missing}}), 2:5) df = DataFrame([categorical(repeat(1:2, inner=4)), categorical(repeat('a':'d', outer=2)), categorical(1:8)], [:id, :variable, :value]) @@ -136,7 +136,7 @@ end Union{Int, Missing}[2, 6], Union{Int, Missing}[3, 7], Union{Int, Missing}[4, 8]], [:id, :a, :b, :c, :d]) @test isa(udf[!, 1], CategoricalVector{Int}) - @test all(isa.(eachcol(udf)[2:end], CategoricalVector{Union{Int, Missing}})) + @test all(i -> isa(eachcol(udf)[i], CategoricalVector{Union{Int, Missing}}), 2:5) end @testset "duplicate entries in unstack warnings" begin From 9192e151c308c5ffda8fa25752577e15cf59861e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 24 Jun 2020 12:30:59 +0200 Subject: [PATCH 11/13] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/abstractdataframe/iteration.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index a04515f957..a1fd210ce7 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -113,12 +113,12 @@ A vector-like object that allows iteration over columns of an `AbstractDataFrame Indexing into `DataFrameColumns` objects using integer, `Symbol` or string returns the corresponding column (without copying). -Indexing into `DataFrameColumns` objects using multiple column selector -returns a subsetted `DataFrameColumns` object with parent being a view of the -original containg only the selected columns. +Indexing into `DataFrameColumns` objects using a multiple column selector +returns a subsetted `DataFrameColumns` object with parent being a `SubDataFrame` view of the +original containing only the selected columns. -It supports most of the `AbstractVector` API. The key differences are that it is -read-only and is that the `keys` function returns a vector of `Symbol`s (and not +`DataFrameColumns` supports most of the `AbstractVector` API. The key differences are that it is +read-only and that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). """ struct DataFrameColumns{T<:AbstractDataFrame} @@ -136,8 +136,8 @@ that allows iterating an `AbstractDataFrame` column by column. Indexing into `DataFrameColumns` objects using integer, `Symbol` or string returns the corresponding column (without copying). -Indexing into `DataFrameColumns` objects using multiple column selector -returns a subsetted `DataFrameColumns` object with parent being a view of the +Indexing into `DataFrameColumns` objects using a multiple column selector +returns a subsetted `DataFrameColumns` object with parent being a `SubDataFrame` view of the original containg only the selected columns. It supports most of the `AbstractVector` API. The key differences are that it is From 7c627620ba36e28f5162f39c596f92456519a63d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 24 Jun 2020 15:00:17 +0200 Subject: [PATCH 12/13] update functionality after the discusssion --- src/abstractdataframe/iteration.jl | 46 ++++++++++++++++-------------- test/iteration.jl | 22 +++++++++++--- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index a1fd210ce7..1701693067 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -106,20 +106,28 @@ Base.propertynames(itr::DataFrameRows, private::Bool=false) = propertynames(pare # Iteration by columns +const DATAFRAMECOLUMNS_DOCSTR = """ +Indexing into `DataFrameColumns` objects using integer, `Symbol` or string +returns the corresponding column (without copying). +Indexing into `DataFrameColumns` objects using a multiple column selector +returns a subsetted `DataFrameColumns` object with parent containing only the +selected columns (without copying). + +`DataFrameColumns` supports most of the `AbstractVector` API. The key +differences are that it is read-only and that the `keys` function returns a +vector of `Symbol`s (and not integers as for normal vectors). + +In particular `findnext`, `findprev`, `findfirst`, `findlast`, and `findall` +functions are supported, and in `findnext`, `findprev` functions it is allowed +to pass integer, string, or `Symbol` as a reference index. +""" + """ DataFrameColumns{<:AbstractDataFrame} A vector-like object that allows iteration over columns of an `AbstractDataFrame`. -Indexing into `DataFrameColumns` objects using integer, `Symbol` or string -returns the corresponding column (without copying). -Indexing into `DataFrameColumns` objects using a multiple column selector -returns a subsetted `DataFrameColumns` object with parent being a `SubDataFrame` view of the -original containing only the selected columns. - -`DataFrameColumns` supports most of the `AbstractVector` API. The key differences are that it is -read-only and that the `keys` function returns a vector of `Symbol`s (and not -integers as for normal vectors). +$DATAFRAMECOLUMNS_DOCSTR """ struct DataFrameColumns{T<:AbstractDataFrame} df::T @@ -131,18 +139,10 @@ Base.summary(io::IO, dfcs::DataFrameColumns) = print(io, summary(dfcs)) """ eachcol(df::AbstractDataFrame) -Return a `DataFrameColumns` object that is a vector-like -that allows iterating an `AbstractDataFrame` column by column. - -Indexing into `DataFrameColumns` objects using integer, `Symbol` or string -returns the corresponding column (without copying). -Indexing into `DataFrameColumns` objects using a multiple column selector -returns a subsetted `DataFrameColumns` object with parent being a `SubDataFrame` view of the -original containg only the selected columns. +Return a `DataFrameColumns` object that is a vector-like that allows iterating +an `AbstractDataFrame` column by column. -It supports most of the `AbstractVector` API. The key differences are that it is -read-only and is that the `keys` function returns a vector of `Symbol`s (and not -integers as for normal vectors). +$DATAFRAMECOLUMNS_DOCSTR # Examples ```jldoctest @@ -193,7 +193,7 @@ Base.iterate(itr::DataFrameColumns, i::Integer=1) = Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::ColumnIndex) = parent(itr)[!, idx] Base.@propagate_inbounds Base.getindex(itr::DataFrameColumns, idx::MultiColumnIndex) = - eachcol(view(parent(itr), !, idx)) + eachcol(parent(itr)[!, idx]) Base.:(==)(itr1::DataFrameColumns, itr2::DataFrameColumns) = parent(itr1) == parent(itr2) Base.isequal(itr1::DataFrameColumns, itr2::DataFrameColumns) = @@ -237,8 +237,12 @@ where `name` is the column name of the column `col`. Base.pairs(itr::DataFrameColumns) = Base.Iterators.Pairs(itr, keys(itr)) Base.findnext(f::Function, itr::DataFrameColumns, i::Integer) = findnext(f, values(itr), i) +Base.findnext(f::Function, itr::DataFrameColumns, i::Union{Symbol, AbstractString}) = + findnext(f, values(itr), index(parent(itr))[i]) Base.findprev(f::Function, itr::DataFrameColumns, i::Integer) = findprev(f, values(itr), i) +Base.findprev(f::Function, itr::DataFrameColumns, i::Union{Symbol, AbstractString}) = + findprev(f, values(itr), index(parent(itr))[i]) Base.findfirst(f::Function, itr::DataFrameColumns) = findfirst(f, values(itr)) Base.findlast(f::Function, itr::DataFrameColumns) = diff --git a/test/iteration.jl b/test/iteration.jl index b3917f8086..42921b6667 100644 --- a/test/iteration.jl +++ b/test/iteration.jl @@ -31,15 +31,17 @@ using Test, DataFrames @test size(eachcol(df), 2) == 1 @test_throws ArgumentError size(eachcol(df), 0) @test eachcol(df)[1] == df[:, 1] - @test eachcol(df)[:A] == df[:, :A] + @test eachcol(df)[:A] === df[!, :A] @test eachcol(df)[All()] == eachcol(df) @test isequal(eachcol(df)[[1]], eachcol(df[!, [1]])) - @test eachcol(df).A == df[:, :A] - @test eachcol(df)["A"] == df[:, "A"] - @test eachcol(df)."A" == df[:, "A"] + @test eachcol(df).A === df[!, :A] + @test eachcol(df)["A"] === df[!, "A"] + @test eachcol(df)."A" === df[!, "A"] @test collect(eachcol(df)) isa Vector{AbstractVector} @test collect(eachcol(df)) == [[1, 2], [2, 3]] @test eltype(eachcol(df)) == AbstractVector + @test_throws ArgumentError eachcol(df)[[1,1]] + @test eachcol(df)[[1]][1] === df.A for col in eachcol(df) @test isa(col, AbstractVector) end @@ -167,8 +169,20 @@ end cols = eachcol(df) @test findfirst(col -> eltype(col) <: Int, cols) == 1 @test findnext(col -> eltype(col) <: Int, cols, 2) == 3 + @test findnext(col -> eltype(col) <: Int, cols, 10) === nothing + @test_throws BoundsError findnext(col -> eltype(col) <: Int, cols, -1) + @test_throws ArgumentError findnext(col -> eltype(col) <: Int, cols, :x1) + @test_throws ArgumentError findnext(col -> eltype(col) <: Int, cols, "x1") + @test findnext(col -> eltype(col) <: Int, cols, :b) == 3 + @test findnext(col -> eltype(col) <: Int, cols, "b") == 3 @test findlast(col -> eltype(col) <: Int, cols) == 3 @test findprev(col -> eltype(col) <: Int, cols, 2) == 1 + @test findprev(col -> eltype(col) <: Int, cols, :b) == 1 + @test findprev(col -> eltype(col) <: Int, cols, "b") == 1 + @test findprev(col -> eltype(col) <: Int, cols, -1) === nothing + @test_throws BoundsError findprev(col -> eltype(col) <: Int, cols, 10) + @test_throws ArgumentError findprev(col -> eltype(col) <: Int, cols, :x1) + @test_throws ArgumentError findprev(col -> eltype(col) <: Int, cols, "x1") @test findall(col -> eltype(col) <: Int, cols) == [1, 3] end From 65c969d99eac610f612819f7fa4fad56db550ead Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Wed, 24 Jun 2020 18:01:49 +0200 Subject: [PATCH 13/13] Apply suggestions from code review Co-authored-by: Milan Bouchet-Valat --- src/abstractdataframe/iteration.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/abstractdataframe/iteration.jl b/src/abstractdataframe/iteration.jl index 1701693067..e80ee69572 100644 --- a/src/abstractdataframe/iteration.jl +++ b/src/abstractdataframe/iteration.jl @@ -110,16 +110,16 @@ const DATAFRAMECOLUMNS_DOCSTR = """ Indexing into `DataFrameColumns` objects using integer, `Symbol` or string returns the corresponding column (without copying). Indexing into `DataFrameColumns` objects using a multiple column selector -returns a subsetted `DataFrameColumns` object with parent containing only the -selected columns (without copying). +returns a subsetted `DataFrameColumns` object with a new parent containing +only the selected columns (without copying). `DataFrameColumns` supports most of the `AbstractVector` API. The key differences are that it is read-only and that the `keys` function returns a vector of `Symbol`s (and not integers as for normal vectors). In particular `findnext`, `findprev`, `findfirst`, `findlast`, and `findall` -functions are supported, and in `findnext`, `findprev` functions it is allowed -to pass integer, string, or `Symbol` as a reference index. +functions are supported, and in `findnext` and `findprev` functions it is allowed +to pass an integer, string, or `Symbol` as a reference index. """ """