diff --git a/NEWS.md b/NEWS.md index d2bf472b53..24d5552c24 100644 --- a/NEWS.md +++ b/NEWS.md @@ -39,6 +39,8 @@ * Add `resize!`, `keepat!`, `pop!`, `popfirst!`, and `popat!`, make `deleteat!` signature more precise ([#3047](https://github.com/JuliaData/DataFrames.jl/pull/3047)) +* Add `pushfirst!` and `insert!` + ([#3072](https://github.com/JuliaData/DataFrames.jl/pull/3072)) * New `threads` argument allows disabling multithreading in `combine`, `select`, `select!`, `transform`, `transform!`, `subset` and `subset!` ([#3030](https://github.com/JuliaData/DataFrames.jl/pull/3030)) diff --git a/docs/src/lib/functions.md b/docs/src/lib/functions.md index afad09bc9d..d8503010a9 100644 --- a/docs/src/lib/functions.md +++ b/docs/src/lib/functions.md @@ -78,6 +78,7 @@ combine fillcombinations flatten hcat +insert! insertcols insertcols! invpermute! @@ -86,6 +87,7 @@ mapcols! permute! prepend! push! +pushfirst! reduce repeat repeat! diff --git a/docs/src/man/getting_started.md b/docs/src/man/getting_started.md index de25e32745..7b5f864eea 100644 --- a/docs/src/man/getting_started.md +++ b/docs/src/man/getting_started.md @@ -225,7 +225,8 @@ julia> df = DataFrame(A=Int[], B=String[]) 0×2 DataFrame ``` -Rows can then be added as tuples or vectors, where the order of elements matches that of columns: +Rows can then be added as tuples or vectors, where the order of elements matches that of columns. +To add new rows at the end of a data frame use [`push!`](@ref): ```jldoctest dataframe julia> push!(df, (1, "M")) @@ -261,6 +262,12 @@ Note that constructing a `DataFrame` row by row is significantly less performant constructing it all at once, or column by column. For many use-cases this will not matter, but for very large `DataFrame`s this may be a consideration. +If you want to add rows at the beginning of a data frame use [`pushfirst!`](@ref) +and to insert a row in an arbitrary location use [`insert!`]((@ref)). + +You can also add whole tables to a data frame using the [`append!`](@ref) +and [`prepend!`](@ref) functions. + ### Constructing from another table type DataFrames supports the [Tables.jl](https://github.com/JuliaData/Tables.jl) interface for diff --git a/src/DataFrames.jl b/src/DataFrames.jl index 2c5ea306f9..61706301b7 100644 --- a/src/DataFrames.jl +++ b/src/DataFrames.jl @@ -149,6 +149,8 @@ include("abstractdataframe/abstractdataframe.jl") include("dataframe/dataframe.jl") include("subdataframe/subdataframe.jl") include("dataframerow/dataframerow.jl") +include("dataframe/insertion.jl") + include("groupeddataframe/groupeddataframe.jl") include("groupeddataframe/utils.jl") diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index 3f3ef3dc26..2f00dfb67e 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -1243,618 +1243,6 @@ disallowmissing!(df::DataFrame, cols::MultiColumnIndex; error::Bool=true) = disallowmissing!(df::DataFrame, cols::Colon=:; error::Bool=true) = disallowmissing!(df, axes(df, 2), error=error) -""" - append!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - append!(df::DataFrame, table; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - -Add the rows of `df2` to the end of `df`. If the second argument `table` is not an -`AbstractDataFrame` then it is converted using `DataFrame(table, copycols=false)` -before being appended. - -The exact behavior of `append!` depends on the `cols` argument: -* If `cols == :setequal` (this is the default) - then `df2` must contain exactly the same columns as `df` (but possibly in a - different order). -* If `cols == :orderequal` then `df2` must contain the same columns in the same - order (for `AbstractDict` this option requires that `keys(row)` matches - `propertynames(df)` to allow for support of ordered dicts; however, if `df2` - is a `Dict` an error is thrown as it is an unordered collection). -* If `cols == :intersect` then `df2` may contain more columns than `df`, but all - column names that are present in `df` must be present in `df2` and only these - are used. -* If `cols == :subset` then `append!` behaves like for `:intersect` but if some - column is missing in `df2` then a `missing` value is pushed to `df`. -* If `cols == :union` then `append!` adds columns missing in `df` that are present - in `df2`, for columns present in `df` but missing in `df2` a `missing` value - is pushed. - -If `promote=true` and element type of a column present in `df` does not allow -the type of a pushed argument then a new column with a promoted element type -allowing it is freshly allocated and stored in `df`. If `promote=false` an error -is thrown. - -The above rule has the following exceptions: -* If `df` has no columns then copies of columns from `df2` are added to it. -* If `df2` has no columns then calling `append!` leaves `df` unchanged. - -Please note that `append!` must not be used on a `DataFrame` that contains -columns that are aliases (equal when compared with `===`). - -# See also - -Use [`push!`](@ref) to add individual rows to a data frame, [`prepend!`](@ref) -to add a table at the beginning, and [`vcat`](@ref) to vertically concatenate -data frames. - -# Examples -```jldoctest -julia> df1 = DataFrame(A=1:3, B=1:3) -3×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 1 1 - 2 │ 2 2 - 3 │ 3 3 - -julia> df2 = DataFrame(A=4.0:6.0, B=4:6) -3×2 DataFrame - Row │ A B - │ Float64 Int64 -─────┼──────────────── - 1 │ 4.0 4 - 2 │ 5.0 5 - 3 │ 6.0 6 - -julia> append!(df1, df2); - -julia> df1 -6×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 1 1 - 2 │ 2 2 - 3 │ 3 3 - 4 │ 4 4 - 5 │ 5 5 - 6 │ 6 6 -``` -""" -Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) = - _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true) - -# TODO: add a reference to pushfirst when it is added: -# [`pushfirst!`](@ref) to add individual rows at the beginning of a data frame, -""" - prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - prepend!(df::DataFrame, table; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - -Add the rows of `df2` to the beginning of `df`. If the second argument `table` -is not an `AbstractDataFrame` then it is converted using -`DataFrame(table, copycols=false)` before being prepended. - -The exact behavior of `prepend!` depends on the `cols` argument: -* If `cols == :setequal` (this is the default) - then `df2` must contain exactly the same columns as `df` (but possibly in a - different order). -* If `cols == :orderequal` then `df2` must contain the same columns in the same - order (for `AbstractDict` this option requires that `keys(row)` matches - `propertynames(df)` to allow for support of ordered dicts; however, if `df2` - is a `Dict` an error is thrown as it is an unordered collection). -* If `cols == :intersect` then `df2` may contain more columns than `df`, but all - column names that are present in `df` must be present in `df2` and only these - are used. -* If `cols == :subset` then `append!` behaves like for `:intersect` but if some - column is missing in `df2` then a `missing` value is pushed to `df`. -* If `cols == :union` then `append!` adds columns missing in `df` that are present - in `df2`, for columns present in `df` but missing in `df2` a `missing` value - is pushed. - -If `promote=true` and element type of a column present in `df` does not allow -the type of a pushed argument then a new column with a promoted element type -allowing it is freshly allocated and stored in `df`. If `promote=false` an error -is thrown. - -The above rule has the following exceptions: -* If `df` has no columns then copies of columns from `df2` are added to it. -* If `df2` has no columns then calling `prepend!` leaves `df` unchanged. - -Please note that `prepend!` must not be used on a `DataFrame` that contains -columns that are aliases (equal when compared with `===`). - -# See also - -Use -[`append!`](@ref) to add a table at the end, and [`vcat`](@ref) -to vertically concatenate data frames. - -# Examples -```jldoctest -julia> df1 = DataFrame(A=1:3, B=1:3) -3×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 1 1 - 2 │ 2 2 - 3 │ 3 3 - -julia> df2 = DataFrame(A=4.0:6.0, B=4:6) -3×2 DataFrame - Row │ A B - │ Float64 Int64 -─────┼──────────────── - 1 │ 4.0 4 - 2 │ 5.0 5 - 3 │ 6.0 6 - -julia> prepend!(df1, df2); - -julia> df1 -6×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 4 4 - 2 │ 5 5 - 3 │ 6 6 - 4 │ 1 1 - 5 │ 2 2 - 6 │ 3 3 -``` -""" -Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) = - _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false) - -function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol, - promote::Bool, atend::Bool) - if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) - throw(ArgumentError("`cols` keyword argument must be " * - ":orderequal, :setequal, :intersect, :subset or :union)")) - end - - if ncol(df1) == 0 - for (n, v) in pairs(eachcol(df2)) - df1[!, n] = copy(v) # make sure df1 does not reuse df2 - end - return df1 - end - ncol(df2) == 0 && return df1 - - if cols == :orderequal && _names(df1) != _names(df2) - wrongnames = symdiff(_names(df1), _names(df2)) - if isempty(wrongnames) - mismatches = findall(_names(df1) .!= _names(df2)) - @assert !isempty(mismatches) - throw(ArgumentError("Columns number " * - join(mismatches, ", ", " and ") * - " do not have the same names in both passed " * - "data frames and `cols == :orderequal`")) - else - mismatchmsg = " Column names :" * - throw(ArgumentError("Column names :" * - join(wrongnames, ", :", " and :") * - " were found in only one of the passed data frames " * - "and `cols == :orderequal`")) - end - elseif cols == :setequal - wrongnames = symdiff(_names(df1), _names(df2)) - if !isempty(wrongnames) - throw(ArgumentError("Column names :" * - join(wrongnames, ", :", " and :") * - " were found in only one of the passed data frames " * - "and `cols == :setequal`")) - end - elseif cols == :intersect - wrongnames = setdiff(_names(df1), _names(df2)) - if !isempty(wrongnames) - throw(ArgumentError("Column names :" * - join(wrongnames, ", :", " and :") * - " were found in only in destination data frame " * - "and `cols == :intersect`")) - end - end - - nrow1 = nrow(df1) - nrow2 = nrow(df2) - targetrows = nrow1 + nrow2 - current_col = 0 - # in the code below we use a direct access to _columns because - # we resize the columns so temporarily the `DataFrame` is internally - # inconsistent and normal data frame indexing would error. - try - for (j, n) in enumerate(_names(df1)) - current_col += 1 - if hasproperty(df2, n) - df2_c = df2[!, n] - S = eltype(df2_c) - df1_c = df1[!, j] - T = eltype(df1_c) - if S <: T || !promote || promote_type(S, T) <: T - # if S <: T || promote_type(S, T) <: T this should never throw an exception - if atend - append!(df1_c, df2_c) - else - prepend!(df1_c, df2_c) - end - else - newcol = similar(df1_c, promote_type(S, T), targetrows) - firstindex(newcol) != 1 && _onebased_check_error() - if atend - copyto!(newcol, 1, df1_c, 1, nrow1) - copyto!(newcol, nrow1+1, df2_c, 1, nrow2) - else - copyto!(newcol, 1, df2_c, 1, nrow2) - copyto!(newcol, nrow2+1, df1_c, 1, nrow1) - end - _columns(df1)[j] = newcol - end - else - if Missing <: eltype(df1[!, j]) - if atend - resize!(df1[!, j], targetrows) - df1[nrow1+1:targetrows, j] .= missing - else - prepend!(df1[!, j], Iterators.repeated(missing, nrow2)) - end - elseif promote - newcol = similar(df1[!, j], Union{Missing, eltype(df1[!, j])}, - targetrows) - firstindex(newcol) != 1 && _onebased_check_error() - if atend - copyto!(newcol, 1, df1[!, j], 1, nrow1) - newcol[nrow1+1:targetrows] .= missing - else - copyto!(newcol, nrow2+1, df1[!, j], 1, nrow1) - newcol[1:nrow2] .= missing - end - _columns(df1)[j] = newcol - else - throw(ArgumentError("promote=false and source data frame does " * - "not contain column :$n, while destination " * - "column does not allow for missing values")) - end - end - end - current_col = 0 - for col in _columns(df1) - current_col += 1 - @assert length(col) == targetrows - end - if cols == :union - for n in setdiff(_names(df2), _names(df1)) - newcol = similar(df2[!, n], Union{Missing, eltype(df2[!, n])}, - targetrows) - firstindex(newcol) != 1 && _onebased_check_error() - if atend - newcol[1:nrow1] .= missing - copyto!(newcol, nrow1+1, df2[!, n], 1, targetrows - nrow1) - else - newcol[nrow2+1:targetrows] .= missing - copyto!(newcol, 1, df2[!, n], 1, nrow2) - end - df1[!, n] = newcol - end - end - catch err - # Undo changes in case of error - for col in _columns(df1) - @assert length(col) >= nrow1 - if atend - resize!(col, nrow1) - elseif length(col) != nrow1 - deleteat!(col, 1:length(col) - nrow1) - end - end - @error "Error adding value to column :$(_names(df1)[current_col])." - rethrow(err) - end - return df1 -end - -function Base.push!(df::DataFrame, row::Union{AbstractDict, NamedTuple}; - cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - possible_cols = (:orderequal, :setequal, :intersect, :subset, :union) - if !(cols in possible_cols) - throw(ArgumentError("`cols` keyword argument must be any of :" * - join(possible_cols, ", :"))) - end - - nrows, ncols = size(df) - targetrows = nrows + 1 - - if ncols == 0 && row isa NamedTuple - for (n, v) in pairs(row) - setproperty!(df, n, fill!(Tables.allocatecolumn(typeof(v), 1), v)) - end - return df - end - - old_row_type = typeof(row) - if row isa AbstractDict && keytype(row) !== Symbol && - (keytype(row) <: AbstractString || all(x -> x isa AbstractString, keys(row))) - row = (;(Symbol.(keys(row)) .=> values(row))...) - end - - # in the code below we use a direct access to _columns because - # we resize the columns so temporarily the `DataFrame` is internally - # inconsistent and normal data frame indexing would error. - if cols == :union - if row isa AbstractDict && keytype(row) !== Symbol && !all(x -> x isa Symbol, keys(row)) - throw(ArgumentError("when `cols == :union` all keys of row must be Symbol")) - end - for (i, colname) in enumerate(_names(df)) - col = _columns(df)[i] - if haskey(row, colname) - val = row[colname] - else - val = missing - end - S = typeof(val) - T = eltype(col) - if S <: T || promote_type(S, T) <: T - push!(col, val) - elseif !promote - try - push!(col, val) - catch err - for col in _columns(df) - resize!(col, nrows) - end - @error "Error adding value to column :$colname." - rethrow(err) - end - else - newcol = similar(col, promote_type(S, T), targetrows) - copyto!(newcol, 1, col, 1, nrows) - newcol[end] = val - firstindex(newcol) != 1 && _onebased_check_error() - _columns(df)[i] = newcol - end - end - for (colname, col) in zip(_names(df), _columns(df)) - if length(col) != targetrows - for col2 in _columns(df) - resize!(col2, nrows) - end - throw(AssertionError("Error adding value to column :$colname")) - end - end - for colname in setdiff(keys(row), _names(df)) - val = row[colname] - S = typeof(val) - if nrows == 0 - newcol = [val] - else - newcol = Tables.allocatecolumn(Union{Missing, S}, targetrows) - fill!(newcol, missing) - newcol[end] = val - end - df[!, colname] = newcol - end - return df - end - - if cols == :orderequal - if old_row_type <: Dict - throw(ArgumentError("passing `Dict` as `row` when `cols == :orderequal` " * - "is not allowed as it is unordered")) - elseif length(row) != ncol(df) || any(x -> x[1] != x[2], zip(keys(row), _names(df))) - throw(ArgumentError("when `cols == :orderequal` pushed row must " * - "have the same column names and in the " * - "same order as the target data frame")) - end - elseif cols === :setequal - # Only check for equal lengths if :setequal is selected, - # as an error will be thrown below if some names don't match - if length(row) != ncols - # an explicit error is thrown as this was allowed in the past - throw(ArgumentError("`push!` with `cols` equal to `:setequal` " * - "requires `row` to have the same number of elements " * - "as the number of columns in `df`.")) - end - end - current_col = 0 - try - for (col, nm) in zip(_columns(df), _names(df)) - current_col += 1 - if cols === :subset - val = get(row, nm, missing) - else - val = row[nm] - end - S = typeof(val) - T = eltype(col) - if S <: T || !promote || promote_type(S, T) <: T - push!(col, val) - else - newcol = similar(col, promote_type(S, T), targetrows) - copyto!(newcol, 1, col, 1, nrows) - newcol[end] = val - firstindex(newcol) != 1 && _onebased_check_error() - _columns(df)[columnindex(df, nm)] = newcol - end - end - current_col = 0 - for col in _columns(df) - current_col += 1 - @assert length(col) == targetrows - end - catch err - for col in _columns(df) - resize!(col, nrows) - end - @error "Error adding value to column :$(_names(df)[current_col])." - rethrow(err) - end - return df -end - -""" - push!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false) - push!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict}; - cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) - -Add in-place one row at the end of `df` taking the values from `row`. - -Column types of `df` are preserved, and new values are converted if necessary. -An error is thrown if conversion fails. - -If `row` is neither a `DataFrameRow`, `NamedTuple` nor `AbstractDict` then -it must be a `Tuple` or an `AbstractArray` -and columns are matched by order of appearance. In this case `row` must contain -the same number of elements as the number of columns in `df`. - -If `row` is a `DataFrameRow`, `NamedTuple` or `AbstractDict` then -values in `row` are matched to columns in `df` based on names. The exact behavior -depends on the `cols` argument value in the following way: -* If `cols == :setequal` (this is the default) - then `row` must contain exactly the same columns as `df` (but possibly in a - different order). -* If `cols == :orderequal` then `row` must contain the same columns in the same - order (for `AbstractDict` this option requires that `keys(row)` matches - `propertynames(df)` to allow for support of ordered dicts; however, if `row` - is a `Dict` an error is thrown as it is an unordered collection). -* If `cols == :intersect` then `row` may contain more columns than `df`, - but all column names that are present in `df` must be present in `row` and only - they are used to populate a new row in `df`. -* If `cols == :subset` then `push!` behaves like for `:intersect` but if some - column is missing in `row` then a `missing` value is pushed to `df`. -* If `cols == :union` then columns missing in `df` that are present in `row` are - added to `df` (using `missing` for existing rows) and a `missing` value is - pushed to columns missing in `row` that are present in `df`. - -If `promote=true` and element type of a column present in `df` does not allow -the type of a pushed argument then a new column with a promoted element type -allowing it is freshly allocated and stored in `df`. If `promote=false` an error -is thrown. - -As a special case, if `df` has no columns and `row` is a `NamedTuple` or -`DataFrameRow`, columns are created for all values in `row`, using their names -and order. - -Please note that `push!` must not be used on a `DataFrame` that contains columns -that are aliases (equal when compared with `===`). - -# Examples -```jldoctest -julia> df = DataFrame(A=1:3, B=1:3); - -julia> push!(df, (true, false)) -4×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 1 1 - 2 │ 2 2 - 3 │ 3 3 - 4 │ 1 0 - -julia> push!(df, df[1, :]) -5×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 1 1 - 2 │ 2 2 - 3 │ 3 3 - 4 │ 1 0 - 5 │ 1 1 - -julia> push!(df, (C="something", A=true, B=false), cols=:intersect) -6×2 DataFrame - Row │ A B - │ Int64 Int64 -─────┼────────────── - 1 │ 1 1 - 2 │ 2 2 - 3 │ 3 3 - 4 │ 1 0 - 5 │ 1 1 - 6 │ 1 0 - -julia> push!(df, Dict(:A=>1.0, :C=>1.0), cols=:union) -7×3 DataFrame - Row │ A B C - │ Float64 Int64? Float64? -─────┼───────────────────────────── - 1 │ 1.0 1 missing - 2 │ 2.0 2 missing - 3 │ 3.0 3 missing - 4 │ 1.0 0 missing - 5 │ 1.0 1 missing - 6 │ 1.0 0 missing - 7 │ 1.0 missing 1.0 - -julia> push!(df, NamedTuple(), cols=:subset) -8×3 DataFrame - Row │ A B C - │ Float64? Int64? Float64? -─────┼─────────────────────────────── - 1 │ 1.0 1 missing - 2 │ 2.0 2 missing - 3 │ 3.0 3 missing - 4 │ 1.0 0 missing - 5 │ 1.0 1 missing - 6 │ 1.0 0 missing - 7 │ 1.0 missing 1.0 - 8 │ missing missing missing -``` -""" -function Base.push!(df::DataFrame, row::Any; promote::Bool=false) - if !(row isa Union{Tuple, AbstractArray}) - # an explicit error is thrown as this was allowed in the past - throw(ArgumentError("`push!` does not allow passing collections of type " * - "$(typeof(row)) to be pushed into a DataFrame. Only " * - "`Tuple`, `AbstractArray`, `AbstractDict`, `DataFrameRow` " * - "and `NamedTuple` are allowed.")) - end - nrows, ncols = size(df) - targetrows = nrows + 1 - if length(row) != ncols - msg = "Length of `row` does not match `DataFrame` column count." - throw(DimensionMismatch(msg)) - end - current_col = 0 - try - for (i, (col, val)) in enumerate(zip(_columns(df), row)) - current_col += 1 - S = typeof(val) - T = eltype(col) - if S <: T || !promote || promote_type(S, T) <: T - push!(col, val) - else - newcol = Tables.allocatecolumn(promote_type(S, T), targetrows) - copyto!(newcol, 1, col, 1, nrows) - newcol[end] = val - firstindex(newcol) != 1 && _onebased_check_error() - _columns(df)[i] = newcol - end - end - current_col = 0 - for col in _columns(df) - current_col += 1 - @assert length(col) == targetrows - end - catch err - #clean up partial row - for col in _columns(df) - resize!(col, nrows) - end - @error "Error adding value to column :$(_names(df)[current_col])." - rethrow(err) - end - df -end - """ repeat!(df::DataFrame; inner::Integer=1, outer::Integer=1) diff --git a/src/dataframe/insertion.jl b/src/dataframe/insertion.jl new file mode 100644 index 0000000000..55b07b7965 --- /dev/null +++ b/src/dataframe/insertion.jl @@ -0,0 +1,948 @@ +""" + append!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + append!(df::DataFrame, table; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + +Add the rows of `df2` to the end of `df`. If the second argument `table` is not an +`AbstractDataFrame` then it is converted using `DataFrame(table, copycols=false)` +before being appended. + +The exact behavior of `append!` depends on the `cols` argument: +* If `cols == :setequal` (this is the default) + then `df2` must contain exactly the same columns as `df` (but possibly in a + different order). +* If `cols == :orderequal` then `df2` must contain the same columns in the same + order (for `AbstractDict` this option requires that `keys(row)` matches + `propertynames(df)` to allow for support of ordered dicts; however, if `df2` + is a `Dict` an error is thrown as it is an unordered collection). +* If `cols == :intersect` then `df2` may contain more columns than `df`, but all + column names that are present in `df` must be present in `df2` and only these + are used. +* If `cols == :subset` then `append!` behaves like for `:intersect` but if some + column is missing in `df2` then a `missing` value is pushed to `df`. +* If `cols == :union` then `append!` adds columns missing in `df` that are present + in `df2`, for columns present in `df` but missing in `df2` a `missing` value + is pushed. + +If `promote=true` and element type of a column present in `df` does not allow +the type of a pushed argument then a new column with a promoted element type +allowing it is freshly allocated and stored in `df`. If `promote=false` an error +is thrown. + +The above rule has the following exceptions: +* If `df` has no columns then copies of columns from `df2` are added to it. +* If `df2` has no columns then calling `append!` leaves `df` unchanged. + +Please note that `append!` must not be used on a `DataFrame` that contains +columns that are aliases (equal when compared with `===`). + +See also: use [`push!`](@ref) to add individual rows to a data frame, [`prepend!`](@ref) +to add a table at the beginning, and [`vcat`](@ref) to vertically concatenate +data frames. + +# Examples +```jldoctest +julia> df1 = DataFrame(A=1:3, B=1:3) +3×2 DataFrame + Row │ A B + │ Int64 Int64 +─────┼────────────── + 1 │ 1 1 + 2 │ 2 2 + 3 │ 3 3 + +julia> df2 = DataFrame(A=4.0:6.0, B=4:6) +3×2 DataFrame + Row │ A B + │ Float64 Int64 +─────┼──────────────── + 1 │ 4.0 4 + 2 │ 5.0 5 + 3 │ 6.0 6 + +julia> append!(df1, df2); + +julia> df1 +6×2 DataFrame + Row │ A B + │ Int64 Int64 +─────┼────────────── + 1 │ 1 1 + 2 │ 2 2 + 3 │ 3 3 + 4 │ 4 4 + 5 │ 5 5 + 6 │ 6 6 +``` +""" +Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true) + +""" + prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + prepend!(df::DataFrame, table; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + +Add the rows of `df2` to the beginning of `df`. If the second argument `table` +is not an `AbstractDataFrame` then it is converted using +`DataFrame(table, copycols=false)` before being prepended. + +The exact behavior of `prepend!` depends on the `cols` argument: +* If `cols == :setequal` (this is the default) + then `df2` must contain exactly the same columns as `df` (but possibly in a + different order). +* If `cols == :orderequal` then `df2` must contain the same columns in the same + order (for `AbstractDict` this option requires that `keys(row)` matches + `propertynames(df)` to allow for support of ordered dicts; however, if `df2` + is a `Dict` an error is thrown as it is an unordered collection). +* If `cols == :intersect` then `df2` may contain more columns than `df`, but all + column names that are present in `df` must be present in `df2` and only these + are used. +* If `cols == :subset` then `append!` behaves like for `:intersect` but if some + column is missing in `df2` then a `missing` value is pushed to `df`. +* If `cols == :union` then `append!` adds columns missing in `df` that are present + in `df2`, for columns present in `df` but missing in `df2` a `missing` value + is pushed. + +If `promote=true` and element type of a column present in `df` does not allow +the type of a pushed argument then a new column with a promoted element type +allowing it is freshly allocated and stored in `df`. If `promote=false` an error +is thrown. + +The above rule has the following exceptions: +* If `df` has no columns then copies of columns from `df2` are added to it. +* If `df2` has no columns then calling `prepend!` leaves `df` unchanged. + +Please note that `prepend!` must not be used on a `DataFrame` that contains +columns that are aliases (equal when compared with `===`). + +See also: use [`pushfirst!`](@ref) to add individual rows at the beginning of a data frame, +[`append!`](@ref) to add a table at the end, and [`vcat`](@ref) +to vertically concatenate data frames. + +# Examples +```jldoctest +julia> df1 = DataFrame(A=1:3, B=1:3) +3×2 DataFrame + Row │ A B + │ Int64 Int64 +─────┼────────────── + 1 │ 1 1 + 2 │ 2 2 + 3 │ 3 3 + +julia> df2 = DataFrame(A=4.0:6.0, B=4:6) +3×2 DataFrame + Row │ A B + │ Float64 Int64 +─────┼──────────────── + 1 │ 4.0 4 + 2 │ 5.0 5 + 3 │ 6.0 6 + +julia> prepend!(df1, df2); + +julia> df1 +6×2 DataFrame + Row │ A B + │ Int64 Int64 +─────┼────────────── + 1 │ 4 4 + 2 │ 5 5 + 3 │ 6 6 + 4 │ 1 1 + 5 │ 2 2 + 6 │ 3 3 +``` +""" +Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false) + +function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol, + promote::Bool, atend::Bool) + if !(cols in (:orderequal, :setequal, :intersect, :subset, :union)) + throw(ArgumentError("`cols` keyword argument must be " * + ":orderequal, :setequal, :intersect, :subset or :union)")) + end + + if ncol(df1) == 0 + for (n, v) in pairs(eachcol(df2)) + df1[!, n] = copy(v) # make sure df1 does not reuse df2 + end + return df1 + end + ncol(df2) == 0 && return df1 + + if cols == :orderequal && _names(df1) != _names(df2) + wrongnames = symdiff(_names(df1), _names(df2)) + if isempty(wrongnames) + mismatches = findall(_names(df1) .!= _names(df2)) + @assert !isempty(mismatches) + throw(ArgumentError("Columns number " * + join(mismatches, ", ", " and ") * + " do not have the same names in both passed " * + "data frames and `cols == :orderequal`")) + else + mismatchmsg = " Column names :" * + throw(ArgumentError("Column names :" * + join(wrongnames, ", :", " and :") * + " were found in only one of the passed data frames " * + "and `cols == :orderequal`")) + end + elseif cols == :setequal + wrongnames = symdiff(_names(df1), _names(df2)) + if !isempty(wrongnames) + throw(ArgumentError("Column names :" * + join(wrongnames, ", :", " and :") * + " were found in only one of the passed data frames " * + "and `cols == :setequal`")) + end + elseif cols == :intersect + wrongnames = setdiff(_names(df1), _names(df2)) + if !isempty(wrongnames) + throw(ArgumentError("Column names :" * + join(wrongnames, ", :", " and :") * + " were found in only in destination data frame " * + "and `cols == :intersect`")) + end + end + + nrow1 = nrow(df1) + nrow2 = nrow(df2) + targetrows = nrow1 + nrow2 + current_col = 0 + # in the code below we use a direct access to _columns because + # we resize the columns so temporarily the `DataFrame` is internally + # inconsistent and normal data frame indexing would error. + try + for (j, n) in enumerate(_names(df1)) + current_col += 1 + if hasproperty(df2, n) + df2_c = df2[!, n] + S = eltype(df2_c) + df1_c = df1[!, j] + T = eltype(df1_c) + if S <: T || !promote || promote_type(S, T) <: T + # if S <: T || promote_type(S, T) <: T this should never throw an exception + if atend + append!(df1_c, df2_c) + else + prepend!(df1_c, df2_c) + end + else + newcol = similar(df1_c, promote_type(S, T), targetrows) + firstindex(newcol) != 1 && _onebased_check_error() + if atend + copyto!(newcol, 1, df1_c, 1, nrow1) + copyto!(newcol, nrow1+1, df2_c, 1, nrow2) + else + copyto!(newcol, 1, df2_c, 1, nrow2) + copyto!(newcol, nrow2+1, df1_c, 1, nrow1) + end + _columns(df1)[j] = newcol + end + else + if Missing <: eltype(df1[!, j]) + if atend + resize!(df1[!, j], targetrows) + df1[nrow1+1:targetrows, j] .= missing + else + prepend!(df1[!, j], Iterators.repeated(missing, nrow2)) + end + elseif promote + newcol = similar(df1[!, j], Union{Missing, eltype(df1[!, j])}, + targetrows) + firstindex(newcol) != 1 && _onebased_check_error() + if atend + copyto!(newcol, 1, df1[!, j], 1, nrow1) + newcol[nrow1+1:targetrows] .= missing + else + copyto!(newcol, nrow2+1, df1[!, j], 1, nrow1) + newcol[1:nrow2] .= missing + end + _columns(df1)[j] = newcol + else + throw(ArgumentError("promote=false and source data frame does " * + "not contain column :$n, while destination " * + "column does not allow for missing values")) + end + end + end + current_col = 0 + for col in _columns(df1) + current_col += 1 + @assert length(col) == targetrows + end + if cols == :union + for n in setdiff(_names(df2), _names(df1)) + newcol = similar(df2[!, n], Union{Missing, eltype(df2[!, n])}, + targetrows) + firstindex(newcol) != 1 && _onebased_check_error() + if atend + newcol[1:nrow1] .= missing + copyto!(newcol, nrow1+1, df2[!, n], 1, targetrows - nrow1) + else + newcol[nrow2+1:targetrows] .= missing + copyto!(newcol, 1, df2[!, n], 1, nrow2) + end + df1[!, n] = newcol + end + end + catch err + # Undo changes in case of error + for col in _columns(df1) + @assert length(col) >= nrow1 + if atend + resize!(col, nrow1) + elseif length(col) != nrow1 + deleteat!(col, 1:length(col) - nrow1) + end + end + @error "Error adding value to column :$(_names(df1)[current_col])." + rethrow(err) + end + return df1 +end + +const INSERTION_COMMON = """ +Column types of `df` are preserved, and new values are converted if necessary. +An error is thrown if conversion fails. + +If `row` is neither a `DataFrameRow`, `NamedTuple` nor `AbstractDict` then +it must be a `Tuple` or an `AbstractArray` +and columns are matched by order of appearance. In this case `row` must contain +the same number of elements as the number of columns in `df`. + +If `row` is a `DataFrameRow`, `NamedTuple` or `AbstractDict` then +values in `row` are matched to columns in `df` based on names. The exact behavior +depends on the `cols` argument value in the following way: +* If `cols == :setequal` (this is the default) + then `row` must contain exactly the same columns as `df` (but possibly in a + different order). +* If `cols == :orderequal` then `row` must contain the same columns in the same + order (for `AbstractDict` this option requires that `keys(row)` matches + `propertynames(df)` to allow for support of ordered dicts; however, if `row` + is a `Dict` an error is thrown as it is an unordered collection). +* If `cols == :intersect` then `row` may contain more columns than `df`, + but all column names that are present in `df` must be present in `row` and only + they are used to populate a new row in `df`. +* If `cols == :subset` then the behavior is like for `:intersect` but if some + column is missing in `row` then a `missing` value is pushed to `df`. +* If `cols == :union` then columns missing in `df` that are present in `row` are + added to `df` (using `missing` for existing rows) and a `missing` value is + pushed to columns missing in `row` that are present in `df`. + +If `promote=true` and element type of a column present in `df` does not allow +the type of a pushed argument then a new column with a promoted element type +allowing it is freshly allocated and stored in `df`. If `promote=false` an error +is thrown. + +As a special case, if `df` has no columns and `row` is a `NamedTuple` or +`DataFrameRow`, columns are created for all values in `row`, using their names +and order. + +Please note that this function must not be used on a +`DataFrame` that contains columns that are aliases (equal when compared with `===`). +""" + +""" + push!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false) + push!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict}; + cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) + +Add one row at the end of `df` in-place, taking the values from `row`. + +$INSERTION_COMMON + +See also: [`pushfirst!`](@ref), [`insert!`](@ref) + +# Examples +```jldoctest +julia> df = DataFrame(A='a':'c', B=1:3) +3×2 DataFrame + Row │ A B + │ Char Int64 +─────┼───────────── + 1 │ a 1 + 2 │ b 2 + 3 │ c 3 + +julia> push!(df, (true, false), promote=true) +4×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ a 1 + 2 │ b 2 + 3 │ c 3 + 4 │ true 0 + +julia> push!(df, df[1, :]) +5×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ a 1 + 2 │ b 2 + 3 │ c 3 + 4 │ true 0 + 5 │ a 1 + +julia> push!(df, (C="something", A=11, B=12), cols=:intersect) +6×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ a 1 + 2 │ b 2 + 3 │ c 3 + 4 │ true 0 + 5 │ a 1 + 6 │ 11 12 + +julia> push!(df, Dict(:A=>1.0, :C=>1.0), cols=:union) +7×3 DataFrame + Row │ A B C + │ Any Int64? Float64? +─────┼────────────────────────── + 1 │ a 1 missing + 2 │ b 2 missing + 3 │ c 3 missing + 4 │ true 0 missing + 5 │ a 1 missing + 6 │ 11 12 missing + 7 │ 1.0 missing 1.0 + +julia> push!(df, NamedTuple(), cols=:subset) +8×3 DataFrame + Row │ A B C + │ Any Int64? Float64? +─────┼───────────────────────────── + 1 │ a 1 missing + 2 │ b 2 missing + 3 │ c 3 missing + 4 │ true 0 missing + 5 │ a 1 missing + 6 │ 11 12 missing + 7 │ 1.0 missing 1.0 + 8 │ missing missing missing +``` +""" +Base.push!(df::DataFrame, row::Any; promote::Bool=false) = + _row_inserter!(df, -1, row, Val{:push}(), promote) + +""" + pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false) + pushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict}; + cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) + +Add one row at the beginning of `df` in-place, taking the values from `row`. + +$INSERTION_COMMON + +See also: [`push!`](@ref), [`insert!`](@ref) + +# Examples +```jldoctest +julia> df = DataFrame(A='a':'c', B=1:3) +3×2 DataFrame + Row │ A B + │ Char Int64 +─────┼───────────── + 1 │ a 1 + 2 │ b 2 + 3 │ c 3 + +julia> pushfirst!(df, (true, false), promote=true) +4×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ true 0 + 2 │ a 1 + 3 │ b 2 + 4 │ c 3 + +julia> pushfirst!(df, df[1, :]) +5×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ true 0 + 2 │ true 0 + 3 │ a 1 + 4 │ b 2 + 5 │ c 3 + +julia> pushfirst!(df, (C="something", A=11, B=12), cols=:intersect) +6×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ 11 12 + 2 │ true 0 + 3 │ true 0 + 4 │ a 1 + 5 │ b 2 + 6 │ c 3 + +julia> pushfirst!(df, Dict(:A=>1.0, :C=>1.0), cols=:union) +7×3 DataFrame + Row │ A B C + │ Any Int64? Float64? +─────┼────────────────────────── + 1 │ 1.0 missing 1.0 + 2 │ 11 12 missing + 3 │ true 0 missing + 4 │ true 0 missing + 5 │ a 1 missing + 6 │ b 2 missing + 7 │ c 3 missing + +julia> pushfirst!(df, NamedTuple(), cols=:subset) +8×3 DataFrame + Row │ A B C + │ Any Int64? Float64? +─────┼───────────────────────────── + 1 │ missing missing missing + 2 │ 1.0 missing 1.0 + 3 │ 11 12 missing + 4 │ true 0 missing + 5 │ true 0 missing + 6 │ a 1 missing + 7 │ b 2 missing + 8 │ c 3 missing +``` +""" +Base.pushfirst!(df::DataFrame, row::Any; promote::Bool=false) = + _row_inserter!(df, -1, row, Val{:pushfirst}(), promote) + +""" + insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray}; promote::Bool=false) + insert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple, AbstractDict}; + cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) + +Add one row to `df` at position `index` in-place, taking the values from `row`. +`index` must be a integer between `1` and `nrow(df)+1`. + +$INSERTION_COMMON + +See also: [`push!`](@ref), [`pushfirst!`](@ref) + +# Examples +```jldoctest +julia> df = DataFrame(A='a':'c', B=1:3) +3×2 DataFrame + Row │ A B + │ Char Int64 +─────┼───────────── + 1 │ a 1 + 2 │ b 2 + 3 │ c 3 + +julia> insert!(df, 2, (true, false), promote=true) +4×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ a 1 + 2 │ true 0 + 3 │ b 2 + 4 │ c 3 + +julia> insert!(df, 5, df[1, :]) +5×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ a 1 + 2 │ true 0 + 3 │ b 2 + 4 │ c 3 + 5 │ a 1 + +julia> insert!(df, 1, (C="something", A=11, B=12), cols=:intersect) +6×2 DataFrame + Row │ A B + │ Any Int64 +─────┼───────────── + 1 │ 11 12 + 2 │ a 1 + 3 │ true 0 + 4 │ b 2 + 5 │ c 3 + 6 │ a 1 + +julia> insert!(df, 7, Dict(:A=>1.0, :C=>1.0), cols=:union) +7×3 DataFrame + Row │ A B C + │ Any Int64? Float64? +─────┼────────────────────────── + 1 │ 11 12 missing + 2 │ a 1 missing + 3 │ true 0 missing + 4 │ b 2 missing + 5 │ c 3 missing + 6 │ a 1 missing + 7 │ 1.0 missing 1.0 + +julia> insert!(df, 3, NamedTuple(), cols=:subset) +8×3 DataFrame + Row │ A B C + │ Any Int64? Float64? +─────┼───────────────────────────── + 1 │ 11 12 missing + 2 │ a 1 missing + 3 │ missing missing missing + 4 │ true 0 missing + 5 │ b 2 missing + 6 │ c 3 missing + 7 │ a 1 missing + 8 │ 1.0 missing 1.0 +``` +""" +function Base.insert!(df::DataFrame, index::Integer, row::Any; promote::Bool=false) + index isa Bool && throw(ArgumentError("invalid index: $index of type Bool")) + 1 <= index <= nrow(df)+1 || + throw(ArgumentError("invalid index: $index for data frame with $(nrow(df)) rows")) + return _row_inserter!(df, index, row, Val{:insert}(), promote) +end + +function _row_inserter!(df::DataFrame, loc::Integer, row::Any, + mode::Union{Val{:push}, Val{:pushfirst}, Val{:insert}}, + promote::Bool) + if !(row isa Union{Tuple, AbstractArray}) + # an explicit error is thrown as this was allowed in the past + throw(ArgumentError("it is not allowed to insert collections of type " * + "$(typeof(row)) into a DataFrame. Only " * + "`Tuple`, `AbstractArray`, `AbstractDict`, `DataFrameRow` " * + "and `NamedTuple` are allowed.")) + end + nrows, ncols = size(df) + targetrows = nrows + 1 + if length(row) != ncols + msg = "Length of `row` does not match `DataFrame` column count." + throw(DimensionMismatch(msg)) + end + current_col = 0 + try + for (i, (col, val)) in enumerate(zip(_columns(df), row)) + current_col += 1 + @assert length(col) == nrows + S = typeof(val) + T = eltype(col) + if S <: T || !promote || promote_type(S, T) <: T + mode isa Val{:push} && push!(col, val) + mode isa Val{:pushfirst} && pushfirst!(col, val) + mode isa Val{:insert} && insert!(col, loc, val) + else + newcol = Tables.allocatecolumn(promote_type(S, T), targetrows) + firstindex(newcol) != 1 && _onebased_check_error() + if mode isa Val{:push} + copyto!(newcol, 1, col, 1, nrows) + newcol[end] = val + elseif mode isa Val{:pushfirst} + newcol[1] = val + copyto!(newcol, 2, col, 1, nrows) + elseif mode isa Val{:insert} + copyto!(newcol, 1, col, 1, loc-1) + newcol[loc] = val + copyto!(newcol, loc+1, col, loc, nrows-loc+1) + end + _columns(df)[i] = newcol + end + end + catch err + # clean up partial row + for j in 1:current_col + col2 = _columns(df)[j] + if length(col2) == targetrows + mode isa Val{:push} && pop!(col2) + mode isa Val{:pushfirst} && popfirst!(col2) + mode isa Val{:insert} && deleteat!(col2, loc) + end + @assert length(col2) == nrows + end + @error "Error adding value to column :$(_names(df)[current_col])." + rethrow(err) + end + df +end + +Base.push!(df::DataFrame, row::DataFrameRow; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _dfr_row_inserter!(df, -1, row, Val{:push}(), cols, promote) + +Base.pushfirst!(df::DataFrame, row::DataFrameRow; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _dfr_row_inserter!(df, -1, row, Val{:pushfirst}(), cols, promote) + +function Base.insert!(df::DataFrame, index::Integer, row::DataFrameRow; + cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset])) + index isa Bool && throw(ArgumentError("invalid index: $index of type Bool")) + 1 <= index <= nrow(df)+1 || + throw(ArgumentError("invalid index: $index for data frame with $(nrow(df)) rows")) + _dfr_row_inserter!(df, index, row, Val{:insert}(), cols, promote) +end + +@noinline pushhelper!(x::AbstractVector, r::Any) = + push!(x, x[r]) + +@noinline pushfirsthelper!(x::AbstractVector, r::Any) = + pushfirst!(x, x[r]) + +@noinline inserthelper!(x::AbstractVector, loc::Integer, r::Any) = + insert!(x, loc, x[r]) + +function _dfr_row_inserter!(df::DataFrame, loc::Integer, dfr::DataFrameRow, + mode::Union{Val{:push}, Val{:pushfirst}, Val{:insert}}, + cols::Symbol, promote::Bool) + possible_cols = (:orderequal, :setequal, :intersect, :subset, :union) + if !(cols in possible_cols) + throw(ArgumentError("`cols` keyword argument must be any of :" * + join(possible_cols, ", :"))) + end + + nrows = nrow(df) + targetrows = nrows + 1 + + if parent(dfr) === df && index(dfr) isa Index + # in this case we are sure that all we do is safe + r = row(dfr) + for (col_num, col) in enumerate(_columns(df)) + if length(col) != nrows + for j in 1:col_num + col2 = _columns(df)[j] + if length(col2) == targetrows + mode isa Val{:push} && pop!(col2) + mode isa Val{:pushfirst} && popfirst!(col2) + mode isa Val{:insert} && deleteat!(col2, loc) + end + @assert length(col2) == nrows + end + colname = _names(df)[col_num] + throw(AssertionError("Error adding value to column :$colname")) + end + # use a function barrier to improve performance + mode isa Val{:push} && pushhelper!(col, r) + mode isa Val{:pushfirst} && pushfirsthelper!(col, r) + mode isa Val{:insert} && inserthelper!(col, loc, r) + end + return df + end + + return _row_inserter!(df, loc, dfr, mode, cols, promote, nrows) +end + +Base.push!(df::DataFrame, row::Union{AbstractDict, NamedTuple}; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _row_inserter!(df, -1, row, Val{:push}(), cols, promote, -1) + +Base.pushfirst!(df::DataFrame, row::Union{AbstractDict, NamedTuple}; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) = + _row_inserter!(df, -1, row, Val{:pushfirst}(), cols, promote, -1) + +function Base.insert!(df::DataFrame, loc::Integer, row::Union{AbstractDict, NamedTuple}; + cols::Symbol=:setequal, + promote::Bool=(cols in [:union, :subset])) + loc isa Bool && throw(ArgumentError("invalid index: $loc of type Bool")) + 1 <= loc <= nrow(df)+1 || + throw(ArgumentError("invalid index: $loc for data frame with $(nrow(df)) rows")) + return _row_inserter!(df, loc, row, Val{:insert}(), cols, promote, -1) +end + +function _row_inserter!(df::DataFrame, loc::Integer, + row::Union{AbstractDict, NamedTuple, DataFrameRow}, + mode::Union{Val{:push}, Val{:pushfirst}, Val{:insert}}, + cols::Symbol, promote::Bool, nrows::Int) + if nrows == -1 + @assert row isa Union{AbstractDict, NamedTuple} + possible_cols = (:orderequal, :setequal, :intersect, :subset, :union) + if !(cols in possible_cols) + throw(ArgumentError("`cols` keyword argument must be any of :" * + join(possible_cols, ", :"))) + end + nrows = nrow(df) + else + @assert row isa DataFrameRow + end + + ncols = ncol(df) + targetrows = nrows + 1 + + if ncols == 0 && row isa Union{NamedTuple, DataFrameRow} + for (n, v) in pairs(row) + setproperty!(df, n, fill!(Tables.allocatecolumn(typeof(v), 1), v)) + end + return df + end + + old_row_type = typeof(row) + if row isa AbstractDict && keytype(row) !== Symbol && + (keytype(row) <: AbstractString || all(x -> x isa AbstractString, keys(row))) + row = (;(Symbol.(keys(row)) .=> values(row))...) + end + + # in the code below we use a direct access to _columns because + # we resize the columns so temporarily the `DataFrame` is internally + # inconsistent and normal data frame indexing would error. + if cols == :union + if row isa AbstractDict && keytype(row) !== Symbol && !all(x -> x isa Symbol, keys(row)) + throw(ArgumentError("when `cols == :union` all keys of row must be Symbol")) + end + for (i, colname) in enumerate(_names(df)) + col = _columns(df)[i] + if length(col) != nrows + for j in 1:i + col2 = _columns(df)[j] + if length(col2) == targetrows + mode isa Val{:push} && pop!(col2) + mode isa Val{:pushfirst} && popfirst!(col2) + mode isa Val{:insert} && deleteat!(col2, loc) + end + @assert length(col2) == nrows + end + throw(AssertionError("Error adding value to column :$colname")) + end + if haskey(row, colname) + val = row[colname] + else + val = missing + end + S = typeof(val) + T = eltype(col) + if S <: T || promote_type(S, T) <: T + mode isa Val{:push} && push!(col, val) + mode isa Val{:pushfirst} && pushfirst!(col, val) + mode isa Val{:insert} && insert!(col, loc, val) + elseif !promote + try + mode isa Val{:push} && push!(col, val) + mode isa Val{:pushfirst} && pushfirst!(col, val) + mode isa Val{:insert} && insert!(col, loc, val) + catch err + for j in 1:i + col2 = _columns(df)[j] + if length(col2) == targetrows + mode isa Val{:push} && pop!(col2) + mode isa Val{:pushfirst} && popfirst!(col2) + mode isa Val{:insert} && deleteat!(col2, loc) + end + @assert length(col2) == nrows + end + @error "Error adding value to column :$colname." + rethrow(err) + end + else + newcol = similar(col, promote_type(S, T), targetrows) + firstindex(newcol) != 1 && _onebased_check_error() + if mode isa Val{:push} + copyto!(newcol, 1, col, 1, nrows) + newcol[end] = val + elseif mode isa Val{:pushfirst} + newcol[1] = val + copyto!(newcol, 2, col, 1, nrows) + elseif mode isa Val{:insert} + copyto!(newcol, 1, col, 1, loc-1) + newcol[loc] = val + copyto!(newcol, loc+1, col, loc, nrows-loc+1) + end + _columns(df)[i] = newcol + end + end + for colname in setdiff(keys(row), _names(df)) + val = row[colname] + S = typeof(val) + if nrows == 0 + mode isa Val{:insert} && @assert loc == 1 + newcol = Tables.allocatecolumn(S, targetrows) + else + newcol = Tables.allocatecolumn(Union{Missing, S}, targetrows) + fill!(newcol, missing) + end + firstindex(newcol) != 1 && _onebased_check_error() + mode isa Val{:push} && (newcol[end] = val) + mode isa Val{:pushfirst} && (newcol[1] = val) + mode isa Val{:insert} && (newcol[loc] = val) + df[!, colname] = newcol + end + return df + end + + if cols == :orderequal + if old_row_type <: Dict + throw(ArgumentError("passing `Dict` as `row` when `cols == :orderequal` " * + "is not allowed as it is unordered")) + elseif length(row) != ncol(df) || any(x -> x[1] != x[2], zip(keys(row), _names(df))) + throw(ArgumentError("when `cols == :orderequal` pushed row must " * + "have the same column names and in the " * + "same order as the target data frame")) + end + elseif cols === :setequal + # Only check for equal lengths if :setequal is selected, + # as an error will be thrown below if some names don't match + if length(row) != ncols + # an explicit error is thrown as this was allowed in the past + throw(ArgumentError("row insertion with `cols` equal to `:setequal` " * + "requires `row` to have the same number of elements " * + "as the number of columns in `df`.")) + end + end + + current_col = 0 + try + for (col, nm) in zip(_columns(df), _names(df)) + current_col += 1 + @assert length(col) == nrows + if cols === :subset + val = get(row, nm, missing) + else + val = row[nm] + end + S = typeof(val) + T = eltype(col) + if S <: T || !promote || promote_type(S, T) <: T + mode isa Val{:push} && push!(col, val) + mode isa Val{:pushfirst} && pushfirst!(col, val) + mode isa Val{:insert} && insert!(col, loc, val) + else + newcol = similar(col, promote_type(S, T), targetrows) + firstindex(newcol) != 1 && _onebased_check_error() + if mode isa Val{:push} + copyto!(newcol, 1, col, 1, nrows) + newcol[end] = val + elseif mode isa Val{:pushfirst} + newcol[1] = val + copyto!(newcol, 2, col, 1, nrows) + elseif mode isa Val{:insert} + copyto!(newcol, 1, col, 1, loc-1) + newcol[loc] = val + copyto!(newcol, loc+1, col, loc, nrows-loc+1) + end + _columns(df)[columnindex(df, nm)] = newcol + end + end + catch err + @assert current_col > 0 + for j in 1:current_col + col2 = _columns(df)[j] + if length(col2) == targetrows + mode isa Val{:push} && pop!(col2) + mode isa Val{:pushfirst} && popfirst!(col2) + mode isa Val{:insert} && deleteat!(col2, loc) + end + @assert length(col2) == nrows + end + @error "Error adding value to column :$(_names(df)[current_col])." + rethrow(err) + end + return df +end diff --git a/src/dataframerow/dataframerow.jl b/src/dataframerow/dataframerow.jl index 1a537c0abd..f6fcb92b9f 100644 --- a/src/dataframerow/dataframerow.jl +++ b/src/dataframerow/dataframerow.jl @@ -495,141 +495,3 @@ function DataFrame(dfr::DataFrameRow) row, cols = parentindices(dfr) parent(dfr)[row:row, cols] end - -@noinline pushhelper!(x, r) = push!(x, x[r]) - -function Base.push!(df::DataFrame, dfr::DataFrameRow; cols::Symbol=:setequal, - promote::Bool=(cols in [:union, :subset])) - possible_cols = (:orderequal, :setequal, :intersect, :subset, :union) - if !(cols in possible_cols) - throw(ArgumentError("`cols` keyword argument must be any of :" * - join(possible_cols, ", :"))) - end - - nrows, ncols = size(df) - targetrows = nrows + 1 - - if parent(dfr) === df && index(dfr) isa Index - # in this case we are sure that all we do is safe - r = row(dfr) - for col in _columns(df) - # use a barrier function to improve performance - pushhelper!(col, r) - end - for (colname, col) in zip(_names(df), _columns(df)) - if length(col) != targetrows - for col2 in _columns(df) - resize!(col2, nrows) - end - throw(AssertionError("Error adding value to column :$colname")) - end - end - return df - end - - if ncols == 0 - for (n, v) in pairs(dfr) - setproperty!(df, n, fill!(Tables.allocatecolumn(typeof(v), 1), v)) - end - return df - end - - if cols == :union - for (i, colname) in enumerate(_names(df)) - col = _columns(df)[i] - if hasproperty(dfr, colname) - val = dfr[colname] - else - val = missing - end - S = typeof(val) - T = eltype(col) - if S <: T || promote_type(S, T) <: T - push!(col, val) - elseif !promote - try - push!(col, val) - catch err - for col in _columns(df) - resize!(col, nrows) - end - @error "Error adding value to column :$colname." - rethrow(err) - end - else - newcol = Tables.allocatecolumn(promote_type(S, T), targetrows) - copyto!(newcol, 1, col, 1, nrows) - newcol[end] = val - firstindex(newcol) != 1 && _onebased_check_error() - _columns(df)[i] = newcol - end - end - for (colname, col) in zip(_names(df), _columns(df)) - if length(col) != targetrows - for col2 in _columns(df) - resize!(col2, nrows) - end - throw(AssertionError("Error adding value to column :$colname")) - end - end - for colname in setdiff(_names(dfr), _names(df)) - val = dfr[colname] - S = typeof(val) - if nrows == 0 - newcol = [val] - else - newcol = Tables.allocatecolumn(Union{Missing, S}, targetrows) - fill!(newcol, missing) - newcol[end] = val - end - df[!, colname] = newcol - end - return df - end - - current_col = 0 - try - if cols === :orderequal - if _names(df) != _names(dfr) - msg = "when `cols == :orderequal` pushed row must have the same " * - "column names and in the same order as the target data frame" - throw(ArgumentError(msg)) - end - elseif cols === :setequal - msg = "Number of columns of `DataFrameRow` does not match that of " * - "target data frame (got $(length(dfr)) and $ncols)." - ncols == length(dfr) || throw(ArgumentError(msg)) - end - for (col, nm) in zip(_columns(df), _names(df)) - current_col += 1 - if cols === :subset - val = get(dfr, nm, missing) - else - val = dfr[nm] - end - S = typeof(val) - T = eltype(col) - if S <: T || !promote || promote_type(S, T) <: T - push!(col, val) - else - newcol = similar(col, promote_type(S, T), targetrows) - copyto!(newcol, 1, col, 1, nrows) - newcol[end] = val - firstindex(newcol) != 1 && _onebased_check_error() - _columns(df)[columnindex(df, nm)] = newcol - end - end - for col in _columns(df) - @assert length(col) == targetrows - end - catch err - for col in _columns(df) - resize!(col, nrows) - end - if current_col > 0 - @error "Error adding value to column :$(_names(df)[current_col])." - end - rethrow(err) - end - return df -end diff --git a/test/dataframe.jl b/test/dataframe.jl index c4730e4e60..ac790a6da2 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -414,226 +414,6 @@ end @test hash(DataFrame([1 2; 3 4], :auto)) == hash(DataFrame([1 2; 3 4], :auto), zero(UInt)) end -@testset "push!(df, row)" begin - buf = IOBuffer() - sl = SimpleLogger(buf) - - df = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - dfc = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, Any[3, "pear"]) - @test df == dfb - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, (3, "pear")) - @test df == dfb - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - with_logger(sl) do - @test_throws InexactError push!(dfb, (33.33, "pear")) - end - @test dfc == dfb - @test occursin("Error adding value to column :first", String(take!(buf))) - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - @test_throws DimensionMismatch push!(dfb, (1, "2", 3)) - @test dfc == dfb - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, ("coconut", 22)) - end - @test dfc == dfb - @test occursin("Error adding value to column :first", String(take!(buf))) - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, (11, 22)) - end - @test dfc == dfb - @test occursin("Error adding value to column :second", String(take!(buf))) - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, Dict(:first=>3, :second=>"pear")) - @test df == dfb - - df = DataFrame(first=[1, 2, 3], second=["apple", "orange", "banana"]) - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, Dict(:first=>3, :second=>"banana")) - @test df == dfb - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, (first=3, second="banana")) - @test df == dfb - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, (second="banana", first=3)) - @test df == dfb - - df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, (second=3, first=3)) - end - @test df0 == dfb - @test occursin("Error adding value to column :second", String(take!(buf))) - - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - push!(dfb, (second="banana", first=3)) - @test df == dfb - - df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, Dict(:first=>true, :second=>false)) - end - @test df0 == dfb - @test occursin("Error adding value to column :second", String(take!(buf))) - - df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) - dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>"stuff")) - end - @test df0 == dfb - @test occursin("Error adding value to column :first", String(take!(buf))) - - df0 = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) - dfb = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) - end - @test df0 == dfb - @test occursin("Error adding value to column :first", String(take!(buf))) - - df0 = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) - dfb = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) - with_logger(sl) do - @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) - end - @test df0 == dfb - @test occursin("Error adding value to column :second", String(take!(buf))) - - df = DataFrame(x=1) - push!(df, Dict(:x=>2), Dict(:x=>3)) - @test df[!, :x] == [1, 2, 3] - - df = DataFrame(x=1, y=2) - push!(df, [3, 4], [5, 6]) - @test df[!, :x] == [1, 3, 5] && df[!, :y] == [2, 4, 6] - - df = DataFrame(x=1, y=2) - with_logger(sl) do - @test_throws KeyError push!(df, Dict(:x=>1, "y"=>2)) - end - @test df == DataFrame(x=1, y=2) - @test occursin("Error adding value to column :y", String(take!(buf))) - - df = DataFrame() - @test push!(df, (a=1, b=true)) === df - @test df == DataFrame(a=1, b=true) - - df = DataFrame() - df.a = [1, 2, 3] - df.b = df.a - dfc = copy(df) - with_logger(sl) do - @test_throws AssertionError push!(df, [1, 2]) - end - @test df == dfc - @test occursin("Error adding value to column :a", String(take!(buf))) - with_logger(sl) do - @test_throws AssertionError push!(df, (a=1, b=2)) - end - @test df == dfc - @test occursin("Error adding value to column :a", String(take!(buf))) - with_logger(sl) do - @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2)) - end - @test df == dfc - @test occursin("Error adding value to column :a", String(take!(buf))) - @test_throws AssertionError push!(df, df[1, :]) - @test df == dfc - with_logger(sl) do - @test_throws AssertionError push!(df, dfc[1, :]) - end - @test df == dfc - - df = DataFrame() - df.a = [1, 2, 3, 4] - df.b = df.a - df.c = [1, 2, 3, 4] - dfc = copy(df) - with_logger(sl) do - @test_throws AssertionError push!(df, [1, 2, 3]) - end - @test df == dfc - @test occursin("Error adding value to column :a", String(take!(buf))) - with_logger(sl) do - @test_throws AssertionError push!(df, (a=1, b=2, c=3)) - end - @test df == dfc - @test occursin("Error adding value to column :a", String(take!(buf))) - with_logger(sl) do - @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2, :c=>3)) - end - @test df == dfc - @test occursin("Error adding value to column :a", String(take!(buf))) - @test_throws AssertionError push!(df, df[1, :]) - @test df == dfc - with_logger(sl) do - @test_throws AssertionError push!(df, dfc[1, :]) - end - @test df == dfc - - df = DataFrame(a=1, b=2) - push!(df, [1 2]) - @test df == DataFrame(a=[1, 1], b=[2, 2]) - push!(df, (1, 2)) - @test df == DataFrame(a=[1, 1, 1], b=[2, 2, 2]) - - @test_throws ArgumentError push!(df, "ab") -end - -@testset "extra push! tests" begin - for df in [DataFrame(a=Any[1]), DataFrame(a=1)] - @test push!(df, (b=1,), cols=:union) ≅ - DataFrame(a=[1, missing], b=[missing, 1]) - @test push!(df, (b=1,), cols=:union) ≅ - DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) - df.x = 1:3 - with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, (b=1,), cols=:union, promote=false) - end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) - allowmissing!(df, :x) - @test push!(df, (b=1,), cols=:union, promote=false) ≅ - DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], - x=[1:3; missing]) - end - - for df in [DataFrame(a=Any[1]), DataFrame(a=1)] - @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ - DataFrame(a=[1, missing], b=[missing, 1]) - @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ - DataFrame(a=[1, missing, missing], b=[missing, 1, 1]) - df.x = 1:3 - with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, DataFrame(b=1)[1, :], cols=:union, - promote=false) - end - @test df ≅ DataFrame(a=[1, missing, missing], b=[missing, 1, 1], x=1:3) - allowmissing!(df, :x) - @test push!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) ≅ - DataFrame(a=[1, missing, missing, missing], b=[missing, 1, 1, 1], - x=[1:3; missing]) - end - - @test_throws ArgumentError push!(DataFrame(), (a=1, b=2), cols=:unions) - @test_throws ArgumentError push!(DataFrame(), Dict('a'=>1, 'b'=>2), cols=:union) -end - @testset "deleteat!" begin df = DataFrame(a=[1, 2], b=[3.0, 4.0]) @test_throws BoundsError deleteat!(df, [true, true, true]) @@ -2019,25 +1799,7 @@ end @test_throws ArgumentError df[1, All(1)] end -@testset "vcat and push! with :orderequal" begin - for v in ((a=10, b=20, c=30), - DataFrame(a=10, b=20, c=30)[1, :], - OrderedDict(:a=>10, :b=>20, :c=>30)) - df = DataFrame(a=1, b=2, c=3) - push!(df, v, cols=:orderequal) - @test df == DataFrame(a=[1, 10], b=[2, 20], c=[3, 30]) - end - - for v in ((a=10, b=20, d=30), (a=10, c=20, b=30), - DataFrame(a=10, c=20, b=30)[1, :], - (a=10, b=20, c=30, d=0), - DataFrame(a=10, b=20, c=30, d=0)[1, :], - Dict(:a=>10, :b=>20, :c=>30), - OrderedDict(:c=>10, :b=>20, :a=>30)) - df = DataFrame(a=1, b=2, c=3) - @test_throws ArgumentError push!(df, v, cols=:orderequal) - end - +@testset "vcat with :orderequal" begin @test vcat(DataFrame(a=1, b=2, c=3), DataFrame(a=10, b=20, c=30), cols=:orderequal) == DataFrame(a=[1, 10], b=[2, 20], c=[3, 30]) @test_throws ArgumentError vcat(DataFrame(a=1, b=2, c=3), DataFrame(a=10, b=20, c=30), @@ -2075,208 +1837,6 @@ end @test_throws ArgumentError vcat(df1, df2, df3, df4, cols=:union, source=:a => [1]) end -@testset "push! with :subset" begin - for v in (Dict(:a=>10, :b=>20, :d=>30), (a=10, b=20, d=30), - DataFrame(a=10, b=20, d=30)[1, :]) - df = DataFrame(a=1, b=2, c=3) - old_logger = global_logger(NullLogger()) - @test_throws MethodError push!(df, v, cols=:subset, promote=false) - global_logger(old_logger) - @test df == DataFrame(a=1, b=2, c=3) - end - for v in (Dict(:a=>10, :b=>20, :d=>30), (a=10, b=20, d=30), - DataFrame(a=10, b=20, d=30)[1, :]) - df = DataFrame(a=1, b=2, c=3) - allowmissing!(df, :c) - push!(df, v, cols=:subset, promote=false) - @test df ≅ DataFrame(a=[1, 10], b=[2, 20], c=[3, missing]) - old_logger = global_logger(NullLogger()) - @test_throws MethodError push!(df, Dict(), cols=:subset, promote=false) - global_logger(old_logger) - @test df ≅ DataFrame(a=[1, 10], b=[2, 20], c=[3, missing]) - allowmissing!(df, [:a, :b]) - push!(df, Dict(), cols=:subset) - @test df ≅ DataFrame(a=[1, 10, missing], b=[2, 20, missing], c=[3, missing, missing]) - end -end - -@testset "push! with :intersect" begin - for row in ((y=4, x=3), Dict(:y=>4, :x=>3), (z=1, y=4, x=3), Dict(:y=>4, :x=>3, :z=>1)) - df = DataFrame(x=1, y=2) - push!(df, row, cols=:intersect) - @test df == DataFrame(x=[1, 3], y=[2, 4]) - end - - old_logger = global_logger(NullLogger()) - for row in ((z=4, x=3), (z=1, p=4, x=3)) - df = DataFrame(x=1, y=2) - @test_throws ErrorException push!(df, row, cols=:intersect) - @test df == DataFrame(x=1, y=2) - end - - for row in (Dict(:z=>4, :x=>3), Dict(:p=>4, :x=>3, :z=>1)) - df = DataFrame(x=1, y=2) - @test_throws KeyError push!(df, row, cols=:intersect) - @test df == DataFrame(x=1, y=2) - end - global_logger(old_logger) -end - -@testset "push!(df, row) with :union" begin - df = DataFrame() - push!(df, (a=1, b=2)) - a = df.a - push!(df, (a=1, c=2), cols=:union) - @test df ≅ DataFrame(a=[1, 1], b=[2, missing], c=[missing, 2]) - @test df.a === a - @test eltype(df.a) === Int - - df = DataFrame(a=Int[]) - push!(df, (a=1, c=2), cols=:union) - @test df == DataFrame(a=[1], c=[2]) - @test eltype(df.a) === Int - @test eltype(df.c) === Int - - df = DataFrame(a=Int[]) - push!(df, (c=2,), cols=:union) - @test df ≅ DataFrame(a=[missing], c=[2]) - @test eltype(df.a) === Union{Int, Missing} - @test eltype(df.c) === Int - - df = DataFrame(a=Int[]) - push!(df, (c=missing,), cols=:union) - @test df ≅ DataFrame(a=[missing], c=[missing]) - @test eltype(df.a) === Union{Int, Missing} - @test eltype(df.c) === Missing - - push!(df, (c="a", d=1), cols=:union) - @test eltype(df.a) === Union{Int, Missing} - @test eltype(df.c) === Union{String, Missing} - @test eltype(df.d) === Union{Int, Missing} - - push!(df, (a="b",), cols=:union) - @test df ≅ DataFrame(a=[missing, missing, "b"], - c=[missing, "a", missing], - d=[missing, 1, missing]) - @test eltype(df.a) === Any - @test eltype(df.c) === Union{String, Missing} - @test eltype(df.d) === Union{Int, Missing} - - a = [1] - df = DataFrame(a=a, copycols=false) - push!(df, (a=1,), cols=:union) - @test df.a === a - push!(df, (a=1.0,), cols=:union) - @test df.a !== a - @test eltype(df.a) === Float64 - - x = [1] - df = DataFrame(a=x, b=x, copycols=false) - @test_throws AssertionError push!(df, (a=1, b=2, c=3), cols=:union) - @test df == DataFrame(a=x, b=x, copycols=false) - @test df.a === df.b === x - - # note that this is correct although we have a problem with aliasing - # as we eventually reallocate column :b to a correct length - # and aliasing does not affect rows that already existed in df - push!(df, (a=1, b=2.0, c=3), cols=:union) - @test df ≅ DataFrame(a=[1, 1], b=[1.0, 2.0], c=[missing, 3], copycols=false) - @test df.a === x - @test eltype(df.b) === Float64 - - df = DataFrame() - push!(df, DataFrame(a=1, b=2)[1, :]) - a = df.a - push!(df, DataFrame(a=1, c=2)[1, :], cols=:union) - @test df ≅ DataFrame(a=[1, 1], b=[2, missing], c=[missing, 2]) - @test df.a === a - @test eltype(df.a) === Int - - df = DataFrame(a=Int[]) - push!(df, DataFrame(a=1, c=2)[1, :], cols=:union) - @test df == DataFrame(a=[1], c=[2]) - @test eltype(df.a) === Int - @test eltype(df.c) === Int - - df = DataFrame(a=Int[]) - push!(df, DataFrame(c=2)[1, :], cols=:union) - @test df ≅ DataFrame(a=[missing], c=[2]) - @test eltype(df.a) === Union{Int, Missing} - @test eltype(df.c) === Int - - df = DataFrame(a=Int[]) - push!(df, DataFrame(c=missing)[1, :], cols=:union) - @test df ≅ DataFrame(a=[missing], c=[missing]) - @test eltype(df.a) === Union{Int, Missing} - @test eltype(df.c) === Missing - - push!(df, DataFrame(c="a", d=1)[1, :], cols=:union) - @test eltype(df.a) === Union{Int, Missing} - @test eltype(df.c) === Union{String, Missing} - @test eltype(df.d) === Union{Int, Missing} - - push!(df, DataFrame(a="b")[1, :], cols=:union) - @test df ≅ DataFrame(a=[missing, missing, "b"], - c=[missing, "a", missing], - d=[missing, 1, missing]) - @test eltype(df.a) === Any - @test eltype(df.c) === Union{String, Missing} - @test eltype(df.d) === Union{Int, Missing} - - a = [1] - df = DataFrame(a=a, copycols=false) - push!(df, DataFrame(a=1)[1, :], cols=:union) - @test df.a === a - push!(df, DataFrame(a=1.0)[1, :], cols=:union) - @test df.a !== a - @test eltype(df.a) === Float64 - - x = [1] - df = DataFrame(a=x, b=x, copycols=false) - @test_throws AssertionError push!(df, DataFrame(a=1, b=2, c=3)[1, :], cols=:union) - @test df == DataFrame(a=x, b=x, copycols=false) - @test df.a === df.b === x - - # note that this is correct although we have a problem with aliasing - # as we eventually reallocate column :b to a correct length - # and aliasing does not affect rows that already existed in df - push!(df, DataFrame(a=1, b=2.0, c=3)[1, :], cols=:union) - @test df ≅ DataFrame(a=[1, 1], b=[1.0, 2.0], c=[missing, 3], copycols=false) - @test df.a === x - @test eltype(df.b) === Float64 -end - -@testset "push!(df, row) with promote options" begin - df = DataFrame(a=1) - with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, ["a"]) - end - @test push!(df, ["a"], promote=true) == DataFrame(a=[1, "a"]) - - for v in [(a="a",), DataFrame(a="a")[1, :]] - for cols in [:orderequal, :setequal, :intersect] - df = DataFrame(a=1) - with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, v, cols=cols) - end - @test push!(df, v, cols=cols, promote=true) == DataFrame(a=[1, "a"]) - end - for cols in [:subset, :union] - df = DataFrame(a=1, b=1) - with_logger(SimpleLogger(IOBuffer())) do - @test_throws MethodError push!(df, v, cols=cols, promote=false) - end - @test push!(df, v, cols=cols) ≅ DataFrame(a=[1, "a"], b=[1, missing]) - end - end -end - -@testset "push! with :setequal and wrong number of entries" begin - df = DataFrame(a=1:3) - @test_throws ArgumentError push!(df, (a=10, b=20)) - @test_throws ArgumentError push!(df, "a") -end - @testset "names for Type, predicate + standard tests of cols" begin df_long = DataFrame(a1=1:3, a2=[1, missing, 3], b1=1.0:3.0, b2=[1.0, missing, 3.0], diff --git a/test/insertion.jl b/test/insertion.jl new file mode 100644 index 0000000000..bc697abc02 --- /dev/null +++ b/test/insertion.jl @@ -0,0 +1,1286 @@ +module TestPush + +using DataFrames, Test, Logging, DataStructures +const ≅ = isequal + +@testset "push!(df, row)" begin + buf = IOBuffer() + sl = SimpleLogger(buf) + + df = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfc = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, Any[3, "pear"]) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, (3, "pear")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws InexactError push!(dfb, (33.33, "pear")) + end + @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + @test_throws DimensionMismatch push!(dfb, (1, "2", 3)) + @test dfc == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, ("coconut", 22)) + end + @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, (11, 22)) + end + @test dfc == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, Dict(:first=>3, :second=>"pear")) + @test df == dfb + + df = DataFrame(first=[1, 2, 3], second=["apple", "orange", "banana"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, Dict(:first=>3, :second=>"banana")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, (first=3, second="banana")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, (second="banana", first=3)) + @test df == dfb + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, (second=3, first=3)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + push!(dfb, (second="banana", first=3)) + @test df == dfb + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>true, :second=>false)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>"stuff")) + end + @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + df0 = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + dfb = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) + end + @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + df0 = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) + dfb = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) + with_logger(sl) do + @test_throws MethodError push!(dfb, Dict(:first=>"chicken", :second=>1)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + df = DataFrame(x=1) + push!(df, Dict(:x=>2), Dict(:x=>3)) + @test df[!, :x] == [1, 2, 3] + + df = DataFrame(x=1, y=2) + push!(df, [3, 4], [5, 6]) + @test df[!, :x] == [1, 3, 5] && df[!, :y] == [2, 4, 6] + + df = DataFrame(x=1, y=2) + with_logger(sl) do + @test_throws KeyError push!(df, Dict(:x=>1, "y"=>2)) + end + @test df == DataFrame(x=1, y=2) + @test occursin("Error adding value to column :y", String(take!(buf))) + + df = DataFrame() + @test push!(df, (a=1, b=true)) === df + @test df == DataFrame(a=1, b=true) + + df = DataFrame() + df.a = [1, 2, 3] + df.b = df.a + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError push!(df, [1, 2]) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, (a=1, b=2)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + @test_throws AssertionError push!(df, df[1, :]) + @test df == dfc + with_logger(sl) do + @test_throws AssertionError push!(df, dfc[1, :]) + end + @test df == dfc + + df = DataFrame() + df.a = [1, 2, 3, 4] + df.b = df.a + df.c = [1, 2, 3, 4] + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError push!(df, [1, 2, 3]) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, (a=1, b=2, c=3)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError push!(df, Dict(:a=>1, :b=>2, :c=>3)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + @test_throws AssertionError push!(df, df[1, :]) + @test df == dfc + with_logger(sl) do + @test_throws AssertionError push!(df, dfc[1, :]) + end + @test df == dfc + + df = DataFrame(a=1, b=2) + push!(df, [11 12]) + @test df == DataFrame(a=[1, 11], b=[2, 12]) + push!(df, (111, 112)) + @test df == DataFrame(a=[1, 11, 111], b=[2, 12, 112]) + + @test_throws ArgumentError push!(df, "ab") +end + +@testset "pushfirst!(df, row)" begin + buf = IOBuffer() + sl = SimpleLogger(buf) + + df = DataFrame(first=[3, 1, 2], second=["pear", "apple", "orange"]) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfc = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, Any[3, "pear"]) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, (3, "pear")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws InexactError pushfirst!(dfb, (33.33, "pear")) + end + @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + @test_throws DimensionMismatch pushfirst!(dfb, (1, "2", 3)) + @test dfc == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, ("coconut", 22)) + end + @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, (11, 22)) + end + @test dfc == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, Dict(:first=>3, :second=>"pear")) + @test df == dfb + + df = DataFrame(first=[3, 1, 2], second=["banana", "apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, Dict(:first=>3, :second=>"banana")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, (first=3, second="banana")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, (second="banana", first=3)) + @test df == dfb + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, (second=3, first=3)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + pushfirst!(dfb, (second="banana", first=3)) + @test df == dfb + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, Dict(:first=>true, :second=>false)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, Dict(:first=>"chicken", :second=>"stuff")) + end + @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + df0 = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + dfb = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, Dict(:first=>"chicken", :second=>1)) + end + @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + df0 = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) + dfb = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) + with_logger(sl) do + @test_throws MethodError pushfirst!(dfb, Dict(:first=>"chicken", :second=>1)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + df = DataFrame(x=1) + pushfirst!(df, Dict(:x=>2), Dict(:x=>3)) + @test df[!, :x] == [2, 3, 1] + + df = DataFrame(x=1, y=2) + pushfirst!(df, [3, 4], [5, 6]) + @test df[!, :x] == [3, 5, 1] && df[!, :y] == [4, 6, 2] + + df = DataFrame(x=1, y=2) + with_logger(sl) do + @test_throws KeyError pushfirst!(df, Dict(:x=>1, "y"=>2)) + end + @test df == DataFrame(x=1, y=2) + @test occursin("Error adding value to column :y", String(take!(buf))) + + df = DataFrame() + @test pushfirst!(df, (a=1, b=true)) === df + @test df == DataFrame(a=1, b=true) + + df = DataFrame() + df.a = [1, 2, 3] + df.b = df.a + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, [1, 2]) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, (a=1, b=2)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, Dict(:a=>1, :b=>2)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + @test_throws AssertionError pushfirst!(df, df[1, :]) + @test df == dfc + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, dfc[1, :]) + end + @test df == dfc + + df = DataFrame() + df.a = [1, 2, 3, 4] + df.b = df.a + df.c = [1, 2, 3, 4] + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, [1, 2, 3]) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, (a=1, b=2, c=3)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, Dict(:a=>1, :b=>2, :c=>3)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + @test_throws AssertionError pushfirst!(df, df[1, :]) + @test df == dfc + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, dfc[1, :]) + end + @test df == dfc + + df = DataFrame(a=1, b=2) + pushfirst!(df, [11 12]) + @test df == DataFrame(a=[11, 1], b=[12, 2]) + pushfirst!(df, (111, 112)) + @test df == DataFrame(a=[111, 11, 1], b=[112, 12, 2]) + + @test_throws ArgumentError pushfirst!(df, "ab") +end + +@testset "insert!(df, idx, row)" begin + @test_throws ArgumentError insert!(DataFrame(), -1, [1, 2]) + @test_throws ArgumentError insert!(DataFrame(), true, [1, 2]) + @test_throws ArgumentError insert!(DataFrame(), 2, [1, 2]) + @test_throws ArgumentError insert!(DataFrame(), -1, (a=1, b=2)) + @test_throws ArgumentError insert!(DataFrame(), true, (a=1, b=2)) + @test_throws ArgumentError insert!(DataFrame(), 2, (a=1, b=2)) + @test_throws ArgumentError insert!(DataFrame(), -1, DataFrame(a=1, b=2)[1, :]) + @test_throws ArgumentError insert!(DataFrame(), true, DataFrame(a=1, b=2)[1, :]) + @test_throws ArgumentError insert!(DataFrame(), 2, DataFrame(a=1, b=2)[1, :]) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), -1, [1, 2]) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), true, [1, 2]) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), 3, [1, 2]) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), -1, (a=1, b=2)) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), true, (a=1, b=2)) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), 3, (a=1, b=2)) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), -1, DataFrame(a=1, b=2)[1, :]) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), true, DataFrame(a=1, b=2)[1, :]) + @test_throws ArgumentError insert!(DataFrame(a=1, b=2), 3, DataFrame(a=1, b=2)[1, :]) + + buf = IOBuffer() + sl = SimpleLogger(buf) + + df = DataFrame(first=[1, 3, 2], second=["apple", "pear", "orange"]) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfc = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, Any[3, "pear"]) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, (3, "pear")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws InexactError insert!(dfb, 2, (33.33, "pear")) + end + @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + @test_throws DimensionMismatch insert!(dfb, 2, (1, "2", 3)) + @test dfc == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, ("coconut", 22)) + end + @test dfc == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, (11, 22)) + end + @test dfc == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, Dict(:first=>3, :second=>"pear")) + @test df == dfb + + df = DataFrame(first=[1, 3, 2], second=["apple", "banana", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, Dict(:first=>3, :second=>"banana")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, (first=3, second="banana")) + @test df == dfb + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, (second="banana", first=3)) + @test df == dfb + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, (second=3, first=3)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + insert!(dfb, 2, (second="banana", first=3)) + @test df == dfb + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, Dict(:first=>true, :second=>false)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + df0 = DataFrame(first=[1, 2], second=["apple", "orange"]) + dfb = DataFrame(first=[1, 2], second=["apple", "orange"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, Dict(:first=>"chicken", :second=>"stuff")) + end + @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + df0 = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + dfb = DataFrame(first=[1, 2, 3], second=["apple", "orange", "pear"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, Dict(:first=>"chicken", :second=>1)) + end + @test df0 == dfb + @test occursin("Error adding value to column :first", String(take!(buf))) + + df0 = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) + dfb = DataFrame(first=["1", "2", "3"], second=["apple", "orange", "pear"]) + with_logger(sl) do + @test_throws MethodError insert!(dfb, 2, Dict(:first=>"chicken", :second=>1)) + end + @test df0 == dfb + @test occursin("Error adding value to column :second", String(take!(buf))) + + df = DataFrame(x=1, y=2) + with_logger(sl) do + @test_throws KeyError insert!(df, 2, Dict(:x=>1, "y"=>2)) + end + @test df == DataFrame(x=1, y=2) + @test occursin("Error adding value to column :y", String(take!(buf))) + + df = DataFrame() + @test insert!(df, 1, (a=1, b=true)) === df + @test df == DataFrame(a=1, b=true) + + df = DataFrame() + df.a = [1, 2, 3] + df.b = df.a + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, [1, 2]) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, (a=1, b=2)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, Dict(:a=>1, :b=>2)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + @test_throws AssertionError insert!(df, 2, df[1, :]) + @test df == dfc + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, dfc[1, :]) + end + @test df == dfc + + df = DataFrame() + df.a = [1, 2, 3, 4] + df.b = df.a + df.c = [1, 2, 3, 4] + dfc = copy(df) + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, [1, 2, 3]) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, (a=1, b=2, c=3)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, Dict(:a=>1, :b=>2, :c=>3)) + end + @test df == dfc + @test occursin("Error adding value to column :b", String(take!(buf))) + @test_throws AssertionError insert!(df, 2, df[1, :]) + @test df == dfc + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, dfc[1, :]) + end + @test df == dfc + + df = DataFrame(a=1:4, b=11:14) + insert!(df, 3, [-1 -2]) + @test df == DataFrame(a=[1, 2, -1, 3, 4], b=[11, 12, -2, 13, 14]) + insert!(df, 3, (-11, -12)) + @test df == DataFrame(a=[1, 2, -11, -1, 3, 4], b=[11, 12, -12, -2, 13, 14]) + + @test_throws ArgumentError insert!(df, 2, "ab") +end + +@testset "extra push! tests" begin + for df in [DataFrame(a=Any[1, 2, 3]), DataFrame(a=1:3)] + @test push!(df, (b=1,), cols=:union) ≅ + DataFrame(a=[1, 2, 3, missing], b=[missing, missing, missing, 1]) + @test push!(df, (b=11,), cols=:union) ≅ + DataFrame(a=[1, 2, 3, missing, missing], b=[missing, missing, missing, 1, 11]) + df.x = 1:5 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, (b=1,), cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[1, 2, 3, missing, missing], + b=[missing, missing, missing, 1, 11], x=1:5) + allowmissing!(df, :x) + @test push!(df, (b=111,), cols=:union, promote=false) ≅ + DataFrame(a=[1, 2, 3, missing, missing, missing], + b=[missing, missing, missing, 1, 11, 111], x=[1:5; missing]) + end + + for df in [DataFrame(a=Any[1, 2, 3]), DataFrame(a=1:3)] + @test push!(df, DataFrame(b=1)[1, :], cols=:union) ≅ + DataFrame(a=[1, 2, 3, missing], b=[missing, missing, missing, 1]) + @test push!(df, DataFrame(b=11)[1, :], cols=:union) ≅ + DataFrame(a=[1, 2, 3, missing, missing], b=[missing, missing, missing, 1, 11]) + df.x = 1:5 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[1, 2, 3, missing, missing], + b=[missing, missing, missing, 1, 11], x=1:5) + allowmissing!(df, :x) + @test push!(df, DataFrame(b=111)[1, :], cols=:union, promote=false) ≅ + DataFrame(a=[1, 2, 3, missing, missing, missing], + b=[missing, missing, missing, 1, 11, 111], x=[1:5; missing]) + end + + @test_throws ArgumentError push!(DataFrame(), (a=1, b=2), cols=:unions) + @test_throws ArgumentError push!(DataFrame(), DataFrame(a=1, b=2)[1, :], cols=:unions) + @test_throws ArgumentError push!(DataFrame(), Dict('a'=>1, 'b'=>2), cols=:union) +end + +@testset "extra pushfirst! tests" begin + for df in [DataFrame(a=Any[1, 2, 3]), DataFrame(a=1:3)] + @test pushfirst!(df, (b=1,), cols=:union) ≅ + DataFrame(a=[missing, 1, 2, 3], b=[1, missing, missing, missing]) + @test pushfirst!(df, (b=11,), cols=:union) ≅ + DataFrame(a=[missing, missing, 1, 2, 3], b=[11, 1, missing, missing, missing]) + df.x = 1:5 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError pushfirst!(df, (b=1,), cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[missing, missing, 1, 2, 3], + b=[11, 1, missing, missing, missing], x=1:5) + allowmissing!(df, :x) + @test pushfirst!(df, (b=111,), cols=:union, promote=false) ≅ + DataFrame(a=[missing, missing, missing, 1, 2, 3], + b=[111, 11, 1, missing, missing, missing], x=[missing; 1:5]) + end + + for df in [DataFrame(a=Any[1, 2, 3]), DataFrame(a=1:3)] + @test pushfirst!(df, DataFrame(b=1)[1, :], cols=:union) ≅ + DataFrame(a=[missing, 1, 2, 3], b=[1, missing, missing, missing]) + @test pushfirst!(df, DataFrame(b=11)[1, :], cols=:union) ≅ + DataFrame(a=[missing, missing, 1, 2, 3], b=[11, 1, missing, missing, missing]) + df.x = 1:5 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError pushfirst!(df, DataFrame(b=1)[1, :], cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[missing, missing, 1, 2, 3], + b=[11, 1, missing, missing, missing], x=1:5) + allowmissing!(df, :x) + @test pushfirst!(df, DataFrame(b=111)[1, :], cols=:union, promote=false) ≅ + DataFrame(a=[missing, missing, missing, 1, 2, 3], + b=[111, 11, 1, missing, missing, missing], x=[missing; 1:5]) + end + + @test_throws ArgumentError pushfirst!(DataFrame(), (a=1, b=2), cols=:unions) + @test_throws ArgumentError pushfirst!(DataFrame(), DataFrame(a=1, b=2)[1, :], cols=:unions) + @test_throws ArgumentError pushfirst!(DataFrame(), Dict('a'=>1, 'b'=>2), cols=:union) +end + +@testset "extra insert! tests" begin + for df in [DataFrame(a=Any[1, 2, 3]), DataFrame(a=1:3)] + @test insert!(df, 2, (b=1,), cols=:union) ≅ + DataFrame(a=[1, missing, 2, 3], b=[missing, 1, missing, missing]) + @test insert!(df, 2, (b=11,), cols=:union) ≅ + DataFrame(a=[1, missing, missing, 2, 3], b=[missing, 11, 1, missing, missing]) + df.x = 1:5 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError insert!(df, 2, (b=1,), cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[1, missing, missing, 2, 3], + b=[missing, 11, 1, missing, missing], x=1:5) + allowmissing!(df, :x) + @test insert!(df, 2, (b=111,), cols=:union, promote=false) ≅ + DataFrame(a=[1, missing, missing, missing, 2, 3], + b=[missing, 111, 11, 1, missing, missing], x=[1, missing, 2, 3, 4, 5]) + end + + for df in [DataFrame(a=Any[1, 2, 3]), DataFrame(a=1:3)] + @test insert!(df, 2, DataFrame(b=1)[1, :], cols=:union) ≅ + DataFrame(a=[1, missing, 2, 3], b=[missing, 1, missing, missing]) + @test insert!(df, 2, DataFrame(b=11)[1, :], cols=:union) ≅ + DataFrame(a=[1, missing, missing, 2, 3], b=[missing, 11, 1, missing, missing]) + df.x = 1:5 + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError insert!(df, 2, DataFrame(b=1)[1, :], cols=:union, promote=false) + end + @test df ≅ DataFrame(a=[1, missing, missing, 2, 3], + b=[missing, 11, 1, missing, missing], x=1:5) + allowmissing!(df, :x) + @test insert!(df, 2, DataFrame(b=111)[1, :], cols=:union, promote=false) ≅ + DataFrame(a=[1, missing, missing, missing, 2, 3], + b=[missing, 111, 11, 1, missing, missing], x=[1, missing, 2, 3, 4, 5]) + end + + @test_throws ArgumentError insert!(DataFrame(), 2, (a=1, b=2), cols=:unions) + @test_throws ArgumentError insert!(DataFrame(), 2, DataFrame(a=1, b=2)[1, :], cols=:unions) + @test_throws ArgumentError insert!(DataFrame(), 2, Dict('a'=>1, 'b'=>2), cols=:union) +end + +@testset "push!/pushfirst!/insert! with :orderequal" begin + for v in ((a=10, b=20, c=30), + DataFrame(a=10, b=20, c=30)[1, :], + OrderedDict(:a=>10, :b=>20, :c=>30)) + df = DataFrame(a=1:3, b=2:4, c=3:5) + push!(df, v, cols=:orderequal) + @test df == DataFrame(a=[1:3; 10], b=[2:4; 20], c=[3:5; 30]) + pushfirst!(df, v, cols=:orderequal) + @test df == DataFrame(a=[10; 1:3; 10], b=[20; 2:4; 20], c=[30; 3:5; 30]) + insert!(df, 3, v, cols=:orderequal) + @test df == DataFrame(a=[10; 1; 10; 2:3; 10], b=[20; 2; 20; 3:4; 20], c=[30; 3; 30; 4:5; 30]) + end + + for v in ((a=10, b=20, d=30), (a=10, c=20, b=30), + DataFrame(a=10, c=20, b=30)[1, :], + (a=10, b=20, c=30, d=0), + DataFrame(a=10, b=20, c=30, d=0)[1, :], + Dict(:a=>10, :b=>20, :c=>30), + OrderedDict(:c=>10, :b=>20, :a=>30)) + df = DataFrame(a=1:3, b=2:4, c=3:5) + @test_throws ArgumentError push!(df, v, cols=:orderequal) + @test_throws ArgumentError pushfirst!(df, v, cols=:orderequal) + @test_throws ArgumentError insert!(df, 2, v, cols=:orderequal) + @test df == DataFrame(a=1:3, b=2:4, c=3:5) + end +end + +@testset "push!/pushfirst!/insert! with :subset" begin + for v in (Dict(:a=>10, :b=>20, :d=>30), (a=10, b=20, d=30), + DataFrame(a=10, b=20, d=30)[1, :]) + df = DataFrame(a=1:3, b=2:4, c=3:5) + old_logger = global_logger(NullLogger()) + @test_throws MethodError push!(df, v, cols=:subset, promote=false) + global_logger(old_logger) + @test df == DataFrame(a=1:3, b=2:4, c=3:5) + old_logger = global_logger(NullLogger()) + @test_throws MethodError pushfirst!(df, v, cols=:subset, promote=false) + global_logger(old_logger) + @test df == DataFrame(a=1:3, b=2:4, c=3:5) + old_logger = global_logger(NullLogger()) + @test_throws MethodError insert!(df, 2, v, cols=:subset, promote=false) + global_logger(old_logger) + @test df == DataFrame(a=1:3, b=2:4, c=3:5) + end + + for v in (Dict(:a=>10, :b=>20, :d=>30), (a=10, b=20, d=30), + DataFrame(a=10, b=20, d=30)[1, :]) + df = DataFrame(a=1:3, b=2:4, c=3:5) + allowmissing!(df, :c) + push!(df, v, cols=:subset, promote=false) + @test df ≅ DataFrame(a=[1, 2, 3, 10], b=[2, 3, 4, 20], c=[3, 4, 5, missing]) + old_logger = global_logger(NullLogger()) + @test_throws MethodError push!(df, Dict(), cols=:subset, promote=false) + global_logger(old_logger) + @test df ≅ DataFrame(a=[1, 2, 3, 10], b=[2, 3, 4, 20], c=[3, 4, 5, missing]) + allowmissing!(df, [:a, :b]) + push!(df, Dict(), cols=:subset) + @test df ≅ DataFrame(a=[1, 2, 3, 10, missing], b=[2, 3, 4, 20, missing], + c=[3, 4, 5, missing, missing]) + + df = DataFrame(a=1:3, b=2:4, c=3:5) + allowmissing!(df, :c) + pushfirst!(df, v, cols=:subset, promote=false) + @test df ≅ DataFrame(a=[10, 1, 2, 3], b=[20, 2, 3, 4], c=[missing, 3, 4, 5]) + old_logger = global_logger(NullLogger()) + @test_throws MethodError pushfirst!(df, Dict(), cols=:subset, promote=false) + global_logger(old_logger) + @test df ≅ DataFrame(a=[10, 1, 2, 3], b=[20, 2, 3, 4], c=[missing, 3, 4, 5]) + allowmissing!(df, [:a, :b]) + pushfirst!(df, Dict(), cols=:subset) + @test df ≅ DataFrame(a=[missing, 10, 1, 2, 3], b=[missing, 20, 2, 3, 4], + c=[missing, missing, 3, 4, 5]) + + df = DataFrame(a=1:3, b=2:4, c=3:5) + allowmissing!(df, :c) + insert!(df, 2, v, cols=:subset, promote=false) + @test df ≅ DataFrame(a=[1, 10, 2, 3], b=[2, 20, 3, 4], c=[3, missing, 4, 5]) + old_logger = global_logger(NullLogger()) + @test_throws MethodError insert!(df, 2, Dict(), cols=:subset, promote=false) + global_logger(old_logger) + @test df ≅ DataFrame(a=[1, 10, 2, 3], b=[2, 20, 3, 4], c=[3, missing, 4, 5]) + allowmissing!(df, [:a, :b]) + insert!(df, 2, Dict(), cols=:subset) + @test df ≅ DataFrame(a=[1, missing, 10, 2, 3], b=[2, missing, 20, 3, 4], + c=[3, missing, missing, 4, 5]) + end +end + +@testset "push!/pushfirst!/insert! with :intersect" begin + for row in ((y=4, x=3), Dict(:y=>4, :x=>3), (z=1, y=4, x=3), Dict(:y=>4, :x=>3, :z=>1)) + df = DataFrame(x=[1, 1], y=[2, 2]) + push!(df, row, cols=:intersect) + @test df == DataFrame(x=[1, 1, 3], y=[2, 2, 4]) + pushfirst!(df, row, cols=:intersect) + @test df == DataFrame(x=[3, 1, 1, 3], y=[4, 2, 2, 4]) + insert!(df, 3, row, cols=:intersect) + @test df == DataFrame(x=[3, 1, 3, 1, 3], y=[4, 2, 4, 2, 4]) + end + + old_logger = global_logger(NullLogger()) + for row in ((z=4, x=3), (z=1, p=4, x=3)) + df = DataFrame(x=1:3, y=2:4) + @test_throws ErrorException push!(df, row, cols=:intersect) + @test_throws ErrorException pushfirst!(df, row, cols=:intersect) + @test_throws ErrorException insert!(df, 2, row, cols=:intersect) + @test df == DataFrame(x=1:3, y=2:4) + end + + for row in (Dict(:z=>4, :x=>3), Dict(:p=>4, :x=>3, :z=>1)) + df = DataFrame(x=1:3, y=2:4) + @test_throws KeyError push!(df, row, cols=:intersect) + @test_throws KeyError pushfirst!(df, row, cols=:intersect) + @test_throws KeyError insert!(df, 2, row, cols=:intersect) + @test df == DataFrame(x=1:3, y=2:4) + end + global_logger(old_logger) +end + +@testset "push! with :union" begin + df = DataFrame() + push!(df, (a=1, b=2)) + a = df.a + push!(df, (a=1, c=2), cols=:union) + @test df ≅ DataFrame(a=[1, 1], b=[2, missing], + c=[missing, 2]) + @test df.a === a + @test eltype(df.a) === Int + + df = DataFrame(a=Int[]) + push!(df, (a=1, c=2), cols=:union) + @test df == DataFrame(a=[1], c=[2]) + @test eltype(df.a) === Int + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + push!(df, (c=2,), cols=:union) + @test df ≅ DataFrame(a=[missing], c=[2]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + push!(df, (c=missing,), cols=:union) + @test df ≅ DataFrame(a=[missing], c=[missing]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Missing + + push!(df, (c="a", d=1), cols=:union) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + push!(df, (a="b",), cols=:union) + @test df ≅ DataFrame(a=[missing, missing, "b"], + c=[missing, "a", missing], + d=[missing, 1, missing]) + @test eltype(df.a) === Any + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + a = [1, 2, 3] + df = DataFrame(a=a, copycols=false) + push!(df, (a=11,), cols=:union) + @test df.a === a + push!(df, (a=12.0,), cols=:union) + @test df.a !== a + @test eltype(df.a) === Float64 + + x = [1, 2, 3] + df = DataFrame(a=x, b=x, copycols=false) + @test_throws AssertionError push!(df, (a=1, b=2, c=3), cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + @test_throws AssertionError push!(df, (a=1, b=2.0, c=3), cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + df = DataFrame() + push!(df, DataFrame(a=1, b=2)[1, :]) + a = df.a + push!(df, DataFrame(a=1, c=2)[1, :], cols=:union) + @test df ≅ DataFrame(a=[1, 1], b=[2, missing], c=[missing, 2]) + @test df.a === a + @test eltype(df.a) === Int + + df = DataFrame(a=Int[]) + push!(df, DataFrame(a=1, c=2)[1, :], cols=:union) + @test df == DataFrame(a=[1], c=[2]) + @test eltype(df.a) === Int + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + push!(df, DataFrame(c=2)[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing], c=[2]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + push!(df, DataFrame(c=missing)[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing], c=[missing]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Missing + + push!(df, DataFrame(c="a", d=1)[1, :], cols=:union) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + push!(df, DataFrame(a="b")[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing, missing, "b"], + c=[missing, "a", missing], + d=[missing, 1, missing]) + @test eltype(df.a) === Any + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + a = [1, 2, 3] + df = DataFrame(a=a, copycols=false) + push!(df, DataFrame(a=1)[1, :], cols=:union) + @test df.a === a + push!(df, DataFrame(a=1.0)[1, :], cols=:union) + @test df.a !== a + @test eltype(df.a) === Float64 + + x = [1, 2, 3] + df = DataFrame(a=x, b=x, copycols=false) + @test_throws AssertionError push!(df, DataFrame(a=1, b=2, c=3)[1, :], cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + @test_throws AssertionError push!(df, DataFrame(a=1, b=2.0, c=3)[1, :], cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x +end + +@testset "pushfirst! with :union" begin + df = DataFrame() + pushfirst!(df, (a=1, b=2)) + a = df.a + pushfirst!(df, (a=1, c=2), cols=:union) + @test df ≅ DataFrame(a=[1, 1], b=[missing, 2], + c=[2, missing]) + @test df.a === a + @test eltype(df.a) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, (a=1, c=2), cols=:union) + @test df == DataFrame(a=[1], c=[2]) + @test eltype(df.a) === Int + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, (c=2,), cols=:union) + @test df ≅ DataFrame(a=[missing], c=[2]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, (c=missing,), cols=:union) + @test df ≅ DataFrame(a=[missing], c=[missing]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Missing + + pushfirst!(df, (c="a", d=1), cols=:union) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + pushfirst!(df, (a="b",), cols=:union) + @test df ≅ DataFrame(a=["b", missing, missing], + c=[missing, "a", missing], + d=[missing, 1, missing]) + @test eltype(df.a) === Any + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + a = [1, 2, 3] + df = DataFrame(a=a, copycols=false) + pushfirst!(df, (a=11,), cols=:union) + @test df.a === a + pushfirst!(df, (a=12.0,), cols=:union) + @test df.a !== a + @test eltype(df.a) === Float64 + + x = [1, 2, 3] + df = DataFrame(a=x, b=x, copycols=false) + @test_throws AssertionError pushfirst!(df, (a=1, b=2, c=3), cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + @test_throws AssertionError pushfirst!(df, (a=1, b=2.0, c=3), cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + df = DataFrame() + pushfirst!(df, DataFrame(a=1, b=2)[1, :]) + a = df.a + pushfirst!(df, DataFrame(a=1, c=2)[1, :], cols=:union) + @test df ≅ DataFrame(a=[1, 1], b=[missing, 2], c=[2, missing]) + @test df.a === a + @test eltype(df.a) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, DataFrame(a=1, c=2)[1, :], cols=:union) + @test df == DataFrame(a=[1], c=[2]) + @test eltype(df.a) === Int + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, DataFrame(c=2)[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing], c=[2]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, DataFrame(c=missing)[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing], c=[missing]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Missing + + pushfirst!(df, DataFrame(c="a", d=1)[1, :], cols=:union) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + pushfirst!(df, DataFrame(a="b")[1, :], cols=:union) + @test df ≅ DataFrame(a=["b", missing, missing], + c=[missing, "a", missing], + d=[missing, 1, missing]) + @test eltype(df.a) === Any + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + a = [1, 2, 3] + df = DataFrame(a=a, copycols=false) + pushfirst!(df, DataFrame(a=1)[1, :], cols=:union) + @test df.a === a + pushfirst!(df, DataFrame(a=1.0)[1, :], cols=:union) + @test df.a !== a + @test eltype(df.a) === Float64 + + x = [1, 2, 3] + df = DataFrame(a=x, b=x, copycols=false) + @test_throws AssertionError pushfirst!(df, DataFrame(a=1, b=2, c=3)[1, :], cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + @test_throws AssertionError pushfirst!(df, DataFrame(a=1, b=2.0, c=3)[1, :], cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x +end + +@testset "insert! with :union" begin + df = DataFrame() + insert!(df, 1, (a=1, b=2)) + a = df.a + insert!(df, 1, (a=1, c=2), cols=:union) + @test df ≅ DataFrame(a=[1, 1], b=[missing, 2], + c=[2, missing]) + @test df.a === a + @test eltype(df.a) === Int + + df = DataFrame(a=Int[]) + pushfirst!(df, (a=1, c=2), cols=:union) + @test df == DataFrame(a=[1], c=[2]) + @test eltype(df.a) === Int + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + insert!(df, 1, (c=2,), cols=:union) + @test df ≅ DataFrame(a=[missing], c=[2]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + insert!(df, 1, (c=missing,), cols=:union) + @test df ≅ DataFrame(a=[missing], c=[missing]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Missing + + insert!(df, 1, (c="a", d=1), cols=:union) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + insert!(df, 2, (a="b",), cols=:union) + @test df ≅ DataFrame(a=[missing, "b", missing], + c=["a", missing, missing], + d=[1, missing, missing]) + @test eltype(df.a) === Any + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + a = [1, 2, 3] + df = DataFrame(a=a, copycols=false) + insert!(df, 2, (a=11,), cols=:union) + @test df.a === a + insert!(df, 2, (a=12.0,), cols=:union) + @test df.a !== a + @test eltype(df.a) === Float64 + + x = [1, 2, 3] + df = DataFrame(a=x, b=x, copycols=false) + @test_throws AssertionError insert!(df, 2, (a=1, b=2, c=3), cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + @test_throws AssertionError insert!(df, 2, (a=1, b=2.0, c=3), cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + df = DataFrame() + insert!(df, 1, DataFrame(a=1, b=2)[1, :]) + a = df.a + insert!(df, 1, DataFrame(a=1, c=2)[1, :], cols=:union) + @test df ≅ DataFrame(a=[1, 1], b=[missing, 2], c=[2, missing]) + @test df.a === a + @test eltype(df.a) === Int + + df = DataFrame(a=Int[]) + insert!(df, 1, DataFrame(a=1, c=2)[1, :], cols=:union) + @test df == DataFrame(a=[1], c=[2]) + @test eltype(df.a) === Int + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + insert!(df, 1, DataFrame(c=2)[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing], c=[2]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Int + + df = DataFrame(a=Int[]) + insert!(df, 1, DataFrame(c=missing)[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing], c=[missing]) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Missing + + insert!(df, 1, DataFrame(c="a", d=1)[1, :], cols=:union) + @test eltype(df.a) === Union{Int, Missing} + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + insert!(df, 2, DataFrame(a="b")[1, :], cols=:union) + @test df ≅ DataFrame(a=[missing, "b", missing], + c=["a", missing, missing], + d=[1, missing, missing]) + @test eltype(df.a) === Any + @test eltype(df.c) === Union{String, Missing} + @test eltype(df.d) === Union{Int, Missing} + + a = [1, 2, 3] + df = DataFrame(a=a, copycols=false) + insert!(df, 2, DataFrame(a=1)[1, :], cols=:union) + @test df.a === a + insert!(df, 2, DataFrame(a=1.0)[1, :], cols=:union) + @test df.a !== a + @test eltype(df.a) === Float64 + + x = [1, 2, 3] + df = DataFrame(a=x, b=x, copycols=false) + @test_throws AssertionError insert!(df, 2, DataFrame(a=1, b=2, c=3)[1, :], cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x + + @test_throws AssertionError insert!(df, 2, DataFrame(a=1, b=2.0, c=3)[1, :], cols=:union) + @test df == DataFrame(a=x, b=x, copycols=false) + @test df.a === df.b === x +end + +@testset "push!/pushfirst!/insert! with promote options" begin + df = DataFrame(a=1:3) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, ["a"]) + end + @test push!(df, ["a"], promote=true) == DataFrame(a=[1:3; "a"]) + + df = DataFrame(a=1:3) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError pushfirst!(df, ["a"]) + end + @test pushfirst!(df, ["a"], promote=true) == DataFrame(a=["a"; 1:3]) + + df = DataFrame(a=1:3) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError insert!(df, 2, ["a"]) + end + @test insert!(df, 2, ["a"], promote=true) == DataFrame(a=[1, "a", 2, 3]) + + for v in ((a="a",), DataFrame(a="a")[1, :]) + for cols in [:orderequal, :setequal, :intersect] + df = DataFrame(a=1:3) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, v, cols=cols) + end + @test push!(df, v, cols=cols, promote=true) == DataFrame(a=[1:3; "a"]) + + df = DataFrame(a=1:3) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError pushfirst!(df, v, cols=cols) + end + @test pushfirst!(df, v, cols=cols, promote=true) == DataFrame(a=["a"; 1:3]) + + df = DataFrame(a=1:3) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError insert!(df, 2, v, cols=cols) + end + @test insert!(df, 2, v, cols=cols, promote=true) == DataFrame(a=[1, "a", 2, 3]) + + end + for cols in [:subset, :union] + df = DataFrame(a=1:3, b=11:13) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError push!(df, v, cols=cols, promote=false) + end + @test push!(df, v, cols=cols) ≅ DataFrame(a=[1:3; "a"], b=[11:13; missing]) + + df = DataFrame(a=1:3, b=11:13) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError pushfirst!(df, v, cols=cols, promote=false) + end + @test pushfirst!(df, v, cols=cols) ≅ DataFrame(a=["a"; 1:3], b=[missing; 11:13]) + + df = DataFrame(a=1:3, b=11:13) + with_logger(SimpleLogger(IOBuffer())) do + @test_throws MethodError insert!(df, 2, v, cols=cols, promote=false) + end + @test insert!(df, 2, v, cols=cols) ≅ DataFrame(a=[1, "a", 2, 3], b=[11, missing, 12, 13]) + end + end +end + +@testset "push!/pushfirst!/insert! with :setequal and wrong number of entries" begin + df = DataFrame(a=1:3) + @test_throws ArgumentError push!(df, (a=10, b=20)) + @test_throws ArgumentError push!(df, "a") + @test_throws ArgumentError pushfirst!(df, (a=10, b=20)) + @test_throws ArgumentError pushfirst!(df, "a") + @test_throws ArgumentError insert!(df, 2, (a=10, b=20)) + @test_throws ArgumentError insert!(df, 2, "a") +end + +@testset "push!/pushfirst!/insert! with self" begin + df = DataFrame(a=1:3, b=2:4, c=3:5) + @test push!(df, df[2, :]) == DataFrame(a=[1:3; 2], b=[2:4; 3], c=[3:5; 4]) + @test pushfirst!(df, df[3, :]) == DataFrame(a=[3; 1:3; 2], b=[4; 2:4; 3], c=[5; 3:5; 4]) + @test insert!(df, 3, df[1, :]) == DataFrame(a=[3; 1; 3; 2:3; 2], b=[4; 2; 4; 3:4; 3], c=[5; 3; 5; 4:5; 4]) + df = DataFrame(a=1:3, b=2:4) + df.c = df.a + @test_throws AssertionError push!(df, df[2, :]) + @test_throws AssertionError pushfirst!(df, df[2, :]) + @test_throws AssertionError insert!(df, 2, df[2, :]) + @test df == DataFrame(a=1:3, b=2:4, c=1:3) +end + +@testset "multicolumn aliasing" begin + df = DataFrame(a1=1:3, b1=11:13) + df.a2 = df.a1 + df.a3 = df.a1 + df.b2 = df.b1 + df.b3 = df.b1 + df.a4 = df.a1 + refdf = copy(df) + + buf = IOBuffer() + sl = SimpleLogger(buf) + + with_logger(sl) do + @test_throws AssertionError push!(df, 1:7) + end + @test occursin("Error adding value to column :a2", String(take!(buf))) + @test df == refdf + + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, 1:7) + end + @test occursin("Error adding value to column :a2", String(take!(buf))) + @test df == refdf + + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, 1:7) + end + @test occursin("Error adding value to column :a2", String(take!(buf))) + @test df == refdf + + with_logger(sl) do + @test_throws AssertionError push!(df, (a1=1, b1=2, a2=3, a3=4, b2=5, b3=6, a4=7)) + end + @test occursin("Error adding value to column :a2", String(take!(buf))) + @test df == refdf + + with_logger(sl) do + @test_throws AssertionError pushfirst!(df, (a1=1, b1=2, a2=3, a3=4, b2=5, b3=6, a4=7)) + end + @test occursin("Error adding value to column :a2", String(take!(buf))) + @test df == refdf + + with_logger(sl) do + @test_throws AssertionError insert!(df, 2, (a1=1, b1=2, a2=3, a3=4, b2=5, b3=6, a4=7)) + end + @test occursin("Error adding value to column :a2", String(take!(buf))) + @test df == refdf +end + +end # module diff --git a/test/runtests.jl b/test/runtests.jl index 08a5fa1fdb..d7f076e787 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -21,6 +21,7 @@ my_tests = ["utils.jl", "data.jl", "index.jl", "dataframe.jl", + "insertion.jl", "select.jl", "reshape.jl", "dataframerow.jl",