Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow push!/pushfirst!/append!/prepend! with multiple values #3372

Merged
merged 17 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# DataFrames.jl v1.7.0 Release Notes

## New functionalities

* Allow passing multiple values to add in `push!`, `pushfirst!`,
`append!`, and `prepend!`
([#3372](https://github.com/JuliaData/DataFrames.jl/pull/3372))

# DataFrames.jl v1.6.1 Release Notes

## Bug fixes
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "DataFrames"
uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
version = "1.6.1"
version = "1.7.0"

[deps]
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
Expand Down
189 changes: 170 additions & 19 deletions src/dataframe/insertion.jl
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
"""
append!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
append!(df::DataFrame, table; cols::Symbol=:setequal,
append!(df::DataFrame, tables...; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))

Add the rows of `df2` to the end of `df`. If the second argument `table` is not
an `AbstractDataFrame` then it is converted using `DataFrame(table,
copycols=false)` before being appended.
Add the rows of tables passed as `tables` to the end of `df`. If the table is not
an `AbstractDataFrame` then it is converted using
`DataFrame(table, copycols=false)` before being appended.

The exact behavior of `append!` depends on the `cols` argument:
* If `cols == :setequal` (this is the default) then `df2` must contain exactly
Expand Down Expand Up @@ -78,18 +76,53 @@
4 │ 4 4
5 │ 5 5
6 │ 6 6

julia> append!(df2, DataFrame(A=1), (; C=1:2), cols=:union)
6×3 DataFrame
Row │ A B C
│ Float64? Int64? Int64?
─────┼─────────────────────────────
1 │ 4.0 4 missing
2 │ 5.0 5 missing
3 │ 6.0 6 missing
4 │ 1.0 missing missing
5 │ missing missing 1
6 │ missing missing 2
```
"""
Base.append!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
_append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=true)

function Base.append!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.append!(df::DataFrame, @nospecialize tables...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
end

return foldl((df, table) -> append!(df, table, cols=cols, promote=promote),
collect(Any, tables), init=df)
end

"""
prepend!(df::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
prepend!(df::DataFrame, table; cols::Symbol=:setequal,
prepend!(df::DataFrame, tables...; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))

Add the rows of tables passed as `tables` to the beginning of `df`. If the table is not
an `AbstractDataFrame` then it is converted using
`DataFrame(table, copycols=false)` before being appended.

Add the rows of `df2` to the beginning of `df`. If the second argument `table`
is not an `AbstractDataFrame` then it is converted using `DataFrame(table,
copycols=false)` before being prepended.
Expand Down Expand Up @@ -164,12 +197,45 @@
4 │ 1 1
5 │ 2 2
6 │ 3 3

julia> prepend!(df2, DataFrame(A=1), (; C=1:2), cols=:union)
6×3 DataFrame
Row │ A B C
│ Float64? Int64? Int64?
─────┼─────────────────────────────
1 │ 1.0 missing missing
2 │ missing missing 1
3 │ missing missing 2
4 │ 4.0 4 missing
5 │ 5.0 5 missing
6 │ 6.0 6 missing
```
"""
Base.prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
_append_or_prepend!(df1, df2, cols=cols, promote=promote, atend=false)

function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.prepend!(df::DataFrame, @nospecialize tables...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
throw(ArgumentError("`cols` keyword argument must be " *
":orderequal, :setequal, :intersect, :subset or :union)"))
end

return foldr((table, df) -> prepend!(df, table, cols=cols, promote=promote),
collect(Any, tables), init=df)
end

function _append_or_prepend!(df1::DataFrame, df2::AbstractDataFrame; cols::Symbol,
promote::Bool, atend::Bool)
if !(cols in (:orderequal, :setequal, :intersect, :subset, :union))
Expand Down Expand Up @@ -355,6 +421,10 @@
added to `df` (using `missing` for existing rows) and a `missing` value is
pushed to columns missing in `row` that are present in `df`.

If `row` is not a `DataFrameRow`, `NamedTuple`, `AbstractDict`, or `Tables.AbstractRow`
it is not allowed to pass `cols` keyword argument other than the default `:setequal`,
bkamins marked this conversation as resolved.
Show resolved Hide resolved
because such rows do not provide column name information.

If `promote=true` and element type of a column present in `df` does not allow
the type of a pushed argument then a new column with a promoted element type
allowing it is freshly allocated and stored in `df`. If `promote=false` an error
Expand All @@ -371,19 +441,21 @@
"""

"""
push!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false)
push!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
cols::Symbol=:setequal, promote::Bool=false)
push!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,
Tables.AbstractRow};
Tables.AbstractRow}...;
cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))

Add one row at the end of `df` in-place, taking the values from `row`.
Several rows can be added by passing them as separate arguments.

$INSERTION_COMMON

See also: [`pushfirst!`](@ref), [`insert!`](@ref)

# Examples
```jldoctest

Check failure on line 458 in src/dataframe/insertion.jl

View workflow job for this annotation

GitHub Actions / Documentation

doctest failure in ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:458-536 ```jldoctest julia> df = DataFrame(A='a':'c', B=1:3) 3×2 DataFrame Row │ A B │ Char Int64 ─────┼───────────── 1 │ a 1 2 │ b 2 3 │ c 3 julia> push!(df, (true, false), promote=true) 4×2 DataFrame Row │ A B │ Any Int64 ─────┼───────────── 1 │ a 1 2 │ b 2 3 │ c 3 4 │ true 0 julia> push!(df, df[1, :]) 5×2 DataFrame Row │ A B │ Any Int64 ─────┼───────────── 1 │ a 1 2 │ b 2 3 │ c 3 4 │ true 0 5 │ a 1 julia> push!(df, (C="something", A=11, B=12), cols=:intersect) 6×2 DataFrame Row │ A B │ Any Int64 ─────┼───────────── 1 │ a 1 2 │ b 2 3 │ c 3 4 │ true 0 5 │ a 1 6 │ 11 12 julia> push!(df, Dict(:A=>1.0, :C=>1.0), cols=:union) 7×3 DataFrame Row │ A B C │ Any Int64? Float64? ─────┼────────────────────────── 1 │ a 1 missing 2 │ b 2 missing 3 │ c 3 missing 4 │ true 0 missing 5 │ a 1 missing 6 │ 11 12 missing 7 │ 1.0 missing 1.0 julia> push!(df, NamedTuple(), cols=:subset) 8×3 DataFrame Row │ A B C │ Any Int64? Float64? ─────┼───────────────────────────── 1 │ a 1 missing 2 │ b 2 missing 3 │ c 3 missing 4 │ true 0 missing 5 │ a 1 missing 6 │ 11 12 missing 7 │ 1.0 missing 1.0 8 │ missing missing missing julia> push!(DataFrame(a=1, b=2), (3, 4), (b=6, a=5)) 3×2 DataFrame Row │ a b │ Int64 Int64 ─────┼────────────── 1 │ 1 2 2 │ 3 4 3 │ 5 6 ``` Subexpression: push!(DataFrame(a=1, b=2), (3, 4), (b=6, a=5)) Evaluated output: ERROR: ArgumentError: Mixing rows with column names and without column names in a single `push!` call is not allowed Stacktrace: [1] push!(df::DataFrame, rows::ByRow{Tuple{Tuple{Int64, Int64}, NamedTuple{(:b, :a), Tuple{Int64, Int64}}}}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1110 [2] push!(::DataFrame, ::Any, ::Vararg{Any}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1119 [3] push!(::DataFrame, ::Any, ::Any) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1119 [4] top-level scope @ none:1 Expected output: 3×2 DataFrame Row │ a b │ Int64 Int64 ─────┼────────────── 1 │ 1 2 2 │ 3 4 3 │ 5 6 diff = Warning: Diff output requires color. 3×2 DataFrame Row │ a b │ Int64 Int64 ─────┼────────────── 1 │ 1 2 2 │ 3 4 3 │ 5 6ERROR: ArgumentError: Mixing rows with column names and without column names in a single `push!` call is not allowed Stacktrace: [1] push!(df::DataFrame, rows::ByRow{Tuple{Tuple{Int64, Int64}, NamedTuple{(:b, :a), Tuple{Int64, Int64}}}}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1110 [2] push!(::DataFrame, ::Any, ::Vararg{Any}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1119 [3] push!(::DataFrame, ::Any, ::Any) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1119 [4] top-level scope @ none:1
julia> df = DataFrame(A='a':'c', B=1:3)
3×2 DataFrame
Row │ A B
Expand Down Expand Up @@ -452,25 +524,43 @@
6 │ 11 12 missing
7 │ 1.0 missing 1.0
8 │ missing missing missing

julia> push!(DataFrame(a=1, b=2), (3, 4), (b=6, a=5))
3×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 1 2
2 │ 3 4
3 │ 5 6
```
"""
Base.push!(df::DataFrame, row::Any; promote::Bool=false) =
_row_inserter!(df, -1, row, Val{:push}(), promote)
function Base.push!(df::DataFrame, row::Any;
cols=:setequal, promote::Bool=false)
if cols !== :setequal
throw(ArgumentError("Passing `cols` keyword argument is not supported " *
"because `row` does not provide column names"))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
end

return _row_inserter!(df, -1, row, Val{:push}(), promote)
end

"""
pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}; promote::Bool=false)
pushfirst!(df::DataFrame, row::Union{Tuple, AbstractArray}...;
cols::Symbol=:setequal, promote::Bool=false)
pushfirst!(df::DataFrame, row::Union{DataFrameRow, NamedTuple, AbstractDict,
Tables.AbstractRow};
Tables.AbstractRow}...;
cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))

Add one row at the beginning of `df` in-place, taking the values from `row`.
bkamins marked this conversation as resolved.
Show resolved Hide resolved
Several rows can be added by passing them as separate arguments.

$INSERTION_COMMON

See also: [`push!`](@ref), [`insert!`](@ref)

# Examples
```jldoctest

Check failure on line 563 in src/dataframe/insertion.jl

View workflow job for this annotation

GitHub Actions / Documentation

doctest failure in ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:563-641 ```jldoctest julia> df = DataFrame(A='a':'c', B=1:3) 3×2 DataFrame Row │ A B │ Char Int64 ─────┼───────────── 1 │ a 1 2 │ b 2 3 │ c 3 julia> pushfirst!(df, (true, false), promote=true) 4×2 DataFrame Row │ A B │ Any Int64 ─────┼───────────── 1 │ true 0 2 │ a 1 3 │ b 2 4 │ c 3 julia> pushfirst!(df, df[1, :]) 5×2 DataFrame Row │ A B │ Any Int64 ─────┼───────────── 1 │ true 0 2 │ true 0 3 │ a 1 4 │ b 2 5 │ c 3 julia> pushfirst!(df, (C="something", A=11, B=12), cols=:intersect) 6×2 DataFrame Row │ A B │ Any Int64 ─────┼───────────── 1 │ 11 12 2 │ true 0 3 │ true 0 4 │ a 1 5 │ b 2 6 │ c 3 julia> pushfirst!(df, Dict(:A=>1.0, :C=>1.0), cols=:union) 7×3 DataFrame Row │ A B C │ Any Int64? Float64? ─────┼────────────────────────── 1 │ 1.0 missing 1.0 2 │ 11 12 missing 3 │ true 0 missing 4 │ true 0 missing 5 │ a 1 missing 6 │ b 2 missing 7 │ c 3 missing julia> pushfirst!(df, NamedTuple(), cols=:subset) 8×3 DataFrame Row │ A B C │ Any Int64? Float64? ─────┼───────────────────────────── 1 │ missing missing missing 2 │ 1.0 missing 1.0 3 │ 11 12 missing 4 │ true 0 missing 5 │ true 0 missing 6 │ a 1 missing 7 │ b 2 missing 8 │ c 3 missing julia> pushfirst!(DataFrame(a=1, b=2), (3, 4), (b=6, a=5)) 3×2 DataFrame Row │ a b │ Int64 Int64 ─────┼────────────── 1 │ 3 4 2 │ 5 6 3 │ 1 2 ``` Subexpression: pushfirst!(DataFrame(a=1, b=2), (3, 4), (b=6, a=5)) Evaluated output: ERROR: ArgumentError: Mixing rows with column names and without column names in a single `push!` call is not allowed Stacktrace: [1] pushfirst!(df::DataFrame, rows::ByRow{Tuple{Tuple{Int64, Int64}, NamedTuple{(:b, :a), Tuple{Int64, Int64}}}}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1129 [2] pushfirst!(::DataFrame, ::Any, ::Vararg{Any}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1138 [3] pushfirst!(::DataFrame, ::Any, ::Any) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1138 [4] top-level scope @ none:1 Expected output: 3×2 DataFrame Row │ a b │ Int64 Int64 ─────┼────────────── 1 │ 3 4 2 │ 5 6 3 │ 1 2 diff = Warning: Diff output requires color. 3×2 DataFrame Row │ a b │ Int64 Int64 ─────┼────────────── 1 │ 3 4 2 │ 5 6 3 │ 1 2ERROR: ArgumentError: Mixing rows with column names and without column names in a single `push!` call is not allowed Stacktrace: [1] pushfirst!(df::DataFrame, rows::ByRow{Tuple{Tuple{Int64, Int64}, NamedTuple{(:b, :a), Tuple{Int64, Int64}}}}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1129 [2] pushfirst!(::DataFrame, ::Any, ::Vararg{Any}; cols::Symbol, promote::Bool) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1138 [3] pushfirst!(::DataFrame, ::Any, ::Any) @ DataFrames ~/work/DataFrames.jl/DataFrames.jl/src/dataframe/insertion.jl:1138 [4] top-level scope @ none:1
julia> df = DataFrame(A='a':'c', B=1:3)
3×2 DataFrame
Row │ A B
Expand Down Expand Up @@ -539,13 +629,30 @@
6 │ a 1 missing
7 │ b 2 missing
8 │ c 3 missing

julia> pushfirst!(DataFrame(a=1, b=2), (3, 4), (b=6, a=5))
3×2 DataFrame
Row │ a b
│ Int64 Int64
─────┼──────────────
1 │ 3 4
2 │ 5 6
3 │ 1 2
```
"""
Base.pushfirst!(df::DataFrame, row::Any; promote::Bool=false) =
_row_inserter!(df, -1, row, Val{:pushfirst}(), promote)
function Base.pushfirst!(df::DataFrame, row::Any;
cols=:setequal, promote::Bool=false)
if cols !== :setequal
throw(ArgumentError("Passing `cols` keyword argument is not supported " *
"because `row` does not provide column names"))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
end

return _row_inserter!(df, -1, row, Val{:pushfirst}(), promote)
end

"""
insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray}; promote::Bool=false)
insert!(df::DataFrame, index::Integer, row::Union{Tuple, AbstractArray};
cols::Symbol=:setequal, promote::Bool=false)
insert!(df::DataFrame, index::Integer, row::Union{DataFrameRow, NamedTuple,
AbstractDict, Tables.AbstractRow};
cols::Symbol=:setequal, promote::Bool=(cols in [:union, :subset]))
Expand Down Expand Up @@ -629,7 +736,13 @@
8 │ 1.0 missing 1.0
```
"""
function Base.insert!(df::DataFrame, index::Integer, row::Any; promote::Bool=false)
function Base.insert!(df::DataFrame, index::Integer, row::Any;
cols=:setequal, promote::Bool=false)
if cols !== :setequal
throw(ArgumentError("Passing `cols` keyword argument is not supported " *
"because `row` does not provide column names"))
bkamins marked this conversation as resolved.
Show resolved Hide resolved
end

index isa Bool && throw(ArgumentError("invalid index: $index of type Bool"))
1 <= index <= nrow(df)+1 ||
throw(ArgumentError("invalid index: $index for data frame with $(nrow(df)) rows"))
Expand Down Expand Up @@ -986,3 +1099,41 @@
_drop_all_nonnote_metadata!(df)
return df
end

function Base.push!(df::DataFrame, rows::ByRow;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
with_names_count = count(rows.fun) do row
row isa Union{AbstractDict,NamedTuple,Tables.AbstractRow}
end
if 0 < with_names_count < length(rows.fun)
throw(ArgumentError("Mixing rows with column names and without column names " *
"in a single `push!` call is not allowed"))
end
return foldl((df, row) -> push!(df, row, cols=cols, promote=promote),
collect(Any, rows.fun), init=df)
end

Base.push!(df::DataFrame, @nospecialize rows...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
push!(df, ByRow(rows), cols=cols, promote=promote)

function Base.pushfirst!(df::DataFrame, rows::ByRow;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
with_names_count = count(rows.fun) do row
row isa Union{AbstractDict,NamedTuple,Tables.AbstractRow}
end
if 0 < with_names_count < length(rows.fun)
throw(ArgumentError("Mixing rows with column names and without column names " *
"in a single `push!` call is not allowed"))
end
return foldr((row, df) -> pushfirst!(df, row, cols=cols, promote=promote),
collect(Any, rows.fun), init=df)
end

Base.pushfirst!(df::DataFrame, @nospecialize rows...;
cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset])) =
pushfirst!(df, ByRow(rows), cols=cols, promote=promote)
20 changes: 1 addition & 19 deletions src/other/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -63,31 +63,13 @@ function DataFrame(x; copycols::Union{Nothing, Bool}=nothing)
end

# the logic here relies on the fact that Tables.CopiedColumns
# is the only exception for default copycols value
# is the only exception for default copycols value
DataFrame(x, cnames::AbstractVector; makeunique::Bool=false,
copycols::Union{Nothing, Bool}=nothing) =
rename!(DataFrame(x, copycols=something(copycols, !(x isa Tables.CopiedColumns))),
_name2symbol(cnames),
makeunique=makeunique)

function Base.append!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
append!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

function Base.prepend!(df::DataFrame, table; cols::Symbol=:setequal,
promote::Bool=(cols in [:union, :subset]))
if table isa Dict && cols == :orderequal
throw(ArgumentError("passing `Dict` as `table` when `cols` is equal to " *
"`:orderequal` is not allowed as it is unordered"))
end
prepend!(df, DataFrame(table, copycols=false), cols=cols, promote=promote)
end

# This supports the Tables.RowTable type; needed to avoid ambiguities w/ another constructor
DataFrame(x::AbstractVector{NamedTuple{names, T}}; copycols::Bool=true) where {names, T} =
fromcolumns(Tables.columns(Tables.IteratorWrapper(x)), collect(names), copycols=false)
Expand Down
Loading
Loading