Skip to content

Commit

Permalink
Fix vcat in case no data frames are passed (#3081)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Jun 19, 2022
1 parent 0ce9b0f commit d645276
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@
* Make sure we avoid aliasing when repeating the same column
in `select[!]` and `transform[!]` on `GroupedDataFrame`
([#3070](https://github.com/JuliaData/DataFrames.jl/pull/3070))
* Make `vcat` correctly handle `cols` keyword argument if only
data frames having no columns are passed
([#3081](https://github.com/JuliaData/DataFrames.jl/pull/3081))

## Performance

Expand Down
7 changes: 5 additions & 2 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1945,8 +1945,11 @@ end
function _vcat(dfs::AbstractVector{AbstractDataFrame};
cols::Union{Symbol, AbstractVector{Symbol},
AbstractVector{<:AbstractString}}=:setequal)

isempty(dfs) && return DataFrame()
# note that empty DataFrame() objects are dropped from dfs before we call _vcat
if isempty(dfs)
cols isa Symbol && return DataFrame()
return DataFrame([col => Missing[] for col in cols])
end
# Array of all headers
allheaders = map(names, dfs)
# Array of unique headers across all data frames
Expand Down
48 changes: 43 additions & 5 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ using OffsetArrays: OffsetArray
const = isequal
const = !isequal

isequal_coltyped(df1::AbstractDataFrame, df2::AbstractDataFrame) =
isequal(df1, df2) && typeof.(eachcol(df1)) == typeof.(eachcol(df2))

# randomized test from https://github.com/JuliaData/DataFrames.jl/pull/1974
@testset "randomized tests for rename!" begin
n = Symbol.('a':'z')
Expand Down Expand Up @@ -2049,7 +2052,7 @@ end
cols=:orderequal)
end

@testset "vcat with source" begin
@testset "vcat with source and reduce(vcat, ...)" begin
df1 = DataFrame(A=1:3, B=1:3)
df2 = DataFrame(A=4:6, B=4:6)
df3 = DataFrame(A=7:9, C=7:9)
Expand All @@ -2058,20 +2061,55 @@ end
for col in [:source, "source"]
@test vcat(df1, df2, df3, df4, cols=:union, source=col)
vcat(df1, df2, df3, df4, cols=:union, source=col => [1, 2, 3, 4])
reduce(vcat, [df1, df2, df3, df4], cols=:union, source=col)
reduce(vcat, [df1, df2, df3, df4], cols=:union, source=col => [1, 2, 3, 4])
DataFrame(A=1:9, B=[1:6; fill(missing, 3)],
C=[fill(missing, 6); 7:9],
source=[1, 1, 1, 2, 2, 2, 3, 3, 3])
res = vcat(df1, df2, df3, df4, cols=:union, source=col => categorical(-4:-1))
@test res DataFrame(A=1:9, B=[1:6; fill(missing, 3)],
C=[fill(missing, 6); 7:9],
source=[-4, -4, -4, -3, -3, -3, -2, -2, -2])
@test res.source isa CategoricalVector
@test isequal_coltyped(res, DataFrame(A=1:9, B=[1:6; fill(missing, 3)],
C=[fill(missing, 6); 7:9],
source=categorical([-4, -4, -4, -3, -3, -3, -2, -2, -2])))

res = reduce(vcat, [df1, df2, df3, df4], cols=:union, source=col => categorical(-4:-1))
@test isequal_coltyped(res, DataFrame(A=1:9, B=[1:6; fill(missing, 3)],
C=[fill(missing, 6); 7:9],
source=categorical([-4, -4, -4, -3, -3, -3, -2, -2, -2])))

@test reduce(vcat, DataFrame[]) == DataFrame()
@test isequal_coltyped(reduce(vcat, DataFrame[], source=:src),
DataFrame(src=Int[]))
@test isequal_coltyped(reduce(vcat, DataFrame[], cols=[:a, :b]),
DataFrame(a=Missing[], b=Missing[]))
@test isequal_coltyped(reduce(vcat, DataFrame[], cols=[:a, :b], source=:src),
DataFrame(a=Missing[], b=Missing[], src=Int[]))
end

@test_throws TypeError vcat(df1, df2, df3, df4, cols=:union, source=1)
@test_throws TypeError vcat(df1, df2, df3, df4, cols=:union, source=:a => 1)
@test_throws ArgumentError vcat(df1, df2, df3, df4, cols=:union, source=:C)
@test_throws ArgumentError vcat(df1, df2, df3, df4, cols=:union, source=:a => [1])
@test_throws TypeError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=1)
@test_throws TypeError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=:a => 1)
@test_throws ArgumentError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=:C)
@test_throws ArgumentError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=:a => [1])

@test vcat(DataFrame(), DataFrame()) ==
reduce(vcat, [DataFrame(), DataFrame()]) ==
DataFrame()
@test isequal_coltyped(vcat(DataFrame(), DataFrame(), cols=[:a, :b]),
DataFrame(a=Missing[], b=Missing[]))
@test isequal_coltyped(reduce(vcat, (DataFrame(), DataFrame()), cols=[:a, :b]),
DataFrame(a=Missing[], b=Missing[]))
@test isequal_coltyped(vcat(DataFrame(a=1:2), DataFrame(), cols=[:a, :b]),
DataFrame(a=1:2, b=missing))
@test isequal_coltyped(reduce(vcat, (DataFrame(a=1:2), DataFrame()), cols=[:a, :b]),
DataFrame(a=1:2, b=missing))
@test vcat(DataFrame(a=1), DataFrame(b=2), cols=[:a]) DataFrame(a=[1, missing])
@test vcat(DataFrame(a=1), DataFrame(b=2), cols=[:b]) DataFrame(b=[missing, 2])
@test vcat(DataFrame(a=1), DataFrame(b=2), cols=Symbol[]) == DataFrame()
@test isequal_coltyped(vcat(DataFrame(a=1), DataFrame(b=2), cols=[:c]),
DataFrame(c=[missing, missing]))
end

@testset "push! with :subset" begin
Expand Down

0 comments on commit d645276

Please sign in to comment.