From d645276ecaf8be148215a2bc2b3de809b7b11ba4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Sun, 19 Jun 2022 08:24:08 +0200 Subject: [PATCH] Fix vcat in case no data frames are passed (#3081) --- NEWS.md | 3 ++ src/abstractdataframe/abstractdataframe.jl | 7 +++- test/dataframe.jl | 48 +++++++++++++++++++--- 3 files changed, 51 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 3d4901bb0d..bbba135084 100644 --- a/NEWS.md +++ b/NEWS.md @@ -57,6 +57,9 @@ * Make sure we avoid aliasing when repeating the same column in `select[!]` and `transform[!]` on `GroupedDataFrame` ([#3070](https://github.com/JuliaData/DataFrames.jl/pull/3070)) +* Make `vcat` correctly handle `cols` keyword argument if only + data frames having no columns are passed + ([#3081](https://github.com/JuliaData/DataFrames.jl/pull/3081)) ## Performance diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index 7c03ffc320..55d73872fd 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -1945,8 +1945,11 @@ end function _vcat(dfs::AbstractVector{AbstractDataFrame}; cols::Union{Symbol, AbstractVector{Symbol}, AbstractVector{<:AbstractString}}=:setequal) - - isempty(dfs) && return DataFrame() + # note that empty DataFrame() objects are dropped from dfs before we call _vcat + if isempty(dfs) + cols isa Symbol && return DataFrame() + return DataFrame([col => Missing[] for col in cols]) + end # Array of all headers allheaders = map(names, dfs) # Array of unique headers across all data frames diff --git a/test/dataframe.jl b/test/dataframe.jl index 7c77d10780..4bdd7457d9 100644 --- a/test/dataframe.jl +++ b/test/dataframe.jl @@ -7,6 +7,9 @@ using OffsetArrays: OffsetArray const ≅ = isequal const ≇ = !isequal +isequal_coltyped(df1::AbstractDataFrame, df2::AbstractDataFrame) = + isequal(df1, df2) && typeof.(eachcol(df1)) == typeof.(eachcol(df2)) + # randomized test from https://github.com/JuliaData/DataFrames.jl/pull/1974 @testset "randomized tests for rename!" begin n = Symbol.('a':'z') @@ -2049,7 +2052,7 @@ end cols=:orderequal) end -@testset "vcat with source" begin +@testset "vcat with source and reduce(vcat, ...)" begin df1 = DataFrame(A=1:3, B=1:3) df2 = DataFrame(A=4:6, B=4:6) df3 = DataFrame(A=7:9, C=7:9) @@ -2058,20 +2061,55 @@ end for col in [:source, "source"] @test vcat(df1, df2, df3, df4, cols=:union, source=col) ≅ vcat(df1, df2, df3, df4, cols=:union, source=col => [1, 2, 3, 4]) ≅ + reduce(vcat, [df1, df2, df3, df4], cols=:union, source=col) ≅ + reduce(vcat, [df1, df2, df3, df4], cols=:union, source=col => [1, 2, 3, 4]) ≅ DataFrame(A=1:9, B=[1:6; fill(missing, 3)], C=[fill(missing, 6); 7:9], source=[1, 1, 1, 2, 2, 2, 3, 3, 3]) res = vcat(df1, df2, df3, df4, cols=:union, source=col => categorical(-4:-1)) - @test res ≅ DataFrame(A=1:9, B=[1:6; fill(missing, 3)], - C=[fill(missing, 6); 7:9], - source=[-4, -4, -4, -3, -3, -3, -2, -2, -2]) - @test res.source isa CategoricalVector + @test isequal_coltyped(res, DataFrame(A=1:9, B=[1:6; fill(missing, 3)], + C=[fill(missing, 6); 7:9], + source=categorical([-4, -4, -4, -3, -3, -3, -2, -2, -2]))) + + res = reduce(vcat, [df1, df2, df3, df4], cols=:union, source=col => categorical(-4:-1)) + @test isequal_coltyped(res, DataFrame(A=1:9, B=[1:6; fill(missing, 3)], + C=[fill(missing, 6); 7:9], + source=categorical([-4, -4, -4, -3, -3, -3, -2, -2, -2]))) + + @test reduce(vcat, DataFrame[]) == DataFrame() + @test isequal_coltyped(reduce(vcat, DataFrame[], source=:src), + DataFrame(src=Int[])) + @test isequal_coltyped(reduce(vcat, DataFrame[], cols=[:a, :b]), + DataFrame(a=Missing[], b=Missing[])) + @test isequal_coltyped(reduce(vcat, DataFrame[], cols=[:a, :b], source=:src), + DataFrame(a=Missing[], b=Missing[], src=Int[])) end @test_throws TypeError vcat(df1, df2, df3, df4, cols=:union, source=1) @test_throws TypeError vcat(df1, df2, df3, df4, cols=:union, source=:a => 1) @test_throws ArgumentError vcat(df1, df2, df3, df4, cols=:union, source=:C) @test_throws ArgumentError vcat(df1, df2, df3, df4, cols=:union, source=:a => [1]) + @test_throws TypeError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=1) + @test_throws TypeError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=:a => 1) + @test_throws ArgumentError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=:C) + @test_throws ArgumentError reduce(vcat, [df1, df2, df3, df4], cols=:union, source=:a => [1]) + + @test vcat(DataFrame(), DataFrame()) == + reduce(vcat, [DataFrame(), DataFrame()]) == + DataFrame() + @test isequal_coltyped(vcat(DataFrame(), DataFrame(), cols=[:a, :b]), + DataFrame(a=Missing[], b=Missing[])) + @test isequal_coltyped(reduce(vcat, (DataFrame(), DataFrame()), cols=[:a, :b]), + DataFrame(a=Missing[], b=Missing[])) + @test isequal_coltyped(vcat(DataFrame(a=1:2), DataFrame(), cols=[:a, :b]), + DataFrame(a=1:2, b=missing)) + @test isequal_coltyped(reduce(vcat, (DataFrame(a=1:2), DataFrame()), cols=[:a, :b]), + DataFrame(a=1:2, b=missing)) + @test vcat(DataFrame(a=1), DataFrame(b=2), cols=[:a]) ≅ DataFrame(a=[1, missing]) + @test vcat(DataFrame(a=1), DataFrame(b=2), cols=[:b]) ≅ DataFrame(b=[missing, 2]) + @test vcat(DataFrame(a=1), DataFrame(b=2), cols=Symbol[]) == DataFrame() + @test isequal_coltyped(vcat(DataFrame(a=1), DataFrame(b=2), cols=[:c]), + DataFrame(c=[missing, missing])) end @testset "push! with :subset" begin