From c1ff9cd5bfe5fe4f960abf15b861cfa885231747 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 17 Oct 2022 23:45:49 +0200 Subject: [PATCH] make sure flatten works corretly on a data frame with zero rows (#3198) --- NEWS.md | 7 +++++++ Project.toml | 2 +- src/abstractdataframe/abstractdataframe.jl | 5 +++-- test/reshape.jl | 11 +++++++++++ 4 files changed, 22 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index e1a01b2b73..0521afd114 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,10 @@ +# DataFrames.jl v1.4.2 Patch Release Notes + +## Bug fixes + +* Make sure `flatten` works correctly on a data frame with zero rows + ([#3198](https://github.com/JuliaData/DataFrames.jl/issues/3198)) + # DataFrames.jl v1.4.1 Patch Release Notes ## Bug fixes diff --git a/Project.toml b/Project.toml index ac0fbc15c2..dae0749812 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DataFrames" uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "1.4.1" +version = "1.4.2" [deps] Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" diff --git a/src/abstractdataframe/abstractdataframe.jl b/src/abstractdataframe/abstractdataframe.jl index cdd086505d..1dda8c8765 100644 --- a/src/abstractdataframe/abstractdataframe.jl +++ b/src/abstractdataframe/abstractdataframe.jl @@ -2568,10 +2568,11 @@ function flatten(df::AbstractDataFrame, length(idxcols) > 1 && sort!(idxcols) for col in idxcols col_to_flatten = df[!, col] - flattened_col = col_to_flatten isa AbstractVector{<:AbstractVector} ? + fast_path = eltype(col_to_flatten) isa AbstractVector && + !isempty(col_to_flatten) + flattened_col = fast_path ? reduce(vcat, col_to_flatten) : collect(Iterators.flatten(col_to_flatten)) - insertcols!(new_df, col, _names(df)[col] => flattened_col) end diff --git a/test/reshape.jl b/test/reshape.jl index f766f0bfaa..58cf7bfce0 100644 --- a/test/reshape.jl +++ b/test/reshape.jl @@ -367,10 +367,13 @@ end @testset "flatten single column" begin df_vec = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]]) df_tup = DataFrame(a=[1, 2], b=[(1, 2), (3, 4)]) + @test flatten(empty(df_vec), :b) == DataFrame(a=[], b=[]) + @test flatten(empty(df_tup), :b) == DataFrame(a=[], b=[]) ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4]) @test flatten(df_vec, :b) == flatten(df_tup, :b) == ref @test flatten(df_vec, "b") == flatten(df_tup, "b") == ref df_mixed_types = DataFrame(a=[1, 2], b=[[1, 2], ["x", "y"]]) + @test flatten(empty(df_mixed_types), :b) == DataFrame(a=[], b=[]) ref_mixed_types = DataFrame(a=[1, 1, 2, 2], b=[1, 2, "x", "y"]) @test flatten(df_mixed_types, :b) == ref_mixed_types df_three = DataFrame(a=[1, 2, 3], b=[[1, 2], [10, 20], [100, 200, 300]]) @@ -382,6 +385,7 @@ end @test flatten(df_gen, :b) == ref_gen @test flatten(df_gen, "b") == ref_gen df_miss = DataFrame(a=[1, 2], b=[Union{Missing, Int}[1, 2], Union{Missing, Int}[3, 4]]) + @test flatten(empty(df_miss), :b) == DataFrame(a=[], b=[]) ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4]) @test flatten(df_miss, :b) == ref @test flatten(df_miss, "b") == ref @@ -389,10 +393,12 @@ end v2 = [[5, 6], [7, 8]] v = [v1, v2] df_vec_vec = DataFrame(a=[1, 2], b=v) + @test flatten(empty(df_vec_vec), :b) == DataFrame(a=[], b=[]) ref_vec_vec = DataFrame(a=[1, 1, 2, 2], b=[v1 ; v2]) @test flatten(df_vec_vec, :b) == ref_vec_vec @test flatten(df_vec_vec, "b") == ref_vec_vec df_cat = DataFrame(a=[1, 2], b=[CategoricalArray([1, 2]), CategoricalArray([1, 2])]) + @test flatten(empty(df_cat), :b) == DataFrame(a=[], b=[]) df_flat_cat = flatten(df_cat, :b) ref_cat = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 1, 2]) @test df_flat_cat == ref_cat @@ -401,6 +407,9 @@ end @testset "flatten multiple columns" begin df = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]]) + @test flatten(empty(df), []) == DataFrame(a=[], b=[], c=[]) + @test flatten(empty(df), [:b, :c]) == DataFrame(a=[], b=[], c=[]) + @test flatten(empty(df), All()) == DataFrame(a=[], b=[], c=[]) @test flatten(df, []) == df ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4], c=[5, 6, 7, 8]) @test flatten(df, [:b, :c]) == ref @@ -418,6 +427,8 @@ end @test flatten(df_allcols, :) == ref_allcols df_bad = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7]]) @test_throws ArgumentError flatten(df_bad, [:b, :c]) + @test flatten(DataFrame(), []) == DataFrame() + @test flatten(DataFrame(), All()) == DataFrame() end @testset "stack categorical test" begin