Skip to content

Commit

Permalink
make sure flatten works corretly on a data frame with zero rows (#3198)
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins authored Oct 17, 2022
1 parent 8f726a6 commit c1ff9cd
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 3 deletions.
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# DataFrames.jl v1.4.2 Patch Release Notes

## Bug fixes

* Make sure `flatten` works correctly on a data frame with zero rows
([#3198](https://github.com/JuliaData/DataFrames.jl/issues/3198))

# DataFrames.jl v1.4.1 Patch Release Notes

## Bug fixes
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "DataFrames"
uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
version = "1.4.1"
version = "1.4.2"

[deps]
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
Expand Down
5 changes: 3 additions & 2 deletions src/abstractdataframe/abstractdataframe.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2568,10 +2568,11 @@ function flatten(df::AbstractDataFrame,
length(idxcols) > 1 && sort!(idxcols)
for col in idxcols
col_to_flatten = df[!, col]
flattened_col = col_to_flatten isa AbstractVector{<:AbstractVector} ?
fast_path = eltype(col_to_flatten) isa AbstractVector &&
!isempty(col_to_flatten)
flattened_col = fast_path ?
reduce(vcat, col_to_flatten) :
collect(Iterators.flatten(col_to_flatten))

insertcols!(new_df, col, _names(df)[col] => flattened_col)
end

Expand Down
11 changes: 11 additions & 0 deletions test/reshape.jl
Original file line number Diff line number Diff line change
Expand Up @@ -367,10 +367,13 @@ end
@testset "flatten single column" begin
df_vec = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]])
df_tup = DataFrame(a=[1, 2], b=[(1, 2), (3, 4)])
@test flatten(empty(df_vec), :b) == DataFrame(a=[], b=[])
@test flatten(empty(df_tup), :b) == DataFrame(a=[], b=[])
ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4])
@test flatten(df_vec, :b) == flatten(df_tup, :b) == ref
@test flatten(df_vec, "b") == flatten(df_tup, "b") == ref
df_mixed_types = DataFrame(a=[1, 2], b=[[1, 2], ["x", "y"]])
@test flatten(empty(df_mixed_types), :b) == DataFrame(a=[], b=[])
ref_mixed_types = DataFrame(a=[1, 1, 2, 2], b=[1, 2, "x", "y"])
@test flatten(df_mixed_types, :b) == ref_mixed_types
df_three = DataFrame(a=[1, 2, 3], b=[[1, 2], [10, 20], [100, 200, 300]])
Expand All @@ -382,17 +385,20 @@ end
@test flatten(df_gen, :b) == ref_gen
@test flatten(df_gen, "b") == ref_gen
df_miss = DataFrame(a=[1, 2], b=[Union{Missing, Int}[1, 2], Union{Missing, Int}[3, 4]])
@test flatten(empty(df_miss), :b) == DataFrame(a=[], b=[])
ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4])
@test flatten(df_miss, :b) == ref
@test flatten(df_miss, "b") == ref
v1 = [[1, 2], [3, 4]]
v2 = [[5, 6], [7, 8]]
v = [v1, v2]
df_vec_vec = DataFrame(a=[1, 2], b=v)
@test flatten(empty(df_vec_vec), :b) == DataFrame(a=[], b=[])
ref_vec_vec = DataFrame(a=[1, 1, 2, 2], b=[v1 ; v2])
@test flatten(df_vec_vec, :b) == ref_vec_vec
@test flatten(df_vec_vec, "b") == ref_vec_vec
df_cat = DataFrame(a=[1, 2], b=[CategoricalArray([1, 2]), CategoricalArray([1, 2])])
@test flatten(empty(df_cat), :b) == DataFrame(a=[], b=[])
df_flat_cat = flatten(df_cat, :b)
ref_cat = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 1, 2])
@test df_flat_cat == ref_cat
Expand All @@ -401,6 +407,9 @@ end

@testset "flatten multiple columns" begin
df = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7, 8]])
@test flatten(empty(df), []) == DataFrame(a=[], b=[], c=[])
@test flatten(empty(df), [:b, :c]) == DataFrame(a=[], b=[], c=[])
@test flatten(empty(df), All()) == DataFrame(a=[], b=[], c=[])
@test flatten(df, []) == df
ref = DataFrame(a=[1, 1, 2, 2], b=[1, 2, 3, 4], c=[5, 6, 7, 8])
@test flatten(df, [:b, :c]) == ref
Expand All @@ -418,6 +427,8 @@ end
@test flatten(df_allcols, :) == ref_allcols
df_bad = DataFrame(a=[1, 2], b=[[1, 2], [3, 4]], c=[[5, 6], [7]])
@test_throws ArgumentError flatten(df_bad, [:b, :c])
@test flatten(DataFrame(), []) == DataFrame()
@test flatten(DataFrame(), All()) == DataFrame()
end

@testset "stack categorical test" begin
Expand Down

0 comments on commit c1ff9cd

Please sign in to comment.