JuliaData · bkamins · Feb 17, 2022 · Feb 16, 2022 · Feb 16, 2022 · Feb 17, 2022
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+# DataFrames.jl v1.4 Release Notes
+
+## New functionalities
+
+* `permutedims` now supports a `strict` keyword argument that allows
+  for a more flexible handling of values stored in a column that will
+  become a new header
+  ([#3004](https://github.com/JuliaData/DataFrames.jl/issues/3004))
+
 # DataFrames.jl v1.3.2 Patch Release Notes
 
 ## Bug fixes

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
@@ -572,7 +572,7 @@ Base.transpose(::AbstractDataFrame, args...; kwargs...) =
 """
     permutedims(df::AbstractDataFrame, src_namescol::Union{Int, Symbol, AbstractString},
                 [dest_namescol::Union{Symbol, AbstractString}];
-                makeunique::Bool=false)
+                makeunique::Bool=false, strict::Bool=true)
 
 Turn `df` on its side such that rows become columns
 and values in the column indexed by `src_namescol` become the names of new columns.
@@ -582,12 +582,16 @@ with name specified by `dest_namescol`.
 # Arguments
 - `df` : the `AbstractDataFrame`
 - `src_namescol` : the column that will become the new header.
-  This column's element type must be `AbstractString` or `Symbol`.
 - `dest_namescol` : the name of the first column in the returned `DataFrame`.
   Defaults to the same name as `src_namescol`.
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found; if `true`, duplicate names will be suffixed
   with `_i` (`i` starting at 1 for the first duplicate).
+- `strict` : if `true` (the default), an error will be raised if the values
+  contained in the `src_namescol` are not all `Symbol` or all `AbstractString`,
+  or can all be converted to `AbstractString` using `convert`. If `false`
+  then any values are accepted and the will be changed to strings using
+  the `string` function.
-  the `string` function.
+  the [`string`](@ref) function.
-  the `string` function.
+  the [`string`](@ref) function.
 
 Note: The element types of columns in resulting `DataFrame`
 (other than the first column, which always has element type `String`)
@@ -637,34 +641,53 @@ julia> permutedims(df2, 1, "different_name")
 """
 function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
                           dest_namescol::Union{Symbol, AbstractString};
-                          makeunique::Bool=false)
+                          makeunique::Bool=false, strict::Bool=true)
 
     if src_namescol isa Integer
         1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
     end
-    eltype(df[!, src_namescol]) <: SymbolOrString ||
-        throw(ArgumentError("src_namescol must have eltype `Symbol` or `<:AbstractString`"))
+    src_col_names = df[!, src_namescol]
+    local new_col_names
+    if eltype(src_col_names) <: SymbolOrString
+        new_col_names = src_col_names
+    elseif all(x -> x isa Symbol, src_col_names)
+        new_col_names = collect(Symbol, src_col_names)
+    elseif !strict
+        new_col_names = string.(src_col_names)
+    else
+        try
+            new_col_names = collect(String, src_col_names)
+        catch e
+            if e isa MethodError && e.f === convert
+                throw(ArgumentError("all elements of src_namescol must support " *
+                                    "conversion to AbstractString"))
+            else
+                rethrow(e)
+            end
+        end
+    end
 
     df_notsrc = df[!, Not(src_namescol)]
     df_permuted = DataFrame(dest_namescol => names(df_notsrc))
 
     if ncol(df_notsrc) == 0
-        df_tmp = DataFrame(AbstractVector[[] for _ in 1:nrow(df)], df[!, src_namescol],
+        df_tmp = DataFrame(AbstractVector[[] for _ in 1:nrow(df)], new_col_names,
                            makeunique=makeunique, copycols=false)
     else
         m = permutedims(Matrix(df_notsrc))
-        df_tmp = rename!(DataFrame(Tables.table(m)), df[!, src_namescol], makeunique=makeunique)
+        df_tmp = rename!(DataFrame(Tables.table(m)), new_col_names, makeunique=makeunique)
     end
     return hcat!(df_permuted, df_tmp, makeunique=makeunique, copycols=false)
 end
 
 function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex;
-                          makeunique::Bool=false)
+                          makeunique::Bool=false, strict::Bool=true)
     if src_namescol isa Integer
         1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
         dest_namescol = _names(df)[src_namescol]
     else
         dest_namescol = src_namescol
     end
-    return permutedims(df, src_namescol, dest_namescol; makeunique=makeunique)
+    return permutedims(df, src_namescol, dest_namescol;
+                       makeunique=makeunique, strict=strict)
 end
diff --git a/test/reshape.jl b/test/reshape.jl
@@ -637,14 +637,37 @@ end
     @test permutedims(df4[!, [:e]], 1) == DataFrame(e=String[], x=[], y=[])
     # Can't index float Column
     @test_throws ArgumentError permutedims(df4[!, [:a, :b, :c]], 1)
-    @test_throws ArgumentError permutedims(DataFrame(a=Float64[], b=Float64[]), 1)
+    # but can if it is empty
+    @test permutedims(DataFrame(a=Float64[], b=Float64[]), 1) == DataFrame(a="b")
     # Can't index columns that allow for missing
     @test_throws ArgumentError permutedims(df4[!, [:g, :a, :b, :c]], 1)
-    @test_throws ArgumentError permutedims(df4[!, [:h, :a, :b]], 1)
+    # but can if they do not contain missing
+    @test permutedims(df4[!, [:h, :a, :b]], 1) == permutedims(df4[!, [:e, :a, :b]], 1, :h)
     # Can't permute empty `df` ...
     @test_throws BoundsError permutedims(DataFrame(), 1)
     # ... but can permute zero-row df
     @test permutedims(DataFrame(a=String[], b=Float64[]), 1) == DataFrame(a=["b"])
+
+    # tests of strict handling
+    df = DataFrame(a=["x", "y"], b=[1.0, 2.0], c=[3, 4], d=[true, false])
+    ref = permutedims(df, 1)
+    # allowed as contents is strings
+    df.a = collect(Any, df.a)
+    @test permutedims(df, 1) == ref
+    # this is allowed as conversion from categorical to string is allowed
+    df.a = categorical(df.a)
+    @test permutedims(df, 1) == ref
+    # allowed as contents is symbols
+    df.a = Any[:x, :y]
+    @test permutedims(df, 1) == ref
+    # not allowed mixing of strings and symbols
+    df.a = Any[:x, "y"]
+    @test_throws ArgumentError permutedims(df, 1)
+    # not allowed values that cannot be converted to string
+    df.a = Any['x', 'y']
+    @test_throws ArgumentError permutedims(df, 1)
+    # but allowed with strict=false
+    @test permutedims(df, 1, strict=false) == ref
 end
 
 @testset "stack view=true additional tests" begin