From fb246d18bc2d484d7a8c99c862493722f952e78f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Wed, 16 Feb 2022 10:16:24 +0100
Subject: [PATCH 1/6] make permutedims more flexible

---
 src/abstractdataframe/reshape.jl | 41 +++++++++++++++++++++++++-------
 test/reshape.jl                  | 27 +++++++++++++++++++--
 2 files changed, 57 insertions(+), 11 deletions(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 6f187b9905..a4bbcc4310 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -572,7 +572,7 @@ Base.transpose(::AbstractDataFrame, args...; kwargs...) =
 """
     permutedims(df::AbstractDataFrame, src_namescol::Union{Int, Symbol, AbstractString},
                 [dest_namescol::Union{Symbol, AbstractString}];
-                makeunique::Bool=false)
+                makeunique::Bool=false, strict::Bool=true)
 
 Turn `df` on its side such that rows become columns
 and values in the column indexed by `src_namescol` become the names of new columns.
@@ -582,12 +582,16 @@ with name specified by `dest_namescol`.
 # Arguments
 - `df` : the `AbstractDataFrame`
 - `src_namescol` : the column that will become the new header.
-  This column's element type must be `AbstractString` or `Symbol`.
 - `dest_namescol` : the name of the first column in the returned `DataFrame`.
   Defaults to the same name as `src_namescol`.
 - `makeunique` : if `false` (the default), an error will be raised
   if duplicate names are found; if `true`, duplicate names will be suffixed
   with `_i` (`i` starting at 1 for the first duplicate).
+- `strict` : if `true` (the default), an error will be raised if the values
+  contained in the `src_namescol` are not all `Symbol` or all `AbstractString`,
+  or can all be converted to `AbstractString` using `convert`. If `false`
+  then any values are accepted and the will be changed to strings using
+  the `string` function.
 
 Note: The element types of columns in resulting `DataFrame`
 (other than the first column, which always has element type `String`)
@@ -637,34 +641,53 @@ julia> permutedims(df2, 1, "different_name")
 """
 function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
                           dest_namescol::Union{Symbol, AbstractString};
-                          makeunique::Bool=false)
+                          makeunique::Bool=false, strict::Bool=true)
 
     if src_namescol isa Integer
         1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
     end
-    eltype(df[!, src_namescol]) <: SymbolOrString ||
-        throw(ArgumentError("src_namescol must have eltype `Symbol` or `<:AbstractString`"))
+    src_col_names = df[!, src_namescol]
+    local new_col_names
+    if eltype(src_col_names) <: SymbolOrString
+        new_col_names = src_col_names
+    elseif all(x -> x isa Symbol, src_col_names)
+        new_col_names = collect(Symbol, src_col_names)
+    elseif !strict
+        new_col_names = string.(src_col_names)
+    else
+        try
+            new_col_names = collect(AbstractString, src_col_names)
+        catch e
+            if e isa MethodError && e.f === convert
+                throw(ArgumentError("all elements of src_namescol must support " *
+                                    "conversion to AbstractString"))
+            else
+                rethrow(e)
+            end
+        end
+    end
 
     df_notsrc = df[!, Not(src_namescol)]
     df_permuted = DataFrame(dest_namescol => names(df_notsrc))
 
     if ncol(df_notsrc) == 0
-        df_tmp = DataFrame(AbstractVector[[] for _ in 1:nrow(df)], df[!, src_namescol],
+        df_tmp = DataFrame(AbstractVector[[] for _ in 1:nrow(df)], new_col_names,
                            makeunique=makeunique, copycols=false)
     else
         m = permutedims(Matrix(df_notsrc))
-        df_tmp = rename!(DataFrame(Tables.table(m)), df[!, src_namescol], makeunique=makeunique)
+        df_tmp = rename!(DataFrame(Tables.table(m)), new_col_names, makeunique=makeunique)
     end
     return hcat!(df_permuted, df_tmp, makeunique=makeunique, copycols=false)
 end
 
 function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex;
-                          makeunique::Bool=false)
+                          makeunique::Bool=false, strict::Bool=true)
     if src_namescol isa Integer
         1 <= src_namescol <= ncol(df) || throw(BoundsError(index(df), src_namescol))
         dest_namescol = _names(df)[src_namescol]
     else
         dest_namescol = src_namescol
     end
-    return permutedims(df, src_namescol, dest_namescol; makeunique=makeunique)
+    return permutedims(df, src_namescol, dest_namescol;
+                       makeunique=makeunique, strict=strict)
 end
diff --git a/test/reshape.jl b/test/reshape.jl
index 288835219e..56e54f2030 100644
--- a/test/reshape.jl
+++ b/test/reshape.jl
@@ -637,14 +637,37 @@ end
     @test permutedims(df4[!, [:e]], 1) == DataFrame(e=String[], x=[], y=[])
     # Can't index float Column
     @test_throws ArgumentError permutedims(df4[!, [:a, :b, :c]], 1)
-    @test_throws ArgumentError permutedims(DataFrame(a=Float64[], b=Float64[]), 1)
+    # but can if it is empty
+    @test permutedims(DataFrame(a=Float64[], b=Float64[]), 1) == DataFrame(a="b")
     # Can't index columns that allow for missing
     @test_throws ArgumentError permutedims(df4[!, [:g, :a, :b, :c]], 1)
-    @test_throws ArgumentError permutedims(df4[!, [:h, :a, :b]], 1)
+    # but can if they do not contain missing
+    @test permutedims(df4[!, [:h, :a, :b]], 1) == permutedims(df4[!, [:e, :a, :b]], 1, :h)
     # Can't permute empty `df` ...
     @test_throws BoundsError permutedims(DataFrame(), 1)
     # ... but can permute zero-row df
     @test permutedims(DataFrame(a=String[], b=Float64[]), 1) == DataFrame(a=["b"])
+
+    # tests of strict handling
+    df = DataFrame(a=["x", "y"], b=[1.0, 2.0], c=[3, 4], d=[true, false])
+    ref = permutedims(df, 1)
+    # allowed as contents is strings
+    df.a = collect(Any, df.a)
+    @test permutedims(df, 1) == ref
+    # this is allowed as conversion from categorical to string is allowed
+    df.a = categorical(df.a)
+    @test permutedims(df, 1) == ref
+    # allowed as contents is symbols
+    df.a = Any[:x, :y]
+    @test permutedims(df, 1) == ref
+    # not allowed mixing of strings and symbols
+    df.a = Any[:x, "y"]
+    @test_throws ArgumentError permutedims(df, 1)
+    # not allowed values that cannot be converted to string
+    df.a = Any['x', 'y']
+    @test_throws ArgumentError permutedims(df, 1)
+    # but allowed with strict=false
+    @test permutedims(df, 1, strict=false) == ref
 end
 
 @testset "stack view=true additional tests" begin

From 6206e7eb1b8628f3ff28d4923c8a250fdd7e47a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Wed, 16 Feb 2022 10:19:41 +0100
Subject: [PATCH 2/6] update NEWS.md

---
 NEWS.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/NEWS.md b/NEWS.md
index a9e6199b0e..d88932575b 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,12 @@
+# DataFrames.jl v1.4 Release Notes
+
+## New functionalities
+
+* `permutedims` now supports a `strict` keyword argument that allows
+  for a more flexible handling of values stored in a column that will
+  become a new header
+  ([#3004](https://github.com/JuliaData/DataFrames.jl/issues/3004))
+
 # DataFrames.jl v1.3.2 Patch Release Notes
 
 ## Bug fixes

From cbf5b2e708338b6fe448d5e5d5d9b3c23edaffa9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 17 Feb 2022 09:26:10 +0100
Subject: [PATCH 3/6] Update src/abstractdataframe/reshape.jl

---
 src/abstractdataframe/reshape.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index a4bbcc4310..3f2c80a669 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -656,7 +656,7 @@ function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
         new_col_names = string.(src_col_names)
     else
         try
-            new_col_names = collect(AbstractString, src_col_names)
+            new_col_names = collect(String, src_col_names)
         catch e
             if e isa MethodError && e.f === convert
                 throw(ArgumentError("all elements of src_namescol must support " *

From 7b36bda2cbc4dcdda8d78c2af1cad603905f6cce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 17 Feb 2022 09:29:32 +0100
Subject: [PATCH 4/6] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 src/abstractdataframe/reshape.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 3f2c80a669..75d168bd9c 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -589,9 +589,9 @@ with name specified by `dest_namescol`.
   with `_i` (`i` starting at 1 for the first duplicate).
 - `strict` : if `true` (the default), an error will be raised if the values
   contained in the `src_namescol` are not all `Symbol` or all `AbstractString`,
-  or can all be converted to `AbstractString` using `convert`. If `false`
+  or can all be converted to `String` using `convert`. If `false`
   then any values are accepted and the will be changed to strings using
-  the `string` function.
+  the [`string`](@ref) function.
 
 Note: The element types of columns in resulting `DataFrame`
 (other than the first column, which always has element type `String`)

From 4586f90f81e631f27e5b81de0516d4891745ea22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 17 Feb 2022 09:29:40 +0100
Subject: [PATCH 5/6] Update src/abstractdataframe/reshape.jl

---
 src/abstractdataframe/reshape.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 75d168bd9c..4cf0aaa5cc 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -660,7 +660,7 @@ function Base.permutedims(df::AbstractDataFrame, src_namescol::ColumnIndex,
         catch e
             if e isa MethodError && e.f === convert
                 throw(ArgumentError("all elements of src_namescol must support " *
-                                    "conversion to AbstractString"))
+                                    "conversion to String"))
             else
                 rethrow(e)
             end

From 14150be239fdb3b5d7bf2fc9c55c87ef220baf1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 17 Feb 2022 12:20:18 +0100
Subject: [PATCH 6/6] Update src/abstractdataframe/reshape.jl

---
 src/abstractdataframe/reshape.jl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 4cf0aaa5cc..174680d8bf 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -591,7 +591,7 @@ with name specified by `dest_namescol`.
   contained in the `src_namescol` are not all `Symbol` or all `AbstractString`,
   or can all be converted to `String` using `convert`. If `false`
   then any values are accepted and the will be changed to strings using
-  the [`string`](@ref) function.
+  the `string` function.
 
 Note: The element types of columns in resulting `DataFrame`
 (other than the first column, which always has element type `String`)