diff --git a/NEWS.md b/NEWS.md index 0f45f54ebd..df3638ba7c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -57,6 +57,13 @@ * `leftjoin!` performing a left join of two data frame objects by updating the left data frame with the joined columns from right data frame. ([#2843](https://github.com/JuliaData/DataFrames.jl/pull/2843)) +* the `DataFrame` constructor when column names are passed to it as a second + argument now determines if a passed vector of column names is valid based on + its contents and not element type + ([#2829](https://github.com/JuliaData/DataFrames.jl/pull/2859)) +* the `DataFrame` constructor when matrix is passed to it as a first + argument now allows `copycols` keyword argument + ([#2829](https://github.com/JuliaData/DataFrames.jl/pull/2859)) ## Bug fixes diff --git a/src/dataframe/dataframe.jl b/src/dataframe/dataframe.jl index e5a3360c2e..2e5f137de0 100755 --- a/src/dataframe/dataframe.jl +++ b/src/dataframe/dataframe.jl @@ -14,7 +14,7 @@ DataFrame(ds::AbstractDict; copycols::Bool=true) DataFrame(kwargs..., copycols::Bool=true) DataFrame(columns::AbstractVecOrMat, - names::AbstractVector{<:Union{AbstractVector, Symbol}}; + names::AbstractVector; makeunique::Bool=false, copycols::Bool=true) DataFrame(table; copycols::Union{Bool, Nothing}=nothing) @@ -56,7 +56,9 @@ the appropriate length. As a particular rule values stored in a `Ref` or a It is also allowed to pass a vector of vectors or a matrix as as the first argument. In this case the second argument must be a vector of `Symbol`s or strings specifying column names, or the symbol `:auto` -to generate column names `x1`, `x2`, ... automatically. +to generate column names `x1`, `x2`, ... automatically. Note that in this case +if the first argument is a matrix and `copycols=false` the columns of the created +`DataFrame` will be views of columns the source matrix. If a single positional argument is passed to a `DataFrame` constructor then it is assumed to be of type that implements the @@ -322,9 +324,16 @@ function DataFrame(columns::AbstractVector, cnames::AbstractVector{Symbol}; copycols=copycols) end -DataFrame(columns::AbstractVector, cnames::AbstractVector{<:AbstractString}; +function _name2symbol(str::AbstractVector) + if !(all(x -> x isa AbstractString, str) || all(x -> x isa Symbol, str)) + throw(ArgumentError("All passed column names must be strings or Symbols")) + end + return Symbol[Symbol(s) for s in str] +end + +DataFrame(columns::AbstractVector, cnames::AbstractVector; makeunique::Bool=false, copycols::Bool=true) = - DataFrame(columns, Symbol.(cnames), makeunique=makeunique, copycols=copycols) + DataFrame(columns, _name2symbol(cnames), makeunique=makeunique, copycols=copycols) DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector{Symbol}; makeunique::Bool=false, copycols::Bool=true)::DataFrame = @@ -332,9 +341,9 @@ DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector{Symb Index(convert(Vector{Symbol}, cnames), makeunique=makeunique), copycols=copycols) -DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector{<:AbstractString}; +DataFrame(columns::AbstractVector{<:AbstractVector}, cnames::AbstractVector; makeunique::Bool=false, copycols::Bool=true) = - DataFrame(columns, Symbol.(cnames); makeunique=makeunique, copycols=copycols) + DataFrame(columns, _name2symbol(cnames); makeunique=makeunique, copycols=copycols) function DataFrame(columns::AbstractVector, cnames::Symbol; copycols::Bool=true) if cnames !== :auto @@ -346,22 +355,25 @@ function DataFrame(columns::AbstractVector, cnames::Symbol; copycols::Bool=true) return DataFrame(columns, gennames(length(columns)), copycols=copycols) end -DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol}; makeunique::Bool=false) = - DataFrame(AbstractVector[columns[:, i] for i in 1:size(columns, 2)], cnames, - makeunique=makeunique, copycols=false) +function DataFrame(columns::AbstractMatrix, cnames::AbstractVector{Symbol}; + makeunique::Bool=false, copycols::Bool=true) + getter = copycols ? getindex : view + return DataFrame(AbstractVector[getter(columns, :, i) for i in 1:size(columns, 2)], + cnames, makeunique=makeunique, copycols=false) +end -DataFrame(columns::AbstractMatrix, cnames::AbstractVector{<:AbstractString}; - makeunique::Bool=false) = - DataFrame(columns, Symbol.(cnames); makeunique=makeunique) +DataFrame(columns::AbstractMatrix, cnames::AbstractVector; + makeunique::Bool=false, copycols::Bool=true) = + DataFrame(columns, _name2symbol(cnames); makeunique=makeunique, copycols=copycols) -function DataFrame(columns::AbstractMatrix, cnames::Symbol) +function DataFrame(columns::AbstractMatrix, cnames::Symbol; copycols::Bool=true) if cnames !== :auto throw(ArgumentError("if the first positional argument to DataFrame " * "constructor is a matrix and a second " * "positional argument is passed then the second " * "argument must be a vector of column names or :auto")) end - return DataFrame(columns, gennames(size(columns, 2)), makeunique=false) + return DataFrame(columns, gennames(size(columns, 2)), makeunique=false, copycols=copycols) end # Discontinued constructors @@ -388,7 +400,6 @@ DataFrame(column_eltypes::AbstractVector{<:Type}, cnames::AbstractVector{<:Abstr "not supported. Pass explicitly created columns to a " * "`DataFrame` constructor instead.")) - ############################################################################## ## ## AbstractDataFrame interface diff --git a/test/constructors.jl b/test/constructors.jl index 9a4b575eb5..521c07baea 100644 --- a/test/constructors.jl +++ b/test/constructors.jl @@ -315,8 +315,6 @@ end @test_throws ArgumentError DataFrame([1:3, 1], ["x1", "x2"], copycols=copycolsarg) @test_throws ErrorException DataFrame([1:3, 1], copycols=copycolsarg) end - - @test_throws MethodError DataFrame([1 2; 3 4], :auto, copycols=false) end @testset "column types" begin @@ -363,6 +361,8 @@ end @test_throws ArgumentError DataFrame([Int, Float64], [:a, :b], 2) @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"]) @test_throws ArgumentError DataFrame([Int, Float64], ["a", "b"], 2) + @test_throws ArgumentError DataFrame([Int, Float64], ["a", :b]) + @test_throws MethodError DataFrame([Int, Float64], ["a", :b], 2) end @testset "threading correctness tests" begin @@ -372,4 +372,42 @@ end end end +@testset "non-specific vector of column names" begin + ref = DataFrame(a=1:2, b=3:4) + for x in ([1 3; 2 4], [[1, 2], [3, 4]], [1:2, 3:4], Any[[1, 2], [3, 4]], Any[1:2, 3:4]) + @test DataFrame(x, Any[:a, :b]) == ref + @test DataFrame(x, Any["a", "b"]) == ref + @test DataFrame(x, Union{String, Symbol}[:a, :b]) == ref + @test DataFrame(x, Union{String, Symbol}["a", "b"]) == ref + @test_throws ArgumentError DataFrame(x, Any["a", :b]) + @test_throws ArgumentError DataFrame(x, Union{String, Symbol}["a", :b]) + end + @test DataFrame([], []) == DataFrame() + @test DataFrame(fill(0, 0, 0), []) == DataFrame() + @test_throws ArgumentError DataFrame(Type[], Symbol[]) + @test_throws ArgumentError DataFrame(Type[], String[]) + @test_throws MethodError DataFrame(Type[], []) +end + +@testset "DataFrame matrix constructor copycols kwarg" begin + m = [1 4; 2 5; 3 6] + refdf = DataFrame(x1=1:3, x2=4:6) + for cnames in ([:x1, :x2], ["x1", "x2"], Any[:x1, :x2], Any["x1", "x2"], :auto) + df = DataFrame(m, cnames) + @test df == refdf + @test df.x1 isa Vector{Int} + @test df.x2 isa Vector{Int} + df = DataFrame(m, cnames, copycols=true) + @test df == refdf + @test df.x1 isa Vector{Int} + @test df.x2 isa Vector{Int} + df = DataFrame(m, cnames, copycols=false) + @test df == refdf + @test df.x1 isa SubArray{Int, 1, Matrix{Int}} + @test df.x2 isa SubArray{Int, 1, Matrix{Int}} + @test parent(df.x1) === m + @test parent(df.x2) === m + end +end + end # module