diff --git a/src/abstractdataframe/io.jl b/src/abstractdataframe/io.jl
index db10902af9..2262f36e4c 100755
--- a/src/abstractdataframe/io.jl
+++ b/src/abstractdataframe/io.jl
@@ -1,3 +1,91 @@
+"""
+ DataFrames.getmaxwidths(df::AbstractDataFrame,
+ io::IO,
+ rowindices1::AbstractVector{Int},
+ rowindices2::AbstractVector{Int},
+ rowlabel::Symbol,
+ rowid::Union{Integer, Nothing},
+ show_eltype::Bool,
+ buffer::IOBuffer)
+
+Calculate, for each column of an AbstractDataFrame, the maximum
+string width used to render the name of that column, its type, and the
+longest entry in that column -- among the rows of the data frame
+will be rendered to IO. The widths for all columns are returned as a
+vector.
+
+Return a `Vector{Int}` giving the maximum string widths required to render
+each column, including that column's name and type.
+
+NOTE: The last entry of the result vector is the string width of the
+implicit row ID column contained in every `AbstractDataFrame`.
+
+# Arguments
+- `df::AbstractDataFrame`: The data frame whose columns will be printed.
+- `io::IO`: The `IO` to which `df` is to be printed
+- `rowindices1::AbstractVector{Int}: A set of indices of the first
+ chunk of the AbstractDataFrame that would be rendered to IO.
+- `rowindices2::AbstractVector{Int}: A set of indices of the second
+ chunk of the AbstractDataFrame that would be rendered to IO. Can
+ be empty if the AbstractDataFrame would be printed without any
+ ellipses.
+- `rowlabel::AbstractString`: The label that will be used when rendered the
+ numeric ID's of each row. Typically, this will be set to "Row".
+- `rowid`: Used to handle showing `DataFrameRow`.
+- `show_eltype`: Whether to print the column type
+ under the column name in the heading.
+- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth`
+"""
+function getmaxwidths(df::AbstractDataFrame,
+ io::IO,
+ rowindices1::AbstractVector{Int},
+ rowindices2::AbstractVector{Int},
+ rowlabel::Symbol,
+ rowid::Union{Integer, Nothing},
+ show_eltype::Bool,
+ buffer::IOBuffer,
+ truncstring::Int)
+ maxwidths = Vector{Int}(undef, size(df, 2) + 1)
+
+ undefstrwidth = ourstrwidth(io, "#undef", buffer, truncstring)
+
+ ct = show_eltype ? batch_compacttype(Any[eltype(c) for c in eachcol(df)]) : String[]
+ j = 1
+ for (col_idx, (name, col)) in enumerate(pairs(eachcol(df)))
+ # (1) Consider length of column name
+ # do not truncate column name
+ maxwidth = ourstrwidth(io, name, buffer, 0)
+
+ # (2) Consider length of longest entry in that column
+ for indices in (rowindices1, rowindices2), i in indices
+ if isassigned(col, i)
+ maxwidth = max(maxwidth, ourstrwidth(io, col[i], buffer, truncstring))
+ else
+ maxwidth = max(maxwidth, undefstrwidth)
+ end
+ end
+ if show_eltype
+ # do not truncate eltype name
+ maxwidths[j] = max(maxwidth, ourstrwidth(io, ct[col_idx], buffer, 0))
+ else
+ maxwidths[j] = maxwidth
+ end
+ j += 1
+ end
+
+ # do not truncate rowlabel
+ if rowid isa Nothing
+ rowmaxwidth1 = isempty(rowindices1) ? 0 : ndigits(maximum(rowindices1))
+ rowmaxwidth2 = isempty(rowindices2) ? 0 : ndigits(maximum(rowindices2))
+ maxwidths[j] = max(max(rowmaxwidth1, rowmaxwidth2),
+ ourstrwidth(io, rowlabel, buffer, 0))
+ else
+ maxwidths[j] = max(ndigits(rowid), ourstrwidth(io, rowlabel, buffer, 0))
+ end
+
+ return maxwidths
+end
+
"""
show(io::IO, mime::MIME, df::AbstractDataFrame)
@@ -107,8 +195,9 @@ function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame;
if eltypes
write(io, "
")
write(io, " | ")
+ ct = batch_compacttype(Any[eltype(df[!, idx]) for idx in 1:mxcol])
for j in 1:mxcol
- s = html_escape(compacttype(eltype(df[!, j])))
+ s = html_escape(ct[j])
write(io, "$s | ")
end
write(io, "
")
@@ -281,8 +370,8 @@ function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame;
write(io, "\t\\hline\n")
if eltypes
write(io, "\t& ")
- header = join(map(c -> latex_escape(string(compacttype(c))),
- eltype.(eachcol(df)[1:mxcol])), " & ")
+ ct = batch_compacttype(Any[eltype(df[!, idx]) for idx in 1:mxcol])
+ header = join(latex_escape.(ct), " & ")
write(io, header)
mxcol < size(df, 2) && write(io, " & ")
write(io, "\\\\\n")
diff --git a/src/abstractdataframe/show.jl b/src/abstractdataframe/show.jl
index 0ea9b8aaaf..a2fde5a092 100644
--- a/src/abstractdataframe/show.jl
+++ b/src/abstractdataframe/show.jl
@@ -68,8 +68,39 @@ if VERSION < v"1.5.0-DEV.261" || VERSION < v"1.5.0-DEV.266"
end
end
-"""Return compact string representation of type T"""
-function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
+# For most data frames, especially wide, columns having the same element type
+# occur multiple times. batch_compacttype ensures that we compute string
+# representation of a specific column element type only once and then reuse it.
+
+function batch_compacttype(types::Vector{Any}, maxwidths::Vector{Int})
+ @assert length(types) == length(maxwidths)
+ cache = Dict{Any, String}()
+ return map(types, maxwidths) do T, maxwidth
+ get!(cache, T) do
+ compacttype(T, maxwidth)
+ end
+ end
+end
+
+function batch_compacttype(types::Vector{Any}, maxwidth::Int=8)
+ cache = Dict{Type, String}()
+ return map(types) do T
+ get!(cache, T) do
+ compacttype(T, maxwidth)
+ end
+ end
+end
+
+"""
+ compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
+
+Return compact string representation of type `T`.
+
+For displaying data frame we do not want string representation of type to be
+longer than `maxwidth`. This function implements rules how type names are
+cropped if they are longer than `maxwidth`.
+"""
+function compacttype(T::Type, maxwidth::Int=8)
maxwidth = max(8, maxwidth)
T === Any && return "Any"
@@ -82,8 +113,6 @@ function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
T = nonmissingtype(T)
sT = string(T)
suffix = "?"
- # ignore "?" for initial width counting but respect it for display
- initial || (maxwidth -= 1)
textwidth(sT) ≤ maxwidth && return sT * suffix
else
suffix = ""
@@ -119,93 +148,6 @@ function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
return first(sT, stop) * "…" * suffix
end
-"""
- DataFrames.getmaxwidths(df::AbstractDataFrame,
- io::IO,
- rowindices1::AbstractVector{Int},
- rowindices2::AbstractVector{Int},
- rowlabel::Symbol,
- rowid::Union{Integer, Nothing},
- show_eltype::Bool,
- buffer::IOBuffer)
-
-Calculate, for each column of an AbstractDataFrame, the maximum
-string width used to render the name of that column, its type, and the
-longest entry in that column -- among the rows of the data frame
-will be rendered to IO. The widths for all columns are returned as a
-vector.
-
-Return a `Vector{Int}` giving the maximum string widths required to render
-each column, including that column's name and type.
-
-NOTE: The last entry of the result vector is the string width of the
-implicit row ID column contained in every `AbstractDataFrame`.
-
-# Arguments
-- `df::AbstractDataFrame`: The data frame whose columns will be printed.
-- `io::IO`: The `IO` to which `df` is to be printed
-- `rowindices1::AbstractVector{Int}: A set of indices of the first
- chunk of the AbstractDataFrame that would be rendered to IO.
-- `rowindices2::AbstractVector{Int}: A set of indices of the second
- chunk of the AbstractDataFrame that would be rendered to IO. Can
- be empty if the AbstractDataFrame would be printed without any
- ellipses.
-- `rowlabel::AbstractString`: The label that will be used when rendered the
- numeric ID's of each row. Typically, this will be set to "Row".
-- `rowid`: Used to handle showing `DataFrameRow`.
-- `show_eltype`: Whether to print the column type
- under the column name in the heading.
-- `buffer`: buffer passed around to avoid reallocations in `ourstrwidth`
-"""
-function getmaxwidths(df::AbstractDataFrame,
- io::IO,
- rowindices1::AbstractVector{Int},
- rowindices2::AbstractVector{Int},
- rowlabel::Symbol,
- rowid::Union{Integer, Nothing},
- show_eltype::Bool,
- buffer::IOBuffer,
- truncstring::Int)
- maxwidths = Vector{Int}(undef, size(df, 2) + 1)
-
- undefstrwidth = ourstrwidth(io, "#undef", buffer, truncstring)
-
- j = 1
- for (name, col) in pairs(eachcol(df))
- # (1) Consider length of column name
- # do not truncate column name
- maxwidth = ourstrwidth(io, name, buffer, 0)
-
- # (2) Consider length of longest entry in that column
- for indices in (rowindices1, rowindices2), i in indices
- if isassigned(col, i)
- maxwidth = max(maxwidth, ourstrwidth(io, col[i], buffer, truncstring))
- else
- maxwidth = max(maxwidth, undefstrwidth)
- end
- end
- if show_eltype
- # do not truncate eltype name
- maxwidths[j] = max(maxwidth, ourstrwidth(io, compacttype(eltype(col)), buffer, 0))
- else
- maxwidths[j] = maxwidth
- end
- j += 1
- end
-
- # do not truncate rowlabel
- if rowid isa Nothing
- rowmaxwidth1 = isempty(rowindices1) ? 0 : ndigits(maximum(rowindices1))
- rowmaxwidth2 = isempty(rowindices2) ? 0 : ndigits(maximum(rowindices2))
- maxwidths[j] = max(max(rowmaxwidth1, rowmaxwidth2),
- ourstrwidth(io, rowlabel, buffer, 0))
- else
- maxwidths[j] = max(ndigits(rowid), ourstrwidth(io, rowlabel, buffer, 0))
- end
-
- return maxwidths
-end
-
function _show(io::IO,
df::AbstractDataFrame;
allrows::Bool = !get(io, :limit, false),
@@ -220,13 +162,10 @@ function _show(io::IO,
_check_consistency(df)
names_str = names(df)
- names_len = textwidth.(names_str)
- maxwidth = max.(9, names_len)
- types = eltype.(eachcol(df))
-
- # NOTE: If we reuse `types` here, the time to print the first table is 2x
- # more. This should be something related to type inference.
- types_str = compacttype.(eltype.(eachcol(df)), maxwidth)
+ names_len = Int[textwidth(n) for n in names_str]
+ maxwidth = Int[max(9, nl) for nl in names_len]
+ types = Any[eltype(c) for c in eachcol(df)]
+ types_str = batch_compacttype(types, maxwidth)
if allcols && allrows
crop = :none