Skip to content

Commit

Permalink
use global pool of column eltype names
Browse files Browse the repository at this point in the history
  • Loading branch information
bkamins committed May 5, 2021
1 parent c3083fc commit 6685c73
Show file tree
Hide file tree
Showing 2 changed files with 51 additions and 9 deletions.
4 changes: 4 additions & 0 deletions src/abstractdataframe/io.jl
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,12 @@ function _show(io::IO, ::MIME"text/html", df::AbstractDataFrame;
if eltypes
write(io, "<tr>")
write(io, "<th></th>")
Threads.lock(TYPE2STRING_LOCK)
for j in 1:mxcol
s = html_escape(compacttype(eltype(df[!, j])))
write(io, "<th>$s</th>")
end
Threads.unlock(TYPE2STRING_LOCK)
write(io, "</tr>")
end
write(io, "</thead>")
Expand Down Expand Up @@ -281,8 +283,10 @@ function _show(io::IO, ::MIME"text/latex", df::AbstractDataFrame;
write(io, "\t\\hline\n")
if eltypes
write(io, "\t& ")
Threads.lock(TYPE2STRING_LOCK)
header = join(map(c -> latex_escape(string(compacttype(c))),
eltype.(eachcol(df)[1:mxcol])), " & ")
Threads.unlock(TYPE2STRING_LOCK)
write(io, header)
mxcol < size(df, 2) && write(io, " & ")
write(io, "\\\\\n")
Expand Down
56 changes: 47 additions & 9 deletions src/abstractdataframe/show.jl
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,46 @@ if VERSION < v"1.5.0-DEV.261" || VERSION < v"1.5.0-DEV.266"
end
end

"""Return compact string representation of type T"""
function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
maxwidth = max(8, maxwidth)
# memoize calls to compacttype
const TYPE2STRING_DICT = Dict{Any, String}()

# use this lock on compacttype caller side to ensure thread safety
const TYPE2STRING_LOCK = Threads.ReentrantLock()

"""
compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
Return compact string representation of type T.
This function is not thread safe. Use TYPE2STRING_LOCK to ensure thread safety.
"""
function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
T === Any && return "Any"
T === Missing && return "Missing"

if haskey(TYPE2STRING_DICT, (T, maxwidth, initial))
return TYPE2STRING_DICT[(T, maxwidth, initial)]
end

refT = T
maxwidth = max(8, maxwidth)

sT = string(T)
textwidth(sT) maxwidth && return sT
if textwidth(sT) maxwidth
TYPE2STRING_DICT[(refT, maxwidth, initial)] = sT
return sT
end

if T >: Missing
T = nonmissingtype(T)
sT = string(T)
suffix = "?"
# ignore "?" for initial width counting but respect it for display
initial || (maxwidth -= 1)
textwidth(sT) maxwidth && return sT * suffix
if textwidth(sT) maxwidth
strrepr = sT * suffix
TYPE2STRING_DICT[(refT, maxwidth, initial)] = strrepr
return strrepr
end
else
suffix = ""
end
Expand All @@ -96,12 +119,18 @@ function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
if startswith(sT, "CategoricalValue") || startswith(sT, "CategoricalArrays.CategoricalValue")
sT = string(nameof(T))
if textwidth(sT) maxwidth
return sT * "" * suffix
strrepr = sT * "" * suffix
TYPE2STRING_DICT[(refT, maxwidth, initial)] = strrepr
return strrepr
else
return (maxwidth 11 ? "Categorical…" : "Cat…") * suffix
strrepr = (maxwidth 11 ? "Categorical…" : "Cat…") * suffix
TYPE2STRING_DICT[(refT, maxwidth, initial)] = strrepr
return strrepr
end
elseif T isa Union
return "Union…" * suffix
strrepr = "Union…" * suffix
TYPE2STRING_DICT[(refT, maxwidth, initial)] = strrepr
return strrepr
else
sT = string(nameof(T))
end
Expand All @@ -116,7 +145,11 @@ function compacttype(T::Type, maxwidth::Int=8, initial::Bool=true)
break
end
end
return first(sT, stop) * "" * suffix

strrepr = first(sT, stop) * "" * suffix

TYPE2STRING_DICT[(refT, maxwidth, initial)] = strrepr
return strrepr
end

"""
Expand Down Expand Up @@ -171,6 +204,7 @@ function getmaxwidths(df::AbstractDataFrame,
undefstrwidth = ourstrwidth(io, "#undef", buffer, truncstring)

j = 1
Threads.lock(TYPE2STRING_LOCK)
for (name, col) in pairs(eachcol(df))
# (1) Consider length of column name
# do not truncate column name
Expand All @@ -186,12 +220,14 @@ function getmaxwidths(df::AbstractDataFrame,
end
if show_eltype
# do not truncate eltype name

maxwidths[j] = max(maxwidth, ourstrwidth(io, compacttype(eltype(col)), buffer, 0))
else
maxwidths[j] = maxwidth
end
j += 1
end
Threads.unlock(TYPE2STRING_LOCK)

# do not truncate rowlabel
if rowid isa Nothing
Expand Down Expand Up @@ -226,7 +262,9 @@ function _show(io::IO,

# NOTE: If we reuse `types` here, the time to print the first table is 2x
# more. This should be something related to type inference.
Threads.lock(TYPE2STRING_LOCK)
types_str = compacttype.(eltype.(eachcol(df)), maxwidth)
Threads.unlock(TYPE2STRING_LOCK)

if allcols && allrows
crop = :none
Expand Down

0 comments on commit 6685c73

Please sign in to comment.