Skip to content

Commit

Permalink
ENH: set render limits on Styler to automatically trim dataframes (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
attack68 authored and JulianWgs committed Jul 3, 2021
1 parent ff2e57e commit b94642d
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 13 deletions.
2 changes: 2 additions & 0 deletions doc/source/user_guide/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ styler.sparse.index True "Sparsify" MultiIndex displ
elements in outer levels within groups).
styler.sparse.columns True "Sparsify" MultiIndex display for columns
in Styler output.
styler.render.max_elements 262144 Maximum number of datapoints that Styler will render
trimming either rows, columns or both to fit.
======================================= ============ ==================================


Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`

:class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`).
One also has greater control of the display through separate sparsification of the index or columns, using the new 'styler' options context (:issue:`41142`).
Render trimming has also been added for large numbers of data elements to avoid browser overload (:issue:`40712`).

We have added an extension to allow LaTeX styling as an alternative to CSS styling and a method :meth:`.Styler.to_latex`
which renders the necessary LaTeX format including built-up styles. An additional file io function :meth:`.Styler.to_html` has been added for convenience (:issue:`40312`).
Expand Down
13 changes: 13 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,9 +752,22 @@ def register_converter_cb(key):
display each explicit level element in a hierarchical key for each column.
"""

styler_max_elements = """
: int
The maximum number of data-cell (<td>) elements that will be rendered before
trimming will occur over columns, rows or both if needed.
"""

with cf.config_prefix("styler"):
cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=bool)

cf.register_option(
"sparse.columns", True, styler_sparse_columns_doc, validator=bool
)

cf.register_option(
"render.max_elements",
2 ** 18,
styler_max_elements,
validator=is_nonnegative_int,
)
156 changes: 147 additions & 9 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
ROW_HEADING_CLASS = "row_heading"
COL_HEADING_CLASS = "col_heading"
INDEX_NAME_CLASS = "index_name"
TRIMMED_COL_CLASS = "col_trim"
TRIMMED_ROW_CLASS = "row_trim"

DATA_CLASS = "data"
BLANK_CLASS = "blank"
Expand All @@ -188,15 +190,34 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
"caption": self.caption,
}

max_elements = get_option("styler.render.max_elements")
max_rows, max_cols = _get_trimming_maximums(
len(self.data.index), len(self.data.columns), max_elements
)

head = self._translate_header(
BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparse_cols
BLANK_CLASS,
BLANK_VALUE,
INDEX_NAME_CLASS,
COL_HEADING_CLASS,
sparse_cols,
max_cols,
TRIMMED_COL_CLASS,
)
d.update({"head": head})

self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(
list
)
body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS, sparse_index)
body = self._translate_body(
DATA_CLASS,
ROW_HEADING_CLASS,
sparse_index,
max_rows,
max_cols,
TRIMMED_ROW_CLASS,
TRIMMED_COL_CLASS,
)
d.update({"body": body})

cellstyle: list[dict[str, CSSList | list[str]]] = [
Expand Down Expand Up @@ -227,6 +248,8 @@ def _translate_header(
index_name_class: str,
col_heading_class: str,
sparsify_cols: bool,
max_cols: int,
trimmed_col_class: str,
):
"""
Build each <tr> within table <head> as a list
Expand All @@ -252,6 +275,10 @@ def _translate_header(
CSS class added to elements within the column_names section of structure.
sparsify_cols : bool
Whether column_headers section will add colspan attributes (>1) to elements.
max_cols : int
Maximum number of columns to render. If exceeded will contain `...` filler.
trimmed_col_class : str
CSS class added to elements within a column including `...` trimmed vals.
Returns
-------
Expand All @@ -260,10 +287,10 @@ def _translate_header(
"""
# for sparsifying a MultiIndex
col_lengths = _get_level_lengths(
self.columns, sparsify_cols, self.hidden_columns
self.columns, sparsify_cols, max_cols, self.hidden_columns
)

clabels = self.data.columns.tolist()
clabels = self.data.columns.tolist()[:max_cols] # slice to allow trimming
if self.data.columns.nlevels == 1:
clabels = [[x] for x in clabels]
clabels = list(zip(*clabels))
Expand Down Expand Up @@ -300,6 +327,18 @@ def _translate_header(
)
for c, value in enumerate(clabels[r])
]

if len(self.data.columns) > max_cols:
# add an extra column with `...` value to indicate trimming
column_headers.append(
_element(
"th",
f"{col_heading_class} level{r} {trimmed_col_class}",
"...",
True,
attributes="",
)
)
head.append(index_blanks + column_name + column_headers)

# 2) index names
Expand All @@ -318,21 +357,33 @@ def _translate_header(
for c, name in enumerate(self.data.index.names)
]

if len(self.data.columns) <= max_cols:
blank_len = len(clabels[0])
else:
blank_len = len(clabels[0]) + 1 # to allow room for `...` trim col

column_blanks = [
_element(
"th",
f"{blank_class} col{c}",
blank_value,
c not in self.hidden_columns,
)
for c in range(len(clabels[0]))
for c in range(blank_len)
]
head.append(index_names + column_blanks)

return head

def _translate_body(
self, data_class: str, row_heading_class: str, sparsify_index: bool
self,
data_class: str,
row_heading_class: str,
sparsify_index: bool,
max_rows: int,
max_cols: int,
trimmed_row_class: str,
trimmed_col_class: str,
):
"""
Build each <tr> within table <body> as a list
Expand Down Expand Up @@ -360,14 +411,52 @@ def _translate_body(
The associated HTML elements needed for template rendering.
"""
# for sparsifying a MultiIndex
idx_lengths = _get_level_lengths(self.index, sparsify_index)
idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows)

rlabels = self.data.index.tolist()
rlabels = self.data.index.tolist()[:max_rows] # slice to allow trimming
if self.data.index.nlevels == 1:
rlabels = [[x] for x in rlabels]

body = []
for r, row_tup in enumerate(self.data.itertuples()):
if r >= max_rows: # used only to add a '...' trimmed row:
index_headers = [
_element(
"th",
f"{row_heading_class} level{c} {trimmed_row_class}",
"...",
not self.hidden_index,
attributes="",
)
for c in range(self.data.index.nlevels)
]

data = [
_element(
"td",
f"{data_class} col{c} {trimmed_row_class}",
"...",
(c not in self.hidden_columns),
attributes="",
)
for c in range(max_cols)
]

if len(self.data.columns) > max_cols:
# columns are also trimmed so we add the final element
data.append(
_element(
"td",
f"{data_class} {trimmed_row_class} {trimmed_col_class}",
"...",
True,
attributes="",
)
)

body.append(index_headers + data)
break

index_headers = [
_element(
"th",
Expand All @@ -386,6 +475,18 @@ def _translate_body(

data = []
for c, value in enumerate(row_tup[1:]):
if c >= max_cols:
data.append(
_element(
"td",
f"{data_class} row{r} {trimmed_col_class}",
"...",
True,
attributes="",
)
)
break

# add custom classes from cell context
cls = ""
if (r, c) in self.cell_context:
Expand Down Expand Up @@ -655,8 +756,40 @@ def _element(
}


def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8):
"""
Recursively reduce the number of rows and columns to satisfy max elements.
Parameters
----------
rn, cn : int
The number of input rows / columns
max_elements : int
The number of allowable elements
Returns
-------
rn, cn : tuple
New rn and cn values that satisfy the max_elements constraint
"""

def scale_down(rn, cn):
if cn >= rn:
return rn, int(cn * scaling_factor)
else:
return int(rn * scaling_factor), cn

while rn * cn > max_elements:
rn, cn = scale_down(rn, cn)

return rn, cn


def _get_level_lengths(
index: Index, sparsify: bool, hidden_elements: Sequence[int] | None = None
index: Index,
sparsify: bool,
max_index: int,
hidden_elements: Sequence[int] | None = None,
):
"""
Given an index, find the level length for each element.
Expand All @@ -667,6 +800,8 @@ def _get_level_lengths(
Index or columns to determine lengths of each element
sparsify : bool
Whether to hide or show each distinct element in a MultiIndex
max_index : int
The maximum number of elements to analyse along the index due to trimming
hidden_elements : sequence of int
Index positions of elements hidden from display in the index affecting
length
Expand All @@ -693,6 +828,9 @@ def _get_level_lengths(

for i, lvl in enumerate(levels):
for j, row in enumerate(lvl):
if j >= max_index:
# stop the loop due to display trimming
break
if not sparsify:
lengths[(i, j)] = 1
elif (row is not lib.no_default) and (j not in hidden_elements):
Expand Down
49 changes: 45 additions & 4 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from pandas.io.formats.style_render import (
_get_level_lengths,
_get_trimming_maximums,
maybe_convert_css_to_tuples,
non_reducing_slice,
)
Expand Down Expand Up @@ -115,6 +116,46 @@ def test_mi_styler_sparsify_options(mi_styler):
assert html1 != html2


def test_trimming_maximum():
rn, cn = _get_trimming_maximums(100, 100, 100, scaling_factor=0.5)
assert (rn, cn) == (12, 6)

rn, cn = _get_trimming_maximums(1000, 3, 750, scaling_factor=0.5)
assert (rn, cn) == (250, 3)


def test_render_trimming():
df = DataFrame(np.arange(120).reshape(60, 2))
with pd.option_context("styler.render.max_elements", 6):
ctx = df.style._translate(True, True)
assert len(ctx["head"][0]) == 3 # index + 2 data cols
assert len(ctx["body"]) == 4 # 3 data rows + trimming row
assert len(ctx["body"][0]) == 3 # index + 2 data cols

df = DataFrame(np.arange(120).reshape(12, 10))
with pd.option_context("styler.render.max_elements", 6):
ctx = df.style._translate(True, True)
assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming row
assert len(ctx["body"]) == 4 # 3 data rows + trimming row
assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming row


def test_render_trimming_mi():
midx = MultiIndex.from_product([[1, 2], [1, 2, 3]])
df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx)
with pd.option_context("styler.render.max_elements", 4):
ctx = df.style._translate(True, True)

assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row
assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items()
assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items()
assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items()
assert len(ctx["body"]) == 3 # 2 data rows + trimming row

assert len(ctx["head"][0]) == 5 # 2 indexes + 2 column headers + trimming col
assert {"attributes": 'colspan="2"'}.items() <= ctx["head"][0][2].items()


class TestStyler:
def setup_method(self, method):
np.random.seed(24)
Expand Down Expand Up @@ -939,7 +980,7 @@ def test_get_level_lengths(self):
(1, 4): 1,
(1, 5): 1,
}
result = _get_level_lengths(index, sparsify=True)
result = _get_level_lengths(index, sparsify=True, max_index=100)
tm.assert_dict_equal(result, expected)

expected = {
Expand All @@ -956,7 +997,7 @@ def test_get_level_lengths(self):
(1, 4): 1,
(1, 5): 1,
}
result = _get_level_lengths(index, sparsify=False)
result = _get_level_lengths(index, sparsify=False, max_index=100)
tm.assert_dict_equal(result, expected)

def test_get_level_lengths_un_sorted(self):
Expand All @@ -970,7 +1011,7 @@ def test_get_level_lengths_un_sorted(self):
(1, 2): 1,
(1, 3): 1,
}
result = _get_level_lengths(index, sparsify=True)
result = _get_level_lengths(index, sparsify=True, max_index=100)
tm.assert_dict_equal(result, expected)

expected = {
Expand All @@ -983,7 +1024,7 @@ def test_get_level_lengths_un_sorted(self):
(1, 2): 1,
(1, 3): 1,
}
result = _get_level_lengths(index, sparsify=False)
result = _get_level_lengths(index, sparsify=False, max_index=100)
tm.assert_dict_equal(result, expected)

def test_mi_sparse_index_names(self):
Expand Down

0 comments on commit b94642d

Please sign in to comment.