Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: set render limits on Styler to automatically trim dataframes #41635

Merged
merged 15 commits into from
Jun 4, 2021
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/user_guide/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -487,6 +487,8 @@ styler.sparse.index True "Sparsify" MultiIndex displ
elements in outer levels within groups).
styler.sparse.columns True "Sparsify" MultiIndex display for columns
in Styler output.
styler.max.elements 262144 Maximum number of datapoints that Styler will render
jreback marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

styler.display.max_elements ? i think something like that is more instructive no?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or
styler.render.max_elements (as all things are display based)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

agreed, even maybe styler.max_elements, but changed to yours

trimming either rows, columns or both to fit.
======================================= ============ ==================================


Expand Down
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,9 @@ The :meth:`.Styler.format` has had upgrades to easily format missing data,
precision, and perform HTML escaping (:issue:`40437` :issue:`40134`). There have been numerous other bug fixes to
properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`40356` :issue:`39807` :issue:`39889` :issue:`39627`)

:class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`).
:class:`.Styler` has also been made compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`).
One also has greater control of the display through separate sparsification of the index or columns, using the new 'styler' options context (:issue:`41142`).
Render trimming has also been added for large numbers of data elements to avoid browser overload (:issue:`40712`).

We have added an extension to allow LaTeX styling as an alternative to CSS styling and a method :meth:`.Styler.to_latex`
which renders the necessary LaTeX format including built-up styles. An additional file io function :meth:`Styler.to_html` has been added for convenience (:issue:`40312`).
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,9 +743,19 @@ def register_converter_cb(key):
display each explicit level element in a hierarchical key for each column.
"""

styler_max_elements = """
: int
The maximum number of data-cell (<td>) elements that will b rendered before
trimming will occur over columns, rows or both.
"""

with cf.config_prefix("styler"):
cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=bool)

cf.register_option(
"sparse.columns", True, styler_sparse_columns_doc, validator=bool
)

cf.register_option(
"max.elements", 2 ** 18, styler_sparse_columns_doc, validator=bool
)
156 changes: 147 additions & 9 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,8 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
ROW_HEADING_CLASS = "row_heading"
COL_HEADING_CLASS = "col_heading"
INDEX_NAME_CLASS = "index_name"
TRIMMED_COL_CLASS = "col_trim"
TRIMMED_ROW_CLASS = "row_trim"

DATA_CLASS = "data"
BLANK_CLASS = "blank"
Expand All @@ -188,15 +190,34 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
"caption": self.caption,
}

max_elements = get_option("styler.max.elements")
max_rows, max_cols = _get_trimming_maximums(
len(self.data.index), len(self.data.columns), max_elements
)

head = self._translate_header(
BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparse_cols
BLANK_CLASS,
BLANK_VALUE,
INDEX_NAME_CLASS,
COL_HEADING_CLASS,
sparse_cols,
max_cols,
TRIMMED_COL_CLASS,
)
d.update({"head": head})

self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(
list
)
body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS, sparse_index)
body = self._translate_body(
DATA_CLASS,
ROW_HEADING_CLASS,
sparse_index,
max_rows,
max_cols,
TRIMMED_ROW_CLASS,
TRIMMED_COL_CLASS,
)
d.update({"body": body})

cellstyle: list[dict[str, CSSList | list[str]]] = [
Expand Down Expand Up @@ -227,6 +248,8 @@ def _translate_header(
index_name_class: str,
col_heading_class: str,
sparsify_cols: bool,
max_cols: int,
trimmed_col_class: str,
):
"""
Build each <tr> within table <head> as a list
Expand All @@ -252,6 +275,10 @@ def _translate_header(
CSS class added to elements within the column_names section of structure.
sparsify_cols : bool
Whether column_headers section will add colspan attributes (>1) to elements.
max_cols : int
Maximum number of columns to render. If exceeded will contain `...` filler.
trimmed_col_class : str
CSS class added to elements within a column including `...` trimmed vals.

Returns
-------
Expand All @@ -260,10 +287,10 @@ def _translate_header(
"""
# for sparsifying a MultiIndex
col_lengths = _get_level_lengths(
self.columns, sparsify_cols, self.hidden_columns
self.columns, sparsify_cols, max_cols, self.hidden_columns
)

clabels = self.data.columns.tolist()
clabels = self.data.columns.tolist()[:max_cols] # slice to allow trimming
if self.data.columns.nlevels == 1:
clabels = [[x] for x in clabels]
clabels = list(zip(*clabels))
Expand Down Expand Up @@ -300,6 +327,18 @@ def _translate_header(
)
for c, value in enumerate(clabels[r])
]

if len(self.data.columns) > max_cols:
# add an extra column with `...` value to indicate trimming
column_headers.append(
_element(
"th",
f"{col_heading_class} level{r} {trimmed_col_class}",
"...",
True,
attributes="",
)
)
head.append(index_blanks + column_name + column_headers)

# 2) index names
Expand All @@ -318,21 +357,33 @@ def _translate_header(
for c, name in enumerate(self.data.index.names)
]

if len(self.data.columns) <= max_cols:
blank_len = len(clabels[0])
else:
blank_len = len(clabels[0]) + 1 # to allow room for `...` trim col

column_blanks = [
_element(
"th",
f"{blank_class} col{c}",
blank_value,
c not in self.hidden_columns,
)
for c in range(len(clabels[0]))
for c in range(blank_len)
]
head.append(index_names + column_blanks)

return head

def _translate_body(
self, data_class: str, row_heading_class: str, sparsify_index: bool
self,
data_class: str,
row_heading_class: str,
sparsify_index: bool,
max_rows: int,
max_cols: int,
trimmed_row_class: str,
trimmed_col_class: str,
):
"""
Build each <tr> within table <body> as a list
Expand Down Expand Up @@ -360,14 +411,52 @@ def _translate_body(
The associated HTML elements needed for template rendering.
"""
# for sparsifying a MultiIndex
idx_lengths = _get_level_lengths(self.index, sparsify_index)
idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows)

rlabels = self.data.index.tolist()
rlabels = self.data.index.tolist()[:max_rows] # slice to allow trimming
if self.data.index.nlevels == 1:
rlabels = [[x] for x in rlabels]

body = []
for r, row_tup in enumerate(self.data.itertuples()):
if r >= max_rows: # used only to add a '...' trimmed row:
index_headers = [
_element(
"th",
f"{row_heading_class} level{c} {trimmed_row_class}",
"...",
not self.hidden_index,
attributes="",
)
for c in range(self.data.index.nlevels)
]

data = [
_element(
"td",
f"{data_class} col{c} {trimmed_row_class}",
"...",
(c not in self.hidden_columns),
attributes="",
)
for c in range(max_cols)
]

if len(self.data.columns) > max_cols:
# columns are also trimmed so we add the final element
data.append(
_element(
"td",
f"{data_class} {trimmed_row_class} {trimmed_col_class}",
"...",
True,
attributes="",
)
)

body.append(index_headers + data)
break

index_headers = [
_element(
"th",
Expand All @@ -386,6 +475,18 @@ def _translate_body(

data = []
for c, value in enumerate(row_tup[1:]):
if c >= max_cols:
data.append(
_element(
"td",
f"{data_class} row{r} {trimmed_col_class}",
"...",
True,
attributes="",
)
)
break

# add custom classes from cell context
cls = ""
if (r, c) in self.cell_context:
Expand Down Expand Up @@ -638,8 +739,40 @@ def _element(
}


def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8):
"""
Recursively reduce the number of rows and columns to satisfy max elements.

Parameters
----------
rn, cn : int
The number of input rows / columns
max_elements : int
The number of allowable elements

Returns
-------
rn, cn : tuple
New rn and cn values that satisfy the max_elements constraint
"""

def scale_down(rn, cn):
if cn >= rn:
return rn, int(cn * scaling_factor)
else:
return int(rn * scaling_factor), cn

while rn * cn > max_elements:
rn, cn = scale_down(rn, cn)

return rn, cn


def _get_level_lengths(
index: Index, sparsify: bool, hidden_elements: Sequence[int] | None = None
index: Index,
sparsify: bool,
max_index: int,
hidden_elements: Sequence[int] | None = None,
):
"""
Given an index, find the level length for each element.
Expand All @@ -650,6 +783,8 @@ def _get_level_lengths(
Index or columns to determine lengths of each element
sparsify : bool
Whether to hide or show each distinct element in a MultiIndex
max_index : int
The maximum number of elements to analyse along the index due to trimming
hidden_elements : sequence of int
Index positions of elements hidden from display in the index affecting
length
Expand All @@ -676,6 +811,9 @@ def _get_level_lengths(

for i, lvl in enumerate(levels):
for j, row in enumerate(lvl):
if j >= max_index:
# stop the loop due to display trimming
break
if not sparsify:
lengths[(i, j)] = 1
elif (row is not lib.no_default) and (j not in hidden_elements):
Expand Down
49 changes: 45 additions & 4 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
from pandas.io.formats.style_render import (
_get_level_lengths,
_get_trimming_maximums,
maybe_convert_css_to_tuples,
non_reducing_slice,
)
Expand Down Expand Up @@ -115,6 +116,46 @@ def test_mi_styler_sparsify_options(mi_styler):
assert html1 != html2


def test_trimming_maximum():
rn, cn = _get_trimming_maximums(100, 100, 100, scaling_factor=0.5)
assert (rn, cn) == (12, 6)

rn, cn = _get_trimming_maximums(1000, 3, 750, scaling_factor=0.5)
assert (rn, cn) == (250, 3)


def test_render_trimming():
df = DataFrame(np.arange(120).reshape(60, 2))
with pd.option_context("styler.max.elements", 6):
ctx = df.style._translate(True, True)
assert len(ctx["head"][0]) == 3 # index + 2 data cols
assert len(ctx["body"]) == 4 # 3 data rows + trimming row
assert len(ctx["body"][0]) == 3 # index + 2 data cols

df = DataFrame(np.arange(120).reshape(12, 10))
with pd.option_context("styler.max.elements", 6):
ctx = df.style._translate(True, True)
assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming row
assert len(ctx["body"]) == 4 # 3 data rows + trimming row
assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming row


def test_render_trimming_mi():
midx = MultiIndex.from_product([[1, 2], [1, 2, 3]])
df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx)
with pd.option_context("styler.max.elements", 4):
ctx = df.style._translate(True, True)

assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row
assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items()
assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items()
assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items()
assert len(ctx["body"]) == 3 # 2 data rows + trimming row

assert len(ctx["head"][0]) == 5 # 2 indexes + 2 column headers + trimming col
assert {"attributes": 'colspan="2"'}.items() <= ctx["head"][0][2].items()


class TestStyler:
def setup_method(self, method):
np.random.seed(24)
Expand Down Expand Up @@ -939,7 +980,7 @@ def test_get_level_lengths(self):
(1, 4): 1,
(1, 5): 1,
}
result = _get_level_lengths(index, sparsify=True)
result = _get_level_lengths(index, sparsify=True, max_index=100)
tm.assert_dict_equal(result, expected)

expected = {
Expand All @@ -956,7 +997,7 @@ def test_get_level_lengths(self):
(1, 4): 1,
(1, 5): 1,
}
result = _get_level_lengths(index, sparsify=False)
result = _get_level_lengths(index, sparsify=False, max_index=100)
tm.assert_dict_equal(result, expected)

def test_get_level_lengths_un_sorted(self):
Expand All @@ -970,7 +1011,7 @@ def test_get_level_lengths_un_sorted(self):
(1, 2): 1,
(1, 3): 1,
}
result = _get_level_lengths(index, sparsify=True)
result = _get_level_lengths(index, sparsify=True, max_index=100)
tm.assert_dict_equal(result, expected)

expected = {
Expand All @@ -983,7 +1024,7 @@ def test_get_level_lengths_un_sorted(self):
(1, 2): 1,
(1, 3): 1,
}
result = _get_level_lengths(index, sparsify=False)
result = _get_level_lengths(index, sparsify=False, max_index=100)
tm.assert_dict_equal(result, expected)

def test_mi_sparse_index_names(self):
Expand Down