diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index aa8a8fae417be..62a347acdaa34 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -487,6 +487,8 @@ styler.sparse.index True "Sparsify" MultiIndex displ elements in outer levels within groups). styler.sparse.columns True "Sparsify" MultiIndex display for columns in Styler output. +styler.render.max_elements 262144 Maximum number of datapoints that Styler will render + trimming either rows, columns or both to fit. ======================================= ============ ================================== diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 88c69335b39f4..6526947a674ce 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -140,6 +140,7 @@ properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:` :class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`). One also has greater control of the display through separate sparsification of the index or columns, using the new 'styler' options context (:issue:`41142`). +Render trimming has also been added for large numbers of data elements to avoid browser overload (:issue:`40712`). We have added an extension to allow LaTeX styling as an alternative to CSS styling and a method :meth:`.Styler.to_latex` which renders the necessary LaTeX format including built-up styles. An additional file io function :meth:`Styler.to_html` has been added for convenience (:issue:`40312`). diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py index a88bc8900ccdd..0db0c5a57207d 100644 --- a/pandas/core/config_init.py +++ b/pandas/core/config_init.py @@ -743,9 +743,22 @@ def register_converter_cb(key): display each explicit level element in a hierarchical key for each column. """ +styler_max_elements = """ +: int + The maximum number of data-cell () elements that will be rendered before + trimming will occur over columns, rows or both if needed. +""" + with cf.config_prefix("styler"): cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=bool) cf.register_option( "sparse.columns", True, styler_sparse_columns_doc, validator=bool ) + + cf.register_option( + "render.max_elements", + 2 ** 18, + styler_max_elements, + validator=is_nonnegative_int, + ) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 41733b77cbbd3..7af8802673f80 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -176,6 +176,8 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "  ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" + TRIMMED_COL_CLASS = "col_trim" + TRIMMED_ROW_CLASS = "row_trim" DATA_CLASS = "data" BLANK_CLASS = "blank" @@ -188,15 +190,34 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "  "caption": self.caption, } + max_elements = get_option("styler.render.max_elements") + max_rows, max_cols = _get_trimming_maximums( + len(self.data.index), len(self.data.columns), max_elements + ) + head = self._translate_header( - BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparse_cols + BLANK_CLASS, + BLANK_VALUE, + INDEX_NAME_CLASS, + COL_HEADING_CLASS, + sparse_cols, + max_cols, + TRIMMED_COL_CLASS, ) d.update({"head": head}) self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict( list ) - body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS, sparse_index) + body = self._translate_body( + DATA_CLASS, + ROW_HEADING_CLASS, + sparse_index, + max_rows, + max_cols, + TRIMMED_ROW_CLASS, + TRIMMED_COL_CLASS, + ) d.update({"body": body}) cellstyle: list[dict[str, CSSList | list[str]]] = [ @@ -227,6 +248,8 @@ def _translate_header( index_name_class: str, col_heading_class: str, sparsify_cols: bool, + max_cols: int, + trimmed_col_class: str, ): """ Build each within table as a list @@ -252,6 +275,10 @@ def _translate_header( CSS class added to elements within the column_names section of structure. sparsify_cols : bool Whether column_headers section will add colspan attributes (>1) to elements. + max_cols : int + Maximum number of columns to render. If exceeded will contain `...` filler. + trimmed_col_class : str + CSS class added to elements within a column including `...` trimmed vals. Returns ------- @@ -260,10 +287,10 @@ def _translate_header( """ # for sparsifying a MultiIndex col_lengths = _get_level_lengths( - self.columns, sparsify_cols, self.hidden_columns + self.columns, sparsify_cols, max_cols, self.hidden_columns ) - clabels = self.data.columns.tolist() + clabels = self.data.columns.tolist()[:max_cols] # slice to allow trimming if self.data.columns.nlevels == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) @@ -300,6 +327,18 @@ def _translate_header( ) for c, value in enumerate(clabels[r]) ] + + if len(self.data.columns) > max_cols: + # add an extra column with `...` value to indicate trimming + column_headers.append( + _element( + "th", + f"{col_heading_class} level{r} {trimmed_col_class}", + "...", + True, + attributes="", + ) + ) head.append(index_blanks + column_name + column_headers) # 2) index names @@ -318,6 +357,11 @@ def _translate_header( for c, name in enumerate(self.data.index.names) ] + if len(self.data.columns) <= max_cols: + blank_len = len(clabels[0]) + else: + blank_len = len(clabels[0]) + 1 # to allow room for `...` trim col + column_blanks = [ _element( "th", @@ -325,14 +369,21 @@ def _translate_header( blank_value, c not in self.hidden_columns, ) - for c in range(len(clabels[0])) + for c in range(blank_len) ] head.append(index_names + column_blanks) return head def _translate_body( - self, data_class: str, row_heading_class: str, sparsify_index: bool + self, + data_class: str, + row_heading_class: str, + sparsify_index: bool, + max_rows: int, + max_cols: int, + trimmed_row_class: str, + trimmed_col_class: str, ): """ Build each within table as a list @@ -360,14 +411,52 @@ def _translate_body( The associated HTML elements needed for template rendering. """ # for sparsifying a MultiIndex - idx_lengths = _get_level_lengths(self.index, sparsify_index) + idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows) - rlabels = self.data.index.tolist() + rlabels = self.data.index.tolist()[:max_rows] # slice to allow trimming if self.data.index.nlevels == 1: rlabels = [[x] for x in rlabels] body = [] for r, row_tup in enumerate(self.data.itertuples()): + if r >= max_rows: # used only to add a '...' trimmed row: + index_headers = [ + _element( + "th", + f"{row_heading_class} level{c} {trimmed_row_class}", + "...", + not self.hidden_index, + attributes="", + ) + for c in range(self.data.index.nlevels) + ] + + data = [ + _element( + "td", + f"{data_class} col{c} {trimmed_row_class}", + "...", + (c not in self.hidden_columns), + attributes="", + ) + for c in range(max_cols) + ] + + if len(self.data.columns) > max_cols: + # columns are also trimmed so we add the final element + data.append( + _element( + "td", + f"{data_class} {trimmed_row_class} {trimmed_col_class}", + "...", + True, + attributes="", + ) + ) + + body.append(index_headers + data) + break + index_headers = [ _element( "th", @@ -386,6 +475,18 @@ def _translate_body( data = [] for c, value in enumerate(row_tup[1:]): + if c >= max_cols: + data.append( + _element( + "td", + f"{data_class} row{r} {trimmed_col_class}", + "...", + True, + attributes="", + ) + ) + break + # add custom classes from cell context cls = "" if (r, c) in self.cell_context: @@ -655,8 +756,40 @@ def _element( } +def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8): + """ + Recursively reduce the number of rows and columns to satisfy max elements. + + Parameters + ---------- + rn, cn : int + The number of input rows / columns + max_elements : int + The number of allowable elements + + Returns + ------- + rn, cn : tuple + New rn and cn values that satisfy the max_elements constraint + """ + + def scale_down(rn, cn): + if cn >= rn: + return rn, int(cn * scaling_factor) + else: + return int(rn * scaling_factor), cn + + while rn * cn > max_elements: + rn, cn = scale_down(rn, cn) + + return rn, cn + + def _get_level_lengths( - index: Index, sparsify: bool, hidden_elements: Sequence[int] | None = None + index: Index, + sparsify: bool, + max_index: int, + hidden_elements: Sequence[int] | None = None, ): """ Given an index, find the level length for each element. @@ -667,6 +800,8 @@ def _get_level_lengths( Index or columns to determine lengths of each element sparsify : bool Whether to hide or show each distinct element in a MultiIndex + max_index : int + The maximum number of elements to analyse along the index due to trimming hidden_elements : sequence of int Index positions of elements hidden from display in the index affecting length @@ -693,6 +828,9 @@ def _get_level_lengths( for i, lvl in enumerate(levels): for j, row in enumerate(lvl): + if j >= max_index: + # stop the loop due to display trimming + break if not sparsify: lengths[(i, j)] = 1 elif (row is not lib.no_default) and (j not in hidden_elements): diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 12b4a13ade271..281170ab6c7cb 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -17,6 +17,7 @@ ) from pandas.io.formats.style_render import ( _get_level_lengths, + _get_trimming_maximums, maybe_convert_css_to_tuples, non_reducing_slice, ) @@ -115,6 +116,46 @@ def test_mi_styler_sparsify_options(mi_styler): assert html1 != html2 +def test_trimming_maximum(): + rn, cn = _get_trimming_maximums(100, 100, 100, scaling_factor=0.5) + assert (rn, cn) == (12, 6) + + rn, cn = _get_trimming_maximums(1000, 3, 750, scaling_factor=0.5) + assert (rn, cn) == (250, 3) + + +def test_render_trimming(): + df = DataFrame(np.arange(120).reshape(60, 2)) + with pd.option_context("styler.render.max_elements", 6): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 3 # index + 2 data cols + assert len(ctx["body"]) == 4 # 3 data rows + trimming row + assert len(ctx["body"][0]) == 3 # index + 2 data cols + + df = DataFrame(np.arange(120).reshape(12, 10)) + with pd.option_context("styler.render.max_elements", 6): + ctx = df.style._translate(True, True) + assert len(ctx["head"][0]) == 4 # index + 2 data cols + trimming row + assert len(ctx["body"]) == 4 # 3 data rows + trimming row + assert len(ctx["body"][0]) == 4 # index + 2 data cols + trimming row + + +def test_render_trimming_mi(): + midx = MultiIndex.from_product([[1, 2], [1, 2, 3]]) + df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx) + with pd.option_context("styler.render.max_elements", 4): + ctx = df.style._translate(True, True) + + assert len(ctx["body"][0]) == 5 # 2 indexes + 2 data cols + trimming row + assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items() + assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items() + assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items() + assert len(ctx["body"]) == 3 # 2 data rows + trimming row + + assert len(ctx["head"][0]) == 5 # 2 indexes + 2 column headers + trimming col + assert {"attributes": 'colspan="2"'}.items() <= ctx["head"][0][2].items() + + class TestStyler: def setup_method(self, method): np.random.seed(24) @@ -939,7 +980,7 @@ def test_get_level_lengths(self): (1, 4): 1, (1, 5): 1, } - result = _get_level_lengths(index, sparsify=True) + result = _get_level_lengths(index, sparsify=True, max_index=100) tm.assert_dict_equal(result, expected) expected = { @@ -956,7 +997,7 @@ def test_get_level_lengths(self): (1, 4): 1, (1, 5): 1, } - result = _get_level_lengths(index, sparsify=False) + result = _get_level_lengths(index, sparsify=False, max_index=100) tm.assert_dict_equal(result, expected) def test_get_level_lengths_un_sorted(self): @@ -970,7 +1011,7 @@ def test_get_level_lengths_un_sorted(self): (1, 2): 1, (1, 3): 1, } - result = _get_level_lengths(index, sparsify=True) + result = _get_level_lengths(index, sparsify=True, max_index=100) tm.assert_dict_equal(result, expected) expected = { @@ -983,7 +1024,7 @@ def test_get_level_lengths_un_sorted(self): (1, 2): 1, (1, 3): 1, } - result = _get_level_lengths(index, sparsify=False) + result = _get_level_lengths(index, sparsify=False, max_index=100) tm.assert_dict_equal(result, expected) def test_mi_sparse_index_names(self):