ENH: set render limits on Styler to automatically trim dataframes (p…

…andas-dev#41635)
JulianWgs · Jul 3, 2021 · b94642d · b94642d
1 parent ff2e57e
commit b94642d
Show file tree

Hide file tree

Showing 5 changed files with 208 additions and 13 deletions.
diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst
@@ -487,6 +487,8 @@ styler.sparse.index                     True         "Sparsify" MultiIndex displ
                                                      elements in outer levels within groups).
 styler.sparse.columns                   True         "Sparsify" MultiIndex display for columns
                                                      in Styler output.
+styler.render.max_elements              262144       Maximum number of datapoints that Styler will render
+                                                     trimming either rows, columns or both to fit.
 ======================================= ============ ==================================
 
 

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -140,6 +140,7 @@ properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`
 
 :class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`).
 One also has greater control of the display through separate sparsification of the index or columns, using the new 'styler' options context (:issue:`41142`).
+Render trimming has also been added for large numbers of data elements to avoid browser overload (:issue:`40712`).
 
 We have added an extension to allow LaTeX styling as an alternative to CSS styling and a method :meth:`.Styler.to_latex`
 which renders the necessary LaTeX format including built-up styles. An additional file io function :meth:`.Styler.to_html` has been added for convenience (:issue:`40312`).

diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -752,9 +752,22 @@ def register_converter_cb(key):
     display each explicit level element in a hierarchical key for each column.
 """
 
+styler_max_elements = """
+: int
+    The maximum number of data-cell (<td>) elements that will be rendered before
+    trimming will occur over columns, rows or both if needed.
+"""
+
 with cf.config_prefix("styler"):
     cf.register_option("sparse.index", True, styler_sparse_index_doc, validator=bool)
 
     cf.register_option(
         "sparse.columns", True, styler_sparse_columns_doc, validator=bool
     )
+
+    cf.register_option(
+        "render.max_elements",
+        2 ** 18,
+        styler_max_elements,
+        validator=is_nonnegative_int,
+    )
diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py
@@ -176,6 +176,8 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
         ROW_HEADING_CLASS = "row_heading"
         COL_HEADING_CLASS = "col_heading"
         INDEX_NAME_CLASS = "index_name"
+        TRIMMED_COL_CLASS = "col_trim"
+        TRIMMED_ROW_CLASS = "row_trim"
 
         DATA_CLASS = "data"
         BLANK_CLASS = "blank"
@@ -188,15 +190,34 @@ def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = "&nbsp;
             "caption": self.caption,
         }
 
+        max_elements = get_option("styler.render.max_elements")
+        max_rows, max_cols = _get_trimming_maximums(
+            len(self.data.index), len(self.data.columns), max_elements
+        )
+
         head = self._translate_header(
-            BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparse_cols
+            BLANK_CLASS,
+            BLANK_VALUE,
+            INDEX_NAME_CLASS,
+            COL_HEADING_CLASS,
+            sparse_cols,
+            max_cols,
+            TRIMMED_COL_CLASS,
         )
         d.update({"head": head})
 
         self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(
             list
         )
-        body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS, sparse_index)
+        body = self._translate_body(
+            DATA_CLASS,
+            ROW_HEADING_CLASS,
+            sparse_index,
+            max_rows,
+            max_cols,
+            TRIMMED_ROW_CLASS,
+            TRIMMED_COL_CLASS,
+        )
         d.update({"body": body})
 
         cellstyle: list[dict[str, CSSList | list[str]]] = [
@@ -227,6 +248,8 @@ def _translate_header(
         index_name_class: str,
         col_heading_class: str,
         sparsify_cols: bool,
+        max_cols: int,
+        trimmed_col_class: str,
     ):
         """
         Build each <tr> within table <head> as a list
@@ -252,6 +275,10 @@ def _translate_header(
             CSS class added to elements within the column_names section of structure.
         sparsify_cols : bool
             Whether column_headers section will add colspan attributes (>1) to elements.
+        max_cols : int
+            Maximum number of columns to render. If exceeded will contain `...` filler.
+        trimmed_col_class : str
+            CSS class added to elements within a column including `...` trimmed vals.
 
         Returns
         -------
@@ -260,10 +287,10 @@ def _translate_header(
         """
         # for sparsifying a MultiIndex
         col_lengths = _get_level_lengths(
-            self.columns, sparsify_cols, self.hidden_columns
+            self.columns, sparsify_cols, max_cols, self.hidden_columns
         )
 
-        clabels = self.data.columns.tolist()
+        clabels = self.data.columns.tolist()[:max_cols]  # slice to allow trimming
         if self.data.columns.nlevels == 1:
             clabels = [[x] for x in clabels]
         clabels = list(zip(*clabels))
@@ -300,6 +327,18 @@ def _translate_header(
                     )
                     for c, value in enumerate(clabels[r])
                 ]
+
+                if len(self.data.columns) > max_cols:
+                    # add an extra column with `...` value to indicate trimming
+                    column_headers.append(
+                        _element(
+                            "th",
+                            f"{col_heading_class} level{r} {trimmed_col_class}",
+                            "...",
+                            True,
+                            attributes="",
+                        )
+                    )
                 head.append(index_blanks + column_name + column_headers)
 
         # 2) index names
@@ -318,21 +357,33 @@ def _translate_header(
                 for c, name in enumerate(self.data.index.names)
             ]
 
+            if len(self.data.columns) <= max_cols:
+                blank_len = len(clabels[0])
+            else:
+                blank_len = len(clabels[0]) + 1  # to allow room for `...` trim col
+
             column_blanks = [
                 _element(
                     "th",
                     f"{blank_class} col{c}",
                     blank_value,
                     c not in self.hidden_columns,
                 )
-                for c in range(len(clabels[0]))
+                for c in range(blank_len)
             ]
             head.append(index_names + column_blanks)
 
         return head
 
     def _translate_body(
-        self, data_class: str, row_heading_class: str, sparsify_index: bool
+        self,
+        data_class: str,
+        row_heading_class: str,
+        sparsify_index: bool,
+        max_rows: int,
+        max_cols: int,
+        trimmed_row_class: str,
+        trimmed_col_class: str,
     ):
         """
         Build each <tr> within table <body> as a list
@@ -360,14 +411,52 @@ def _translate_body(
             The associated HTML elements needed for template rendering.
         """
         # for sparsifying a MultiIndex
-        idx_lengths = _get_level_lengths(self.index, sparsify_index)
+        idx_lengths = _get_level_lengths(self.index, sparsify_index, max_rows)
 
-        rlabels = self.data.index.tolist()
+        rlabels = self.data.index.tolist()[:max_rows]  # slice to allow trimming
         if self.data.index.nlevels == 1:
             rlabels = [[x] for x in rlabels]
 
         body = []
         for r, row_tup in enumerate(self.data.itertuples()):
+            if r >= max_rows:  # used only to add a '...' trimmed row:
+                index_headers = [
+                    _element(
+                        "th",
+                        f"{row_heading_class} level{c} {trimmed_row_class}",
+                        "...",
+                        not self.hidden_index,
+                        attributes="",
+                    )
+                    for c in range(self.data.index.nlevels)
+                ]
+
+                data = [
+                    _element(
+                        "td",
+                        f"{data_class} col{c} {trimmed_row_class}",
+                        "...",
+                        (c not in self.hidden_columns),
+                        attributes="",
+                    )
+                    for c in range(max_cols)
+                ]
+
+                if len(self.data.columns) > max_cols:
+                    # columns are also trimmed so we add the final element
+                    data.append(
+                        _element(
+                            "td",
+                            f"{data_class} {trimmed_row_class} {trimmed_col_class}",
+                            "...",
+                            True,
+                            attributes="",
+                        )
+                    )
+
+                body.append(index_headers + data)
+                break
+
             index_headers = [
                 _element(
                     "th",
@@ -386,6 +475,18 @@ def _translate_body(
 
             data = []
             for c, value in enumerate(row_tup[1:]):
+                if c >= max_cols:
+                    data.append(
+                        _element(
+                            "td",
+                            f"{data_class} row{r} {trimmed_col_class}",
+                            "...",
+                            True,
+                            attributes="",
+                        )
+                    )
+                    break
+
                 # add custom classes from cell context
                 cls = ""
                 if (r, c) in self.cell_context:
@@ -655,8 +756,40 @@ def _element(
     }
 
 
+def _get_trimming_maximums(rn, cn, max_elements, scaling_factor=0.8):
+    """
+    Recursively reduce the number of rows and columns to satisfy max elements.
+
+    Parameters
+    ----------
+    rn, cn : int
+        The number of input rows / columns
+    max_elements : int
+        The number of allowable elements
+
+    Returns
+    -------
+    rn, cn : tuple
+        New rn and cn values that satisfy the max_elements constraint
+    """
+
+    def scale_down(rn, cn):
+        if cn >= rn:
+            return rn, int(cn * scaling_factor)
+        else:
+            return int(rn * scaling_factor), cn
+
+    while rn * cn > max_elements:
+        rn, cn = scale_down(rn, cn)
+
+    return rn, cn
+
+
 def _get_level_lengths(
-    index: Index, sparsify: bool, hidden_elements: Sequence[int] | None = None
+    index: Index,
+    sparsify: bool,
+    max_index: int,
+    hidden_elements: Sequence[int] | None = None,
 ):
     """
     Given an index, find the level length for each element.
@@ -667,6 +800,8 @@ def _get_level_lengths(
         Index or columns to determine lengths of each element
     sparsify : bool
         Whether to hide or show each distinct element in a MultiIndex
+    max_index : int
+        The maximum number of elements to analyse along the index due to trimming
     hidden_elements : sequence of int
         Index positions of elements hidden from display in the index affecting
         length
@@ -693,6 +828,9 @@ def _get_level_lengths(
 
     for i, lvl in enumerate(levels):
         for j, row in enumerate(lvl):
+            if j >= max_index:
+                # stop the loop due to display trimming
+                break
             if not sparsify:
                 lengths[(i, j)] = 1
             elif (row is not lib.no_default) and (j not in hidden_elements):

diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py
@@ -17,6 +17,7 @@
 )
 from pandas.io.formats.style_render import (
     _get_level_lengths,
+    _get_trimming_maximums,
     maybe_convert_css_to_tuples,
     non_reducing_slice,
 )
@@ -115,6 +116,46 @@ def test_mi_styler_sparsify_options(mi_styler):
     assert html1 != html2
 
 
+def test_trimming_maximum():
+    rn, cn = _get_trimming_maximums(100, 100, 100, scaling_factor=0.5)
+    assert (rn, cn) == (12, 6)
+
+    rn, cn = _get_trimming_maximums(1000, 3, 750, scaling_factor=0.5)
+    assert (rn, cn) == (250, 3)
+
+
+def test_render_trimming():
+    df = DataFrame(np.arange(120).reshape(60, 2))
+    with pd.option_context("styler.render.max_elements", 6):
+        ctx = df.style._translate(True, True)
+    assert len(ctx["head"][0]) == 3  # index + 2 data cols
+    assert len(ctx["body"]) == 4  # 3 data rows + trimming row
+    assert len(ctx["body"][0]) == 3  # index + 2 data cols
+
+    df = DataFrame(np.arange(120).reshape(12, 10))
+    with pd.option_context("styler.render.max_elements", 6):
+        ctx = df.style._translate(True, True)
+    assert len(ctx["head"][0]) == 4  # index + 2 data cols + trimming row
+    assert len(ctx["body"]) == 4  # 3 data rows + trimming row
+    assert len(ctx["body"][0]) == 4  # index + 2 data cols + trimming row
+
+
+def test_render_trimming_mi():
+    midx = MultiIndex.from_product([[1, 2], [1, 2, 3]])
+    df = DataFrame(np.arange(36).reshape(6, 6), columns=midx, index=midx)
+    with pd.option_context("styler.render.max_elements", 4):
+        ctx = df.style._translate(True, True)
+
+    assert len(ctx["body"][0]) == 5  # 2 indexes + 2 data cols + trimming row
+    assert {"attributes": 'rowspan="2"'}.items() <= ctx["body"][0][0].items()
+    assert {"class": "data row0 col_trim"}.items() <= ctx["body"][0][4].items()
+    assert {"class": "data row_trim col_trim"}.items() <= ctx["body"][2][4].items()
+    assert len(ctx["body"]) == 3  # 2 data rows + trimming row
+
+    assert len(ctx["head"][0]) == 5  # 2 indexes + 2 column headers + trimming col
+    assert {"attributes": 'colspan="2"'}.items() <= ctx["head"][0][2].items()
+
+
 class TestStyler:
     def setup_method(self, method):
         np.random.seed(24)
@@ -939,7 +980,7 @@ def test_get_level_lengths(self):
             (1, 4): 1,
             (1, 5): 1,
         }
-        result = _get_level_lengths(index, sparsify=True)
+        result = _get_level_lengths(index, sparsify=True, max_index=100)
         tm.assert_dict_equal(result, expected)
 
         expected = {
@@ -956,7 +997,7 @@ def test_get_level_lengths(self):
             (1, 4): 1,
             (1, 5): 1,
         }
-        result = _get_level_lengths(index, sparsify=False)
+        result = _get_level_lengths(index, sparsify=False, max_index=100)
         tm.assert_dict_equal(result, expected)
 
     def test_get_level_lengths_un_sorted(self):
@@ -970,7 +1011,7 @@ def test_get_level_lengths_un_sorted(self):
             (1, 2): 1,
             (1, 3): 1,
         }
-        result = _get_level_lengths(index, sparsify=True)
+        result = _get_level_lengths(index, sparsify=True, max_index=100)
         tm.assert_dict_equal(result, expected)
 
         expected = {
@@ -983,7 +1024,7 @@ def test_get_level_lengths_un_sorted(self):
             (1, 2): 1,
             (1, 3): 1,
         }
-        result = _get_level_lengths(index, sparsify=False)
+        result = _get_level_lengths(index, sparsify=False, max_index=100)
         tm.assert_dict_equal(result, expected)
 
     def test_mi_sparse_index_names(self):