diff --git a/doc/source/reference/style.rst b/doc/source/reference/style.rst index 9dcdcb86c2ea4..6c6b24b4b9136 100644 --- a/doc/source/reference/style.rst +++ b/doc/source/reference/style.rst @@ -35,7 +35,6 @@ Style application Styler.applymap Styler.where Styler.format - Styler.set_precision Styler.set_td_classes Styler.set_table_styles Styler.set_table_attributes @@ -44,7 +43,6 @@ Style application Styler.set_caption Styler.set_properties Styler.set_uuid - Styler.set_na_rep Styler.clear Styler.pipe diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 4cce967e63f21..fb8eecdaa275e 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -364,6 +364,7 @@ Deprecations - Deprecated :meth:`core.window.ewm.ExponentialMovingWindow.vol` (:issue:`39220`) - Using ``.astype`` to convert between ``datetime64[ns]`` dtype and :class:`DatetimeTZDtype` is deprecated and will raise in a future version, use ``obj.tz_localize`` or ``obj.dt.tz_localize`` instead (:issue:`38622`) - Deprecated casting ``datetime.date`` objects to ``datetime64`` when used as ``fill_value`` in :meth:`DataFrame.unstack`, :meth:`DataFrame.shift`, :meth:`Series.shift`, and :meth:`DataFrame.reindex`, pass ``pd.Timestamp(dateobj)`` instead (:issue:`39767`) +- Deprecated :meth:`.Styler.set_na_rep` and :meth:`.Styler.set_precision` in favour of :meth:`.Styler.format` with ``na_rep`` and ``precision`` as existing and new input arguments respectively (:issue:`40134`) - Deprecated allowing partial failure in :meth:`Series.transform` and :meth:`DataFrame.transform` when ``func`` is list-like or dict-like; will raise if any function fails on a column in a future version (:issue:`40211`) .. --------------------------------------------------------------------------- diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 1744a7c8970a6..619b71611674f 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -7,7 +7,6 @@ from contextlib import contextmanager import copy from functools import partial -from itertools import product from typing import ( Any, Callable, @@ -22,6 +21,7 @@ cast, ) from uuid import uuid4 +import warnings import numpy as np @@ -37,14 +37,10 @@ from pandas.compat._optional import import_optional_dependency from pandas.util._decorators import doc -from pandas.core.dtypes.common import is_float from pandas.core.dtypes.generic import ABCSeries import pandas as pd -from pandas.api.types import ( - is_dict_like, - is_list_like, -) +from pandas.api.types import is_list_like from pandas.core import generic import pandas.core.common as com from pandas.core.frame import DataFrame @@ -53,6 +49,8 @@ jinja2 = import_optional_dependency("jinja2", extra="DataFrame.style requires jinja2.") +BaseFormatter = Union[str, Callable] +ExtFormatter = Union[BaseFormatter, Dict[Any, Optional[BaseFormatter]]] CSSPair = Tuple[str, Union[str, int, float]] CSSList = List[CSSPair] CSSProperties = Union[str, CSSList] @@ -182,9 +180,6 @@ def __init__( self.data: DataFrame = data self.index: pd.Index = data.index self.columns: pd.Index = data.columns - if precision is None: - precision = get_option("display.precision") - self.precision = precision self.table_styles = table_styles if not isinstance(uuid_len, int) or not uuid_len >= 0: raise TypeError("``uuid_len`` must be an integer in range [0, 32].") @@ -193,7 +188,6 @@ def __init__( self.caption = caption self.table_attributes = table_attributes self.cell_ids = cell_ids - self.na_rep = na_rep # assign additional default vars self.hidden_index: bool = False @@ -204,7 +198,10 @@ def __init__( self.tooltips: Optional[_Tooltips] = None self._display_funcs: DefaultDict[ # maps (row, col) -> formatting function Tuple[int, int], Callable[[Any], str] - ] = defaultdict(lambda: self._default_display_func) + ] = defaultdict(lambda: partial(_default_formatter, precision=None)) + self.precision = precision # can be removed on set_precision depr cycle + self.na_rep = na_rep # can be removed on set_na_rep depr cycle + self.format(formatter=None, precision=precision, na_rep=na_rep) def _repr_html_(self) -> str: """ @@ -225,15 +222,6 @@ def _init_tooltips(self): if self.tooltips is None: self.tooltips = _Tooltips() - def _default_display_func(self, x): - if self.na_rep is not None and pd.isna(x): - return self.na_rep - elif is_float(x): - display_format = f"{x:.{self.precision}f}" - return display_format - else: - return x - def set_tooltips(self, ttips: DataFrame) -> Styler: """ Add string based tooltips that will appear in the `Styler` HTML result. These @@ -389,7 +377,6 @@ def _translate(self): table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx - precision = self.precision hidden_index = self.hidden_index hidden_columns = self.hidden_columns uuid = self.uuid @@ -569,7 +556,6 @@ def _translate(self): "cellstyle": cellstyle, "body": body, "uuid": uuid, - "precision": precision, "table_styles": _format_table_styles(table_styles), "caption": caption, "table_attributes": table_attr, @@ -579,14 +565,20 @@ def _translate(self): return d - def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler: + def format( + self, + formatter: Optional[ExtFormatter] = None, + subset: Optional[Union[slice, Sequence[Any]]] = None, + na_rep: Optional[str] = None, + precision: Optional[int] = None, + ) -> Styler: """ Format the text display value of cells. Parameters ---------- formatter : str, callable, dict or None - If ``formatter`` is None, the default formatter is used. + Object to define how values are displayed. See notes. subset : IndexSlice An argument to ``DataFrame.loc`` that restricts which elements ``formatter`` is applied to. @@ -596,58 +588,107 @@ def format(self, formatter, subset=None, na_rep: Optional[str] = None) -> Styler .. versionadded:: 1.0.0 + precision : int, optional + Floating point precision to use for display purposes, if not determined by + the specified ``formatter``. + + .. versionadded:: 1.3.0 + Returns ------- self : Styler Notes ----- - ``formatter`` is either an ``a`` or a dict ``{column name: a}`` where - ``a`` is one of - - - str: this will be wrapped in: ``a.format(x)`` - - callable: called with the value of an individual cell - - The default display value for numeric values is the "general" (``g``) - format with ``pd.options.display.precision`` precision. + This method assigns a formatting function, ``formatter``, to each cell in the + DataFrame. If ``formatter`` is ``None``, then the default formatter is used. + If a callable then that function should take a data value as input and return + a displayable representation, such as a string. If ``formatter`` is + given as a string this is assumed to be a valid Python format specification + and is wrapped to a callable as ``string.format(x)``. If a ``dict`` is given, + keys should correspond to column names, and values should be string or + callable, as above. + + The default formatter currently expresses floats and complex numbers with the + pandas display precision unless using the ``precision`` argument here. The + default formatter does not adjust the representation of missing values unless + the ``na_rep`` argument is used. + + The ``subset`` argument defines which region to apply the formatting function + to. If the ``formatter`` argument is given in dict form but does not include + all columns within the subset then these columns will have the default formatter + applied. Any columns in the formatter dict excluded from the subset will + raise a ``KeyError``. + + When using a ``formatter`` string the dtypes must be compatible, otherwise a + `ValueError` will be raised. Examples -------- - >>> df = pd.DataFrame(np.random.randn(4, 2), columns=['a', 'b']) - >>> df.style.format("{:.2%}") - >>> df['c'] = ['a', 'b', 'c', 'd'] - >>> df.style.format({'c': str.upper}) + Using ``na_rep`` and ``precision`` with the default ``formatter`` + + >>> df = pd.DataFrame([[np.nan, 1.0, 'A'], [2.0, np.nan, 3.0]]) + >>> df.style.format(na_rep='MISS', precision=3) + 0 1 2 + 0 MISS 1.000 A + 1 2.000 MISS 3.000 + + Using a format specification on consistent column dtypes + + >>> df.style.format('{:.2f}', na_rep='MISS', subset=[0,1]) + 0 1 2 + 0 MISS 1.00 A + 1 2.00 MISS 3.000000 + + Using the default ``formatter`` for unspecified columns + + >>> df.style.format({0: '{:.2f}', 1: '£ {:.1f}'}, na_rep='MISS', precision=1) + 0 1 2 + 0 MISS £ 1.0 A + 1 2.00 MISS 3.0 + + Multiple ``na_rep`` or ``precision`` specifications under the default + ``formatter``. + + >>> df.style.format(na_rep='MISS', precision=1, subset=[0]) + ... .format(na_rep='PASS', precision=2, subset=[1, 2]) + 0 1 2 + 0 MISS 1.00 A + 1 2.0 PASS 3.00 + + Using a callable formatting function + + >>> func = lambda s: 'STRING' if isinstance(s, str) else 'FLOAT' + >>> df.style.format({0: '{:.1f}', 2: func}, precision=4, na_rep='MISS') + 0 1 2 + 0 MISS 1.0000 STRING + 1 2.0 MISS FLOAT """ - if formatter is None: - assert self._display_funcs.default_factory is not None - formatter = self._display_funcs.default_factory() + if all((formatter is None, subset is None, precision is None, na_rep is None)): + self._display_funcs.clear() + return self # clear the formatter / revert to default and avoid looping + + subset = slice(None) if subset is None else subset + subset = _non_reducing_slice(subset) + data = self.data.loc[subset] + + columns = data.columns + if not isinstance(formatter, dict): + formatter = {col: formatter for col in columns} + + for col in columns: + try: + format_func = formatter[col] + except KeyError: + format_func = None + format_func = _maybe_wrap_formatter( + format_func, na_rep=na_rep, precision=precision + ) + + for row, value in data[[col]].itertuples(): + i, j = self.index.get_loc(row), self.columns.get_loc(col) + self._display_funcs[(i, j)] = format_func - if subset is None: - row_locs = range(len(self.data)) - col_locs = range(len(self.data.columns)) - else: - subset = _non_reducing_slice(subset) - if len(subset) == 1: - subset = subset, self.data.columns - - sub_df = self.data.loc[subset] - row_locs = self.data.index.get_indexer_for(sub_df.index) - col_locs = self.data.columns.get_indexer_for(sub_df.columns) - - if is_dict_like(formatter): - for col, col_formatter in formatter.items(): - # formatter must be callable, so '{}' are converted to lambdas - col_formatter = _maybe_wrap_formatter(col_formatter, na_rep) - col_num = self.data.columns.get_indexer_for([col])[0] - - for row_num in row_locs: - self._display_funcs[(row_num, col_num)] = col_formatter - else: - # single scalar to format all cells with - formatter = _maybe_wrap_formatter(formatter, na_rep) - locs = product(*(row_locs, col_locs)) - for i, j in locs: - self._display_funcs[(i, j)] = formatter return self def set_td_classes(self, classes: DataFrame) -> Styler: @@ -748,7 +789,6 @@ def render(self, **kwargs) -> str: * cellstyle * body * uuid - * precision * table_styles * caption * table_attributes @@ -1048,7 +1088,9 @@ def where( def set_precision(self, precision: int) -> Styler: """ - Set the precision used to render. + Set the precision used to display values. + + .. deprecated:: 1.3.0 Parameters ---------- @@ -1057,9 +1099,18 @@ def set_precision(self, precision: int) -> Styler: Returns ------- self : Styler + + Notes + ----- + This method is deprecated see `Styler.format`. """ + warnings.warn( + "this method is deprecated in favour of `Styler.format(precision=..)`", + FutureWarning, + stacklevel=2, + ) self.precision = precision - return self + return self.format(precision=precision, na_rep=self.na_rep) def set_table_attributes(self, attributes: str) -> Styler: """ @@ -1270,6 +1321,8 @@ def set_na_rep(self, na_rep: str) -> Styler: .. versionadded:: 1.0.0 + .. deprecated:: 1.3.0 + Parameters ---------- na_rep : str @@ -1277,9 +1330,18 @@ def set_na_rep(self, na_rep: str) -> Styler: Returns ------- self : Styler + + Notes + ----- + This method is deprecated. See `Styler.format()` """ + warnings.warn( + "this method is deprecated in favour of `Styler.format(na_rep=..)`", + FutureWarning, + stacklevel=2, + ) self.na_rep = na_rep - return self + return self.format(na_rep=na_rep, precision=self.precision) def hide_index(self) -> Styler: """ @@ -2077,24 +2139,52 @@ def _get_level_lengths(index, hidden_elements=None): return non_zero_lengths +def _default_formatter(x: Any, precision: Optional[int] = None) -> Any: + """ + Format the display of a value + + Parameters + ---------- + x : Any + Input variable to be formatted + precision : Int, optional + Floating point precision used if ``x`` is float or complex. + + Returns + ------- + value : Any + Matches input type, or string if input is float or complex. + """ + if precision is None: + precision = get_option("display.precision") + if isinstance(x, (float, complex)): + return f"{x:.{precision}f}" + return x + + def _maybe_wrap_formatter( - formatter: Union[Callable, str], na_rep: Optional[str] + formatter: Optional[BaseFormatter] = None, + na_rep: Optional[str] = None, + precision: Optional[int] = None, ) -> Callable: + """ + Allows formatters to be expressed as str, callable or None, where None returns + a default formatting function. wraps with na_rep, and precision where they are + available. + """ if isinstance(formatter, str): formatter_func = lambda x: formatter.format(x) elif callable(formatter): formatter_func = formatter + elif formatter is None: + formatter_func = partial(_default_formatter, precision=precision) else: - msg = f"Expected a template string or callable, got {formatter} instead" - raise TypeError(msg) + raise TypeError(f"'formatter' expected str or callable, got {type(formatter)}") if na_rep is None: return formatter_func - elif isinstance(na_rep, str): - return lambda x: na_rep if pd.isna(x) else formatter_func(x) else: - msg = f"Expected a string, got {na_rep} instead" - raise TypeError(msg) + return lambda x: na_rep if pd.isna(x) else formatter_func(x) def _maybe_convert_css_to_tuples(style: CSSProperties) -> CSSList: diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 09e14d06f4d9b..b938495ca9e31 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -605,15 +605,17 @@ def test_set_na_rep(self): # GH 21527 28358 df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) - ctx = df.style.set_na_rep("NA")._translate() + with tm.assert_produces_warning(FutureWarning): + ctx = df.style.set_na_rep("NA")._translate() assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "NA" - ctx = ( - df.style.set_na_rep("NA") - .format(None, na_rep="-", subset=["B"]) - ._translate() - ) + with tm.assert_produces_warning(FutureWarning): + ctx = ( + df.style.set_na_rep("NA") + .format(None, na_rep="-", subset=["B"]) + ._translate() + ) assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "-" @@ -626,7 +628,8 @@ def test_format_non_numeric_na(self): } ) - ctx = df.style.set_na_rep("NA")._translate() + with tm.assert_produces_warning(FutureWarning): + ctx = df.style.set_na_rep("NA")._translate() assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "NA" assert ctx["body"][1][1]["display_value"] == "NA" @@ -638,12 +641,12 @@ def test_format_non_numeric_na(self): assert ctx["body"][1][1]["display_value"] == "-" assert ctx["body"][1][2]["display_value"] == "-" - def test_format_with_bad_na_rep(self): - # GH 21527 28358 - df = DataFrame([[None, None], [1.1, 1.2]], columns=["A", "B"]) - msg = "Expected a string, got -1 instead" - with pytest.raises(TypeError, match=msg): - df.style.format(None, na_rep=-1) + def test_format_clear(self): + assert (0, 0) not in self.styler._display_funcs # using default + self.styler.format("{:.2f") + assert (0, 0) in self.styler._display_funcs # formatter is specified + self.styler.format() + assert (0, 0) not in self.styler._display_funcs # formatter cleared to default def test_nonunique_raises(self): df = DataFrame([[1, 2]], columns=["A", "A"]) @@ -734,13 +737,11 @@ def test_table_attributes(self): assert 'class="foo" data-bar' in result def test_precision(self): - with pd.option_context("display.precision", 10): - s = Styler(self.df) - assert s.precision == 10 s = Styler(self.df, precision=2) assert s.precision == 2 - s2 = s.set_precision(4) + with tm.assert_produces_warning(FutureWarning): + s2 = s.set_precision(4) assert s is s2 assert s.precision == 4 @@ -783,44 +784,35 @@ def test_display_format(self): ) assert len(ctx["body"][0][1]["display_value"].lstrip("-")) <= 3 - def test_display_format_raises(self): - df = DataFrame(np.random.randn(2, 2)) - msg = "Expected a template string or callable, got 5 instead" - with pytest.raises(TypeError, match=msg): - df.style.format(5) - - msg = "Expected a template string or callable, got True instead" - with pytest.raises(TypeError, match=msg): - df.style.format(True) + @pytest.mark.parametrize("formatter", [5, True, [2.0]]) + def test_format_raises(self, formatter): + with pytest.raises(TypeError, match="expected str or callable"): + self.df.style.format(formatter) - def test_display_set_precision(self): + def test_format_with_precision(self): # Issue #13257 df = DataFrame(data=[[1.0, 2.0090], [3.2121, 4.566]], columns=["a", "b"]) s = Styler(df) - ctx = s.set_precision(1)._translate() - - assert s.precision == 1 + ctx = s.format(precision=1)._translate() assert ctx["body"][0][1]["display_value"] == "1.0" assert ctx["body"][0][2]["display_value"] == "2.0" assert ctx["body"][1][1]["display_value"] == "3.2" assert ctx["body"][1][2]["display_value"] == "4.6" - ctx = s.set_precision(2)._translate() - assert s.precision == 2 + ctx = s.format(precision=2)._translate() assert ctx["body"][0][1]["display_value"] == "1.00" assert ctx["body"][0][2]["display_value"] == "2.01" assert ctx["body"][1][1]["display_value"] == "3.21" assert ctx["body"][1][2]["display_value"] == "4.57" - ctx = s.set_precision(3)._translate() - assert s.precision == 3 + ctx = s.format(precision=3)._translate() assert ctx["body"][0][1]["display_value"] == "1.000" assert ctx["body"][0][2]["display_value"] == "2.009" assert ctx["body"][1][1]["display_value"] == "3.212" assert ctx["body"][1][2]["display_value"] == "4.566" - def test_display_subset(self): + def test_format_subset(self): df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) ctx = df.style.format( {"a": "{:0.1f}", "b": "{0:.2%}"}, subset=pd.IndexSlice[0, :] @@ -851,7 +843,7 @@ def test_display_subset(self): assert ctx["body"][0][2]["display_value"] == "0.123400" assert ctx["body"][1][2]["display_value"] == raw_11 - def test_display_dict(self): + def test_format_dict(self): df = DataFrame([[0.1234, 0.1234], [1.1234, 1.1234]], columns=["a", "b"]) ctx = df.style.format({"a": "{:0.1f}", "b": "{0:.2%}"})._translate() assert ctx["body"][0][1]["display_value"] == "0.1"