From 1cc569f5725ec629e45451419f753b2b336d9d79 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 2 May 2021 20:36:40 +0200 Subject: [PATCH 01/18] ENH: make `Styler.format` compatible with non-unique indexes (with Tests) --- pandas/io/formats/style_render.py | 13 ++++++----- pandas/tests/io/formats/style/test_format.py | 23 ++++++++++++++++++++ 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 4aaf1eecde5e8..0386b9678314c 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -82,8 +82,8 @@ def __init__( data = data.to_frame() if not isinstance(data, DataFrame): raise TypeError("``data`` must be a Series or DataFrame") - if not data.index.is_unique or not data.columns.is_unique: - raise ValueError("style is not supported for non-unique indices.") + # if not data.index.is_unique or not data.columns.is_unique: + # raise ValueError("style is not supported for non-unique indices.") self.data: DataFrame = data self.index: Index = data.index self.columns: Index = data.columns @@ -495,9 +495,12 @@ def format( escape=escape, ) - for row, value in data[[col]].itertuples(): - i, j = self.index.get_loc(row), self.columns.get_loc(col) - self._display_funcs[(i, j)] = format_func + for row in data[[col]].itertuples(): + i_ = self.index.get_indexer_for([row[0]]) # handle duplicate keys in + j_ = self.columns.get_indexer_for([col]) # non-unique indexes + for i in i_: + for j in j_: + self._display_funcs[(i, j)] = format_func return self diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index 0f3e5863a4a99..bf4d4bb981a35 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -237,3 +237,26 @@ def test_format_decimal(formatter, thousands, precision): decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate() assert "000_123" in result["body"][0][1]["display_value"] + + +def test_format_non_unique(): + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + dtype=float, + ) + + # test dict + html = df.style.format({"d": "{:.1f}"}).render() + for val in ["1.000000<", "4.000000<", "7.000000<"]: + assert val in html + for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + # test subset + html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).render() + for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: + assert val in html + for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html From 6982554045b3a8228f1000138b7a64fb18a36d42 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 2 May 2021 21:07:44 +0200 Subject: [PATCH 02/18] ENH: error when using a non-unique subset with .apply and .applymap (inc Tests) --- pandas/io/formats/style.py | 19 +++++++++++++------ pandas/tests/io/formats/style/test_style.py | 17 +++++++++++++++++ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 02e1369a05b93..dd90ead095b40 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -465,12 +465,19 @@ def _update_ctx(self, attrs: DataFrame) -> None: matter. """ for cn in attrs.columns: - for rn, c in attrs[[cn]].itertuples(): - if not c: - continue - css_list = maybe_convert_css_to_tuples(c) - i, j = self.index.get_loc(rn), self.columns.get_loc(cn) - self.ctx[(i, j)].extend(css_list) + try: + for rn, c in attrs[[cn]].itertuples(): + if not c: + continue + css_list = maybe_convert_css_to_tuples(c) + i, j = self.index.get_loc(rn), self.columns.get_loc(cn) + self.ctx[(i, j)].extend(css_list) + except (ValueError, TypeError): + raise KeyError( + "`Styler.apply` and `.applymap` are not " + "compatible with subset slices containing " + "non-unique index or column keys." + ) def _copy(self, deepcopy: bool = False) -> Styler: styler = Styler( diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 3b614be770bc5..88feb5ab7cc2d 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -1467,3 +1467,20 @@ def test_from_custom_template(tmpdir): assert result.template_html is not Styler.template_html styler = result(DataFrame({"A": [1, 2]})) assert styler.render() + + @pytest.mark.parametrize("func", ["apply", "applymap"]) + def test_apply_applymap_non_unique_raises(func): + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + ) + with pytest.raises(match="`Styler.apply` and `.applymap` are not compatible"): + # slice is non-unique on columns + getattr(df.style, func)(lambda x: x, subset=("i", "d")) + + with pytest.raises(match="`Styler.apply` and `.applymap` are not compatible"): + # slice is non-unique on rows + getattr(df.style, func)(lambda x: x, subset=("j", "c")) + + getattr(df.style, func)(lambda x: x, subset=("i", "c")) # unique subset OK From a3694db8f1def701a15198a3a8e803063aeee8fb Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 2 May 2021 21:33:51 +0200 Subject: [PATCH 03/18] ENH: error when using a non-unique subset with .apply and .applymap: fix Tests --- pandas/tests/io/formats/style/test_format.py | 1 + pandas/tests/io/formats/style/test_style.py | 41 ++++++++++++-------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index bf4d4bb981a35..af637d879a600 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -240,6 +240,7 @@ def test_format_decimal(formatter, thousands, precision): def test_format_non_unique(): + # GH 41269 df = DataFrame( [[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=["i", "j", "j"], diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 88feb5ab7cc2d..18c3148fe20fa 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -534,6 +534,30 @@ def color_negative_red(val): df.loc[pct_subset] df.style.applymap(color_negative_red, subset=pct_subset) + @pytest.mark.parametrize("func", ["apply", "applymap"]) + def test_apply_applymap_non_unique_raises(self, func): + # GH 41269 + if func == "apply": + op = lambda s: ["color: red;"] * len(s) + else: + op = lambda v: "color: red;" + + df = DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + ) + with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): + # slice is non-unique on columns + getattr(df.style, func)(op, subset=("i", "d"))._compute() + + with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): + # slice is non-unique on rows + getattr(df.style, func)(op, subset=("j", "c"))._compute() + + # unique subset OK + getattr(df.style, func)(op, subset=("i", "c"))._compute() + def test_where_with_one_style(self): # GH 17474 def f(x): @@ -1467,20 +1491,3 @@ def test_from_custom_template(tmpdir): assert result.template_html is not Styler.template_html styler = result(DataFrame({"A": [1, 2]})) assert styler.render() - - @pytest.mark.parametrize("func", ["apply", "applymap"]) - def test_apply_applymap_non_unique_raises(func): - df = DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - index=["i", "j", "j"], - columns=["c", "d", "d"], - ) - with pytest.raises(match="`Styler.apply` and `.applymap` are not compatible"): - # slice is non-unique on columns - getattr(df.style, func)(lambda x: x, subset=("i", "d")) - - with pytest.raises(match="`Styler.apply` and `.applymap` are not compatible"): - # slice is non-unique on rows - getattr(df.style, func)(lambda x: x, subset=("j", "c")) - - getattr(df.style, func)(lambda x: x, subset=("i", "c")) # unique subset OK From 5c6669c35b9503331adf92539bd6d101247b39ae Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 2 May 2021 22:02:15 +0200 Subject: [PATCH 04/18] ENH: make table_styles work with non-unique + TST: refactor to own file --- pandas/io/formats/style.py | 3 +- pandas/tests/io/formats/style/test_format.py | 24 ------ .../tests/io/formats/style/test_non_unique.py | 83 +++++++++++++++++++ pandas/tests/io/formats/style/test_style.py | 33 -------- 4 files changed, 85 insertions(+), 58 deletions(-) create mode 100644 pandas/tests/io/formats/style/test_non_unique.py diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index dd90ead095b40..09e315841d4a0 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -993,10 +993,11 @@ def set_table_styles( table_styles = [ { - "selector": str(s["selector"]) + idf + str(obj.get_loc(key)), + "selector": str(s["selector"]) + idf + str(idx), "props": maybe_convert_css_to_tuples(s["props"]), } for key, styles in table_styles.items() + for idx in obj.get_indexer_for([key]) for s in styles ] else: diff --git a/pandas/tests/io/formats/style/test_format.py b/pandas/tests/io/formats/style/test_format.py index af637d879a600..0f3e5863a4a99 100644 --- a/pandas/tests/io/formats/style/test_format.py +++ b/pandas/tests/io/formats/style/test_format.py @@ -237,27 +237,3 @@ def test_format_decimal(formatter, thousands, precision): decimal="_", formatter=formatter, thousands=thousands, precision=precision )._translate() assert "000_123" in result["body"][0][1]["display_value"] - - -def test_format_non_unique(): - # GH 41269 - df = DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - index=["i", "j", "j"], - columns=["c", "d", "d"], - dtype=float, - ) - - # test dict - html = df.style.format({"d": "{:.1f}"}).render() - for val in ["1.000000<", "4.000000<", "7.000000<"]: - assert val in html - for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: - assert val in html - - # test subset - html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).render() - for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: - assert val in html - for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: - assert val in html diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py new file mode 100644 index 0000000000000..ab80e93c84b33 --- /dev/null +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -0,0 +1,83 @@ +import pytest + +from pandas import ( + DataFrame, + IndexSlice, +) + +pytest.importorskip("jinja2") + +from pandas.io.formats.style import Styler + + +@pytest.fixture +def df(): + return DataFrame( + [[1, 2, 3], [4, 5, 6], [7, 8, 9]], + index=["i", "j", "j"], + columns=["c", "d", "d"], + dtype=float, + ) + + +@pytest.fixture +def styler(df): + return Styler(df, uuid_len=0) + + +def test_format_non_unique(df): + # GH 41269 + + # test dict + html = df.style.format({"d": "{:.1f}"}).render() + for val in ["1.000000<", "4.000000<", "7.000000<"]: + assert val in html + for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + # test subset + html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).render() + for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]: + assert val in html + for val in ["5.0<", "6.0<", "8.0<", "9.0<"]: + assert val in html + + +@pytest.mark.parametrize("func", ["apply", "applymap"]) +def test_apply_applymap_non_unique_raises(df, func): + # GH 41269 + if func == "apply": + op = lambda s: ["color: red;"] * len(s) + else: + op = lambda v: "color: red;" + + with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): + # slice is non-unique on columns + getattr(df.style, func)(op, subset=("i", "d"))._compute() + + with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): + # slice is non-unique on rows + getattr(df.style, func)(op, subset=("j", "c"))._compute() + + # unique subset OK + getattr(df.style, func)(op, subset=("i", "c"))._compute() + + +def test_table_styles_dict_non_unique_index(styler): + styles = styler.set_table_styles( + {"j": [{"selector": "td", "props": "a: v;"}]}, axis=1 + ).table_styles + assert styles == [ + {"selector": "td.row1", "props": [("a", "v")]}, + {"selector": "td.row2", "props": [("a", "v")]}, + ] + + +def test_table_styles_dict_non_unique_columns(styler): + styles = styler.set_table_styles( + {"d": [{"selector": "td", "props": "a: v;"}]}, axis=0 + ).table_styles + assert styles == [ + {"selector": "td.col1", "props": [("a", "v")]}, + {"selector": "td.col2", "props": [("a", "v")]}, + ] diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 18c3148fe20fa..855def916c2cd 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -534,30 +534,6 @@ def color_negative_red(val): df.loc[pct_subset] df.style.applymap(color_negative_red, subset=pct_subset) - @pytest.mark.parametrize("func", ["apply", "applymap"]) - def test_apply_applymap_non_unique_raises(self, func): - # GH 41269 - if func == "apply": - op = lambda s: ["color: red;"] * len(s) - else: - op = lambda v: "color: red;" - - df = DataFrame( - [[1, 2, 3], [4, 5, 6], [7, 8, 9]], - index=["i", "j", "j"], - columns=["c", "d", "d"], - ) - with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): - # slice is non-unique on columns - getattr(df.style, func)(op, subset=("i", "d"))._compute() - - with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): - # slice is non-unique on rows - getattr(df.style, func)(op, subset=("j", "c"))._compute() - - # unique subset OK - getattr(df.style, func)(op, subset=("i", "c"))._compute() - def test_where_with_one_style(self): # GH 17474 def f(x): @@ -695,15 +671,6 @@ def test_set_na_rep(self): assert ctx["body"][0][1]["display_value"] == "NA" assert ctx["body"][0][2]["display_value"] == "-" - def test_nonunique_raises(self): - df = DataFrame([[1, 2]], columns=["A", "A"]) - msg = "style is not supported for non-unique indices." - with pytest.raises(ValueError, match=msg): - df.style - - with pytest.raises(ValueError, match=msg): - Styler(df) - def test_caption(self): styler = Styler(self.df, caption="foo") result = styler.render() From 732c7d5a97abd399d8ab424b800d810aa8fb7b6d Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 2 May 2021 22:39:44 +0200 Subject: [PATCH 05/18] ENH: error catching --- pandas/io/formats/style.py | 18 ++++++++++++------ .../tests/io/formats/style/test_non_unique.py | 5 +++++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 09e315841d4a0..4b977efda9721 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -464,6 +464,11 @@ def _update_ctx(self, attrs: DataFrame) -> None: Whitespace shouldn't matter and the final trailing ';' shouldn't matter. """ + err = KeyError( + "`Styler.apply` and `.applymap` are not compatible with subset slices " + "containing non-unique index or column keys." + ) + for cn in attrs.columns: try: for rn, c in attrs[[cn]].itertuples(): @@ -472,12 +477,13 @@ def _update_ctx(self, attrs: DataFrame) -> None: css_list = maybe_convert_css_to_tuples(c) i, j = self.index.get_loc(rn), self.columns.get_loc(cn) self.ctx[(i, j)].extend(css_list) - except (ValueError, TypeError): - raise KeyError( - "`Styler.apply` and `.applymap` are not " - "compatible with subset slices containing " - "non-unique index or column keys." - ) + except ValueError as ve: + if "Styles supplied as string must follow CSS rule formats" in str(ve): + raise ve # error is from maybe_convert_css_to_tuples() + else: + raise err # 'too many values to unpack': caught non-unique column + except TypeError: + raise err # 'unhashable type: slice' caught a non-unique index def _copy(self, deepcopy: bool = False) -> Styler: styler = Styler( diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index ab80e93c84b33..ff33b2a58b8e0 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -81,3 +81,8 @@ def test_table_styles_dict_non_unique_columns(styler): {"selector": "td.col1", "props": [("a", "v")]}, {"selector": "td.col2", "props": [("a", "v")]}, ] + + +def test_maybe_convert_css_raises(styler): + with pytest.raises(ValueError, match="Styles supplied as string must follow CSS"): + styler.applymap(lambda x: "bad-css;")._compute() From 4c991301948fc07b67722a9c4e43c9d7456de6d0 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 2 May 2021 22:43:56 +0200 Subject: [PATCH 06/18] ENH: error catching --- pandas/tests/io/formats/style/test_non_unique.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index ff33b2a58b8e0..c2a442b2b19ac 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -84,5 +84,6 @@ def test_table_styles_dict_non_unique_columns(styler): def test_maybe_convert_css_raises(styler): + # test _update_ctx() detects the right ValueError where non-unique columns present with pytest.raises(ValueError, match="Styles supplied as string must follow CSS"): styler.applymap(lambda x: "bad-css;")._compute() From a7a29667b46a41d0cf2e915ac00b7b9702535e4c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 3 May 2021 08:03:32 +0200 Subject: [PATCH 07/18] ENH: deal with tooltips and raise (inc Tests) --- pandas/io/formats/style.py | 4 ++++ pandas/tests/io/formats/style/test_non_unique.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 4b977efda9721..6bb7afb150751 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -322,6 +322,10 @@ def set_tooltips( raise NotImplementedError( "Tooltips can only render with 'cell_ids' is True." ) + if not ttips.index.is_unique or not ttips.columns.is_unique: + raise KeyError( + "Tooltips renders only if `ttips` has unique index and columns." + ) if self.tooltips is None: # create a default instance if necessary self.tooltips = Tooltips() self.tooltips.tt_data = ttips diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index c2a442b2b19ac..e92ff4d630579 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -87,3 +87,19 @@ def test_maybe_convert_css_raises(styler): # test _update_ctx() detects the right ValueError where non-unique columns present with pytest.raises(ValueError, match="Styles supplied as string must follow CSS"): styler.applymap(lambda x: "bad-css;")._compute() + + +def test_tooltips_non_unique_raises(styler): + # ttips has unique keys + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_tooltips(ttips=ttips) # OK + + # ttips has non-unique columns + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Tooltips renders only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) + + # ttips has non-unique index + ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Tooltips renders only if `ttips` has unique"): + styler.set_tooltips(ttips=ttips) From 19fb7f9c3ae72bdc9c0cc978469ae63b2e56c743 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 3 May 2021 08:04:11 +0200 Subject: [PATCH 08/18] ENH: deal with tooltips and raise (inc Tests) --- pandas/io/formats/style.py | 2 +- pandas/tests/io/formats/style/test_non_unique.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 6bb7afb150751..f0d95818eb060 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -324,7 +324,7 @@ def set_tooltips( ) if not ttips.index.is_unique or not ttips.columns.is_unique: raise KeyError( - "Tooltips renders only if `ttips` has unique index and columns." + "Tooltips render only if `ttips` has unique index and columns." ) if self.tooltips is None: # create a default instance if necessary self.tooltips = Tooltips() diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index e92ff4d630579..c4b84873f08af 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -96,10 +96,10 @@ def test_tooltips_non_unique_raises(styler): # ttips has non-unique columns ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) - with pytest.raises(KeyError, match="Tooltips renders only if `ttips` has unique"): + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): styler.set_tooltips(ttips=ttips) # ttips has non-unique index ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) - with pytest.raises(KeyError, match="Tooltips renders only if `ttips` has unique"): + with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): styler.set_tooltips(ttips=ttips) From 4ce559ee24d71320e39501285406726d83063532 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 3 May 2021 08:13:07 +0200 Subject: [PATCH 09/18] ENH: deal with tset_td_classes and raise (inc Tests) --- pandas/io/formats/style.py | 4 ++++ pandas/tests/io/formats/style/test_non_unique.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f0d95818eb060..f56ce40efb280 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -446,6 +446,10 @@ def set_td_classes(self, classes: DataFrame) -> Styler: ' ' '' """ + if not classes.index.is_unique or not classes.columns.is_unique: + raise KeyError( + "Classes render only if `classes` has unique index and columns." + ) classes = classes.reindex_like(self.data) for r, row_tup in enumerate(classes.itertuples()): diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index c4b84873f08af..7175c683205da 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -103,3 +103,19 @@ def test_tooltips_non_unique_raises(styler): ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"): styler.set_tooltips(ttips=ttips) + + +def test_set_td_classes_non_unique_raises(styler): + # classes has unique keys + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) + styler.set_td_classes(classes=classes) # OK + + # classes has non-unique columns + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) + + # classes has non-unique index + classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) + with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): + styler.set_td_classes(classes=classes) From 7f2811142f52be5d70cb440037dd181dcc155eb6 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 3 May 2021 08:46:12 +0200 Subject: [PATCH 10/18] ENH: tests for hide_columns --- .../tests/io/formats/style/test_non_unique.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index 7175c683205da..c57442978a4f9 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -119,3 +119,20 @@ def test_set_td_classes_non_unique_raises(styler): classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"]) with pytest.raises(KeyError, match="Classes render only if `classes` has unique"): styler.set_td_classes(classes=classes) + + +def test_hide_columns_non_unique(styler): + ctx = styler.hide_columns(["d"])._translate() + + assert ctx["head"][0][1]["display_value"] == "c" + assert ctx["head"][0][1]["is_visible"] is True + + assert ctx["head"][0][2]["display_value"] == "d" + assert ctx["head"][0][2]["is_visible"] is False + + assert ctx["head"][0][3]["display_value"] == "d" + assert ctx["head"][0][3]["is_visible"] is False + + assert ctx["body"][0][1]["is_visible"] is True + assert ctx["body"][0][2]["is_visible"] is False + assert ctx["body"][0][3]["is_visible"] is False From 5043c0149aec81a15a04550bf9121baeba692fa8 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 3 May 2021 08:47:55 +0200 Subject: [PATCH 11/18] ENH: remove style ValueError --- pandas/io/formats/style_render.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 0386b9678314c..2ca65bca424c3 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -82,8 +82,6 @@ def __init__( data = data.to_frame() if not isinstance(data, DataFrame): raise TypeError("``data`` must be a Series or DataFrame") - # if not data.index.is_unique or not data.columns.is_unique: - # raise ValueError("style is not supported for non-unique indices.") self.data: DataFrame = data self.index: Index = data.index self.columns: Index = data.columns From 09764ba8a874eed311d66dff20bdcac0ce6832cc Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 4 May 2021 10:22:41 +0200 Subject: [PATCH 12/18] whats new --- doc/source/whatsnew/v1.3.0.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 61bea198e42db..0f84099a70b41 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -138,6 +138,8 @@ The :meth:`.Styler.format` has had upgrades to easily format missing data, precision, and perform HTML escaping (:issue:`40437` :issue:`40134`). There have been numerous other bug fixes to properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`40356` :issue:`39807` :issue:`39889` :issue:`39627`) +:class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`). + Documentation has also seen major revisions in light of new features (:issue:`39720` :issue:`39317` :issue:`40493`) .. _whatsnew_130.dataframe_honors_copy_with_dict: From 4faeb29e1bc2085231bfa6915760fee9898335fc Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 5 May 2021 16:32:31 +0200 Subject: [PATCH 13/18] prohibit apply and applymap in non-unique case --- pandas/io/formats/style.py | 29 +++++++------------ .../tests/io/formats/style/test_non_unique.py | 16 +--------- 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f56ce40efb280..8fc2825ffcfc5 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -472,26 +472,19 @@ def _update_ctx(self, attrs: DataFrame) -> None: Whitespace shouldn't matter and the final trailing ';' shouldn't matter. """ - err = KeyError( - "`Styler.apply` and `.applymap` are not compatible with subset slices " - "containing non-unique index or column keys." - ) + if not self.index.is_unique or not self.columns.is_unique: + raise KeyError( + "`Styler.apply` and `.applymap` are not compatible " + "with non-unique index or columns." + ) for cn in attrs.columns: - try: - for rn, c in attrs[[cn]].itertuples(): - if not c: - continue - css_list = maybe_convert_css_to_tuples(c) - i, j = self.index.get_loc(rn), self.columns.get_loc(cn) - self.ctx[(i, j)].extend(css_list) - except ValueError as ve: - if "Styles supplied as string must follow CSS rule formats" in str(ve): - raise ve # error is from maybe_convert_css_to_tuples() - else: - raise err # 'too many values to unpack': caught non-unique column - except TypeError: - raise err # 'unhashable type: slice' caught a non-unique index + for rn, c in attrs[[cn]].itertuples(): + if not c: + continue + css_list = maybe_convert_css_to_tuples(c) + i, j = self.index.get_loc(rn), self.columns.get_loc(cn) + self.ctx[(i, j)].extend(css_list) def _copy(self, deepcopy: bool = False) -> Styler: styler = Styler( diff --git a/pandas/tests/io/formats/style/test_non_unique.py b/pandas/tests/io/formats/style/test_non_unique.py index c57442978a4f9..2dc7433009368 100644 --- a/pandas/tests/io/formats/style/test_non_unique.py +++ b/pandas/tests/io/formats/style/test_non_unique.py @@ -52,15 +52,7 @@ def test_apply_applymap_non_unique_raises(df, func): op = lambda v: "color: red;" with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): - # slice is non-unique on columns - getattr(df.style, func)(op, subset=("i", "d"))._compute() - - with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"): - # slice is non-unique on rows - getattr(df.style, func)(op, subset=("j", "c"))._compute() - - # unique subset OK - getattr(df.style, func)(op, subset=("i", "c"))._compute() + getattr(df.style, func)(op)._compute() def test_table_styles_dict_non_unique_index(styler): @@ -83,12 +75,6 @@ def test_table_styles_dict_non_unique_columns(styler): ] -def test_maybe_convert_css_raises(styler): - # test _update_ctx() detects the right ValueError where non-unique columns present - with pytest.raises(ValueError, match="Styles supplied as string must follow CSS"): - styler.applymap(lambda x: "bad-css;")._compute() - - def test_tooltips_non_unique_raises(styler): # ttips has unique keys ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"]) From 3a8f11e56dcb4ee39ff2e229ea9b0b1c7197a0d4 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 6 May 2021 08:10:07 +0200 Subject: [PATCH 14/18] move outside loop --- pandas/io/formats/style_render.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 2ca65bca424c3..3b320c7f74e33 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -493,9 +493,9 @@ def format( escape=escape, ) + j_ = self.columns.get_indexer_for([col]) # handle non-unique columns for row in data[[col]].itertuples(): - i_ = self.index.get_indexer_for([row[0]]) # handle duplicate keys in - j_ = self.columns.get_indexer_for([col]) # non-unique indexes + i_ = self.index.get_indexer_for([row[0]]) # handle non-unique index for i in i_: for j in j_: self._display_funcs[(i, j)] = format_func From 51233beade0714b09698d2775aefd03a946a6a1e Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 6 May 2021 08:25:54 +0200 Subject: [PATCH 15/18] create conditional for performance --- pandas/io/formats/style_render.py | 46 +++++++++++++++++++++---------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 3b320c7f74e33..2f3441a840a59 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -483,22 +483,38 @@ def format( if not isinstance(formatter, dict): formatter = {col: formatter for col in columns} - for col in columns: - format_func = _maybe_wrap_formatter( - formatter.get(col), - na_rep=na_rep, - precision=precision, - decimal=decimal, - thousands=thousands, - escape=escape, - ) + if self.index.is_unique and self.columns.is_unique: + for col in columns: + format_func = _maybe_wrap_formatter( + formatter.get(col), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + ) + + j = self.columns.get_loc(col) # single value + for row in data[[col]].itertuples(): + i = self.index.get_loc(row) # single value + self._display_funcs[(i, j)] = format_func + else: # will get multiple index value locs to loop over at performance cost + for col in columns: + format_func = _maybe_wrap_formatter( + formatter.get(col), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + ) - j_ = self.columns.get_indexer_for([col]) # handle non-unique columns - for row in data[[col]].itertuples(): - i_ = self.index.get_indexer_for([row[0]]) # handle non-unique index - for i in i_: - for j in j_: - self._display_funcs[(i, j)] = format_func + j_ = self.columns.get_indexer_for([col]) # handle non-unique columns + for row in data[[col]].itertuples(): + i_ = self.index.get_indexer_for([row[0]]) # handle non-unique index + for i in i_: + for j in j_: + self._display_funcs[(i, j)] = format_func return self From 8454c5e2565277b318400b57c09ee1b462112a91 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 6 May 2021 08:51:49 +0200 Subject: [PATCH 16/18] create conditional for performance --- pandas/io/formats/style_render.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 2f3441a840a59..8c76a7f6d9296 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -495,7 +495,7 @@ def format( ) j = self.columns.get_loc(col) # single value - for row in data[[col]].itertuples(): + for row, value in data[[col]].itertuples(): i = self.index.get_loc(row) # single value self._display_funcs[(i, j)] = format_func else: # will get multiple index value locs to loop over at performance cost From c3b7af8214cae0681a4bd37062346ac3a29c9b6a Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 6 May 2021 20:51:23 +0200 Subject: [PATCH 17/18] take indexing out of loops --- pandas/io/formats/style_render.py | 83 ++++++++++++++++++------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index 8c76a7f6d9296..da6d885c38acb 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -479,42 +479,55 @@ def format( subset = non_reducing_slice(subset) data = self.data.loc[subset] - columns = data.columns if not isinstance(formatter, dict): - formatter = {col: formatter for col in columns} - - if self.index.is_unique and self.columns.is_unique: - for col in columns: - format_func = _maybe_wrap_formatter( - formatter.get(col), - na_rep=na_rep, - precision=precision, - decimal=decimal, - thousands=thousands, - escape=escape, - ) - - j = self.columns.get_loc(col) # single value - for row, value in data[[col]].itertuples(): - i = self.index.get_loc(row) # single value - self._display_funcs[(i, j)] = format_func - else: # will get multiple index value locs to loop over at performance cost - for col in columns: - format_func = _maybe_wrap_formatter( - formatter.get(col), - na_rep=na_rep, - precision=precision, - decimal=decimal, - thousands=thousands, - escape=escape, - ) - - j_ = self.columns.get_indexer_for([col]) # handle non-unique columns - for row in data[[col]].itertuples(): - i_ = self.index.get_indexer_for([row[0]]) # handle non-unique index - for i in i_: - for j in j_: - self._display_funcs[(i, j)] = format_func + formatter = {col: formatter for col in data.columns} + + cis = self.columns.get_indexer_for(data.columns) + ris = self.index.get_indexer_for(data.index) + for ci in cis: + format_func = _maybe_wrap_formatter( + formatter.get(self.columns[ci]), + na_rep=na_rep, + precision=precision, + decimal=decimal, + thousands=thousands, + escape=escape, + ) + for ri in ris: + self._display_funcs[(ri, ci)] = format_func + + # if self.index.is_unique and self.columns.is_unique: + # for col in columns: + # format_func = _maybe_wrap_formatter( + # formatter.get(col), + # na_rep=na_rep, + # precision=precision, + # decimal=decimal, + # thousands=thousands, + # escape=escape, + # ) + # + # j = self.columns.get_loc(col) # single value + # for row, value in data[[col]].itertuples(): + # i = self.index.get_loc(row) # single value + # self._display_funcs[(i, j)] = format_func + # else: # will get multiple index value locs to loop over at performance cost + # for col in columns: + # format_func = _maybe_wrap_formatter( + # formatter.get(col), + # na_rep=na_rep, + # precision=precision, + # decimal=decimal, + # thousands=thousands, + # escape=escape, + # ) + # + # j_ = self.columns.get_indexer_for([col]) # handle non-unique columns + # for row in data[[col]].itertuples(): + # i_ = self.index.get_indexer_for([row[0]]) # handle non-unique index + # for i in i_: + # for j in j_: + # self._display_funcs[(i, j)] = format_func return self From 20cd19fc2029ec94e771b2527f592d19f7459f01 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Thu, 6 May 2021 21:01:26 +0200 Subject: [PATCH 18/18] take indexing out of loops --- pandas/io/formats/style_render.py | 33 ------------------------------- 1 file changed, 33 deletions(-) diff --git a/pandas/io/formats/style_render.py b/pandas/io/formats/style_render.py index da6d885c38acb..bd768f4f0a1d4 100644 --- a/pandas/io/formats/style_render.py +++ b/pandas/io/formats/style_render.py @@ -496,39 +496,6 @@ def format( for ri in ris: self._display_funcs[(ri, ci)] = format_func - # if self.index.is_unique and self.columns.is_unique: - # for col in columns: - # format_func = _maybe_wrap_formatter( - # formatter.get(col), - # na_rep=na_rep, - # precision=precision, - # decimal=decimal, - # thousands=thousands, - # escape=escape, - # ) - # - # j = self.columns.get_loc(col) # single value - # for row, value in data[[col]].itertuples(): - # i = self.index.get_loc(row) # single value - # self._display_funcs[(i, j)] = format_func - # else: # will get multiple index value locs to loop over at performance cost - # for col in columns: - # format_func = _maybe_wrap_formatter( - # formatter.get(col), - # na_rep=na_rep, - # precision=precision, - # decimal=decimal, - # thousands=thousands, - # escape=escape, - # ) - # - # j_ = self.columns.get_indexer_for([col]) # handle non-unique columns - # for row in data[[col]].itertuples(): - # i_ = self.index.get_indexer_for([row[0]]) # handle non-unique index - # for i in i_: - # for j in j_: - # self._display_funcs[(i, j)] = format_func - return self