Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: make Styler compatible with non-unique indexes #41269

Merged
merged 22 commits into from
May 6, 2021
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
1cc569f
ENH: make `Styler.format` compatible with non-unique indexes (with Te…
attack68 May 2, 2021
6982554
ENH: error when using a non-unique subset with .apply and .applymap (…
attack68 May 2, 2021
a3694db
ENH: error when using a non-unique subset with .apply and .applymap: …
attack68 May 2, 2021
5c6669c
ENH: make table_styles work with non-unique + TST: refactor to own file
attack68 May 2, 2021
732c7d5
ENH: error catching
attack68 May 2, 2021
4c99130
ENH: error catching
attack68 May 2, 2021
57e8bef
Merge remote-tracking branch 'upstream/master' into styler_non_unique
attack68 May 3, 2021
a7a2966
ENH: deal with tooltips and raise (inc Tests)
attack68 May 3, 2021
19fb7f9
ENH: deal with tooltips and raise (inc Tests)
attack68 May 3, 2021
4ce559e
ENH: deal with tset_td_classes and raise (inc Tests)
attack68 May 3, 2021
7f28111
ENH: tests for hide_columns
attack68 May 3, 2021
5043c01
ENH: remove style ValueError
attack68 May 3, 2021
9fc6cd3
Merge remote-tracking branch 'upstream/master' into styler_non_unique
attack68 May 4, 2021
09764ba
whats new
attack68 May 4, 2021
9451aae
Merge remote-tracking branch 'upstream/master' into styler_non_unique
attack68 May 5, 2021
4faeb29
prohibit apply and applymap in non-unique case
attack68 May 5, 2021
aed0536
Merge remote-tracking branch 'upstream/master' into styler_non_unique
attack68 May 6, 2021
3a8f11e
move outside loop
attack68 May 6, 2021
51233be
create conditional for performance
attack68 May 6, 2021
8454c5e
create conditional for performance
attack68 May 6, 2021
c3b7af8
take indexing out of loops
attack68 May 6, 2021
20cd19f
take indexing out of loops
attack68 May 6, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ The :meth:`.Styler.format` has had upgrades to easily format missing data,
precision, and perform HTML escaping (:issue:`40437` :issue:`40134`). There have been numerous other bug fixes to
properly format HTML and eliminate some inconsistencies (:issue:`39942` :issue:`40356` :issue:`39807` :issue:`39889` :issue:`39627`)

:class:`.Styler` has also been compatible with non-unique index or columns, at least for as many features as are fully compatible, others made only partially compatible (:issue:`41269`).

Documentation has also seen major revisions in light of new features (:issue:`39720` :issue:`39317` :issue:`40493`)

.. _whatsnew_130.dataframe_honors_copy_with_dict:
Expand Down
17 changes: 16 additions & 1 deletion pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ def set_tooltips(
raise NotImplementedError(
"Tooltips can only render with 'cell_ids' is True."
)
if not ttips.index.is_unique or not ttips.columns.is_unique:
raise KeyError(
jreback marked this conversation as resolved.
Show resolved Hide resolved
"Tooltips render only if `ttips` has unique index and columns."
)
if self.tooltips is None: # create a default instance if necessary
self.tooltips = Tooltips()
self.tooltips.tt_data = ttips
Expand Down Expand Up @@ -442,6 +446,10 @@ def set_td_classes(self, classes: DataFrame) -> Styler:
' </tbody>'
'</table>'
"""
if not classes.index.is_unique or not classes.columns.is_unique:
raise KeyError(
"Classes render only if `classes` has unique index and columns."
)
classes = classes.reindex_like(self.data)

for r, row_tup in enumerate(classes.itertuples()):
Expand All @@ -464,6 +472,12 @@ def _update_ctx(self, attrs: DataFrame) -> None:
Whitespace shouldn't matter and the final trailing ';' shouldn't
matter.
"""
if not self.index.is_unique or not self.columns.is_unique:
raise KeyError(
"`Styler.apply` and `.applymap` are not compatible "
"with non-unique index or columns."
)

for cn in attrs.columns:
for rn, c in attrs[[cn]].itertuples():
if not c:
Expand Down Expand Up @@ -986,10 +1000,11 @@ def set_table_styles(

table_styles = [
{
"selector": str(s["selector"]) + idf + str(obj.get_loc(key)),
"selector": str(s["selector"]) + idf + str(idx),
"props": maybe_convert_css_to_tuples(s["props"]),
}
for key, styles in table_styles.items()
for idx in obj.get_indexer_for([key])
for s in styles
]
else:
Expand Down
11 changes: 6 additions & 5 deletions pandas/io/formats/style_render.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ def __init__(
data = data.to_frame()
if not isinstance(data, DataFrame):
raise TypeError("``data`` must be a Series or DataFrame")
if not data.index.is_unique or not data.columns.is_unique:
raise ValueError("style is not supported for non-unique indices.")
self.data: DataFrame = data
self.index: Index = data.index
self.columns: Index = data.columns
Expand Down Expand Up @@ -495,9 +493,12 @@ def format(
escape=escape,
)

for row, value in data[[col]].itertuples():
i, j = self.index.get_loc(row), self.columns.get_loc(col)
self._display_funcs[(i, j)] = format_func
for row in data[[col]].itertuples():
i_ = self.index.get_indexer_for([row[0]]) # handle duplicate keys in
j_ = self.columns.get_indexer_for([col]) # non-unique indexes
Copy link
Contributor

@jreback jreback May 6, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you can do this outside of the loop right? (as col doesn't change), for j_

does this change perf at all? (I don't think so, but checking).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch.. the multiple loops really killed it for the unique case (which is benchmarked)..

       before           after         ratio
     [4af3eed5]       [3a8f11e5]
     <styler_non_unique~1^2>       <styler_non_unique>
+        57.8±2ms          163±4ms     2.82  io.style.Render.time_format_render(24, 120)
+        87.1±5ms          242±4ms     2.78  io.style.Render.time_format_render(36, 120)
+      30.5±0.9ms       82.2±0.4ms     2.69  io.style.Render.time_format_render(12, 120)
+     8.53±0.08ms       14.7±0.2ms     1.72  io.style.Render.time_format_render(12, 12)
+      16.2±0.3ms       27.7±0.7ms     1.71  io.style.Render.time_format_render(24, 12)
+        25.3±1ms       40.9±0.7ms     1.62  io.style.Render.time_format_render(36, 12)
+      16.6±0.2ms       18.5±0.2ms     1.11  io.style.Render.time_classes_render(36, 12)

SOME BENCHMARKS HAVE CHANGED SIGNIFICANTLY.
PERFORMANCE DECREASED.

So I had to separate out the non-unique and unique cases with a conditional, then performance was the same...

BENCHMARKS NOT SIGNIFICANTLY CHANGED.

for i in i_:
for j in j_:
self._display_funcs[(i, j)] = format_func

return self

Expand Down
124 changes: 124 additions & 0 deletions pandas/tests/io/formats/style/test_non_unique.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
import pytest

from pandas import (
DataFrame,
IndexSlice,
)

pytest.importorskip("jinja2")

from pandas.io.formats.style import Styler


@pytest.fixture
def df():
return DataFrame(
[[1, 2, 3], [4, 5, 6], [7, 8, 9]],
index=["i", "j", "j"],
columns=["c", "d", "d"],
dtype=float,
)


@pytest.fixture
def styler(df):
return Styler(df, uuid_len=0)


def test_format_non_unique(df):
# GH 41269

# test dict
html = df.style.format({"d": "{:.1f}"}).render()
for val in ["1.000000<", "4.000000<", "7.000000<"]:
assert val in html
for val in ["2.0<", "3.0<", "5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html

# test subset
html = df.style.format(precision=1, subset=IndexSlice["j", "d"]).render()
for val in ["1.000000<", "4.000000<", "7.000000<", "2.000000<", "3.000000<"]:
assert val in html
for val in ["5.0<", "6.0<", "8.0<", "9.0<"]:
assert val in html


@pytest.mark.parametrize("func", ["apply", "applymap"])
def test_apply_applymap_non_unique_raises(df, func):
# GH 41269
if func == "apply":
op = lambda s: ["color: red;"] * len(s)
else:
op = lambda v: "color: red;"

with pytest.raises(KeyError, match="`Styler.apply` and `.applymap` are not"):
getattr(df.style, func)(op)._compute()


def test_table_styles_dict_non_unique_index(styler):
styles = styler.set_table_styles(
{"j": [{"selector": "td", "props": "a: v;"}]}, axis=1
).table_styles
assert styles == [
{"selector": "td.row1", "props": [("a", "v")]},
{"selector": "td.row2", "props": [("a", "v")]},
]


def test_table_styles_dict_non_unique_columns(styler):
styles = styler.set_table_styles(
{"d": [{"selector": "td", "props": "a: v;"}]}, axis=0
).table_styles
assert styles == [
{"selector": "td.col1", "props": [("a", "v")]},
{"selector": "td.col2", "props": [("a", "v")]},
]


def test_tooltips_non_unique_raises(styler):
# ttips has unique keys
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_tooltips(ttips=ttips) # OK

# ttips has non-unique columns
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)

# ttips has non-unique index
ttips = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Tooltips render only if `ttips` has unique"):
styler.set_tooltips(ttips=ttips)


def test_set_td_classes_non_unique_raises(styler):
# classes has unique keys
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "b"])
styler.set_td_classes(classes=classes) # OK

# classes has non-unique columns
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "c"], index=["a", "b"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)

# classes has non-unique index
classes = DataFrame([["1", "2"], ["3", "4"]], columns=["c", "d"], index=["a", "a"])
with pytest.raises(KeyError, match="Classes render only if `classes` has unique"):
styler.set_td_classes(classes=classes)


def test_hide_columns_non_unique(styler):
ctx = styler.hide_columns(["d"])._translate()

assert ctx["head"][0][1]["display_value"] == "c"
assert ctx["head"][0][1]["is_visible"] is True

assert ctx["head"][0][2]["display_value"] == "d"
assert ctx["head"][0][2]["is_visible"] is False

assert ctx["head"][0][3]["display_value"] == "d"
assert ctx["head"][0][3]["is_visible"] is False

assert ctx["body"][0][1]["is_visible"] is True
assert ctx["body"][0][2]["is_visible"] is False
assert ctx["body"][0][3]["is_visible"] is False
9 changes: 0 additions & 9 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,15 +671,6 @@ def test_set_na_rep(self):
assert ctx["body"][0][1]["display_value"] == "NA"
assert ctx["body"][0][2]["display_value"] == "-"

def test_nonunique_raises(self):
df = DataFrame([[1, 2]], columns=["A", "A"])
msg = "style is not supported for non-unique indices."
with pytest.raises(ValueError, match=msg):
df.style

with pytest.raises(ValueError, match=msg):
Styler(df)

def test_caption(self):
styler = Styler(self.df, caption="foo")
result = styler.render()
Expand Down