diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 9faef9b15bfb4..8da3cbb2c11f8 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -21,7 +21,7 @@ The pandas I/O API is a set of top level ``reader`` functions accessed like text;`CSV `__;:ref:`read_csv`;:ref:`to_csv` text;Fixed-Width Text File;:ref:`read_fwf` text;`JSON `__;:ref:`read_json`;:ref:`to_json` - text;`HTML `__;:ref:`read_html`;:ref:`to_html` + text;`HTML `__;:ref:`read_html`;:ref:`Styler.to_html` text;`LaTeX `__;;:ref:`Styler.to_latex` text;`XML `__;:ref:`read_xml`;:ref:`to_xml` text; Local clipboard;:ref:`read_clipboard`;:ref:`to_clipboard` @@ -2682,8 +2682,8 @@ Read in pandas ``to_html`` output (with some loss of floating point precision): .. code-block:: python df = pd.DataFrame(np.random.randn(2, 2)) - s = df.to_html(float_format="{0:.40g}".format) - dfin = pd.read_html(s, index_col=0) + s = df.style.format("{0:.40g}").to_html() + dfin = pd.read_html(s, index_col=0)[0] The ``lxml`` backend will raise an error on a failed parse if that is the only parser you provide. If you only have a single parser you can provide just a @@ -2714,156 +2714,34 @@ succeeds, the function will return*. Writing to HTML files '''''''''''''''''''''' -``DataFrame`` objects have an instance method ``to_html`` which renders the -contents of the ``DataFrame`` as an HTML table. The function arguments are as -in the method ``to_string`` described above. - .. note:: - Not all of the possible options for ``DataFrame.to_html`` are shown here for - brevity's sake. See :func:`~pandas.core.frame.DataFrame.to_html` for the - full set of options. + DataFrame *and* Styler objects currently have a ``to_html`` method. We recommend + using the `Styler.to_html() <../reference/api/pandas.io.formats.style.Styler.to_html.rst>`__ method + over `DataFrame.to_html() <../reference/api/pandas.DataFrame.to_html.rst>`__ due to the former's greater flexibility with + conditional styling, and the latter's possible future deprecation. -.. ipython:: python - :suppress: +Review the documentation for `Styler.to_html <../reference/api/pandas.io.formats.style.Styler.to_html.rst>`__, +which gives examples of conditional styling and explains the operation of its keyword +arguments. The ``to_html`` methods render the contents of the ``DataFrame`` as an HTML table. - def write_html(df, filename, *args, **kwargs): - static = os.path.abspath(os.path.join("source", "_static")) - with open(os.path.join(static, filename + ".html"), "w") as f: - df.to_html(f, *args, **kwargs) +For simple application the following pattern is sufficient: .. ipython:: python df = pd.DataFrame(np.random.randn(2, 2)) df - print(df.to_html()) # raw html - -.. ipython:: python - :suppress: - - write_html(df, "basic") - -HTML: - -.. raw:: html - :file: ../_static/basic.html - -The ``columns`` argument will limit the columns shown: - -.. ipython:: python - - print(df.to_html(columns=[0])) - -.. ipython:: python - :suppress: - - write_html(df, "columns", columns=[0]) - -HTML: - -.. raw:: html - :file: ../_static/columns.html - -``float_format`` takes a Python callable to control the precision of floating -point values: - -.. ipython:: python - - print(df.to_html(float_format="{0:.10f}".format)) - -.. ipython:: python - :suppress: - - write_html(df, "float_format", float_format="{0:.10f}".format) - -HTML: - -.. raw:: html - :file: ../_static/float_format.html - -``bold_rows`` will make the row labels bold by default, but you can turn that -off: - -.. ipython:: python - - print(df.to_html(bold_rows=False)) - -.. ipython:: python - :suppress: + print(df.style.to_html()) # raw html - write_html(df, "nobold", bold_rows=False) - -.. raw:: html - :file: ../_static/nobold.html - -The ``classes`` argument provides the ability to give the resulting HTML -table CSS classes. Note that these classes are *appended* to the existing -``'dataframe'`` class. - -.. ipython:: python - - print(df.to_html(classes=["awesome_table_class", "even_more_awesome_class"])) - -The ``render_links`` argument provides the ability to add hyperlinks to cells -that contain URLs. - -.. ipython:: python - - url_df = pd.DataFrame( - { - "name": ["Python", "pandas"], - "url": ["https://www.python.org/", "https://pandas.pydata.org"], - } - ) - print(url_df.to_html(render_links=True)) - -.. ipython:: python - :suppress: - - write_html(url_df, "render_links", render_links=True) - -HTML: - -.. raw:: html - :file: ../_static/render_links.html - -Finally, the ``escape`` argument allows you to control whether the -"<", ">" and "&" characters escaped in the resulting HTML (by default it is -``True``). So to get the HTML without escaped characters pass ``escape=False`` - -.. ipython:: python - - df = pd.DataFrame({"a": list("&<>"), "b": np.random.randn(3)}) - - -.. ipython:: python - :suppress: - - write_html(df, "escape") - write_html(df, "noescape", escape=False) - -Escaped: - -.. ipython:: python - - print(df.to_html()) - -.. raw:: html - :file: ../_static/escape.html - -Not escaped: +To format values before output, chain the `Styler.format <../reference/api/pandas.io.formats.style.Styler.format.rst>`__ +method. .. ipython:: python - print(df.to_html(escape=False)) - -.. raw:: html - :file: ../_static/noescape.html - -.. note:: + print(df.style.format("€ {}").to_html()) - Some browsers may not show a difference in the rendering of the previous two - HTML tables. +Some browsers or browser applications may process and add css class styling by default to alter the appearance +of HTML tables, such as Jupyter Notebook and Google Colab. .. _io.html.gotchas: diff --git a/doc/source/user_guide/scale.rst b/doc/source/user_guide/scale.rst index 71aef4fdd75f6..edeebe2e8678c 100644 --- a/doc/source/user_guide/scale.rst +++ b/doc/source/user_guide/scale.rst @@ -275,6 +275,7 @@ column names and dtypes. That's because Dask hasn't actually read the data yet. Rather than executing immediately, doing operations build up a **task graph**. .. ipython:: python + :okwarning: ddf ddf["name"] @@ -333,6 +334,7 @@ known automatically. In this case, since we created the parquet files manually, we need to supply the divisions manually. .. ipython:: python + :okwarning: N = 12 starts = [f"20{i:>02d}-01-01" for i in range(N)] diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4c3e53ddcfa26..878250cd8288f 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -615,7 +615,7 @@ Other Deprecations - Deprecated the behavior of :func:`to_datetime` with the string "now" with ``utc=False``; in a future version this will match ``Timestamp("now")``, which in turn matches :meth:`Timestamp.now` returning the local time (:issue:`18705`) - Deprecated :meth:`DateOffset.apply`, use ``offset + other`` instead (:issue:`44522`) - Deprecated parameter ``names`` in :meth:`Index.copy` (:issue:`44916`) -- A deprecation warning is now shown for :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments to :meth:`.Styler.to_latex` in future versions (:issue:`44411`) +- A deprecation warning is now shown for both :meth:`DataFrame.to_html` and :meth:`DataFrame.to_latex` indicating the arguments signature may change and emulate more the arguments in :meth:`.Styler.to_html` and :meth:`.Styler.to_latex`, respectively, in future versions (:issue:`44411`, :issue:`44451`) - Deprecated behavior of :func:`concat` between objects with bool-dtype and numeric-dtypes; in a future version these will cast to object dtype instead of coercing bools to numeric values (:issue:`39817`) - Deprecated :meth:`Categorical.replace`, use :meth:`Series.replace` instead (:issue:`44929`) - Deprecated passing ``set`` or ``dict`` as indexer for :meth:`DataFrame.loc.__setitem__`, :meth:`DataFrame.loc.__getitem__`, :meth:`Series.loc.__setitem__`, :meth:`Series.loc.__getitem__`, :meth:`DataFrame.__getitem__`, :meth:`Series.__getitem__` and :meth:`Series.__setitem__` (:issue:`42825`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a39c1b0bf43f2..6f556f592c477 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -2903,8 +2903,76 @@ def to_html( %(returns)s See Also -------- + Styler.to_html : Render a DataFrame to HTML with conditional formatting. to_string : Convert DataFrame to a string. - """ + + Notes + ----- + As of version 1.4.0, a warning is shown for the following use of keyword + arguments if they are non-default values, in order to align this function with + the formatting structure akin to ``Styler.to_html``. + + - ``classes``, likely replaced by ``table_attributes``. + - ``sparsify``, likely separated to ``sparse_index``, ``sparse_columns``. + - ``max_cols``, likely replaced with ``max_columns`` consistently. + - ``formatters``, likely replaced with ``formatter``. + - ``float_format``, likely replaced by ``precision``, ``decimal``, + ``thousands``. + - ``border``, likely removed due to deprecated HTML specification. + - ``col_space``, likely removed in favour of CSS solutions. + - ``justify``, likely removed in favour of CSS solutions. + - ``render_links``, likely removed due to limited functionality. + - ``notebook``, likely removed due to legacy impact. + - ``show_dimensions``, likely removed in favour of caption solution. + - ``bold_rows``, likely replaced with ``bold_headers`` with index and column + control. + """ + # Warnings are shown in 1.4.0 in preparation for signature changes in 2.0.0 + warnings_default_none = { + "classes": "replaced by `table_attributes`", + "sparsify": "replaced by `sparse_index` and `sparse_columns`", + "max_cols": "replaced by `max_columns` for consistency", + "formatters": "replaced by `formatter` accepted by `Styler.format`", + "float_format": "replaced by `precision`, `decimal`, and `thousands`", + "border": "removed as deprecated HTML, suggested to use CSS", + "col_space": "removed, suggested to use CSS `min-width: 100px;`", + "justify": "removed, suggested to use CSS", + } + + warnings_default_false = { + "render_links": "removed due to limited functionality", + "notebook": "removed as a legacy argument", + "show_dimensions": "removed, suggested to use `caption=f'{df.shape}'`", + } + + warnings_default_true = { + "bold_rows": "replaced by `bold_headers` controlling index and columns", + } + + warning_msg = ( + "In future versions `DataFrame.to_html` is expected to utilise the base " + "implementation of `Styler.to_html` for formatting and rendering. " + "The arguments signature may therefore change. You are specifically using " + "the following arguments: " + ) + warning_flag = False + + for kwarg, msg in warnings_default_none.items(): + if locals()[kwarg] is not None: + warning_flag = True + warning_msg += f"\n `{kwarg}`, which may be {msg}." + for kwarg, msg in warnings_default_false.items(): + if locals()[kwarg] is True: + warning_flag = True + warning_msg += f"\n `{kwarg}`, which may be {msg}." + for kwarg, msg in warnings_default_true.items(): + if locals()[kwarg] is False: + warning_flag = True + warning_msg += f"\n `{kwarg}`, which may be {msg}." + + if warning_flag: + warnings.warn(warning_msg, FutureWarning, stacklevel=find_stack_level()) + if justify is not None and justify not in fmt._VALID_JUSTIFY_PARAMETERS: raise ValueError("Invalid value for justify parameter") diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index bf0a10fa702a5..e454b3621a2d2 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3323,7 +3323,7 @@ def test_filepath_or_buffer_arg( ): getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) elif encoding == "foo": - expected_warning = FutureWarning if method == "to_latex" else None + expected_warning = FutureWarning if method in ["to_latex"] else None with tm.assert_produces_warning(expected_warning): with pytest.raises(LookupError, match="unknown encoding"): getattr(df, method)(buf=filepath_or_buffer, encoding=encoding) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index aa8508d8e8942..e512cc714f96b 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -16,6 +16,8 @@ import pandas.io.formats.format as fmt +pytestmark = pytest.mark.filterwarnings("ignore::FutureWarning") + lorem_ipsum = ( "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod " "tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim " @@ -888,3 +890,50 @@ def test_to_html_float_format_object_col(datapath): result = df.to_html(float_format=lambda x: f"{x:,.0f}") expected = expected_html(datapath, "gh40024_expected_output") assert result == expected + + +@pytest.mark.parametrize( + "kw1, kw1_val, kw1_msg", + [ + ("classes", "text", "replaced by `table_attributes`"), + ("sparsify", True, "replaced by `sparse_index` and `sparse_columns`"), + ("max_cols", 1, "replaced by `max_columns` for consistency"), + ("formatters", [None], "replaced by `formatter` accepted by `Styler.format`"), + ("float_format", "txt", "replaced by `precision`, `decimal`, and `thousands`"), + ("border", 10, "removed as deprecated HTML, suggested to use CSS"), + ("col_space", 1, "removed, suggested to use CSS `min-width: 100px;`"), + ("justify", "right", "removed, suggested to use CSS"), + ], +) +@pytest.mark.parametrize( + "kw2, kw2_val, kw2_msg", + [ + ("render_links", True, "removed due to limited functionality"), + ("notebook", True, "removed as a legacy argument"), + ("show_dimensions", True, "removed, suggested to use `caption=f'{df.shape}'`"), + ( + "bold_rows", + False, + "replaced by `bold_headers` controlling index and columns", + ), + ], +) +def test_future_warning(kw1, kw1_val, kw1_msg, kw2, kw2_val, kw2_msg): + df = DataFrame([[1]]) + msg = ( + "In future versions `DataFrame.to_html` is expected to utilise the base " + "implementation of `Styler.to_html` for formatting and rendering. " + "The arguments signature may therefore change. You are specifically using " + "the following arguments: " + ) + msg += f"\\n `{kw1}`, which may be {kw1_msg}." + msg += f"\\n `{kw2}`, which may be {kw2_msg}." + + with tm.assert_produces_warning(FutureWarning, match=msg): + df.to_html(**{kw1: kw1_val, kw2: kw2_val}) + + +def test_no_future_warning(): + df = DataFrame([[1]]) + with tm.assert_produces_warning(None): + df.to_html()