From 8853bbc36e4b1d3374c4773a69ab9c38f239132a Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Fri, 27 Dec 2019 16:58:05 +0000 Subject: [PATCH] [ENH] Add to_markdown method (#30350) --- ci/deps/travis-37.yaml | 1 + ci/deps/travis-38.yaml | 1 + doc/source/getting_started/install.rst | 5 +- doc/source/reference/frame.rst | 1 + doc/source/reference/series.rst | 1 + doc/source/whatsnew/v1.0.0.rst | 1 + environment.yml | 1 + pandas/compat/_optional.py | 1 + pandas/core/frame.py | 33 +++++++++++++ pandas/core/generic.py | 24 +++++++++ pandas/core/series.py | 24 ++++++++- pandas/tests/io/formats/test_to_markdown.py | 55 +++++++++++++++++++++ requirements-dev.txt | 1 + 13 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 pandas/tests/io/formats/test_to_markdown.py diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml index 6826a9d072ff32..73e2c20b314388 100644 --- a/ci/deps/travis-37.yaml +++ b/ci/deps/travis-37.yaml @@ -20,6 +20,7 @@ dependencies: - pyarrow - pytz - s3fs + - tabulate - pyreadstat - pip - pip: diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml index 828f02596a70e7..a627b7edc175f8 100644 --- a/ci/deps/travis-38.yaml +++ b/ci/deps/travis-38.yaml @@ -17,3 +17,4 @@ dependencies: - nomkl - pytz - pip + - tabulate==0.8.3 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 62a39fb5176f90..03514bf63d93ce 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -234,7 +234,8 @@ Optional dependencies ~~~~~~~~~~~~~~~~~~~~~ Pandas has many optional dependencies that are only used for specific methods. -For example, :func:`pandas.read_hdf` requires the ``pytables`` package. If the +For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while +:meth:`DataFrame.to_markdown` requires the ``tabulate`` package. If the optional dependency is not installed, pandas will raise an ``ImportError`` when the method requiring that dependency is called. @@ -264,6 +265,7 @@ pyreadstat SPSS files (.sav) reading pytables 3.4.2 HDF5 reading / writing qtpy Clipboard I/O s3fs 0.3.0 Amazon S3 access +tabulate 0.8.3 Printing in Markdown-friendly format (see `tabulate`_) xarray 0.8.2 pandas-like API for N-dimensional data xclip Clipboard I/O on linux xlrd 1.1.0 Excel reading @@ -301,3 +303,4 @@ top-level :func:`~pandas.read_html` function: .. _html5lib: https://github.com/html5lib/html5lib-python .. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup .. _lxml: http://lxml.de +.. _tabulate: https://github.com/astanin/python-tabulate diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 815f3f9c19d49d..4c9df35ea8d9de 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -361,4 +361,5 @@ Serialization / IO / conversion DataFrame.to_records DataFrame.to_string DataFrame.to_clipboard + DataFrame.to_markdown DataFrame.style diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 6e1ee303135d88..0639730e2dcde8 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -578,3 +578,4 @@ Serialization / IO / conversion Series.to_string Series.to_clipboard Series.to_latex + Series.to_markdown diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c7d5dbb78d4047..5d647e3ab56e3c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -209,6 +209,7 @@ Other enhancements - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`) - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`) - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`) +- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`) - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`) diff --git a/environment.yml b/environment.yml index f930458d0a855c..7119fb5ab1b9e9 100644 --- a/environment.yml +++ b/environment.yml @@ -100,5 +100,6 @@ dependencies: - sqlalchemy # pandas.read_sql, DataFrame.to_sql - xarray # DataFrame.to_xarray - pyreadstat # pandas.read_spss + - tabulate>=0.8.3 # DataFrame.to_markdown - pip: - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index 412293f029fa5d..c8cf639fcd15ff 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -23,6 +23,7 @@ "scipy": "0.19.0", "sqlalchemy": "1.1.4", "tables": "3.4.2", + "tabulate": "0.8.3", "xarray": "0.8.2", "xlrd": "1.1.0", "xlwt": "1.2.0", diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4ff3cb7d4f02d5..fc39b264d1598c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -15,6 +15,7 @@ import sys from textwrap import dedent from typing import ( + IO, Any, FrozenSet, Hashable, @@ -37,6 +38,7 @@ from pandas._libs import algos as libalgos, lib from pandas._typing import Axes, Dtype, FilePathOrBuffer +from pandas.compat._optional import import_optional_dependency from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, @@ -118,6 +120,7 @@ from pandas.core.ops.missing import dispatch_fill_zeros from pandas.core.series import Series +from pandas.io.common import get_filepath_or_buffer from pandas.io.formats import console, format as fmt from pandas.io.formats.printing import pprint_thing import pandas.plotting @@ -1964,6 +1967,36 @@ def to_feather(self, path): to_feather(self, path) + @Appender( + """ + Examples + -------- + >>> df = pd.DataFrame( + ... data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]} + ... ) + >>> print(df.to_markdown()) + | | animal_1 | animal_2 | + |---:|:-----------|:-----------| + | 0 | elk | dog | + | 1 | pig | quetzal | + """ + ) + @Substitution(klass="DataFrame") + @Appender(_shared_docs["to_markdown"]) + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + ) -> Optional[str]: + kwargs.setdefault("headers", "keys") + kwargs.setdefault("tablefmt", "pipe") + tabulate = import_optional_dependency("tabulate") + result = tabulate.tabulate(self, **kwargs) + if buf is None: + return result + buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode) + assert buf is not None # Help mypy. + buf.writelines(result) + return None + @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") def to_parquet( self, diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 06a38448843f4a..c4461a9530e5c7 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1970,6 +1970,30 @@ def _repr_data_resource_(self): # ---------------------------------------------------------------------- # I/O Methods + _shared_docs[ + "to_markdown" + ] = """ + Print %(klass)s in Markdown-friendly format. + + .. versionadded:: 1.0.0 + + Parameters + ---------- + buf : writable buffer, defaults to sys.stdout + Where to send the output. By default, the output is printed to + sys.stdout. Pass a writable buffer if you need to further process + the output. + mode : str, optional + Mode in which file is opened. + **kwargs + These parameters will be passed to `tabulate`. + + Returns + ------- + str + %(klass)s in Markdown-friendly format. + """ + _shared_docs[ "to_excel" ] = """ diff --git a/pandas/core/series.py b/pandas/core/series.py index b52ab4c248498d..14826e0a1d5a49 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4,7 +4,7 @@ from io import StringIO from shutil import get_terminal_size from textwrap import dedent -from typing import Any, Callable, Hashable, List, Optional +from typing import IO, Any, Callable, Hashable, List, Optional import warnings import numpy as np @@ -59,6 +59,7 @@ is_empty_data, sanitize_array, ) +from pandas.core.generic import _shared_docs from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.accessors import CombinedDatetimelikeProperties from pandas.core.indexes.api import ( @@ -1439,6 +1440,27 @@ def to_string( with open(buf, "w") as f: f.write(result) + @Appender( + """ + Examples + -------- + >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal") + >>> print(s.to_markdown()) + | | animal | + |---:|:---------| + | 0 | elk | + | 1 | pig | + | 2 | dog | + | 3 | quetzal | + """ + ) + @Substitution(klass="Series") + @Appender(_shared_docs["to_markdown"]) + def to_markdown( + self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs, + ) -> Optional[str]: + return self.to_frame().to_markdown(buf, mode, **kwargs) + # ---------------------------------------------------------------------- def items(self): diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py new file mode 100644 index 00000000000000..8893e4294353ff --- /dev/null +++ b/pandas/tests/io/formats/test_to_markdown.py @@ -0,0 +1,55 @@ +from io import StringIO + +import pytest + +import pandas as pd + +pytest.importorskip("tabulate") + + +def test_simple(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf) + result = buf.getvalue() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_other_tablefmt(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, tablefmt="jira") + result = buf.getvalue() + assert result == "|| || 0 ||\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + + +def test_other_headers(): + buf = StringIO() + df = pd.DataFrame([1, 2, 3]) + df.to_markdown(buf=buf, headers=["foo", "bar"]) + result = buf.getvalue() + assert result == ( + "| foo | bar |\n|------:|------:|\n| 0 " + "| 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_series(): + buf = StringIO() + s = pd.Series([1, 2, 3], name="foo") + s.to_markdown(buf=buf) + result = buf.getvalue() + assert result == ( + "| | foo |\n|---:|------:|\n| 0 | 1 " + "|\n| 1 | 2 |\n| 2 | 3 |" + ) + + +def test_no_buf(capsys): + df = pd.DataFrame([1, 2, 3]) + result = df.to_markdown() + assert ( + result == "| | 0 |\n|---:|----:|\n| 0 | 1 |\n| 1 | 2 |\n| 2 | 3 |" + ) diff --git a/requirements-dev.txt b/requirements-dev.txt index 827bb809d46e49..a2c43bb6be73a2 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -67,4 +67,5 @@ s3fs sqlalchemy xarray pyreadstat +tabulate>=0.8.3 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master \ No newline at end of file