[ENH] Add to_markdown method (pandas-dev#30350)

AlexKirko · Dec 29, 2019 · 8853bbc · 8853bbc
1 parent 52d5520
commit 8853bbc
Show file tree

Hide file tree

Showing 13 changed files with 147 additions and 2 deletions.
diff --git a/ci/deps/travis-37.yaml b/ci/deps/travis-37.yaml
@@ -20,6 +20,7 @@ dependencies:
   - pyarrow
   - pytz
   - s3fs
+  - tabulate
   - pyreadstat
   - pip
   - pip:

diff --git a/ci/deps/travis-38.yaml b/ci/deps/travis-38.yaml
@@ -17,3 +17,4 @@ dependencies:
   - nomkl
   - pytz
   - pip
+  - tabulate==0.8.3
diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -234,7 +234,8 @@ Optional dependencies
 ~~~~~~~~~~~~~~~~~~~~~
 
 Pandas has many optional dependencies that are only used for specific methods.
-For example, :func:`pandas.read_hdf` requires the ``pytables`` package. If the
+For example, :func:`pandas.read_hdf` requires the ``pytables`` package, while
+:meth:`DataFrame.to_markdown` requires the ``tabulate`` package. If the
 optional dependency is not installed, pandas will raise an ``ImportError`` when
 the method requiring that dependency is called.
 
@@ -264,6 +265,7 @@ pyreadstat                                   SPSS files (.sav) reading
 pytables                  3.4.2              HDF5 reading / writing
 qtpy                                         Clipboard I/O
 s3fs                      0.3.0              Amazon S3 access
+tabulate                  0.8.3              Printing in Markdown-friendly format (see `tabulate`_)
 xarray                    0.8.2              pandas-like API for N-dimensional data
 xclip                                        Clipboard I/O on linux
 xlrd                      1.1.0              Excel reading
@@ -301,3 +303,4 @@ top-level :func:`~pandas.read_html` function:
 .. _html5lib: https://github.com/html5lib/html5lib-python
 .. _BeautifulSoup4: http://www.crummy.com/software/BeautifulSoup
 .. _lxml: http://lxml.de
+.. _tabulate: https://github.com/astanin/python-tabulate
diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
@@ -361,4 +361,5 @@ Serialization / IO / conversion
    DataFrame.to_records
    DataFrame.to_string
    DataFrame.to_clipboard
+   DataFrame.to_markdown
    DataFrame.style
diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
@@ -578,3 +578,4 @@ Serialization / IO / conversion
    Series.to_string
    Series.to_clipboard
    Series.to_latex
+   Series.to_markdown
diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -209,6 +209,7 @@ Other enhancements
 - :func:`to_parquet` now appropriately handles the ``schema`` argument for user defined schemas in the pyarrow engine. (:issue: `30270`)
 - DataFrame constructor preserve `ExtensionArray` dtype with `ExtensionArray` (:issue:`11363`)
 - :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` have gained ``ignore_index`` keyword to be able to reset index after sorting (:issue:`30114`)
+- :meth:`DataFrame.to_markdown` and :meth:`Series.to_markdown` added (:issue:`11052`)
 
 - :meth:`DataFrame.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`30114`)
 

diff --git a/environment.yml b/environment.yml
@@ -100,5 +100,6 @@ dependencies:
   - sqlalchemy  # pandas.read_sql, DataFrame.to_sql
   - xarray  # DataFrame.to_xarray
   - pyreadstat  # pandas.read_spss
+  - tabulate>=0.8.3  # DataFrame.to_markdown
   - pip:
     - git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master
diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py
@@ -23,6 +23,7 @@
     "scipy": "0.19.0",
     "sqlalchemy": "1.1.4",
     "tables": "3.4.2",
+    "tabulate": "0.8.3",
     "xarray": "0.8.2",
     "xlrd": "1.1.0",
     "xlwt": "1.2.0",

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -15,6 +15,7 @@
 import sys
 from textwrap import dedent
 from typing import (
+    IO,
     Any,
     FrozenSet,
     Hashable,
@@ -37,6 +38,7 @@
 
 from pandas._libs import algos as libalgos, lib
 from pandas._typing import Axes, Dtype, FilePathOrBuffer
+from pandas.compat._optional import import_optional_dependency
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import (
     Appender,
@@ -118,6 +120,7 @@
 from pandas.core.ops.missing import dispatch_fill_zeros
 from pandas.core.series import Series
 
+from pandas.io.common import get_filepath_or_buffer
 from pandas.io.formats import console, format as fmt
 from pandas.io.formats.printing import pprint_thing
 import pandas.plotting
@@ -1964,6 +1967,36 @@ def to_feather(self, path):
 
         to_feather(self, path)
 
+    @Appender(
+        """
+        Examples
+        --------
+        >>> df = pd.DataFrame(
+        ...     data={"animal_1": ["elk", "pig"], "animal_2": ["dog", "quetzal"]}
+        ... )
+        >>> print(df.to_markdown())
+        |    | animal_1   | animal_2   |
+        |---:|:-----------|:-----------|
+        |  0 | elk        | dog        |
+        |  1 | pig        | quetzal    |
+        """
+    )
+    @Substitution(klass="DataFrame")
+    @Appender(_shared_docs["to_markdown"])
+    def to_markdown(
+        self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
+    ) -> Optional[str]:
+        kwargs.setdefault("headers", "keys")
+        kwargs.setdefault("tablefmt", "pipe")
+        tabulate = import_optional_dependency("tabulate")
+        result = tabulate.tabulate(self, **kwargs)
+        if buf is None:
+            return result
+        buf, _, _, _ = get_filepath_or_buffer(buf, mode=mode)
+        assert buf is not None  # Help mypy.
+        buf.writelines(result)
+        return None
+
     @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
     def to_parquet(
         self,

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -1970,6 +1970,30 @@ def _repr_data_resource_(self):
     # ----------------------------------------------------------------------
     # I/O Methods
 
+    _shared_docs[
+        "to_markdown"
+    ] = """
+    Print %(klass)s in Markdown-friendly format.
+
+    .. versionadded:: 1.0.0
+
+    Parameters
+    ----------
+    buf : writable buffer, defaults to sys.stdout
+        Where to send the output. By default, the output is printed to
+        sys.stdout. Pass a writable buffer if you need to further process
+        the output.
+    mode : str, optional
+        Mode in which file is opened.
+    **kwargs
+        These parameters will be passed to `tabulate`.
+
+    Returns
+    -------
+    str
+        %(klass)s in Markdown-friendly format.
+    """
+
     _shared_docs[
         "to_excel"
     ] = """

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -4,7 +4,7 @@
 from io import StringIO
 from shutil import get_terminal_size
 from textwrap import dedent
-from typing import Any, Callable, Hashable, List, Optional
+from typing import IO, Any, Callable, Hashable, List, Optional
 import warnings
 
 import numpy as np
@@ -59,6 +59,7 @@
     is_empty_data,
     sanitize_array,
 )
+from pandas.core.generic import _shared_docs
 from pandas.core.indexers import maybe_convert_indices
 from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
 from pandas.core.indexes.api import (
@@ -1439,6 +1440,27 @@ def to_string(
                 with open(buf, "w") as f:
                     f.write(result)
 
+    @Appender(
+        """
+        Examples
+        --------
+        >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
+        >>> print(s.to_markdown())
+        |    | animal   |
+        |---:|:---------|
+        |  0 | elk      |
+        |  1 | pig      |
+        |  2 | dog      |
+        |  3 | quetzal  |
+        """
+    )
+    @Substitution(klass="Series")
+    @Appender(_shared_docs["to_markdown"])
+    def to_markdown(
+        self, buf: Optional[IO[str]] = None, mode: Optional[str] = None, **kwargs,
+    ) -> Optional[str]:
+        return self.to_frame().to_markdown(buf, mode, **kwargs)
+
     # ----------------------------------------------------------------------
 
     def items(self):

diff --git a/pandas/tests/io/formats/test_to_markdown.py b/pandas/tests/io/formats/test_to_markdown.py
@@ -0,0 +1,55 @@
+from io import StringIO
+
+import pytest
+
+import pandas as pd
+
+pytest.importorskip("tabulate")
+
+
+def test_simple():
+    buf = StringIO()
+    df = pd.DataFrame([1, 2, 3])
+    df.to_markdown(buf=buf)
+    result = buf.getvalue()
+    assert (
+        result == "|    |   0 |\n|---:|----:|\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+    )
+
+
+def test_other_tablefmt():
+    buf = StringIO()
+    df = pd.DataFrame([1, 2, 3])
+    df.to_markdown(buf=buf, tablefmt="jira")
+    result = buf.getvalue()
+    assert result == "||    ||   0 ||\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+
+
+def test_other_headers():
+    buf = StringIO()
+    df = pd.DataFrame([1, 2, 3])
+    df.to_markdown(buf=buf, headers=["foo", "bar"])
+    result = buf.getvalue()
+    assert result == (
+        "|   foo |   bar |\n|------:|------:|\n|     0 "
+        "|     1 |\n|     1 |     2 |\n|     2 |     3 |"
+    )
+
+
+def test_series():
+    buf = StringIO()
+    s = pd.Series([1, 2, 3], name="foo")
+    s.to_markdown(buf=buf)
+    result = buf.getvalue()
+    assert result == (
+        "|    |   foo |\n|---:|------:|\n|  0 |     1 "
+        "|\n|  1 |     2 |\n|  2 |     3 |"
+    )
+
+
+def test_no_buf(capsys):
+    df = pd.DataFrame([1, 2, 3])
+    result = df.to_markdown()
+    assert (
+        result == "|    |   0 |\n|---:|----:|\n|  0 |   1 |\n|  1 |   2 |\n|  2 |   3 |"
+    )
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -67,4 +67,5 @@ s3fs
 sqlalchemy
 xarray
 pyreadstat
+tabulate>=0.8.3
 git+https://github.com/pandas-dev/pandas-sphinx-theme.git@master