DEPR: msgpack (pandas-dev#30112)

proost · Dec 19, 2019 · 6e56979 · 6e56979
1 parent ad6283c
commit 6e56979
Show file tree

Hide file tree

Showing 58 changed files with 22 additions and 6,390 deletions.
diff --git a/LICENSES/MSGPACK_LICENSE b/LICENSES/MSGPACK_LICENSE
diff --git a/LICENSES/MSGPACK_NUMPY_LICENSE b/LICENSES/MSGPACK_NUMPY_LICENSE
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -20,7 +20,6 @@ global-exclude *.gz
 global-exclude *.h5
 global-exclude *.html
 global-exclude *.json
-global-exclude *.msgpack
 global-exclude *.pickle
 global-exclude *.png
 global-exclude *.pyc

diff --git a/asv_bench/benchmarks/io/msgpack.py b/asv_bench/benchmarks/io/msgpack.py
diff --git a/asv_bench/benchmarks/io/sas.py b/asv_bench/benchmarks/io/sas.py
@@ -26,5 +26,5 @@ def setup(self, format):
         ]
         self.f = os.path.join(*paths)
 
-    def time_read_msgpack(self, format):
+    def time_read_sas(self, format):
         read_sas(self.f, format=format)
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -94,10 +94,10 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
 
     # We don't lint all C files because we don't want to lint any that are built
     # from Cython files nor do we want to lint C files that we didn't modify for
-    # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
+    # this particular codebase (e.g. src/headers, src/klib). However,
     # we can lint all header files since they aren't "generated" like C files are.
     MSG='Linting .c and .h' ; echo $MSG
-    cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/io/msgpack pandas/_libs/*.cpp pandas/util
+    cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     echo "isort --version-number"

diff --git a/doc/redirects.csv b/doc/redirects.csv
@@ -491,7 +491,6 @@ generated/pandas.DataFrame.to_hdf,../reference/api/pandas.DataFrame.to_hdf
 generated/pandas.DataFrame.to,../reference/api/pandas.DataFrame.to
 generated/pandas.DataFrame.to_json,../reference/api/pandas.DataFrame.to_json
 generated/pandas.DataFrame.to_latex,../reference/api/pandas.DataFrame.to_latex
-generated/pandas.DataFrame.to_msgpack,../reference/api/pandas.DataFrame.to_msgpack
 generated/pandas.DataFrame.to_numpy,../reference/api/pandas.DataFrame.to_numpy
 generated/pandas.DataFrame.to_panel,../reference/api/pandas.DataFrame.to_panel
 generated/pandas.DataFrame.to_parquet,../reference/api/pandas.DataFrame.to_parquet
@@ -889,7 +888,6 @@ generated/pandas.read_gbq,../reference/api/pandas.read_gbq
 generated/pandas.read_hdf,../reference/api/pandas.read_hdf
 generated/pandas.read,../reference/api/pandas.read
 generated/pandas.read_json,../reference/api/pandas.read_json
-generated/pandas.read_msgpack,../reference/api/pandas.read_msgpack
 generated/pandas.read_parquet,../reference/api/pandas.read_parquet
 generated/pandas.read_pickle,../reference/api/pandas.read_pickle
 generated/pandas.read_sas,../reference/api/pandas.read_sas
@@ -1230,7 +1228,6 @@ generated/pandas.Series.to_json,../reference/api/pandas.Series.to_json
 generated/pandas.Series.to_latex,../reference/api/pandas.Series.to_latex
 generated/pandas.Series.to_list,../reference/api/pandas.Series.to_list
 generated/pandas.Series.tolist,../reference/api/pandas.Series.tolist
-generated/pandas.Series.to_msgpack,../reference/api/pandas.Series.to_msgpack
 generated/pandas.Series.to_numpy,../reference/api/pandas.Series.to_numpy
 generated/pandas.Series.to_period,../reference/api/pandas.Series.to_period
 generated/pandas.Series.to_pickle,../reference/api/pandas.Series.to_pickle

diff --git a/doc/source/development/developer.rst b/doc/source/development/developer.rst
@@ -125,7 +125,6 @@ The ``metadata`` field is ``None`` except for:
   in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of:
 
     * ``'pickle'``
-    * ``'msgpack'``
     * ``'bson'``
     * ``'json'``
 

diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst
@@ -249,7 +249,7 @@ PyTables                  3.4.2              HDF5-based reading / writing
 SQLAlchemy                1.1.4              SQL support for databases other than sqlite
 SciPy                     0.19.0             Miscellaneous statistical functions
 XLsxWriter                0.9.8              Excel writing
-blosc                                        Compression for msgpack
+blosc                                        Compression for HDF5
 fastparquet               0.3.2              Parquet reading / writing
 gcsfs                     0.2.2              Google Cloud Storage access
 html5lib                                     HTML parser for read_html (see :ref:`note <optional_html>`)
@@ -269,7 +269,7 @@ xclip                                        Clipboard I/O on linux
 xlrd                      1.1.0              Excel reading
 xlwt                      1.2.0              Excel writing
 xsel                                         Clipboard I/O on linux
-zlib                                         Compression for msgpack
+zlib                                         Compression for HDF5
 ========================= ================== =============================================================
 
 .. _optional_html:

diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst
@@ -357,7 +357,6 @@ Serialization / IO / conversion
    DataFrame.to_feather
    DataFrame.to_latex
    DataFrame.to_stata
-   DataFrame.to_msgpack
    DataFrame.to_gbq
    DataFrame.to_records
    DataFrame.to_string

diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst
@@ -22,7 +22,6 @@ Flat file
    read_table
    read_csv
    read_fwf
-   read_msgpack
 
 Clipboard
 ~~~~~~~~~

diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst
@@ -574,7 +574,6 @@ Serialization / IO / conversion
    Series.to_xarray
    Series.to_hdf
    Series.to_sql
-   Series.to_msgpack
    Series.to_json
    Series.to_string
    Series.to_clipboard

diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -1229,7 +1229,7 @@ in the frame:
    The offsets of the structure elements may be different depending on the
    architecture of the machine on which the file was created. Using a raw
    binary file format like this for general data storage is not recommended, as
-   it is not cross platform. We recommended either HDF5 or msgpack, both of
+   it is not cross platform. We recommended either HDF5 or parquet, both of
    which are supported by pandas' IO facilities.
 
 Computation

diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst
@@ -3382,87 +3382,19 @@ The default is to 'infer':
 msgpack
 -------
 
-pandas supports the ``msgpack`` format for
-object serialization. This is a lightweight portable binary format, similar
-to binary JSON, that is highly space efficient, and provides good performance
-both on the writing (serialization), and reading (deserialization).
+pandas support for ``msgpack`` has been removed in version 1.0.0.  It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
 
-.. warning::
-
-   The msgpack format is deprecated as of 0.25 and will be removed in a future version.
-   It is recommended to use pyarrow for on-the-wire transmission of pandas objects.
-
-.. warning::
-
-   :func:`read_msgpack` is only guaranteed backwards compatible back to pandas version 0.20.3
-
-.. ipython:: python
-   :okwarning:
-
-   df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB'))
-   df.to_msgpack('foo.msg')
-   pd.read_msgpack('foo.msg')
-   s = pd.Series(np.random.rand(5), index=pd.date_range('20130101', periods=5))
-
-You can pass a list of objects and you will receive them back on deserialization.
-
-.. ipython:: python
-   :okwarning:
-
-   pd.to_msgpack('foo.msg', df, 'foo', np.array([1, 2, 3]), s)
-   pd.read_msgpack('foo.msg')
-
-You can pass ``iterator=True`` to iterate over the unpacked results:
-
-.. ipython:: python
-   :okwarning:
-
-   for o in pd.read_msgpack('foo.msg', iterator=True):
-       print(o)
-
-You can pass ``append=True`` to the writer to append to an existing pack:
-
-.. ipython:: python
-   :okwarning:
+Example pyarrow usage:
 
-   df.to_msgpack('foo.msg', append=True)
-   pd.read_msgpack('foo.msg')
-
-Unlike other io methods, ``to_msgpack`` is available on both a per-object basis,
-``df.to_msgpack()`` and using the top-level ``pd.to_msgpack(...)`` where you
-can pack arbitrary collections of Python lists, dicts, scalars, while intermixing
-pandas objects.
-
-.. ipython:: python
-   :okwarning:
-
-   pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'},
-                                       {'scalar': 1.}, {'s': s}]})
-   pd.read_msgpack('foo2.msg')
-
-.. ipython:: python
-   :suppress:
-   :okexcept:
-
-   os.remove('foo.msg')
-   os.remove('foo2.msg')
-
-Read/write API
-''''''''''''''
-
-Msgpacks can also be read from and written to strings.
-
-.. ipython:: python
-   :okwarning:
-
-   df.to_msgpack()
-
-Furthermore you can concatenate the strings to produce a list of the original objects.
+.. code-block:: python
 
-.. ipython:: python
-   :okwarning:
+    >>> import pandas as pd
+    >>> import pyarrow as pa
+    >>> df = pd.DataFrame({'A': [1, 2, 3]})
+    >>> context = pa.default_serialization_context()
+    >>> df_bytestring = context.serialize(df).to_buffer().to_pybytes()
 
-   pd.read_msgpack(df.to_msgpack() + s.to_msgpack())
+For documentation on pyarrow, see `here <https://arrow.apache.org/docs/python/index.html>`__.
 
 .. _io.hdf5:
 

diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst
@@ -828,8 +828,7 @@ Experimental
 
      Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release.
 
-  .. ipython:: python
-     :okwarning:
+  .. code-block:: python
 
      df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB'))
      df.to_msgpack('foo.msg')
@@ -841,8 +840,7 @@ Experimental
 
   You can pass ``iterator=True`` to iterator over the unpacked results
 
-  .. ipython:: python
-     :okwarning:
+  .. code-block:: python
 
      for o in pd.read_msgpack('foo.msg', iterator=True):
          print(o)

diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst
@@ -628,6 +628,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more.
 - Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`)
 - Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`)
 - :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`)
+- Removed the previously deprecated :func:`to_msgpack`, :func:`read_msgpack`, :meth:`DataFrame.to_msgpack`, :meth:`Series.to_msgpack` (:issue:`27103`)
 -
 - Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`)
 - Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`)

diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -148,9 +148,6 @@
     ExcelFile,
     ExcelWriter,
     read_excel,
-    # packers
-    read_msgpack,
-    to_msgpack,
     # parsers
     read_csv,
     read_fwf,