From 6e569792c5c60bf484c5e40e3a2f3c5e06b4a885 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Thu, 12 Dec 2019 08:17:00 -0800 Subject: [PATCH] DEPR: msgpack (#30112) --- LICENSES/MSGPACK_LICENSE | 13 - LICENSES/MSGPACK_NUMPY_LICENSE | 33 - MANIFEST.in | 1 - asv_bench/benchmarks/io/msgpack.py | 32 - asv_bench/benchmarks/io/sas.py | 2 +- ci/code_checks.sh | 4 +- doc/redirects.csv | 3 - doc/source/development/developer.rst | 1 - doc/source/getting_started/install.rst | 4 +- doc/source/reference/frame.rst | 1 - doc/source/reference/io.rst | 1 - doc/source/reference/series.rst | 1 - doc/source/user_guide/cookbook.rst | 2 +- doc/source/user_guide/io.rst | 86 +- doc/source/whatsnew/v0.13.0.rst | 6 +- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/__init__.py | 3 - pandas/_libs/src/msgpack/pack.h | 103 -- pandas/_libs/src/msgpack/pack_template.h | 785 --------------- pandas/_libs/src/msgpack/sysdep.h | 194 ---- pandas/_libs/src/msgpack/unpack.h | 278 ------ pandas/_libs/src/msgpack/unpack_define.h | 95 -- pandas/_libs/src/msgpack/unpack_template.h | 475 --------- pandas/core/generic.py | 42 - pandas/io/api.py | 1 - pandas/io/msgpack/__init__.py | 56 -- pandas/io/msgpack/_packer.pyi | 22 - pandas/io/msgpack/_packer.pyx | 312 ------ pandas/io/msgpack/_unpacker.pyi | 59 -- pandas/io/msgpack/_unpacker.pyx | 494 ---------- pandas/io/msgpack/_version.py | 1 - pandas/io/msgpack/exceptions.py | 31 - pandas/io/packers.py | 865 ----------------- pandas/tests/api/test_api.py | 3 +- .../tests/io/generate_legacy_storage_files.py | 53 +- pandas/tests/io/msgpack/__init__.py | 0 pandas/tests/io/msgpack/common.py | 2 - pandas/tests/io/msgpack/data/frame.mp | Bin 309 -> 0 bytes pandas/tests/io/msgpack/test_buffer.py | 22 - pandas/tests/io/msgpack/test_case.py | 149 --- pandas/tests/io/msgpack/test_except.py | 38 - pandas/tests/io/msgpack/test_extension.py | 63 -- pandas/tests/io/msgpack/test_format.py | 84 -- pandas/tests/io/msgpack/test_limits.py | 107 -- pandas/tests/io/msgpack/test_newspec.py | 90 -- pandas/tests/io/msgpack/test_obj.py | 71 -- pandas/tests/io/msgpack/test_pack.py | 171 ---- pandas/tests/io/msgpack/test_read_size.py | 71 -- pandas/tests/io/msgpack/test_seq.py | 47 - pandas/tests/io/msgpack/test_sequnpack.py | 102 -- pandas/tests/io/msgpack/test_subtype.py | 26 - pandas/tests/io/msgpack/test_unpack.py | 64 -- pandas/tests/io/msgpack/test_unpack_raw.py | 30 - pandas/tests/io/test_common.py | 5 - pandas/tests/io/test_packers.py | 911 ------------------ pandas/tests/util/test_move.py | 44 - pandas/util/move.c | 212 ---- setup.py | 40 - 58 files changed, 22 insertions(+), 6390 deletions(-) delete mode 100644 LICENSES/MSGPACK_LICENSE delete mode 100644 LICENSES/MSGPACK_NUMPY_LICENSE delete mode 100644 asv_bench/benchmarks/io/msgpack.py delete mode 100644 pandas/_libs/src/msgpack/pack.h delete mode 100644 pandas/_libs/src/msgpack/pack_template.h delete mode 100644 pandas/_libs/src/msgpack/sysdep.h delete mode 100644 pandas/_libs/src/msgpack/unpack.h delete mode 100644 pandas/_libs/src/msgpack/unpack_define.h delete mode 100644 pandas/_libs/src/msgpack/unpack_template.h delete mode 100644 pandas/io/msgpack/__init__.py delete mode 100644 pandas/io/msgpack/_packer.pyi delete mode 100644 pandas/io/msgpack/_packer.pyx delete mode 100644 pandas/io/msgpack/_unpacker.pyi delete mode 100644 pandas/io/msgpack/_unpacker.pyx delete mode 100644 pandas/io/msgpack/_version.py delete mode 100644 pandas/io/msgpack/exceptions.py delete mode 100644 pandas/io/packers.py delete mode 100644 pandas/tests/io/msgpack/__init__.py delete mode 100644 pandas/tests/io/msgpack/common.py delete mode 100644 pandas/tests/io/msgpack/data/frame.mp delete mode 100644 pandas/tests/io/msgpack/test_buffer.py delete mode 100644 pandas/tests/io/msgpack/test_case.py delete mode 100644 pandas/tests/io/msgpack/test_except.py delete mode 100644 pandas/tests/io/msgpack/test_extension.py delete mode 100644 pandas/tests/io/msgpack/test_format.py delete mode 100644 pandas/tests/io/msgpack/test_limits.py delete mode 100644 pandas/tests/io/msgpack/test_newspec.py delete mode 100644 pandas/tests/io/msgpack/test_obj.py delete mode 100644 pandas/tests/io/msgpack/test_pack.py delete mode 100644 pandas/tests/io/msgpack/test_read_size.py delete mode 100644 pandas/tests/io/msgpack/test_seq.py delete mode 100644 pandas/tests/io/msgpack/test_sequnpack.py delete mode 100644 pandas/tests/io/msgpack/test_subtype.py delete mode 100644 pandas/tests/io/msgpack/test_unpack.py delete mode 100644 pandas/tests/io/msgpack/test_unpack_raw.py delete mode 100644 pandas/tests/io/test_packers.py delete mode 100644 pandas/tests/util/test_move.py delete mode 100644 pandas/util/move.c diff --git a/LICENSES/MSGPACK_LICENSE b/LICENSES/MSGPACK_LICENSE deleted file mode 100644 index ae1b0f2f32f06..0000000000000 --- a/LICENSES/MSGPACK_LICENSE +++ /dev/null @@ -1,13 +0,0 @@ -Copyright (C) 2008-2011 INADA Naoki - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. \ No newline at end of file diff --git a/LICENSES/MSGPACK_NUMPY_LICENSE b/LICENSES/MSGPACK_NUMPY_LICENSE deleted file mode 100644 index e570011efac73..0000000000000 --- a/LICENSES/MSGPACK_NUMPY_LICENSE +++ /dev/null @@ -1,33 +0,0 @@ -.. -*- rst -*- - -License -======= - -Copyright (c) 2013, Lev Givon. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. -* Neither the name of Lev Givon nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in index adaad1dc1c864..cf6a1835433a4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -20,7 +20,6 @@ global-exclude *.gz global-exclude *.h5 global-exclude *.html global-exclude *.json -global-exclude *.msgpack global-exclude *.pickle global-exclude *.png global-exclude *.pyc diff --git a/asv_bench/benchmarks/io/msgpack.py b/asv_bench/benchmarks/io/msgpack.py deleted file mode 100644 index a5b8b81bed85b..0000000000000 --- a/asv_bench/benchmarks/io/msgpack.py +++ /dev/null @@ -1,32 +0,0 @@ -import warnings - -import numpy as np - -from pandas import DataFrame, date_range, read_msgpack -import pandas.util.testing as tm - -from ..pandas_vb_common import BaseIO - - -class MSGPack(BaseIO): - def setup(self): - self.fname = "__test__.msg" - N = 100000 - C = 5 - self.df = DataFrame( - np.random.randn(N, C), - columns=[f"float{i}" for i in range(C)], - index=date_range("20000101", periods=N, freq="H"), - ) - self.df["object"] = tm.makeStringIndex(N) - with warnings.catch_warnings(record=True): - self.df.to_msgpack(self.fname) - - def time_read_msgpack(self): - read_msgpack(self.fname) - - def time_write_msgpack(self): - self.df.to_msgpack(self.fname) - - -from ..pandas_vb_common import setup # noqa: F401 isort:skip diff --git a/asv_bench/benchmarks/io/sas.py b/asv_bench/benchmarks/io/sas.py index 7ce8ef8c12639..5eaeb231b031b 100644 --- a/asv_bench/benchmarks/io/sas.py +++ b/asv_bench/benchmarks/io/sas.py @@ -26,5 +26,5 @@ def setup(self, format): ] self.f = os.path.join(*paths) - def time_read_msgpack(self, format): + def time_read_sas(self, format): read_sas(self.f, format=format) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 2b9ea7dc220d7..8c4f3740ef8fa 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -94,10 +94,10 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # We don't lint all C files because we don't want to lint any that are built # from Cython files nor do we want to lint C files that we didn't modify for - # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, + # this particular codebase (e.g. src/headers, src/klib). However, # we can lint all header files since they aren't "generated" like C files are. MSG='Linting .c and .h' ; echo $MSG - cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/io/msgpack pandas/_libs/*.cpp pandas/util + cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/_libs/*.cpp RET=$(($RET + $?)) ; echo $MSG "DONE" echo "isort --version-number" diff --git a/doc/redirects.csv b/doc/redirects.csv index 02c0af9be3739..587a5e9f65b38 100644 --- a/doc/redirects.csv +++ b/doc/redirects.csv @@ -491,7 +491,6 @@ generated/pandas.DataFrame.to_hdf,../reference/api/pandas.DataFrame.to_hdf generated/pandas.DataFrame.to,../reference/api/pandas.DataFrame.to generated/pandas.DataFrame.to_json,../reference/api/pandas.DataFrame.to_json generated/pandas.DataFrame.to_latex,../reference/api/pandas.DataFrame.to_latex -generated/pandas.DataFrame.to_msgpack,../reference/api/pandas.DataFrame.to_msgpack generated/pandas.DataFrame.to_numpy,../reference/api/pandas.DataFrame.to_numpy generated/pandas.DataFrame.to_panel,../reference/api/pandas.DataFrame.to_panel generated/pandas.DataFrame.to_parquet,../reference/api/pandas.DataFrame.to_parquet @@ -889,7 +888,6 @@ generated/pandas.read_gbq,../reference/api/pandas.read_gbq generated/pandas.read_hdf,../reference/api/pandas.read_hdf generated/pandas.read,../reference/api/pandas.read generated/pandas.read_json,../reference/api/pandas.read_json -generated/pandas.read_msgpack,../reference/api/pandas.read_msgpack generated/pandas.read_parquet,../reference/api/pandas.read_parquet generated/pandas.read_pickle,../reference/api/pandas.read_pickle generated/pandas.read_sas,../reference/api/pandas.read_sas @@ -1230,7 +1228,6 @@ generated/pandas.Series.to_json,../reference/api/pandas.Series.to_json generated/pandas.Series.to_latex,../reference/api/pandas.Series.to_latex generated/pandas.Series.to_list,../reference/api/pandas.Series.to_list generated/pandas.Series.tolist,../reference/api/pandas.Series.tolist -generated/pandas.Series.to_msgpack,../reference/api/pandas.Series.to_msgpack generated/pandas.Series.to_numpy,../reference/api/pandas.Series.to_numpy generated/pandas.Series.to_period,../reference/api/pandas.Series.to_period generated/pandas.Series.to_pickle,../reference/api/pandas.Series.to_pickle diff --git a/doc/source/development/developer.rst b/doc/source/development/developer.rst index 923ef005d5926..33646e5d74757 100644 --- a/doc/source/development/developer.rst +++ b/doc/source/development/developer.rst @@ -125,7 +125,6 @@ The ``metadata`` field is ``None`` except for: in ``BYTE_ARRAY`` Parquet columns. The encoding can be one of: * ``'pickle'`` - * ``'msgpack'`` * ``'bson'`` * ``'json'`` diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 2347b10b242e6..62a39fb5176f9 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -249,7 +249,7 @@ PyTables 3.4.2 HDF5-based reading / writing SQLAlchemy 1.1.4 SQL support for databases other than sqlite SciPy 0.19.0 Miscellaneous statistical functions XLsxWriter 0.9.8 Excel writing -blosc Compression for msgpack +blosc Compression for HDF5 fastparquet 0.3.2 Parquet reading / writing gcsfs 0.2.2 Google Cloud Storage access html5lib HTML parser for read_html (see :ref:`note `) @@ -269,7 +269,7 @@ xclip Clipboard I/O on linux xlrd 1.1.0 Excel reading xlwt 1.2.0 Excel writing xsel Clipboard I/O on linux -zlib Compression for msgpack +zlib Compression for HDF5 ========================= ================== ============================================================= .. _optional_html: diff --git a/doc/source/reference/frame.rst b/doc/source/reference/frame.rst index 8eeca1ec28054..815f3f9c19d49 100644 --- a/doc/source/reference/frame.rst +++ b/doc/source/reference/frame.rst @@ -357,7 +357,6 @@ Serialization / IO / conversion DataFrame.to_feather DataFrame.to_latex DataFrame.to_stata - DataFrame.to_msgpack DataFrame.to_gbq DataFrame.to_records DataFrame.to_string diff --git a/doc/source/reference/io.rst b/doc/source/reference/io.rst index 3a57c8a487304..50168dec928ab 100644 --- a/doc/source/reference/io.rst +++ b/doc/source/reference/io.rst @@ -22,7 +22,6 @@ Flat file read_table read_csv read_fwf - read_msgpack Clipboard ~~~~~~~~~ diff --git a/doc/source/reference/series.rst b/doc/source/reference/series.rst index 807dc151dac4e..6e1ee303135d8 100644 --- a/doc/source/reference/series.rst +++ b/doc/source/reference/series.rst @@ -574,7 +574,6 @@ Serialization / IO / conversion Series.to_xarray Series.to_hdf Series.to_sql - Series.to_msgpack Series.to_json Series.to_string Series.to_clipboard diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst index c9d3bc3a28c70..37637bbdb38e6 100644 --- a/doc/source/user_guide/cookbook.rst +++ b/doc/source/user_guide/cookbook.rst @@ -1229,7 +1229,7 @@ in the frame: The offsets of the structure elements may be different depending on the architecture of the machine on which the file was created. Using a raw binary file format like this for general data storage is not recommended, as - it is not cross platform. We recommended either HDF5 or msgpack, both of + it is not cross platform. We recommended either HDF5 or parquet, both of which are supported by pandas' IO facilities. Computation diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 972f36aecad24..4302497eb00cd 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -3382,87 +3382,19 @@ The default is to 'infer': msgpack ------- -pandas supports the ``msgpack`` format for -object serialization. This is a lightweight portable binary format, similar -to binary JSON, that is highly space efficient, and provides good performance -both on the writing (serialization), and reading (deserialization). +pandas support for ``msgpack`` has been removed in version 1.0.0. It is recommended to use pyarrow for on-the-wire transmission of pandas objects. -.. warning:: - - The msgpack format is deprecated as of 0.25 and will be removed in a future version. - It is recommended to use pyarrow for on-the-wire transmission of pandas objects. - -.. warning:: - - :func:`read_msgpack` is only guaranteed backwards compatible back to pandas version 0.20.3 - -.. ipython:: python - :okwarning: - - df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) - df.to_msgpack('foo.msg') - pd.read_msgpack('foo.msg') - s = pd.Series(np.random.rand(5), index=pd.date_range('20130101', periods=5)) - -You can pass a list of objects and you will receive them back on deserialization. - -.. ipython:: python - :okwarning: - - pd.to_msgpack('foo.msg', df, 'foo', np.array([1, 2, 3]), s) - pd.read_msgpack('foo.msg') - -You can pass ``iterator=True`` to iterate over the unpacked results: - -.. ipython:: python - :okwarning: - - for o in pd.read_msgpack('foo.msg', iterator=True): - print(o) - -You can pass ``append=True`` to the writer to append to an existing pack: - -.. ipython:: python - :okwarning: +Example pyarrow usage: - df.to_msgpack('foo.msg', append=True) - pd.read_msgpack('foo.msg') - -Unlike other io methods, ``to_msgpack`` is available on both a per-object basis, -``df.to_msgpack()`` and using the top-level ``pd.to_msgpack(...)`` where you -can pack arbitrary collections of Python lists, dicts, scalars, while intermixing -pandas objects. - -.. ipython:: python - :okwarning: - - pd.to_msgpack('foo2.msg', {'dict': [{'df': df}, {'string': 'foo'}, - {'scalar': 1.}, {'s': s}]}) - pd.read_msgpack('foo2.msg') - -.. ipython:: python - :suppress: - :okexcept: - - os.remove('foo.msg') - os.remove('foo2.msg') - -Read/write API -'''''''''''''' - -Msgpacks can also be read from and written to strings. - -.. ipython:: python - :okwarning: - - df.to_msgpack() - -Furthermore you can concatenate the strings to produce a list of the original objects. +.. code-block:: python -.. ipython:: python - :okwarning: + >>> import pandas as pd + >>> import pyarrow as pa + >>> df = pd.DataFrame({'A': [1, 2, 3]}) + >>> context = pa.default_serialization_context() + >>> df_bytestring = context.serialize(df).to_buffer().to_pybytes() - pd.read_msgpack(df.to_msgpack() + s.to_msgpack()) +For documentation on pyarrow, see `here `__. .. _io.hdf5: diff --git a/doc/source/whatsnew/v0.13.0.rst b/doc/source/whatsnew/v0.13.0.rst index ab48594ddadab..43c6083fdce8f 100644 --- a/doc/source/whatsnew/v0.13.0.rst +++ b/doc/source/whatsnew/v0.13.0.rst @@ -828,8 +828,7 @@ Experimental Since this is an EXPERIMENTAL LIBRARY, the storage format may not be stable until a future release. - .. ipython:: python - :okwarning: + .. code-block:: python df = pd.DataFrame(np.random.rand(5, 2), columns=list('AB')) df.to_msgpack('foo.msg') @@ -841,8 +840,7 @@ Experimental You can pass ``iterator=True`` to iterator over the unpacked results - .. ipython:: python - :okwarning: + .. code-block:: python for o in pd.read_msgpack('foo.msg', iterator=True): print(o) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index edc69f1ab8e23..d0dc3f58379a0 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -628,6 +628,7 @@ or ``matplotlib.Axes.plot``. See :ref:`plotting.formatters` for more. - Removed previously deprecated ``errors`` argument in :meth:`Timestamp.tz_localize`, :meth:`DatetimeIndex.tz_localize`, and :meth:`Series.tz_localize` (:issue:`22644`) - Changed the default value for ``ordered`` in :class:`CategoricalDtype` from ``None`` to ``False`` (:issue:`26336`) - :meth:`Series.set_axis` and :meth:`DataFrame.set_axis` now require "labels" as the first argument and "axis" as an optional named parameter (:issue:`30089`) +- Removed the previously deprecated :func:`to_msgpack`, :func:`read_msgpack`, :meth:`DataFrame.to_msgpack`, :meth:`Series.to_msgpack` (:issue:`27103`) - - Removed the previously deprecated keyword "fill_value" from :meth:`Categorical.fillna`, use "value" instead (:issue:`19269`) - Removed the previously deprecated keyword "data" from :func:`andrews_curves`, use "frame" instead (:issue:`6956`) diff --git a/pandas/__init__.py b/pandas/__init__.py index f72a12b58edcb..d68fc2f48624b 100644 --- a/pandas/__init__.py +++ b/pandas/__init__.py @@ -148,9 +148,6 @@ ExcelFile, ExcelWriter, read_excel, - # packers - read_msgpack, - to_msgpack, # parsers read_csv, read_fwf, diff --git a/pandas/_libs/src/msgpack/pack.h b/pandas/_libs/src/msgpack/pack.h deleted file mode 100644 index 02379c9188424..0000000000000 --- a/pandas/_libs/src/msgpack/pack.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * MessagePack for Python packing routine - * - * Copyright (C) 2009 Naoki INADA - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "sysdep.h" -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(_MSC_VER) && (_MSC_VER < 1900) -#define inline __inline -#endif - -typedef struct msgpack_packer { - char *buf; - size_t length; - size_t buf_size; - bool use_bin_type; -} msgpack_packer; - -typedef struct Packer Packer; - -static inline int msgpack_pack_int(msgpack_packer* pk, int d); -static inline int msgpack_pack_long(msgpack_packer* pk, long d); -static inline int msgpack_pack_long_long(msgpack_packer* pk, long long d); -static inline int msgpack_pack_unsigned_short(msgpack_packer* pk, unsigned short d); -static inline int msgpack_pack_unsigned_int(msgpack_packer* pk, unsigned int d); -static inline int msgpack_pack_unsigned_long(msgpack_packer* pk, unsigned long d); -//static inline int msgpack_pack_unsigned_long_long(msgpack_packer* pk, unsigned long long d); - -static inline int msgpack_pack_uint8(msgpack_packer* pk, uint8_t d); -static inline int msgpack_pack_uint16(msgpack_packer* pk, uint16_t d); -static inline int msgpack_pack_uint32(msgpack_packer* pk, uint32_t d); -static inline int msgpack_pack_uint64(msgpack_packer* pk, uint64_t d); -static inline int msgpack_pack_int8(msgpack_packer* pk, int8_t d); -static inline int msgpack_pack_int16(msgpack_packer* pk, int16_t d); -static inline int msgpack_pack_int32(msgpack_packer* pk, int32_t d); -static inline int msgpack_pack_int64(msgpack_packer* pk, int64_t d); - -static inline int msgpack_pack_float(msgpack_packer* pk, float d); -static inline int msgpack_pack_double(msgpack_packer* pk, double d); - -static inline int msgpack_pack_nil(msgpack_packer* pk); -static inline int msgpack_pack_true(msgpack_packer* pk); -static inline int msgpack_pack_false(msgpack_packer* pk); - -static inline int msgpack_pack_array(msgpack_packer* pk, unsigned int n); - -static inline int msgpack_pack_map(msgpack_packer* pk, unsigned int n); - -static inline int msgpack_pack_raw(msgpack_packer* pk, size_t l); -static inline int msgpack_pack_bin(msgpack_packer* pk, size_t l); -static inline int msgpack_pack_raw_body(msgpack_packer* pk, const void* b, size_t l); - -static inline int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l); - -static inline int msgpack_pack_write(msgpack_packer* pk, const char *data, size_t l) -{ - char* buf = pk->buf; - size_t bs = pk->buf_size; - size_t len = pk->length; - - if (len + l > bs) { - bs = (len + l) * 2; - buf = (char*)realloc(buf, bs); - if (!buf) return -1; - } - memcpy(buf + len, data, l); - len += l; - - pk->buf = buf; - pk->buf_size = bs; - pk->length = len; - return 0; -} - -#define msgpack_pack_append_buffer(user, buf, len) \ - return msgpack_pack_write(user, (const char*)buf, len) - -#include "pack_template.h" - -#ifdef __cplusplus -} -#endif diff --git a/pandas/_libs/src/msgpack/pack_template.h b/pandas/_libs/src/msgpack/pack_template.h deleted file mode 100644 index 5d1088f4b7d78..0000000000000 --- a/pandas/_libs/src/msgpack/pack_template.h +++ /dev/null @@ -1,785 +0,0 @@ -/* - * MessagePack packing routine template - * - * Copyright (C) 2008-2010 FURUHASHI Sadayuki - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if defined(__LITTLE_ENDIAN__) -#define TAKE8_8(d) ((uint8_t*)&d)[0] -#define TAKE8_16(d) ((uint8_t*)&d)[0] -#define TAKE8_32(d) ((uint8_t*)&d)[0] -#define TAKE8_64(d) ((uint8_t*)&d)[0] -#elif defined(__BIG_ENDIAN__) -#define TAKE8_8(d) ((uint8_t*)&d)[0] -#define TAKE8_16(d) ((uint8_t*)&d)[1] -#define TAKE8_32(d) ((uint8_t*)&d)[3] -#define TAKE8_64(d) ((uint8_t*)&d)[7] -#endif - -#ifndef msgpack_pack_append_buffer -#error msgpack_pack_append_buffer callback is not defined -#endif - - -/* - * Integer - */ - -#define msgpack_pack_real_uint8(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ -} while(0) - -#define msgpack_pack_real_uint16(x, d) \ -do { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ - } else if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ -} while(0) - -#define msgpack_pack_real_uint32(x, d) \ -do { \ - if(d < (1<<8)) { \ - if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else { \ - if(d < (1<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } \ -} while(0) - -#define msgpack_pack_real_uint64(x, d) \ -do { \ - if(d < (1ULL<<8)) { \ - if(d < (1ULL<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ - } else { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else { \ - if(d < (1ULL<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else if(d < (1ULL<<32)) { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else { \ - /* unsigned 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } \ - } \ -} while(0) - -#define msgpack_pack_real_int8(x, d) \ -do { \ - if(d < -(1<<5)) { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_8(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); \ - } \ -} while(0) - -#define msgpack_pack_real_int16(x, d) \ -do { \ - if(d < -(1<<5)) { \ - if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_16(d), 1); \ - } else { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_16(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ - } \ -} while(0) - -#define msgpack_pack_real_int32(x, d) \ -do { \ - if(d < -(1<<5)) { \ - if(d < -(1<<15)) { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_32(d), 1); \ - } else { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_32(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else if(d < (1<<16)) { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } \ -} while(0) - -#define msgpack_pack_real_int64(x, d) \ -do { \ - if(d < -(1LL<<5)) { \ - if(d < -(1LL<<15)) { \ - if(d < -(1LL<<31)) { \ - /* signed 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xd3; _msgpack_store64(&buf[1], d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } else { \ - /* signed 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xd2; _msgpack_store32(&buf[1], (int32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } \ - } else { \ - if(d < -(1<<7)) { \ - /* signed 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xd1; _msgpack_store16(&buf[1], (int16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } else { \ - /* signed 8 */ \ - unsigned char buf[2] = {0xd0, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } \ - } \ - } else if(d < (1<<7)) { \ - /* fixnum */ \ - msgpack_pack_append_buffer(x, &TAKE8_64(d), 1); \ - } else { \ - if(d < (1LL<<16)) { \ - if(d < (1<<8)) { \ - /* unsigned 8 */ \ - unsigned char buf[2] = {0xcc, TAKE8_64(d)}; \ - msgpack_pack_append_buffer(x, buf, 2); \ - } else { \ - /* unsigned 16 */ \ - unsigned char buf[3]; \ - buf[0] = 0xcd; _msgpack_store16(&buf[1], (uint16_t)d); \ - msgpack_pack_append_buffer(x, buf, 3); \ - } \ - } else { \ - if(d < (1LL<<32)) { \ - /* unsigned 32 */ \ - unsigned char buf[5]; \ - buf[0] = 0xce; _msgpack_store32(&buf[1], (uint32_t)d); \ - msgpack_pack_append_buffer(x, buf, 5); \ - } else { \ - /* unsigned 64 */ \ - unsigned char buf[9]; \ - buf[0] = 0xcf; _msgpack_store64(&buf[1], d); \ - msgpack_pack_append_buffer(x, buf, 9); \ - } \ - } \ - } \ -} while(0) - - -static inline int msgpack_pack_uint8(msgpack_packer* x, uint8_t d) -{ - msgpack_pack_real_uint8(x, d); -} - -static inline int msgpack_pack_uint16(msgpack_packer* x, uint16_t d) -{ - msgpack_pack_real_uint16(x, d); -} - -static inline int msgpack_pack_uint32(msgpack_packer* x, uint32_t d) -{ - msgpack_pack_real_uint32(x, d); -} - -static inline int msgpack_pack_uint64(msgpack_packer* x, uint64_t d) -{ - msgpack_pack_real_uint64(x, d); -} - -static inline int msgpack_pack_int8(msgpack_packer* x, int8_t d) -{ - msgpack_pack_real_int8(x, d); -} - -static inline int msgpack_pack_int16(msgpack_packer* x, int16_t d) -{ - msgpack_pack_real_int16(x, d); -} - -static inline int msgpack_pack_int32(msgpack_packer* x, int32_t d) -{ - msgpack_pack_real_int32(x, d); -} - -static inline int msgpack_pack_int64(msgpack_packer* x, int64_t d) -{ - msgpack_pack_real_int64(x, d); -} - - -//#ifdef msgpack_pack_inline_func_cint - -static inline int msgpack_pack_short(msgpack_packer* x, short d) -{ -#if defined(SIZEOF_SHORT) -#if SIZEOF_SHORT == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_SHORT == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(SHRT_MAX) -#if SHRT_MAX == 0x7fff - msgpack_pack_real_int16(x, d); -#elif SHRT_MAX == 0x7fffffff - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(short) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(short) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} -#endif -} - -static inline int msgpack_pack_int(msgpack_packer* x, int d) -{ -#if defined(SIZEOF_INT) -#if SIZEOF_INT == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_INT == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(INT_MAX) -#if INT_MAX == 0x7fff - msgpack_pack_real_int16(x, d); -#elif INT_MAX == 0x7fffffff - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(int) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(int) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} -#endif -} - -static inline int msgpack_pack_long(msgpack_packer* x, long d) -{ -#if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(LONG_MAX) -#if LONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} -#endif -} - -static inline int msgpack_pack_long_long(msgpack_packer* x, long long d) -{ -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_int16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#elif defined(LLONG_MAX) -#if LLONG_MAX == 0x7fffL - msgpack_pack_real_int16(x, d); -#elif LLONG_MAX == 0x7fffffffL - msgpack_pack_real_int32(x, d); -#else - msgpack_pack_real_int64(x, d); -#endif - -#else -if(sizeof(long long) == 2) { - msgpack_pack_real_int16(x, d); -} else if(sizeof(long long) == 4) { - msgpack_pack_real_int32(x, d); -} else { - msgpack_pack_real_int64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_short(msgpack_packer* x, unsigned short d) -{ -#if defined(SIZEOF_SHORT) -#if SIZEOF_SHORT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_SHORT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(USHRT_MAX) -#if USHRT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif USHRT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned short) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned short) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_int(msgpack_packer* x, unsigned int d) -{ -#if defined(SIZEOF_INT) -#if SIZEOF_INT == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_INT == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(UINT_MAX) -#if UINT_MAX == 0xffffU - msgpack_pack_real_uint16(x, d); -#elif UINT_MAX == 0xffffffffU - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned int) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned int) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_long(msgpack_packer* x, unsigned long d) -{ -#if defined(SIZEOF_LONG) -#if SIZEOF_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULONG_MAX) -#if ULONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -static inline int msgpack_pack_unsigned_long_long(msgpack_packer* x, unsigned long long d) -{ -#if defined(SIZEOF_LONG_LONG) -#if SIZEOF_LONG_LONG == 2 - msgpack_pack_real_uint16(x, d); -#elif SIZEOF_LONG_LONG == 4 - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#elif defined(ULLONG_MAX) -#if ULLONG_MAX == 0xffffUL - msgpack_pack_real_uint16(x, d); -#elif ULLONG_MAX == 0xffffffffUL - msgpack_pack_real_uint32(x, d); -#else - msgpack_pack_real_uint64(x, d); -#endif - -#else -if(sizeof(unsigned long long) == 2) { - msgpack_pack_real_uint16(x, d); -} else if(sizeof(unsigned long long) == 4) { - msgpack_pack_real_uint32(x, d); -} else { - msgpack_pack_real_uint64(x, d); -} -#endif -} - -//#undef msgpack_pack_inline_func_cint -//#endif - - - -/* - * Float - */ - -static inline int msgpack_pack_float(msgpack_packer* x, float d) -{ - union { float f; uint32_t i; } mem; - mem.f = d; - unsigned char buf[5]; - buf[0] = 0xca; _msgpack_store32(&buf[1], mem.i); - msgpack_pack_append_buffer(x, buf, 5); -} - -static inline int msgpack_pack_double(msgpack_packer* x, double d) -{ - union { double f; uint64_t i; } mem; - mem.f = d; - unsigned char buf[9]; - buf[0] = 0xcb; -#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi - // https://github.com/msgpack/msgpack-perl/pull/1 - mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); -#endif - _msgpack_store64(&buf[1], mem.i); - msgpack_pack_append_buffer(x, buf, 9); -} - - -/* - * Nil - */ - -static inline int msgpack_pack_nil(msgpack_packer* x) -{ - static const unsigned char d = 0xc0; - msgpack_pack_append_buffer(x, &d, 1); -} - - -/* - * Boolean - */ - -static inline int msgpack_pack_true(msgpack_packer* x) -{ - static const unsigned char d = 0xc3; - msgpack_pack_append_buffer(x, &d, 1); -} - -static inline int msgpack_pack_false(msgpack_packer* x) -{ - static const unsigned char d = 0xc2; - msgpack_pack_append_buffer(x, &d, 1); -} - - -/* - * Array - */ - -static inline int msgpack_pack_array(msgpack_packer* x, unsigned int n) -{ - if(n < 16) { - unsigned char d = 0x90 | n; - msgpack_pack_append_buffer(x, &d, 1); - } else if(n < 65536) { - unsigned char buf[3]; - buf[0] = 0xdc; _msgpack_store16(&buf[1], (uint16_t)n); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdd; _msgpack_store32(&buf[1], (uint32_t)n); - msgpack_pack_append_buffer(x, buf, 5); - } -} - - -/* - * Map - */ - -static inline int msgpack_pack_map(msgpack_packer* x, unsigned int n) -{ - if(n < 16) { - unsigned char d = 0x80 | n; - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); - } else if(n < 65536) { - unsigned char buf[3]; - buf[0] = 0xde; _msgpack_store16(&buf[1], (uint16_t)n); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdf; _msgpack_store32(&buf[1], (uint32_t)n); - msgpack_pack_append_buffer(x, buf, 5); - } -} - - -/* - * Raw - */ - -static inline int msgpack_pack_raw(msgpack_packer* x, size_t l) -{ - if (l < 32) { - unsigned char d = 0xa0 | (uint8_t)l; - msgpack_pack_append_buffer(x, &TAKE8_8(d), 1); - } else if (x->use_bin_type && l < 256) { // str8 is new format introduced with bin. - unsigned char buf[2] = {0xd9, (uint8_t)l}; - msgpack_pack_append_buffer(x, buf, 2); - } else if (l < 65536) { - unsigned char buf[3]; - buf[0] = 0xda; _msgpack_store16(&buf[1], (uint16_t)l); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5]; - buf[0] = 0xdb; _msgpack_store32(&buf[1], (uint32_t)l); - msgpack_pack_append_buffer(x, buf, 5); - } -} - -/* - * bin - */ -static inline int msgpack_pack_bin(msgpack_packer *x, size_t l) -{ - if (!x->use_bin_type) { - return msgpack_pack_raw(x, l); - } - if (l < 256) { - unsigned char buf[2] = {0xc4, (unsigned char)l}; - msgpack_pack_append_buffer(x, buf, 2); - } else if (l < 65536) { - unsigned char buf[3] = {0xc5}; - _msgpack_store16(&buf[1], (uint16_t)l); - msgpack_pack_append_buffer(x, buf, 3); - } else { - unsigned char buf[5] = {0xc6}; - _msgpack_store32(&buf[1], (uint32_t)l); - msgpack_pack_append_buffer(x, buf, 5); - } -} - -static inline int msgpack_pack_raw_body(msgpack_packer* x, const void* b, size_t l) -{ - if (l > 0) msgpack_pack_append_buffer(x, (const unsigned char*)b, l); - return 0; -} - -/* - * Ext - */ -static inline int msgpack_pack_ext(msgpack_packer* x, char typecode, size_t l) -{ - if (l == 1) { - unsigned char buf[2]; - buf[0] = 0xd4; - buf[1] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 2); - } - else if(l == 2) { - unsigned char buf[2]; - buf[0] = 0xd5; - buf[1] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 2); - } - else if(l == 4) { - unsigned char buf[2]; - buf[0] = 0xd6; - buf[1] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 2); - } - else if(l == 8) { - unsigned char buf[2]; - buf[0] = 0xd7; - buf[1] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 2); - } - else if(l == 16) { - unsigned char buf[2]; - buf[0] = 0xd8; - buf[1] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 2); - } - else if(l < 256) { - unsigned char buf[3]; - buf[0] = 0xc7; - buf[1] = l; - buf[2] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 3); - } else if(l < 65536) { - unsigned char buf[4]; - buf[0] = 0xc8; - _msgpack_store16(&buf[1], (uint16_t)l); - buf[3] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 4); - } else { - unsigned char buf[6]; - buf[0] = 0xc9; - _msgpack_store32(&buf[1], (uint32_t)l); - buf[5] = (unsigned char)typecode; - msgpack_pack_append_buffer(x, buf, 6); - } - -} - - - -#undef msgpack_pack_append_buffer - -#undef TAKE8_8 -#undef TAKE8_16 -#undef TAKE8_32 -#undef TAKE8_64 - -#undef msgpack_pack_real_uint8 -#undef msgpack_pack_real_uint16 -#undef msgpack_pack_real_uint32 -#undef msgpack_pack_real_uint64 -#undef msgpack_pack_real_int8 -#undef msgpack_pack_real_int16 -#undef msgpack_pack_real_int32 -#undef msgpack_pack_real_int64 diff --git a/pandas/_libs/src/msgpack/sysdep.h b/pandas/_libs/src/msgpack/sysdep.h deleted file mode 100644 index ed9c1bc0b8031..0000000000000 --- a/pandas/_libs/src/msgpack/sysdep.h +++ /dev/null @@ -1,194 +0,0 @@ -/* - * MessagePack system dependencies - * - * Copyright (C) 2008-2010 FURUHASHI Sadayuki - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MSGPACK_SYSDEP_H__ -#define MSGPACK_SYSDEP_H__ - -#include -#include -#if defined(_MSC_VER) && _MSC_VER < 1600 -typedef __int8 int8_t; -typedef unsigned __int8 uint8_t; -typedef __int16 int16_t; -typedef unsigned __int16 uint16_t; -typedef __int32 int32_t; -typedef unsigned __int32 uint32_t; -typedef __int64 int64_t; -typedef unsigned __int64 uint64_t; -#elif defined(_MSC_VER) // && _MSC_VER >= 1600 -#include -#else -#include -#include -#endif - -#ifdef _WIN32 -#define _msgpack_atomic_counter_header -typedef long _msgpack_atomic_counter_t; -#define _msgpack_sync_decr_and_fetch(ptr) InterlockedDecrement(ptr) -#define _msgpack_sync_incr_and_fetch(ptr) InterlockedIncrement(ptr) -#elif defined(__GNUC__) && ((__GNUC__*10 + __GNUC_MINOR__) < 41) -#define _msgpack_atomic_counter_header "gcc_atomic.h" -#else -typedef unsigned int _msgpack_atomic_counter_t; -#define _msgpack_sync_decr_and_fetch(ptr) __sync_sub_and_fetch(ptr, 1) -#define _msgpack_sync_incr_and_fetch(ptr) __sync_add_and_fetch(ptr, 1) -#endif - -#ifdef _WIN32 - -#ifdef __cplusplus -/* numeric_limits::min,max */ -#ifdef max -#undef max -#endif -#ifdef min -#undef min -#endif -#endif - -#else -#include /* __BYTE_ORDER */ -#endif - -#if !defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__) -#if __BYTE_ORDER == __LITTLE_ENDIAN -#define __LITTLE_ENDIAN__ -#elif __BYTE_ORDER == __BIG_ENDIAN -#define __BIG_ENDIAN__ -#elif _WIN32 -#define __LITTLE_ENDIAN__ -#endif -#endif - - -#ifdef __LITTLE_ENDIAN__ - -#ifdef _WIN32 -# if defined(ntohs) -# define _msgpack_be16(x) ntohs(x) -# elif defined(_byteswap_ushort) || (defined(_MSC_VER) && _MSC_VER >= 1400) -# define _msgpack_be16(x) ((uint16_t)_byteswap_ushort((unsigned short)x)) -# else -# define _msgpack_be16(x) ( \ - ((((uint16_t)x) << 8) ) | \ - ((((uint16_t)x) >> 8) ) ) -# endif -#else -# define _msgpack_be16(x) ntohs(x) -#endif - -#ifdef _WIN32 -# if defined(ntohl) -# define _msgpack_be32(x) ntohl(x) -# elif defined(_byteswap_ulong) || (defined(_MSC_VER) && _MSC_VER >= 1400) -# define _msgpack_be32(x) ((uint32_t)_byteswap_ulong((unsigned long)x)) -# else -# define _msgpack_be32(x) \ - ( ((((uint32_t)x) << 24) ) | \ - ((((uint32_t)x) << 8) & 0x00ff0000U ) | \ - ((((uint32_t)x) >> 8) & 0x0000ff00U ) | \ - ((((uint32_t)x) >> 24) ) ) -# endif -#else -# define _msgpack_be32(x) ntohl(x) -#endif - -#if defined(_byteswap_uint64) || (defined(_MSC_VER) && _MSC_VER >= 1400) -# define _msgpack_be64(x) (_byteswap_uint64(x)) -#elif defined(bswap_64) -# define _msgpack_be64(x) bswap_64(x) -#elif defined(__DARWIN_OSSwapInt64) -# define _msgpack_be64(x) __DARWIN_OSSwapInt64(x) -#else -#define _msgpack_be64(x) \ - ( ((((uint64_t)x) << 56) ) | \ - ((((uint64_t)x) << 40) & 0x00ff000000000000ULL ) | \ - ((((uint64_t)x) << 24) & 0x0000ff0000000000ULL ) | \ - ((((uint64_t)x) << 8) & 0x000000ff00000000ULL ) | \ - ((((uint64_t)x) >> 8) & 0x00000000ff000000ULL ) | \ - ((((uint64_t)x) >> 24) & 0x0000000000ff0000ULL ) | \ - ((((uint64_t)x) >> 40) & 0x000000000000ff00ULL ) | \ - ((((uint64_t)x) >> 56) ) ) -#endif - -#define _msgpack_load16(cast, from) ((cast)( \ - (((uint16_t)((uint8_t*)(from))[0]) << 8) | \ - (((uint16_t)((uint8_t*)(from))[1]) ) )) - -#define _msgpack_load32(cast, from) ((cast)( \ - (((uint32_t)((uint8_t*)(from))[0]) << 24) | \ - (((uint32_t)((uint8_t*)(from))[1]) << 16) | \ - (((uint32_t)((uint8_t*)(from))[2]) << 8) | \ - (((uint32_t)((uint8_t*)(from))[3]) ) )) - -#define _msgpack_load64(cast, from) ((cast)( \ - (((uint64_t)((uint8_t*)(from))[0]) << 56) | \ - (((uint64_t)((uint8_t*)(from))[1]) << 48) | \ - (((uint64_t)((uint8_t*)(from))[2]) << 40) | \ - (((uint64_t)((uint8_t*)(from))[3]) << 32) | \ - (((uint64_t)((uint8_t*)(from))[4]) << 24) | \ - (((uint64_t)((uint8_t*)(from))[5]) << 16) | \ - (((uint64_t)((uint8_t*)(from))[6]) << 8) | \ - (((uint64_t)((uint8_t*)(from))[7]) ) )) - -#else - -#define _msgpack_be16(x) (x) -#define _msgpack_be32(x) (x) -#define _msgpack_be64(x) (x) - -#define _msgpack_load16(cast, from) ((cast)( \ - (((uint16_t)((uint8_t*)from)[0]) << 8) | \ - (((uint16_t)((uint8_t*)from)[1]) ) )) - -#define _msgpack_load32(cast, from) ((cast)( \ - (((uint32_t)((uint8_t*)from)[0]) << 24) | \ - (((uint32_t)((uint8_t*)from)[1]) << 16) | \ - (((uint32_t)((uint8_t*)from)[2]) << 8) | \ - (((uint32_t)((uint8_t*)from)[3]) ) )) - -#define _msgpack_load64(cast, from) ((cast)( \ - (((uint64_t)((uint8_t*)from)[0]) << 56) | \ - (((uint64_t)((uint8_t*)from)[1]) << 48) | \ - (((uint64_t)((uint8_t*)from)[2]) << 40) | \ - (((uint64_t)((uint8_t*)from)[3]) << 32) | \ - (((uint64_t)((uint8_t*)from)[4]) << 24) | \ - (((uint64_t)((uint8_t*)from)[5]) << 16) | \ - (((uint64_t)((uint8_t*)from)[6]) << 8) | \ - (((uint64_t)((uint8_t*)from)[7]) ) )) -#endif - - -#define _msgpack_store16(to, num) \ - do { uint16_t val = _msgpack_be16(num); memcpy(to, &val, 2); } while(0) -#define _msgpack_store32(to, num) \ - do { uint32_t val = _msgpack_be32(num); memcpy(to, &val, 4); } while(0) -#define _msgpack_store64(to, num) \ - do { uint64_t val = _msgpack_be64(num); memcpy(to, &val, 8); } while(0) - -/* -#define _msgpack_load16(cast, from) \ - ({ cast val; memcpy(&val, (char*)from, 2); _msgpack_be16(val); }) -#define _msgpack_load32(cast, from) \ - ({ cast val; memcpy(&val, (char*)from, 4); _msgpack_be32(val); }) -#define _msgpack_load64(cast, from) \ - ({ cast val; memcpy(&val, (char*)from, 8); _msgpack_be64(val); }) -*/ - - -#endif /* msgpack/sysdep.h */ diff --git a/pandas/_libs/src/msgpack/unpack.h b/pandas/_libs/src/msgpack/unpack.h deleted file mode 100644 index 591fad1ae4661..0000000000000 --- a/pandas/_libs/src/msgpack/unpack.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * MessagePack for Python unpacking routine - * - * Copyright (C) 2009 Naoki INADA - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#define MSGPACK_EMBED_STACK_SIZE (1024) -#include "unpack_define.h" - -typedef struct unpack_user { - int use_list; - PyObject *object_hook; - bool has_pairs_hook; - PyObject *list_hook; - PyObject *ext_hook; - const char *encoding; - const char *unicode_errors; - Py_ssize_t max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len; -} unpack_user; - -typedef PyObject* msgpack_unpack_object; -struct unpack_context; -typedef struct unpack_context unpack_context; -typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off); - -static inline msgpack_unpack_object unpack_callback_root(unpack_user* u) -{ - return NULL; -} - -static inline int unpack_callback_uint16(unpack_user* u, uint16_t d, msgpack_unpack_object* o) -{ - PyObject *p = PyInt_FromLong((long)d); - if (!p) - return -1; - *o = p; - return 0; -} -static inline int unpack_callback_uint8(unpack_user* u, uint8_t d, msgpack_unpack_object* o) -{ - return unpack_callback_uint16(u, d, o); -} - - -static inline int unpack_callback_uint32(unpack_user* u, uint32_t d, msgpack_unpack_object* o) -{ - PyObject *p = PyInt_FromSize_t((size_t)d); - if (!p) - return -1; - *o = p; - return 0; -} - -static inline int unpack_callback_uint64(unpack_user* u, uint64_t d, msgpack_unpack_object* o) -{ - PyObject *p; - if (d > LONG_MAX) { - p = PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG)d); - } else { - p = PyInt_FromSize_t((size_t)d); - } - if (!p) - return -1; - *o = p; - return 0; -} - -static inline int unpack_callback_int32(unpack_user* u, int32_t d, msgpack_unpack_object* o) -{ - PyObject *p = PyInt_FromLong(d); - if (!p) - return -1; - *o = p; - return 0; -} - -static inline int unpack_callback_int16(unpack_user* u, int16_t d, msgpack_unpack_object* o) -{ - return unpack_callback_int32(u, d, o); -} - -static inline int unpack_callback_int8(unpack_user* u, int8_t d, msgpack_unpack_object* o) -{ - return unpack_callback_int32(u, d, o); -} - -static inline int unpack_callback_int64(unpack_user* u, int64_t d, msgpack_unpack_object* o) -{ - PyObject *p; - if (d > LONG_MAX || d < LONG_MIN) { - p = PyLong_FromLongLong((unsigned PY_LONG_LONG)d); - } else { - p = PyInt_FromLong((long)d); - } - *o = p; - return 0; -} - -static inline int unpack_callback_double(unpack_user* u, double d, msgpack_unpack_object* o) -{ - PyObject *p = PyFloat_FromDouble(d); - if (!p) - return -1; - *o = p; - return 0; -} - -static inline int unpack_callback_float(unpack_user* u, float d, msgpack_unpack_object* o) -{ - return unpack_callback_double(u, d, o); -} - -static inline int unpack_callback_nil(unpack_user* u, msgpack_unpack_object* o) -{ Py_INCREF(Py_None); *o = Py_None; return 0; } - -static inline int unpack_callback_true(unpack_user* u, msgpack_unpack_object* o) -{ Py_INCREF(Py_True); *o = Py_True; return 0; } - -static inline int unpack_callback_false(unpack_user* u, msgpack_unpack_object* o) -{ Py_INCREF(Py_False); *o = Py_False; return 0; } - -static inline int unpack_callback_array(unpack_user* u, unsigned int n, msgpack_unpack_object* o) -{ - if (n > u->max_array_len) { - PyErr_Format(PyExc_ValueError, "%u exceeds max_array_len(%zd)", n, u->max_array_len); - return -1; - } - PyObject *p = u->use_list ? PyList_New(n) : PyTuple_New(n); - - if (!p) - return -1; - *o = p; - return 0; -} - -static inline int unpack_callback_array_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object o) -{ - if (u->use_list) - PyList_SET_ITEM(*c, current, o); - else - PyTuple_SET_ITEM(*c, current, o); - return 0; -} - -static inline int unpack_callback_array_end(unpack_user* u, msgpack_unpack_object* c) -{ - if (u->list_hook) { - PyObject *new_c = PyObject_CallFunctionObjArgs(u->list_hook, *c, NULL); - if (!new_c) - return -1; - Py_DECREF(*c); - *c = new_c; - } - return 0; -} - -static inline int unpack_callback_map(unpack_user* u, unsigned int n, msgpack_unpack_object* o) -{ - if (n > u->max_map_len) { - PyErr_Format(PyExc_ValueError, "%u exceeds max_map_len(%zd)", n, u->max_map_len); - return -1; - } - PyObject *p; - if (u->has_pairs_hook) { - p = PyList_New(n); // Or use tuple? - } - else { - p = PyDict_New(); - } - if (!p) - return -1; - *o = p; - return 0; -} - -static inline int unpack_callback_map_item(unpack_user* u, unsigned int current, msgpack_unpack_object* c, msgpack_unpack_object k, msgpack_unpack_object v) -{ - if (u->has_pairs_hook) { - msgpack_unpack_object item = PyTuple_Pack(2, k, v); - if (!item) - return -1; - Py_DECREF(k); - Py_DECREF(v); - PyList_SET_ITEM(*c, current, item); - return 0; - } - else if (PyDict_SetItem(*c, k, v) == 0) { - Py_DECREF(k); - Py_DECREF(v); - return 0; - } - return -1; -} - -static inline int unpack_callback_map_end(unpack_user* u, msgpack_unpack_object* c) -{ - if (u->object_hook) { - PyObject *new_c = PyObject_CallFunctionObjArgs(u->object_hook, *c, NULL); - if (!new_c) - return -1; - - Py_DECREF(*c); - *c = new_c; - } - return 0; -} - -static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) -{ - if (l > u->max_str_len) { - PyErr_Format(PyExc_ValueError, "%u exceeds max_str_len(%zd)", l, u->max_str_len); - return -1; - } - - PyObject *py; - if(u->encoding) { - py = PyUnicode_Decode(p, l, u->encoding, u->unicode_errors); - } else { - py = PyBytes_FromStringAndSize(p, l); - } - if (!py) - return -1; - *o = py; - return 0; -} - -static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int l, msgpack_unpack_object* o) -{ - if (l > u->max_bin_len) { - PyErr_Format(PyExc_ValueError, "%u exceeds max_bin_len(%zd)", l, u->max_bin_len); - return -1; - } - - PyObject *py = PyBytes_FromStringAndSize(p, l); - if (!py) - return -1; - *o = py; - return 0; -} - -static inline int unpack_callback_ext(unpack_user* u, const char* base, const char* pos, - unsigned int length, msgpack_unpack_object* o) -{ - PyObject *py; - int8_t typecode = (int8_t)*pos++; - if (!u->ext_hook) { - PyErr_SetString(PyExc_AssertionError, "u->ext_hook cannot be NULL"); - return -1; - } - if (length-1 > u->max_ext_len) { - PyErr_Format(PyExc_ValueError, "%u exceeds max_ext_len(%zd)", length, u->max_ext_len); - return -1; - } - // length also includes the typecode, so the actual data is length-1 -#if PY_MAJOR_VERSION == 2 - py = PyObject_CallFunction(u->ext_hook, (char*)"(is#)", typecode, pos, (Py_ssize_t)length-1); -#else - py = PyObject_CallFunction(u->ext_hook, (char*)"(iy#)", typecode, pos, (Py_ssize_t)length-1); -#endif - if (!py) - return -1; - *o = py; - return 0; -} - -#include "unpack_template.h" diff --git a/pandas/_libs/src/msgpack/unpack_define.h b/pandas/_libs/src/msgpack/unpack_define.h deleted file mode 100644 index 0dd708d17c3d4..0000000000000 --- a/pandas/_libs/src/msgpack/unpack_define.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * MessagePack unpacking routine template - * - * Copyright (C) 2008-2010 FURUHASHI Sadayuki - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#ifndef MSGPACK_UNPACK_DEFINE_H__ -#define MSGPACK_UNPACK_DEFINE_H__ - -#include "msgpack/sysdep.h" -#include -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - - -#ifndef MSGPACK_EMBED_STACK_SIZE -#define MSGPACK_EMBED_STACK_SIZE 32 -#endif - - -// CS is first byte & 0x1f -typedef enum { - CS_HEADER = 0x00, // nil - - //CS_ = 0x01, - //CS_ = 0x02, // false - //CS_ = 0x03, // true - - CS_BIN_8 = 0x04, - CS_BIN_16 = 0x05, - CS_BIN_32 = 0x06, - - CS_EXT_8 = 0x07, - CS_EXT_16 = 0x08, - CS_EXT_32 = 0x09, - - CS_FLOAT = 0x0a, - CS_DOUBLE = 0x0b, - CS_UINT_8 = 0x0c, - CS_UINT_16 = 0x0d, - CS_UINT_32 = 0x0e, - CS_UINT_64 = 0x0f, - CS_INT_8 = 0x10, - CS_INT_16 = 0x11, - CS_INT_32 = 0x12, - CS_INT_64 = 0x13, - - //CS_FIXEXT1 = 0x14, - //CS_FIXEXT2 = 0x15, - //CS_FIXEXT4 = 0x16, - //CS_FIXEXT8 = 0x17, - //CS_FIXEXT16 = 0x18, - - CS_RAW_8 = 0x19, - CS_RAW_16 = 0x1a, - CS_RAW_32 = 0x1b, - CS_ARRAY_16 = 0x1c, - CS_ARRAY_32 = 0x1d, - CS_MAP_16 = 0x1e, - CS_MAP_32 = 0x1f, - - ACS_RAW_VALUE, - ACS_BIN_VALUE, - ACS_EXT_VALUE, -} msgpack_unpack_state; - - -typedef enum { - CT_ARRAY_ITEM, - CT_MAP_KEY, - CT_MAP_VALUE, -} msgpack_container_type; - - -#ifdef __cplusplus -} -#endif - -#endif /* msgpack/unpack_define.h */ diff --git a/pandas/_libs/src/msgpack/unpack_template.h b/pandas/_libs/src/msgpack/unpack_template.h deleted file mode 100644 index 402dcd48cb35a..0000000000000 --- a/pandas/_libs/src/msgpack/unpack_template.h +++ /dev/null @@ -1,475 +0,0 @@ -/* - * MessagePack unpacking routine template - * - * Copyright (C) 2008-2010 FURUHASHI Sadayuki - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef USE_CASE_RANGE -#ifdef __GNUC__ -#define USE_CASE_RANGE -#endif -#endif - -typedef struct unpack_stack { - PyObject* obj; - size_t size; - size_t count; - unsigned int ct; - PyObject* map_key; -} unpack_stack; - -struct unpack_context { - unpack_user user; - unsigned int cs; - unsigned int trail; - unsigned int top; - /* - unpack_stack* stack; - unsigned int stack_size; - unpack_stack embed_stack[MSGPACK_EMBED_STACK_SIZE]; - */ - unpack_stack stack[MSGPACK_EMBED_STACK_SIZE]; -}; - - -static inline void unpack_init(unpack_context* ctx) -{ - ctx->cs = CS_HEADER; - ctx->trail = 0; - ctx->top = 0; - /* - ctx->stack = ctx->embed_stack; - ctx->stack_size = MSGPACK_EMBED_STACK_SIZE; - */ - ctx->stack[0].obj = unpack_callback_root(&ctx->user); -} - -/* -static inline void unpack_destroy(unpack_context* ctx) -{ - if(ctx->stack_size != MSGPACK_EMBED_STACK_SIZE) { - free(ctx->stack); - } -} -*/ - -static inline PyObject* unpack_data(unpack_context* ctx) -{ - return (ctx)->stack[0].obj; -} - - -template -static inline int unpack_execute(unpack_context* ctx, const char* data, size_t len, size_t* off) -{ - assert(len >= *off); - - const unsigned char* p = (unsigned char*)data + *off; - const unsigned char* const pe = (unsigned char*)data + len; - const void* n = NULL; - - unsigned int trail = ctx->trail; - unsigned int cs = ctx->cs; - unsigned int top = ctx->top; - unpack_stack* stack = ctx->stack; - /* - unsigned int stack_size = ctx->stack_size; - */ - unpack_user* user = &ctx->user; - - PyObject* obj = NULL; - unpack_stack* c = NULL; - - int ret; - -#define construct_cb(name) \ - construct && unpack_callback ## name - -#define push_simple_value(func) \ - if(construct_cb(func)(user, &obj) < 0) { goto _failed; } \ - goto _push -#define push_fixed_value(func, arg) \ - if(construct_cb(func)(user, arg, &obj) < 0) { goto _failed; } \ - goto _push -#define push_variable_value(func, base, pos, len) \ - if(construct_cb(func)(user, \ - (const char*)base, (const char*)pos, len, &obj) < 0) { goto _failed; } \ - goto _push - -#define again_fixed_trail(_cs, trail_len) \ - trail = trail_len; \ - cs = _cs; \ - goto _fixed_trail_again -#define again_fixed_trail_if_zero(_cs, trail_len, ifzero) \ - trail = trail_len; \ - if(trail == 0) { goto ifzero; } \ - cs = _cs; \ - goto _fixed_trail_again - -#define start_container(func, count_, ct_) \ - if(top >= MSGPACK_EMBED_STACK_SIZE) { goto _failed; } /* FIXME */ \ - if(construct_cb(func)(user, count_, &stack[top].obj) < 0) { goto _failed; } \ - if((count_) == 0) { obj = stack[top].obj; \ - if (construct_cb(func##_end)(user, &obj) < 0) { goto _failed; } \ - goto _push; } \ - stack[top].ct = ct_; \ - stack[top].size = count_; \ - stack[top].count = 0; \ - ++top; \ - /*printf("container %d count %d stack %d\n",stack[top].obj,count_,top);*/ \ - /*printf("stack push %d\n", top);*/ \ - /* FIXME \ - if(top >= stack_size) { \ - if(stack_size == MSGPACK_EMBED_STACK_SIZE) { \ - size_t csize = sizeof(unpack_stack) * MSGPACK_EMBED_STACK_SIZE; \ - size_t nsize = csize * 2; \ - unpack_stack* tmp = (unpack_stack*)malloc(nsize); \ - if(tmp == NULL) { goto _failed; } \ - memcpy(tmp, ctx->stack, csize); \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = MSGPACK_EMBED_STACK_SIZE * 2; \ - } else { \ - size_t nsize = sizeof(unpack_stack) * ctx->stack_size * 2; \ - unpack_stack* tmp = (unpack_stack*)realloc(ctx->stack, nsize); \ - if(tmp == NULL) { goto _failed; } \ - ctx->stack = stack = tmp; \ - ctx->stack_size = stack_size = stack_size * 2; \ - } \ - } \ - */ \ - goto _header_again - -#define NEXT_CS(p) ((unsigned int)*p & 0x1f) - -#ifdef USE_CASE_RANGE -#define SWITCH_RANGE_BEGIN switch(*p) { -#define SWITCH_RANGE(FROM, TO) case FROM ... TO: -#define SWITCH_RANGE_DEFAULT default: -#define SWITCH_RANGE_END } -#else -#define SWITCH_RANGE_BEGIN { if(0) { -#define SWITCH_RANGE(FROM, TO) } else if(FROM <= *p && *p <= TO) { -#define SWITCH_RANGE_DEFAULT } else { -#define SWITCH_RANGE_END } } -#endif - - if(p == pe) { goto _out; } - do { - switch(cs) { - case CS_HEADER: - SWITCH_RANGE_BEGIN - SWITCH_RANGE(0x00, 0x7f) // Positive Fixnum - push_fixed_value(_uint8, *(uint8_t*)p); - SWITCH_RANGE(0xe0, 0xff) // Negative Fixnum - push_fixed_value(_int8, *(int8_t*)p); - SWITCH_RANGE(0xc0, 0xdf) // Variable - switch(*p) { - case 0xc0: // nil - push_simple_value(_nil); - //case 0xc1: // never used - case 0xc2: // false - push_simple_value(_false); - case 0xc3: // true - push_simple_value(_true); - case 0xc4: // bin 8 - again_fixed_trail(NEXT_CS(p), 1); - case 0xc5: // bin 16 - again_fixed_trail(NEXT_CS(p), 2); - case 0xc6: // bin 32 - again_fixed_trail(NEXT_CS(p), 4); - case 0xc7: // ext 8 - again_fixed_trail(NEXT_CS(p), 1); - case 0xc8: // ext 16 - again_fixed_trail(NEXT_CS(p), 2); - case 0xc9: // ext 32 - again_fixed_trail(NEXT_CS(p), 4); - case 0xca: // float - case 0xcb: // double - case 0xcc: // unsigned int 8 - case 0xcd: // unsigned int 16 - case 0xce: // unsigned int 32 - case 0xcf: // unsigned int 64 - case 0xd0: // signed int 8 - case 0xd1: // signed int 16 - case 0xd2: // signed int 32 - case 0xd3: // signed int 64 - again_fixed_trail(NEXT_CS(p), 1 << (((unsigned int)*p) & 0x03)); - case 0xd4: // fixext 1 - case 0xd5: // fixext 2 - case 0xd6: // fixext 4 - case 0xd7: // fixext 8 - again_fixed_trail_if_zero(ACS_EXT_VALUE, - (1 << (((unsigned int)*p) & 0x03))+1, - _ext_zero); - case 0xd8: // fixext 16 - again_fixed_trail_if_zero(ACS_EXT_VALUE, 16+1, _ext_zero); - case 0xd9: // str 8 - again_fixed_trail(NEXT_CS(p), 1); - case 0xda: // raw 16 - case 0xdb: // raw 32 - case 0xdc: // array 16 - case 0xdd: // array 32 - case 0xde: // map 16 - case 0xdf: // map 32 - again_fixed_trail(NEXT_CS(p), 2 << (((unsigned int)*p) & 0x01)); - default: - goto _failed; - } - SWITCH_RANGE(0xa0, 0xbf) // FixRaw - again_fixed_trail_if_zero(ACS_RAW_VALUE, ((unsigned int)*p & 0x1f), _raw_zero); - SWITCH_RANGE(0x90, 0x9f) // FixArray - start_container(_array, ((unsigned int)*p) & 0x0f, CT_ARRAY_ITEM); - SWITCH_RANGE(0x80, 0x8f) // FixMap - start_container(_map, ((unsigned int)*p) & 0x0f, CT_MAP_KEY); - - SWITCH_RANGE_DEFAULT - goto _failed; - SWITCH_RANGE_END - // end CS_HEADER - - - _fixed_trail_again: - ++p; - - default: - if((size_t)(pe - p) < trail) { goto _out; } - n = p; p += trail - 1; - switch(cs) { - case CS_EXT_8: - again_fixed_trail_if_zero(ACS_EXT_VALUE, *(uint8_t*)n+1, _ext_zero); - case CS_EXT_16: - again_fixed_trail_if_zero(ACS_EXT_VALUE, - _msgpack_load16(uint16_t,n)+1, - _ext_zero); - case CS_EXT_32: - again_fixed_trail_if_zero(ACS_EXT_VALUE, - _msgpack_load32(uint32_t,n)+1, - _ext_zero); - case CS_FLOAT: { - union { uint32_t i; float f; } mem; - mem.i = _msgpack_load32(uint32_t,n); - push_fixed_value(_float, mem.f); } - case CS_DOUBLE: { - union { uint64_t i; double f; } mem; - mem.i = _msgpack_load64(uint64_t,n); -#if defined(__arm__) && !(__ARM_EABI__) // arm-oabi - // https://github.com/msgpack/msgpack-perl/pull/1 - mem.i = (mem.i & 0xFFFFFFFFUL) << 32UL | (mem.i >> 32UL); -#endif - push_fixed_value(_double, mem.f); } - case CS_UINT_8: - push_fixed_value(_uint8, *(uint8_t*)n); - case CS_UINT_16: - push_fixed_value(_uint16, _msgpack_load16(uint16_t,n)); - case CS_UINT_32: - push_fixed_value(_uint32, _msgpack_load32(uint32_t,n)); - case CS_UINT_64: - push_fixed_value(_uint64, _msgpack_load64(uint64_t,n)); - - case CS_INT_8: - push_fixed_value(_int8, *(int8_t*)n); - case CS_INT_16: - push_fixed_value(_int16, _msgpack_load16(int16_t,n)); - case CS_INT_32: - push_fixed_value(_int32, _msgpack_load32(int32_t,n)); - case CS_INT_64: - push_fixed_value(_int64, _msgpack_load64(int64_t,n)); - - case CS_BIN_8: - again_fixed_trail_if_zero(ACS_BIN_VALUE, *(uint8_t*)n, _bin_zero); - case CS_BIN_16: - again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load16(uint16_t,n), _bin_zero); - case CS_BIN_32: - again_fixed_trail_if_zero(ACS_BIN_VALUE, _msgpack_load32(uint32_t,n), _bin_zero); - case ACS_BIN_VALUE: - _bin_zero: - push_variable_value(_bin, data, n, trail); - - case CS_RAW_8: - again_fixed_trail_if_zero(ACS_RAW_VALUE, *(uint8_t*)n, _raw_zero); - case CS_RAW_16: - again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load16(uint16_t,n), _raw_zero); - case CS_RAW_32: - again_fixed_trail_if_zero(ACS_RAW_VALUE, _msgpack_load32(uint32_t,n), _raw_zero); - case ACS_RAW_VALUE: - _raw_zero: - push_variable_value(_raw, data, n, trail); - - case ACS_EXT_VALUE: - _ext_zero: - push_variable_value(_ext, data, n, trail); - - case CS_ARRAY_16: - start_container(_array, _msgpack_load16(uint16_t,n), CT_ARRAY_ITEM); - case CS_ARRAY_32: - /* FIXME security guard */ - start_container(_array, _msgpack_load32(uint32_t,n), CT_ARRAY_ITEM); - - case CS_MAP_16: - start_container(_map, _msgpack_load16(uint16_t,n), CT_MAP_KEY); - case CS_MAP_32: - /* FIXME security guard */ - start_container(_map, _msgpack_load32(uint32_t,n), CT_MAP_KEY); - - default: - goto _failed; - } - } - -_push: - if(top == 0) { goto _finish; } - c = &stack[top-1]; - switch(c->ct) { - case CT_ARRAY_ITEM: - if(construct_cb(_array_item)(user, c->count, &c->obj, obj) < 0) { goto _failed; } - if(++c->count == c->size) { - obj = c->obj; - if (construct_cb(_array_end)(user, &obj) < 0) { goto _failed; } - --top; - /*printf("stack pop %d\n", top);*/ - goto _push; - } - goto _header_again; - case CT_MAP_KEY: - c->map_key = obj; - c->ct = CT_MAP_VALUE; - goto _header_again; - case CT_MAP_VALUE: - if(construct_cb(_map_item)(user, c->count, &c->obj, c->map_key, obj) < 0) { goto _failed; } - if(++c->count == c->size) { - obj = c->obj; - if (construct_cb(_map_end)(user, &obj) < 0) { goto _failed; } - --top; - /*printf("stack pop %d\n", top);*/ - goto _push; - } - c->ct = CT_MAP_KEY; - goto _header_again; - - default: - goto _failed; - } - -_header_again: - cs = CS_HEADER; - ++p; - } while(p != pe); - goto _out; - - -_finish: - if (!construct) - unpack_callback_nil(user, &obj); - stack[0].obj = obj; - ++p; - ret = 1; - /*printf("-- finish --\n"); */ - goto _end; - -_failed: - /*printf("** FAILED **\n"); */ - ret = -1; - goto _end; - -_out: - ret = 0; - goto _end; - -_end: - ctx->cs = cs; - ctx->trail = trail; - ctx->top = top; - *off = p - (const unsigned char*)data; - - return ret; -#undef construct_cb -} - -#undef SWITCH_RANGE_BEGIN -#undef SWITCH_RANGE -#undef SWITCH_RANGE_DEFAULT -#undef SWITCH_RANGE_END -#undef push_simple_value -#undef push_fixed_value -#undef push_variable_value -#undef again_fixed_trail -#undef again_fixed_trail_if_zero -#undef start_container - -template -static inline int unpack_container_header(unpack_context* ctx, const char* data, size_t len, size_t* off) -{ - assert(len >= *off); - uint32_t size; - const unsigned char *const p = (unsigned char*)data + *off; - -#define inc_offset(inc) \ - if (len - *off < inc) \ - return 0; \ - *off += inc; - - switch (*p) { - case var_offset: - inc_offset(3); - size = _msgpack_load16(uint16_t, p + 1); - break; - case var_offset + 1: - inc_offset(5); - size = _msgpack_load32(uint32_t, p + 1); - break; -#ifdef USE_CASE_RANGE - case fixed_offset + 0x0 ... fixed_offset + 0xf: -#else - case fixed_offset + 0x0: - case fixed_offset + 0x1: - case fixed_offset + 0x2: - case fixed_offset + 0x3: - case fixed_offset + 0x4: - case fixed_offset + 0x5: - case fixed_offset + 0x6: - case fixed_offset + 0x7: - case fixed_offset + 0x8: - case fixed_offset + 0x9: - case fixed_offset + 0xa: - case fixed_offset + 0xb: - case fixed_offset + 0xc: - case fixed_offset + 0xd: - case fixed_offset + 0xe: - case fixed_offset + 0xf: -#endif - ++*off; - size = ((unsigned int)*p) & 0x0f; - break; - default: - PyErr_SetString(PyExc_ValueError, "Unexpected type header on stream"); - return -1; - } - unpack_callback_uint32(&ctx->user, size, &ctx->stack[0].obj); - return 1; -} - -#undef SWITCH_RANGE_BEGIN -#undef SWITCH_RANGE -#undef SWITCH_RANGE_DEFAULT -#undef SWITCH_RANGE_END - -static const execute_fn unpack_construct = &unpack_execute; -static const execute_fn unpack_skip = &unpack_execute; -static const execute_fn read_array_header = &unpack_container_header<0x90, 0xdc>; -static const execute_fn read_map_header = &unpack_container_header<0x80, 0xde>; - -#undef NEXT_CS - -/* vim: set ts=4 sw=4 sts=4 expandtab */ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f88f37fac7157..cf2244b4df4d2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -2504,48 +2504,6 @@ def to_hdf( encoding=encoding, ) - def to_msgpack(self, path_or_buf=None, encoding="utf-8", **kwargs): - """ - Serialize object to input file path using msgpack format. - - .. deprecated:: 0.25.0 - - to_msgpack is deprecated and will be removed in a future version. - It is recommended to use pyarrow for on-the-wire transmission of - pandas objects. - - Example pyarrow usage: - - >>> import pandas as pd - >>> import pyarrow as pa - >>> df = pd.DataFrame({'A': [1, 2, 3]}) - >>> context = pa.default_serialization_context() - >>> df_bytestring = context.serialize(df).to_buffer().to_pybytes() - - For documentation on pyarrow, see `here - `__. - - Parameters - ---------- - path : str, buffer-like, or None - Destination for the serialized object. - If None, return generated bytes. - append : bool, default False - Whether to append to an existing msgpack. - compress : str, default None - Type of compressor (zlib, blosc or None). - - Returns - ------- - None or bytes - If path_or_buf is None, returns the resulting msgpack format as a - byte string. Otherwise returns None. - """ - - from pandas.io import packers - - return packers.to_msgpack(path_or_buf, self, encoding=encoding, **kwargs) - def to_sql( self, name: str, diff --git a/pandas/io/api.py b/pandas/io/api.py index e20aa18324a34..2d25ffe5f8a6b 100644 --- a/pandas/io/api.py +++ b/pandas/io/api.py @@ -11,7 +11,6 @@ from pandas.io.html import read_html from pandas.io.json import read_json from pandas.io.orc import read_orc -from pandas.io.packers import read_msgpack, to_msgpack from pandas.io.parquet import read_parquet from pandas.io.parsers import read_csv, read_fwf, read_table from pandas.io.pickle import read_pickle, to_pickle diff --git a/pandas/io/msgpack/__init__.py b/pandas/io/msgpack/__init__.py deleted file mode 100644 index 11407c8282660..0000000000000 --- a/pandas/io/msgpack/__init__.py +++ /dev/null @@ -1,56 +0,0 @@ -# coding: utf-8 - -from collections import namedtuple - -from pandas.io.msgpack.exceptions import * # noqa: F401,F403 isort:skip -from pandas.io.msgpack._version import version # noqa: F401 isort:skip - - -class ExtType(namedtuple("ExtType", "code data")): - """ExtType represents ext type in msgpack.""" - - def __new__(cls, code, data): - if not isinstance(code, int): - raise TypeError("code must be int") - if not isinstance(data, bytes): - raise TypeError("data must be bytes") - if not 0 <= code <= 127: - raise ValueError("code must be 0~127") - return super().__new__(cls, code, data) - - -import os # noqa: F401,E402 isort:skip - -from pandas.io.msgpack._unpacker import ( # noqa: F401,E402 isort:skip - Unpacker, - unpack, - unpackb, -) -from pandas.io.msgpack._packer import Packer # noqa: E402 isort:skip - - -def pack(o, stream, **kwargs): - """ - Pack object `o` and write it to `stream` - - See :class:`Packer` for options. - """ - packer = Packer(**kwargs) - stream.write(packer.pack(o)) - - -def packb(o, **kwargs): - """ - Pack object `o` and return packed bytes - - See :class:`Packer` for options. - """ - return Packer(**kwargs).pack(o) - - -# alias for compatibility to json/marshal/pickle. -load = unpack -loads = unpackb - -dump = pack -dumps = packb diff --git a/pandas/io/msgpack/_packer.pyi b/pandas/io/msgpack/_packer.pyi deleted file mode 100644 index e95a1622c5615..0000000000000 --- a/pandas/io/msgpack/_packer.pyi +++ /dev/null @@ -1,22 +0,0 @@ -# flake8: noqa - -class Packer: - def __cinit__(self): ... - def __init__( - self, - default=..., - encoding=..., - unicode_errors=..., - use_single_float=..., - autoreset: int = ..., - use_bin_type: int = ..., - ): ... - def __dealloc__(self): ... - def _pack(self, o, nest_limit: int = ...) -> int: ... - def pack(self, obj): ... - def pack_ext_type(self, typecode, data): ... - def pack_array_header(self, size): ... - def pack_map_header(self, size): ... - def pack_map_pairs(self, pairs): ... - def reset(self) -> None: ... - def bytes(self): ... diff --git a/pandas/io/msgpack/_packer.pyx b/pandas/io/msgpack/_packer.pyx deleted file mode 100644 index 5c0499b489110..0000000000000 --- a/pandas/io/msgpack/_packer.pyx +++ /dev/null @@ -1,312 +0,0 @@ -# coding: utf-8 -# cython: embedsignature=True - -from cpython.bytes cimport (PyBytes_Check, PyBytes_AsString, - PyBytes_FromStringAndSize) -from cpython.dict cimport PyDict_Check, PyDict_CheckExact -from cpython.float cimport PyFloat_Check -from cpython.int cimport PyInt_Check -from cpython.list cimport PyList_Check -from cpython.long cimport PyLong_Check -from cpython.object cimport PyCallable_Check -from cpython.tuple cimport PyTuple_Check -from cpython.unicode cimport PyUnicode_Check, PyUnicode_AsEncodedString - -from libc.stdlib cimport free, malloc - -from pandas.io.msgpack.exceptions import PackValueError -from pandas.io.msgpack import ExtType -import numpy as np - - -cdef extern from "../../src/msgpack/pack.h": - struct msgpack_packer: - char* buf - size_t length - size_t buf_size - bint use_bin_type - - int msgpack_pack_int(msgpack_packer* pk, int d) - int msgpack_pack_nil(msgpack_packer* pk) - int msgpack_pack_true(msgpack_packer* pk) - int msgpack_pack_false(msgpack_packer* pk) - int msgpack_pack_long(msgpack_packer* pk, long d) - int msgpack_pack_long_long(msgpack_packer* pk, long long d) - int msgpack_pack_unsigned_long_long(msgpack_packer* pk, - unsigned long long d) - int msgpack_pack_float(msgpack_packer* pk, float d) - int msgpack_pack_double(msgpack_packer* pk, double d) - int msgpack_pack_array(msgpack_packer* pk, size_t l) - int msgpack_pack_map(msgpack_packer* pk, size_t l) - int msgpack_pack_raw(msgpack_packer* pk, size_t l) - int msgpack_pack_bin(msgpack_packer* pk, size_t l) - int msgpack_pack_raw_body(msgpack_packer* pk, char* body, size_t l) - int msgpack_pack_ext(msgpack_packer* pk, char typecode, size_t l) - -cdef int DEFAULT_RECURSE_LIMIT=511 - - -cdef class Packer: - """ - MessagePack Packer - - usage:: - - packer = Packer() - astream.write(packer.pack(a)) - astream.write(packer.pack(b)) - - Packer's constructor has some keyword arguments: - - :param callable default: - Convert user type to builtin type that Packer supports. - :param str encoding: - Convert unicode to bytes with this encoding. (default: 'utf-8') - :param str unicode_errors: - Error handler for encoding unicode. (default: 'strict') - :param bool use_single_float: - Use single precision float type for float. (default: False) - :param bool autoreset: - Reset buffer after each pack and return it's - content as `bytes`. (default: True). - If set this to false, use `bytes()` to get - content and `.reset()` to clear buffer. - :param bool use_bin_type: - Use bin type introduced in msgpack spec 2.0 for bytes. - It also enable str8 type for unicode. - """ - cdef: - msgpack_packer pk - object _default - object _bencoding - object _berrors - char *encoding - char *unicode_errors - bint use_float - bint autoreset - - def __cinit__(self): - cdef int buf_size = 1024 * 1024 - self.pk.buf = malloc(buf_size) - if self.pk.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.pk.buf_size = buf_size - self.pk.length = 0 - - def __init__(self, default=None, encoding='utf-8', - unicode_errors='strict', use_single_float=False, - bint autoreset=1, bint use_bin_type=0): - """ - """ - self.use_float = use_single_float - self.autoreset = autoreset - self.pk.use_bin_type = use_bin_type - if default is not None: - if not PyCallable_Check(default): - raise TypeError("default must be a callable.") - self._default = default - if encoding is None: - self.encoding = NULL - self.unicode_errors = NULL - else: - if isinstance(encoding, unicode): - self._bencoding = encoding.encode('ascii') - else: - self._bencoding = encoding - self.encoding = PyBytes_AsString(self._bencoding) - if isinstance(unicode_errors, unicode): - self._berrors = unicode_errors.encode('ascii') - else: - self._berrors = unicode_errors - self.unicode_errors = PyBytes_AsString(self._berrors) - - def __dealloc__(self): - free(self.pk.buf); - - cdef int _pack(self, object o, - int nest_limit=DEFAULT_RECURSE_LIMIT) except -1: - cdef: - long long llval - unsigned long long ullval - long longval - float fval - double dval - char* rawval - int ret - dict d - size_t L - int default_used = 0 - - if nest_limit < 0: - raise PackValueError("recursion limit exceeded.") - - while True: - if o is None: - ret = msgpack_pack_nil(&self.pk) - elif isinstance(o, (bool, np.bool_)): - if o: - ret = msgpack_pack_true(&self.pk) - else: - ret = msgpack_pack_false(&self.pk) - elif PyLong_Check(o): - # PyInt_Check(long) is True for Python 3. - # Sow we should test long before int. - if o > 0: - ullval = o - ret = msgpack_pack_unsigned_long_long(&self.pk, ullval) - else: - llval = o - ret = msgpack_pack_long_long(&self.pk, llval) - elif PyInt_Check(o): - longval = o - ret = msgpack_pack_long(&self.pk, longval) - elif PyFloat_Check(o): - if self.use_float: - fval = o - ret = msgpack_pack_float(&self.pk, fval) - else: - dval = o - ret = msgpack_pack_double(&self.pk, dval) - elif PyBytes_Check(o): - L = len(o) - if L > (2**32) - 1: - raise ValueError("bytes is too large") - rawval = o - ret = msgpack_pack_bin(&self.pk, L) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyUnicode_Check(o): - if not self.encoding: - raise TypeError("Can't encode unicode string: " - "no encoding is specified") - o = PyUnicode_AsEncodedString(o, self.encoding, - self.unicode_errors) - L = len(o) - if L > (2**32) - 1: - raise ValueError("dict is too large") - rawval = o - ret = msgpack_pack_raw(&self.pk, len(o)) - if ret == 0: - ret = msgpack_pack_raw_body(&self.pk, rawval, len(o)) - elif PyDict_CheckExact(o): - d = o - L = len(d) - if L > (2**32) - 1: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in d.items(): - ret = self._pack(k, nest_limit - 1) - if ret != 0: break - ret = self._pack(v, nest_limit - 1) - if ret != 0: break - elif PyDict_Check(o): - L = len(o) - if L > (2**32) - 1: - raise ValueError("dict is too large") - ret = msgpack_pack_map(&self.pk, L) - if ret == 0: - for k, v in o.items(): - ret = self._pack(k, nest_limit - 1) - if ret != 0: break - ret = self._pack(v, nest_limit - 1) - if ret != 0: break - elif isinstance(o, ExtType): - # This should be before Tuple because ExtType is namedtuple. - longval = o.code - rawval = o.data - L = len(o.data) - if L > (2**32) - 1: - raise ValueError("EXT data is too large") - ret = msgpack_pack_ext(&self.pk, longval, L) - ret = msgpack_pack_raw_body(&self.pk, rawval, L) - elif PyTuple_Check(o) or PyList_Check(o): - L = len(o) - if L > (2**32) - 1: - raise ValueError("list is too large") - ret = msgpack_pack_array(&self.pk, L) - if ret == 0: - for v in o: - ret = self._pack(v, nest_limit - 1) - if ret != 0: break - elif not default_used and self._default: - o = self._default(o) - default_used = 1 - continue - else: - raise TypeError(f"can't serialize {repr(o)}") - break - return ret - - cpdef pack(self, object obj): - cdef int ret - ret = self._pack(obj, DEFAULT_RECURSE_LIMIT) - if ret == -1: - raise MemoryError - elif ret: # should not happen. - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_ext_type(self, typecode, data): - msgpack_pack_ext(&self.pk, typecode, len(data)) - msgpack_pack_raw_body(&self.pk, data, len(data)) - - def pack_array_header(self, size_t size): - if size > (2**32) - 1: - raise ValueError - cdef int ret = msgpack_pack_array(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_map_header(self, size_t size): - if size > (2**32) - 1: - raise ValueError - cdef int ret = msgpack_pack_map(&self.pk, size) - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def pack_map_pairs(self, object pairs): - """ - Pack *pairs* as msgpack map type. - - *pairs* should sequence of pair. - (`len(pairs)` and `for k, v in pairs:` should be supported.) - """ - cdef int ret = msgpack_pack_map(&self.pk, len(pairs)) - if ret == 0: - for k, v in pairs: - ret = self._pack(k) - if ret != 0: break - ret = self._pack(v) - if ret != 0: break - if ret == -1: - raise MemoryError - elif ret: # should not happen - raise TypeError - if self.autoreset: - buf = PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) - self.pk.length = 0 - return buf - - def reset(self): - """Clear internal buffer.""" - self.pk.length = 0 - - def bytes(self): - """Return buffer content.""" - return PyBytes_FromStringAndSize(self.pk.buf, self.pk.length) diff --git a/pandas/io/msgpack/_unpacker.pyi b/pandas/io/msgpack/_unpacker.pyi deleted file mode 100644 index 9910895947fb6..0000000000000 --- a/pandas/io/msgpack/_unpacker.pyi +++ /dev/null @@ -1,59 +0,0 @@ -# flake8: noqa - -def unpackb( - packed, - object_hook=..., - list_hook=..., - use_list=..., - encoding=..., - unicode_errors=..., - object_pairs_hook=..., - ext_hook=..., - max_str_len=..., - max_bin_len=..., - max_array_len=..., - max_map_len=..., - max_ext_len=..., -): ... -def unpack( - stream, - object_hook=..., - list_hook=..., - use_list=..., - encoding=..., - unicode_errors=..., - object_pairs_hook=..., -): ... - -class Unpacker: - def __cinit__(self): ... - def __dealloc__(self): ... - def __init__( - self, - file_like=..., - read_size=..., - use_list=..., - object_hook=..., - object_pairs_hook=..., - list_hook=..., - encoding=..., - unicode_errors=..., - max_buffer_size: int = ..., - ext_hook=..., - max_str_len=..., - max_bin_len=..., - max_array_len=..., - max_map_len=..., - max_ext_len=..., - ): ... - def feed(self, next_bytes): ... - def append_buffer(self, _buf, _buf_len): ... - def read_from_file(self): ... - def _unpack(self, execute, write_bytes, iter=...): ... - def read_bytes(self, nbytes): ... - def unpack(self, write_bytes=...): ... - def skip(self, write_bytes=...): ... - def read_array_header(self, write_bytes=...): ... - def read_map_header(self, write_bytes=...): ... - def __iter__(self): ... - def __next__(self): ... diff --git a/pandas/io/msgpack/_unpacker.pyx b/pandas/io/msgpack/_unpacker.pyx deleted file mode 100644 index f1817f29bd42a..0000000000000 --- a/pandas/io/msgpack/_unpacker.pyx +++ /dev/null @@ -1,494 +0,0 @@ -# coding: utf-8 -# cython: embedsignature=True - -from cython cimport Py_ssize_t - -from cpython.buffer cimport (PyBUF_SIMPLE, PyObject_GetBuffer, - PyBuffer_Release, Py_buffer) -from cpython.bytes cimport (PyBytes_Size, PyBytes_AsString, - PyBytes_FromStringAndSize) -from cpython.object cimport PyCallable_Check - -cdef extern from "Python.h": - ctypedef struct PyObject - -from libc.stdlib cimport free, malloc -from libc.string cimport memcpy, memmove -from libc.limits cimport INT_MAX - -from pandas.io.msgpack.exceptions import (BufferFull, OutOfData, - UnpackValueError, ExtraData) -from pandas.io.msgpack import ExtType - - -cdef extern from "../../src/msgpack/unpack.h": - ctypedef struct msgpack_user: - bint use_list - PyObject* object_hook - bint has_pairs_hook # call object_hook with k-v pairs - PyObject* list_hook - PyObject* ext_hook - char *encoding - char *unicode_errors - Py_ssize_t max_str_len - Py_ssize_t max_bin_len - Py_ssize_t max_array_len - Py_ssize_t max_map_len - Py_ssize_t max_ext_len - - ctypedef struct unpack_context: - msgpack_user user - PyObject* obj - size_t count - - ctypedef int (*execute_fn)(unpack_context* ctx, const char* data, - size_t len, size_t* off) except? -1 - execute_fn unpack_construct - execute_fn unpack_skip - execute_fn read_array_header - execute_fn read_map_header - void unpack_init(unpack_context* ctx) - object unpack_data(unpack_context* ctx) - -cdef inline init_ctx(unpack_context *ctx, - object object_hook, object object_pairs_hook, - object list_hook, object ext_hook, - bint use_list, char* encoding, char* unicode_errors, - Py_ssize_t max_str_len, Py_ssize_t max_bin_len, - Py_ssize_t max_array_len, Py_ssize_t max_map_len, - Py_ssize_t max_ext_len): - unpack_init(ctx) - ctx.user.use_list = use_list - ctx.user.object_hook = ctx.user.list_hook = NULL - ctx.user.max_str_len = max_str_len - ctx.user.max_bin_len = max_bin_len - ctx.user.max_array_len = max_array_len - ctx.user.max_map_len = max_map_len - ctx.user.max_ext_len = max_ext_len - - if object_hook is not None and object_pairs_hook is not None: - raise TypeError("object_pairs_hook and object_hook " - "are mutually exclusive.") - - if object_hook is not None: - if not PyCallable_Check(object_hook): - raise TypeError("object_hook must be a callable.") - ctx.user.object_hook = object_hook - - if object_pairs_hook is None: - ctx.user.has_pairs_hook = False - else: - if not PyCallable_Check(object_pairs_hook): - raise TypeError("object_pairs_hook must be a callable.") - ctx.user.object_hook = object_pairs_hook - ctx.user.has_pairs_hook = True - - if list_hook is not None: - if not PyCallable_Check(list_hook): - raise TypeError("list_hook must be a callable.") - ctx.user.list_hook = list_hook - - if ext_hook is not None: - if not PyCallable_Check(ext_hook): - raise TypeError("ext_hook must be a callable.") - ctx.user.ext_hook = ext_hook - - ctx.user.encoding = encoding - ctx.user.unicode_errors = unicode_errors - - -def default_read_extended_type(typecode, data): - raise NotImplementedError("Cannot decode extended type " - f"with typecode={typecode}") - - -def unpackb(object packed, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): - """ - Unpack packed_bytes to object. Returns an unpacked object. - - Raises `ValueError` when `packed` contains extra bytes. - - See :class:`Unpacker` for options. - """ - cdef: - unpack_context ctx - size_t off = 0 - int ret - - char* buf - Py_ssize_t buf_len - char* cenc = NULL - char* cerr = NULL - Py_buffer view - bytes extra_bytes - - # GH#26769 Effectively re-implement deprecated PyObject_AsReadBuffer; - # based on https://xpra.org/trac/ticket/1884 - PyObject_GetBuffer(packed, &view, PyBUF_SIMPLE) - buf = view.buf - buf_len = view.len - - if encoding is not None: - if isinstance(encoding, unicode): - encoding = encoding.encode('ascii') - cenc = PyBytes_AsString(encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - unicode_errors = unicode_errors.encode('ascii') - cerr = PyBytes_AsString(unicode_errors) - - init_ctx(&ctx, object_hook, object_pairs_hook, list_hook, ext_hook, - use_list, cenc, cerr, - max_str_len, max_bin_len, max_array_len, max_map_len, max_ext_len) - ret = unpack_construct(&ctx, buf, buf_len, &off) - if ret == 1: - obj = unpack_data(&ctx) - if off < buf_len: - extra_bytes = PyBytes_FromStringAndSize(buf + off, buf_len - off) - PyBuffer_Release(&view) - raise ExtraData(obj, extra_bytes) - PyBuffer_Release(&view) - return obj - else: - PyBuffer_Release(&view) - raise UnpackValueError(f"Unpack failed: error = {ret}") - - -def unpack(object stream, object object_hook=None, object list_hook=None, - bint use_list=1, encoding=None, unicode_errors="strict", - object_pairs_hook=None, - ): - """ - Unpack an object from `stream`. - - Raises `ValueError` when `stream` has extra bytes. - - See :class:`Unpacker` for options. - """ - return unpackb(stream.read(), use_list=use_list, - object_hook=object_hook, - object_pairs_hook=object_pairs_hook, list_hook=list_hook, - encoding=encoding, unicode_errors=unicode_errors) - - -cdef class Unpacker: - """Streaming unpacker. - - arguments: - - :param file_like: - File-like object having `.read(n)` method. - If specified, unpacker reads serialized data from it and - :meth:`feed()` is not usable. - - :param int read_size: - Used as `file_like.read(read_size)`. (default: - `min(1024**2, max_buffer_size)`) - - :param bool use_list: - If true, unpack msgpack array to Python list. - Otherwise, unpack to Python tuple. (default: True) - - :param callable object_hook: - When specified, it should be callable. - Unpacker calls it with a dict argument after unpacking msgpack map. - - :param callable object_pairs_hook: - When specified, it should be callable. Unpacker calls it with a list - of key-value pairs after unpacking msgpack map. - - :param str encoding: - Encoding used for decoding msgpack raw. - If it is None (default), msgpack raw is deserialized to Python bytes. - - :param str unicode_errors: - Used for decoding msgpack raw with *encoding*. - (default: `'strict'`) - - :param int max_buffer_size: - Limits size of data waiting unpacked. 0 means system's - INT_MAX (default). Raises `BufferFull` exception when it - is insufficient. You should set this parameter when unpacking - data from untrasted source. - - :param int max_str_len: - Limits max length of str. (default: 2**31-1) - - :param int max_bin_len: - Limits max length of bin. (default: 2**31-1) - - :param int max_array_len: - Limits max length of array. (default: 2**31-1) - - :param int max_map_len: - Limits max length of map. (default: 2**31-1) - - - example of streaming deserialize from file-like object:: - - unpacker = Unpacker(file_like) - for o in unpacker: - process(o) - - example of streaming deserialize from socket:: - - unpacker = Unpacker() - while True: - buf = sock.recv(1024**2) - if not buf: - break - unpacker.feed(buf) - for o in unpacker: - process(o) - """ - cdef: - unpack_context ctx - char* buf - size_t buf_size, buf_head, buf_tail - object file_like - object file_like_read - Py_ssize_t read_size - # To maintain refcnt. - object object_hook, object_pairs_hook, list_hook, ext_hook - object encoding, unicode_errors - size_t max_buffer_size - - def __cinit__(self): - self.buf = NULL - - def __dealloc__(self): - free(self.buf) - self.buf = NULL - - def __init__(self, file_like=None, Py_ssize_t read_size=0, bint use_list=1, - object object_hook=None, object object_pairs_hook=None, - object list_hook=None, encoding=None, unicode_errors='strict', - int max_buffer_size=0, object ext_hook=ExtType, - Py_ssize_t max_str_len=2147483647, # 2**32-1 - Py_ssize_t max_bin_len=2147483647, - Py_ssize_t max_array_len=2147483647, - Py_ssize_t max_map_len=2147483647, - Py_ssize_t max_ext_len=2147483647): - cdef: - char *cenc=NULL, - char *cerr=NULL - - self.object_hook = object_hook - self.object_pairs_hook = object_pairs_hook - self.list_hook = list_hook - self.ext_hook = ext_hook - - self.file_like = file_like - if file_like: - self.file_like_read = file_like.read - if not PyCallable_Check(self.file_like_read): - raise TypeError("`file_like.read` must be a callable.") - if not max_buffer_size: - max_buffer_size = INT_MAX - if read_size > max_buffer_size: - raise ValueError("read_size should be less or " - "equal to max_buffer_size") - if not read_size: - read_size = min(max_buffer_size, 1024**2) - self.max_buffer_size = max_buffer_size - self.read_size = read_size - self.buf = malloc(read_size) - if self.buf == NULL: - raise MemoryError("Unable to allocate internal buffer.") - self.buf_size = read_size - self.buf_head = 0 - self.buf_tail = 0 - - if encoding is not None: - if isinstance(encoding, unicode): - self.encoding = encoding.encode('ascii') - elif isinstance(encoding, bytes): - self.encoding = encoding - else: - raise TypeError("encoding should be bytes or unicode") - cenc = PyBytes_AsString(self.encoding) - - if unicode_errors is not None: - if isinstance(unicode_errors, unicode): - self.unicode_errors = unicode_errors.encode('ascii') - elif isinstance(unicode_errors, bytes): - self.unicode_errors = unicode_errors - else: - raise TypeError("unicode_errors should be bytes or unicode") - cerr = PyBytes_AsString(self.unicode_errors) - - init_ctx(&self.ctx, object_hook, object_pairs_hook, list_hook, - ext_hook, use_list, cenc, cerr, - max_str_len, max_bin_len, max_array_len, - max_map_len, max_ext_len) - - def feed(self, object next_bytes): - """Append `next_bytes` to internal buffer.""" - cdef Py_buffer pybuff - if self.file_like is not None: - raise AssertionError("unpacker.feed() is not be able " - "to use with `file_like`.") - PyObject_GetBuffer(next_bytes, &pybuff, PyBUF_SIMPLE) - try: - self.append_buffer(pybuff.buf, pybuff.len) - finally: - PyBuffer_Release(&pybuff) - - cdef append_buffer(self, void* _buf, Py_ssize_t _buf_len): - cdef: - char* buf = self.buf - char* new_buf - size_t head = self.buf_head - size_t tail = self.buf_tail - size_t buf_size = self.buf_size - size_t new_size - - if tail + _buf_len > buf_size: - if ((tail - head) + _buf_len) <= buf_size: - # move to front. - memmove(buf, buf + head, tail - head) - tail -= head - head = 0 - else: - # expand buffer. - new_size = (tail - head) + _buf_len - if new_size > self.max_buffer_size: - raise BufferFull - new_size = min(new_size * 2, self.max_buffer_size) - new_buf = malloc(new_size) - if new_buf == NULL: - # self.buf still holds old buffer and will be freed during - # obj destruction - raise MemoryError("Unable to enlarge internal buffer.") - memcpy(new_buf, buf + head, tail - head) - free(buf) - - buf = new_buf - buf_size = new_size - tail -= head - head = 0 - - memcpy(buf + tail, (_buf), _buf_len) - self.buf = buf - self.buf_head = head - self.buf_size = buf_size - self.buf_tail = tail + _buf_len - - cdef read_from_file(self): - # Assume self.max_buffer_size - (self.buf_tail - self.buf_head) >= 0 - next_bytes = self.file_like_read( - min(self.read_size, - (self.max_buffer_size - - (self.buf_tail - self.buf_head)))) - if next_bytes: - self.append_buffer(PyBytes_AsString(next_bytes), - PyBytes_Size(next_bytes)) - else: - self.file_like = None - - cdef object _unpack(self, execute_fn execute, - object write_bytes, bint iter=0): - cdef: - int ret - object obj - size_t prev_head - - if self.buf_head >= self.buf_tail and self.file_like is not None: - self.read_from_file() - - while 1: - prev_head = self.buf_head - if prev_head >= self.buf_tail: - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") - - ret = execute(&self.ctx, self.buf, self.buf_tail, &self.buf_head) - if write_bytes is not None: - write_bytes(PyBytes_FromStringAndSize( - self.buf + prev_head, self.buf_head - prev_head)) - - if ret == 1: - obj = unpack_data(&self.ctx) - unpack_init(&self.ctx) - return obj - elif ret == 0: - if self.file_like is not None: - self.read_from_file() - continue - if iter: - raise StopIteration("No more data to unpack.") - else: - raise OutOfData("No more data to unpack.") - else: - raise ValueError(f"Unpack failed: error = {ret}") - - def read_bytes(self, Py_ssize_t nbytes): - """Read a specified number of raw bytes from the stream""" - cdef size_t nread - - # Assume that self.buf_tail - self.buf_head >= 0 - nread = min((self.buf_tail - self.buf_head), nbytes) - ret = PyBytes_FromStringAndSize(self.buf + self.buf_head, nread) - self.buf_head += nread - if len(ret) < nbytes and self.file_like is not None: - ret += self.file_like.read(nbytes - len(ret)) - return ret - - def unpack(self, object write_bytes=None): - """Unpack one object - - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(unpack_construct, write_bytes) - - def skip(self, object write_bytes=None): - """Read and ignore one object, returning None - - If write_bytes is not None, it will be called with parts of the raw - message as it is unpacked. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(unpack_skip, write_bytes) - - def read_array_header(self, object write_bytes=None): - """assuming the next object is an array, return its size n, such that - the next n unpack() calls will iterate over its contents. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(read_array_header, write_bytes) - - def read_map_header(self, object write_bytes=None): - """assuming the next object is a map, return its size n, such that the - next n * 2 unpack() calls will iterate over its key-value pairs. - - Raises `OutOfData` when there are no more bytes to unpack. - """ - return self._unpack(read_map_header, write_bytes) - - def __iter__(self): - return self - - def __next__(self): - return self._unpack(unpack_construct, None, 1) - - # for debug. - # def _buf(self): - # return PyString_FromStringAndSize(self.buf, self.buf_tail) - - # def _off(self): - # return self.buf_head diff --git a/pandas/io/msgpack/_version.py b/pandas/io/msgpack/_version.py deleted file mode 100644 index 2c1c96c0759a1..0000000000000 --- a/pandas/io/msgpack/_version.py +++ /dev/null @@ -1 +0,0 @@ -version = (0, 4, 6) diff --git a/pandas/io/msgpack/exceptions.py b/pandas/io/msgpack/exceptions.py deleted file mode 100644 index 2966f69920930..0000000000000 --- a/pandas/io/msgpack/exceptions.py +++ /dev/null @@ -1,31 +0,0 @@ -class UnpackException(Exception): - pass - - -class BufferFull(UnpackException): - pass - - -class OutOfData(UnpackException): - pass - - -class UnpackValueError(UnpackException, ValueError): - pass - - -class ExtraData(ValueError): - def __init__(self, unpacked, extra): - self.unpacked = unpacked - self.extra = extra - - def __str__(self) -> str: - return "unpack(b) received extra data." - - -class PackException(Exception): - pass - - -class PackValueError(PackException, ValueError): - pass diff --git a/pandas/io/packers.py b/pandas/io/packers.py deleted file mode 100644 index bb7b00571b0df..0000000000000 --- a/pandas/io/packers.py +++ /dev/null @@ -1,865 +0,0 @@ -""" -Msgpack serializer support for reading and writing pandas data structures -to disk - -portions of msgpack_numpy package, by Lev Givon were incorporated -into this module (and tests_packers.py) - -License -======= - -Copyright (c) 2013, Lev Givon. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. -* Redistributions in binary form must reproduce the above - copyright notice, this list of conditions and the following - disclaimer in the documentation and/or other materials provided - with the distribution. -* Neither the name of Lev Givon nor the names of any - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -from datetime import date, datetime, timedelta -from io import BytesIO -import os -import warnings - -from dateutil.parser import parse -import numpy as np - -from pandas.compat._optional import import_optional_dependency -from pandas.errors import PerformanceWarning -from pandas.util._move import ( - BadMove as _BadMove, - move_into_mutable_buffer as _move_into_mutable_buffer, -) - -from pandas.core.dtypes.common import ( - is_categorical_dtype, - is_datetime64tz_dtype, - is_object_dtype, - needs_i8_conversion, - pandas_dtype, -) - -from pandas import ( # noqa:F401 - Categorical, - CategoricalIndex, - DataFrame, - DatetimeIndex, - Float64Index, - Index, - Int64Index, - Interval, - IntervalIndex, - MultiIndex, - NaT, - Period, - PeriodIndex, - RangeIndex, - Series, - TimedeltaIndex, - Timestamp, -) -from pandas.core import internals -from pandas.core.arrays import DatetimeArray, IntervalArray, PeriodArray -from pandas.core.arrays.sparse import BlockIndex, IntIndex -from pandas.core.generic import NDFrame -from pandas.core.internals import BlockManager, _safe_reshape, make_block - -from pandas.io.common import _stringify_path, get_filepath_or_buffer -from pandas.io.msgpack import ExtType, Packer as _Packer, Unpacker as _Unpacker - -# until we can pass this into our conversion functions, -# this is pretty hacky -compressor = None - - -def to_msgpack(path_or_buf, *args, **kwargs): - """ - msgpack (serialize) object to input file path - - .. deprecated:: 0.25.0 - - to_msgpack is deprecated and will be removed in a future version. - It is recommended to use pyarrow for on-the-wire transmission of - pandas objects. - - Example pyarrow usage: - - >>> import pandas as pd - >>> import pyarrow as pa - >>> df = pd.DataFrame({'A': [1, 2, 3]}) - >>> context = pa.default_serialization_context() - >>> df_bytestring = context.serialize(df).to_buffer().to_pybytes() - - For documentation on pyarrow, see `here - `__. - - Parameters - ---------- - path_or_buf : string File path, buffer-like, or None - if None, return generated bytes - args : an object or objects to serialize - encoding : encoding for unicode objects - append : boolean whether to append to an existing msgpack - (default is False) - compress : type of compressor (zlib or blosc), default to None (no - compression) - """ - warnings.warn( - "to_msgpack is deprecated and will be removed in a " - "future version.\n" - "It is recommended to use pyarrow for on-the-wire " - "transmission of pandas objects.\n" - "For a full example, check\n" - "https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.to_msgpack.html", # noqa: E501 - FutureWarning, - stacklevel=3, - ) - - global compressor - compressor = kwargs.pop("compress", None) - append = kwargs.pop("append", None) - if append: - mode = "a+b" - else: - mode = "wb" - - def writer(fh): - for a in args: - fh.write(pack(a, **kwargs)) - - path_or_buf = _stringify_path(path_or_buf) - if isinstance(path_or_buf, str): - try: - with open(path_or_buf, mode) as fh: - writer(fh) - except FileNotFoundError: - msg = "File b'{}' does not exist".format(path_or_buf) - raise FileNotFoundError(msg) - elif path_or_buf is None: - buf = BytesIO() - writer(buf) - return buf.getvalue() - else: - writer(path_or_buf) - - -def read_msgpack(path_or_buf, encoding="utf-8", iterator=False, **kwargs): - """ - Load msgpack pandas object from the specified - file path. - - .. deprecated:: 0.25.0 - - read_msgpack is deprecated and will be removed in a future version. - It is recommended to use pyarrow for on-the-wire transmission of - pandas objects. - - Parameters - ---------- - path_or_buf : str, path object or file-like object - Any valid string path is acceptable. The string could be a URL. Valid - URL schemes include http, ftp, s3, and file. For file URLs, a host is - expected. - - If you want to pass in a path object, pandas accepts any - ``os.PathLike``. - - By file-like object, we refer to objects with a ``read()`` method, - such as a file handler (e.g. via builtin ``open`` function) or - ``StringIO``. - encoding : Encoding for decoding msgpack str type - iterator : boolean, if True, return an iterator to the unpacker - (default is False). - - Returns - ------- - obj : same type as object stored in file - - Notes - ----- - read_msgpack is only guaranteed to be backwards compatible to pandas - 0.20.3. - """ - warnings.warn( - "The read_msgpack is deprecated and will be removed in a " - "future version.\n" - "It is recommended to use pyarrow for on-the-wire " - "transmission of pandas objects.", - FutureWarning, - stacklevel=3, - ) - - path_or_buf, _, _, should_close = get_filepath_or_buffer(path_or_buf) - if iterator: - return Iterator(path_or_buf) - - def read(fh): - unpacked_obj = list(unpack(fh, encoding=encoding, **kwargs)) - if len(unpacked_obj) == 1: - return unpacked_obj[0] - - if should_close: - try: - path_or_buf.close() - except IOError: - pass - return unpacked_obj - - # see if we have an actual file - if isinstance(path_or_buf, str): - try: - with open(path_or_buf, "rb") as fh: - return read(fh) - except FileNotFoundError: - msg = "File b'{}' does not exist".format(path_or_buf) - raise FileNotFoundError(msg) - - if isinstance(path_or_buf, bytes): - # treat as a binary-like - fh = None - try: - fh = BytesIO(path_or_buf) - return read(fh) - finally: - if fh is not None: - fh.close() - elif hasattr(path_or_buf, "read") and callable(path_or_buf.read): - # treat as a buffer like - return read(path_or_buf) - - raise ValueError("path_or_buf needs to be a string file path or file-like") - - -dtype_dict = { - 21: np.dtype("M8[ns]"), - "datetime64[ns]": np.dtype("M8[ns]"), - "datetime64[us]": np.dtype("M8[us]"), - 22: np.dtype("m8[ns]"), - "timedelta64[ns]": np.dtype("m8[ns]"), - "timedelta64[us]": np.dtype("m8[us]"), - # this is platform int, which we need to remap to np.int64 - # for compat on windows platforms - 7: np.dtype("int64"), - "category": "category", -} - - -def dtype_for(t): - """ return my dtype mapping, whether number or name """ - if t in dtype_dict: - return dtype_dict[t] - return np.typeDict.get(t, t) - - -c2f_dict = {"complex": np.float64, "complex128": np.float64, "complex64": np.float32} - -# windows (32 bit) compat -if hasattr(np, "float128"): - c2f_dict["complex256"] = np.float128 - - -def c2f(r, i, ctype_name): - """ - Convert strings to complex number instance with specified numpy type. - """ - - ftype = c2f_dict[ctype_name] - return np.typeDict[ctype_name](ftype(r) + 1j * ftype(i)) - - -def convert(values): - """ convert the numpy values to a list """ - - dtype = values.dtype - - if is_categorical_dtype(values): - return values - - elif is_object_dtype(dtype): - return values.ravel().tolist() - - if needs_i8_conversion(dtype): - values = values.view("i8") - v = values.ravel() - - if compressor == "zlib": - zlib = import_optional_dependency( - "zlib", extra="zlib is required when `compress='zlib'`." - ) - - # return string arrays like they are - if dtype == np.object_: - return v.tolist() - - # convert to a bytes array - v = v.tostring() - return ExtType(0, zlib.compress(v)) - - elif compressor == "blosc": - blosc = import_optional_dependency( - "blosc", extra="zlib is required when `compress='blosc'`." - ) - - # return string arrays like they are - if dtype == np.object_: - return v.tolist() - - # convert to a bytes array - v = v.tostring() - return ExtType(0, blosc.compress(v, typesize=dtype.itemsize)) - - # ndarray (on original dtype) - return ExtType(0, v.tostring()) - - -def unconvert(values, dtype, compress=None): - - as_is_ext = isinstance(values, ExtType) and values.code == 0 - - if as_is_ext: - values = values.data - - if is_categorical_dtype(dtype): - return values - - elif is_object_dtype(dtype): - return np.array(values, dtype=object) - - dtype = pandas_dtype(dtype).base - - if not as_is_ext: - values = values.encode("latin1") - - if compress: - if compress == "zlib": - zlib = import_optional_dependency( - "zlib", extra="zlib is required when `compress='zlib'`." - ) - decompress = zlib.decompress - elif compress == "blosc": - blosc = import_optional_dependency( - "blosc", extra="zlib is required when `compress='blosc'`." - ) - decompress = blosc.decompress - else: - raise ValueError("compress must be one of 'zlib' or 'blosc'") - - try: - return np.frombuffer( - _move_into_mutable_buffer(decompress(values)), dtype=dtype - ) - except _BadMove as e: - # Pull the decompressed data off of the `_BadMove` exception. - # We don't just store this in the locals because we want to - # minimize the risk of giving users access to a `bytes` object - # whose data is also given to a mutable buffer. - values = e.args[0] - if len(values) > 1: - # The empty string and single characters are memoized in many - # string creating functions in the capi. This case should not - # warn even though we need to make a copy because we are only - # copying at most 1 byte. - warnings.warn( - "copying data after decompressing; this may mean that" - " decompress is caching its result", - PerformanceWarning, - ) - # fall through to copying `np.fromstring` - - # Copy the bytes into a numpy array. - buf = np.frombuffer(values, dtype=dtype) - buf = buf.copy() # required to not mutate the original data - buf.flags.writeable = True - return buf - - -def encode(obj): - """ - Data encoder - """ - tobj = type(obj) - if isinstance(obj, Index): - if isinstance(obj, RangeIndex): - return { - "typ": "range_index", - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "start": obj._range.start, - "stop": obj._range.stop, - "step": obj._range.step, - } - elif isinstance(obj, PeriodIndex): - return { - "typ": "period_index", - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "freq": getattr(obj, "freqstr", None), - "dtype": obj.dtype.name, - "data": convert(obj.asi8), - "compress": compressor, - } - elif isinstance(obj, DatetimeIndex): - tz = getattr(obj, "tz", None) - - # store tz info and data as UTC - if tz is not None: - tz = tz.zone - obj = obj.tz_convert("UTC") - return { - "typ": "datetime_index", - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "dtype": obj.dtype.name, - "data": convert(obj.asi8), - "freq": getattr(obj, "freqstr", None), - "tz": tz, - "compress": compressor, - } - elif isinstance(obj, (IntervalIndex, IntervalArray)): - if isinstance(obj, IntervalIndex): - typ = "interval_index" - else: - typ = "interval_array" - return { - "typ": typ, - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "left": getattr(obj, "left", None), - "right": getattr(obj, "right", None), - "closed": getattr(obj, "closed", None), - } - elif isinstance(obj, MultiIndex): - return { - "typ": "multi_index", - "klass": type(obj).__name__, - "names": getattr(obj, "names", None), - "dtype": obj.dtype.name, - "data": convert(obj.values), - "compress": compressor, - } - else: - return { - "typ": "index", - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "dtype": obj.dtype.name, - "data": convert(obj.values), - "compress": compressor, - } - - elif isinstance(obj, Categorical): - return { - "typ": "category", - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "codes": obj.codes, - "categories": obj.categories, - "ordered": obj.ordered, - "compress": compressor, - } - - elif isinstance(obj, Series): - return { - "typ": "series", - "klass": type(obj).__name__, - "name": getattr(obj, "name", None), - "index": obj.index, - "dtype": obj.dtype.name, - "data": convert(obj.values), - "compress": compressor, - } - elif issubclass(tobj, NDFrame): - data = obj._data - if not data.is_consolidated(): - data = data.consolidate() - - # the block manager - return { - "typ": "block_manager", - "klass": type(obj).__name__, - "axes": data.axes, - "blocks": [ - { - "locs": b.mgr_locs.as_array, - "values": convert(b.values), - "shape": b.values.shape, - "dtype": b.dtype.name, - "klass": type(b).__name__, - "compress": compressor, - } - for b in data.blocks - ], - } - - elif ( - isinstance(obj, (datetime, date, np.datetime64, timedelta, np.timedelta64)) - or obj is NaT - ): - if isinstance(obj, Timestamp): - tz = obj.tzinfo - if tz is not None: - tz = tz.zone - freq = obj.freq - if freq is not None: - freq = freq.freqstr - return {"typ": "timestamp", "value": obj.value, "freq": freq, "tz": tz} - if obj is NaT: - return {"typ": "nat"} - elif isinstance(obj, np.timedelta64): - return {"typ": "timedelta64", "data": obj.view("i8")} - elif isinstance(obj, timedelta): - return { - "typ": "timedelta", - "data": (obj.days, obj.seconds, obj.microseconds), - } - elif isinstance(obj, np.datetime64): - return {"typ": "datetime64", "data": str(obj)} - elif isinstance(obj, datetime): - return {"typ": "datetime", "data": obj.isoformat()} - elif isinstance(obj, date): - return {"typ": "date", "data": obj.isoformat()} - raise Exception("cannot encode this datetimelike object: {obj}".format(obj=obj)) - elif isinstance(obj, Period): - return {"typ": "period", "ordinal": obj.ordinal, "freq": obj.freqstr} - elif isinstance(obj, Interval): - return { - "typ": "interval", - "left": obj.left, - "right": obj.right, - "closed": obj.closed, - } - elif isinstance(obj, BlockIndex): - return { - "typ": "block_index", - "klass": type(obj).__name__, - "blocs": obj.blocs, - "blengths": obj.blengths, - "length": obj.length, - } - elif isinstance(obj, IntIndex): - return { - "typ": "int_index", - "klass": type(obj).__name__, - "indices": obj.indices, - "length": obj.length, - } - elif isinstance(obj, np.ndarray): - return { - "typ": "ndarray", - "shape": obj.shape, - "ndim": obj.ndim, - "dtype": obj.dtype.name, - "data": convert(obj), - "compress": compressor, - } - elif isinstance(obj, np.number): - if np.iscomplexobj(obj): - return { - "typ": "np_scalar", - "sub_typ": "np_complex", - "dtype": obj.dtype.name, - "real": np.real(obj).__repr__(), - "imag": np.imag(obj).__repr__(), - } - else: - return {"typ": "np_scalar", "dtype": obj.dtype.name, "data": obj.__repr__()} - elif isinstance(obj, complex): - return { - "typ": "np_complex", - "real": np.real(obj).__repr__(), - "imag": np.imag(obj).__repr__(), - } - - return obj - - -def decode(obj): - """ - Decoder for deserializing numpy data types. - """ - - typ = obj.get("typ") - if typ is None: - return obj - elif typ == "timestamp": - freq = obj["freq"] if "freq" in obj else obj["offset"] - return Timestamp(obj["value"], tz=obj["tz"], freq=freq) - elif typ == "nat": - return NaT - elif typ == "period": - return Period(ordinal=obj["ordinal"], freq=obj["freq"]) - elif typ == "index": - dtype = dtype_for(obj["dtype"]) - data = unconvert(obj["data"], dtype, obj.get("compress")) - return Index(data, dtype=dtype, name=obj["name"]) - elif typ == "range_index": - return RangeIndex(obj["start"], obj["stop"], obj["step"], name=obj["name"]) - elif typ == "multi_index": - dtype = dtype_for(obj["dtype"]) - data = unconvert(obj["data"], dtype, obj.get("compress")) - data = [tuple(x) for x in data] - return MultiIndex.from_tuples(data, names=obj["names"]) - elif typ == "period_index": - data = unconvert(obj["data"], np.int64, obj.get("compress")) - d = dict(name=obj["name"], freq=obj["freq"]) - freq = d.pop("freq", None) - return PeriodIndex(PeriodArray(data, freq), **d) - - elif typ == "datetime_index": - data = unconvert(obj["data"], np.int64, obj.get("compress")) - d = dict(name=obj["name"], freq=obj["freq"]) - result = DatetimeIndex(data, **d) - tz = obj["tz"] - - # reverse tz conversion - if tz is not None: - result = result.tz_localize("UTC").tz_convert(tz) - return result - - elif typ in ("interval_index", "interval_array"): - return globals()[obj["klass"]].from_arrays( - obj["left"], obj["right"], obj["closed"], name=obj["name"] - ) - elif typ == "category": - from_codes = globals()[obj["klass"]].from_codes - return from_codes( - codes=obj["codes"], categories=obj["categories"], ordered=obj["ordered"] - ) - - elif typ == "interval": - return Interval(obj["left"], obj["right"], obj["closed"]) - elif typ == "series": - dtype = dtype_for(obj["dtype"]) - index = obj["index"] - data = unconvert(obj["data"], dtype, obj["compress"]) - return Series(data, index=index, dtype=dtype, name=obj["name"]) - - elif typ == "block_manager": - axes = obj["axes"] - - def create_block(b): - values = _safe_reshape( - unconvert(b["values"], dtype_for(b["dtype"]), b["compress"]), b["shape"] - ) - - # locs handles duplicate column names, and should be used instead - # of items; see GH 9618 - if "locs" in b: - placement = b["locs"] - else: - placement = axes[0].get_indexer(b["items"]) - - if is_datetime64tz_dtype(b["dtype"]): - assert isinstance(values, np.ndarray), type(values) - assert values.dtype == "M8[ns]", values.dtype - values = DatetimeArray(values, dtype=b["dtype"]) - - return make_block( - values=values, - klass=getattr(internals, b["klass"]), - placement=placement, - dtype=b["dtype"], - ) - - blocks = [create_block(b) for b in obj["blocks"]] - return globals()[obj["klass"]](BlockManager(blocks, axes)) - elif typ == "datetime": - return parse(obj["data"]) - elif typ == "datetime64": - return np.datetime64(parse(obj["data"])) - elif typ == "date": - return parse(obj["data"]).date() - elif typ == "timedelta": - return timedelta(*obj["data"]) - elif typ == "timedelta64": - return np.timedelta64(int(obj["data"])) - elif typ == "block_index": - return globals()[obj["klass"]](obj["length"], obj["blocs"], obj["blengths"]) - elif typ == "int_index": - return globals()[obj["klass"]](obj["length"], obj["indices"]) - elif typ == "ndarray": - return unconvert( - obj["data"], np.typeDict[obj["dtype"]], obj.get("compress") - ).reshape(obj["shape"]) - elif typ == "np_scalar": - if obj.get("sub_typ") == "np_complex": - return c2f(obj["real"], obj["imag"], obj["dtype"]) - else: - dtype = dtype_for(obj["dtype"]) - try: - return dtype(obj["data"]) - except (ValueError, TypeError): - return dtype.type(obj["data"]) - elif typ == "np_complex": - return complex(obj["real"] + "+" + obj["imag"] + "j") - elif isinstance(obj, (dict, list, set)): - return obj - else: - return obj - - -def pack( - o, - default=encode, - encoding="utf-8", - unicode_errors="strict", - use_single_float=False, - autoreset=1, - use_bin_type=1, -): - """ - Pack an object and return the packed bytes. - """ - - return Packer( - default=default, - encoding=encoding, - unicode_errors=unicode_errors, - use_single_float=use_single_float, - autoreset=autoreset, - use_bin_type=use_bin_type, - ).pack(o) - - -def unpack( - packed, - object_hook=decode, - list_hook=None, - use_list=False, - encoding="utf-8", - unicode_errors="strict", - object_pairs_hook=None, - max_buffer_size=0, - ext_hook=ExtType, -): - """ - Unpack a packed object, return an iterator - Note: packed lists will be returned as tuples - """ - - return Unpacker( - packed, - object_hook=object_hook, - list_hook=list_hook, - use_list=use_list, - encoding=encoding, - unicode_errors=unicode_errors, - object_pairs_hook=object_pairs_hook, - max_buffer_size=max_buffer_size, - ext_hook=ext_hook, - ) - - -class Packer(_Packer): - def __init__( - self, - default=encode, - encoding="utf-8", - unicode_errors="strict", - use_single_float=False, - autoreset=1, - use_bin_type=1, - ): - super().__init__( - default=default, - encoding=encoding, - unicode_errors=unicode_errors, - use_single_float=use_single_float, - autoreset=autoreset, - use_bin_type=use_bin_type, - ) - - -class Unpacker(_Unpacker): - def __init__( - self, - file_like=None, - read_size=0, - use_list=False, - object_hook=decode, - object_pairs_hook=None, - list_hook=None, - encoding="utf-8", - unicode_errors="strict", - max_buffer_size=0, - ext_hook=ExtType, - ): - super().__init__( - file_like=file_like, - read_size=read_size, - use_list=use_list, - object_hook=object_hook, - object_pairs_hook=object_pairs_hook, - list_hook=list_hook, - encoding=encoding, - unicode_errors=unicode_errors, - max_buffer_size=max_buffer_size, - ext_hook=ext_hook, - ) - - -class Iterator: - """ manage the unpacking iteration, - close the file on completion """ - - def __init__(self, path, **kwargs): - self.path = path - self.kwargs = kwargs - - def __iter__(self): - - needs_closing = True - try: - - # see if we have an actual file - if isinstance(self.path, str): - - try: - path_exists = os.path.exists(self.path) - except TypeError: - path_exists = False - - if path_exists: - fh = open(self.path, "rb") - else: - fh = BytesIO(self.path) - - else: - - if not hasattr(self.path, "read"): - fh = BytesIO(self.path) - - else: - - # a file-like - needs_closing = False - fh = self.path - - unpacker = unpack(fh) - for o in unpacker: - yield o - finally: - if needs_closing: - fh.close() diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 870d7fd6e44c1..b832440aca99c 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -157,7 +157,6 @@ class TestPDApi(Base): "read_hdf", "read_html", "read_json", - "read_msgpack", "read_pickle", "read_sas", "read_sql", @@ -172,7 +171,7 @@ class TestPDApi(Base): ] # top-level to_* funcs - funcs_to = ["to_datetime", "to_msgpack", "to_numeric", "to_pickle", "to_timedelta"] + funcs_to = ["to_datetime", "to_numeric", "to_pickle", "to_timedelta"] # top-level to deprecate in the future deprecated_funcs_in_future: List[str] = [] diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index e63644a44a81f..6ef0e0457e2e2 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -1,7 +1,7 @@ #!/usr/bin/env python """ -self-contained to write legacy storage (pickle/msgpack) files +self-contained to write legacy storage pickle files To use this script. Create an environment where you want generate pickles, say its for 0.20.3, with your pandas clone @@ -58,7 +58,6 @@ date_range, period_range, timedelta_range, - to_msgpack, ) from pandas.tseries.offsets import ( @@ -136,7 +135,7 @@ def _create_sp_frame(): def create_data(): - """ create the pickle/msgpack data """ + """ create the pickle data """ data = { "A": [0.0, 1.0, 2.0, 3.0, np.nan], @@ -306,28 +305,6 @@ def create_pickle_data(): return data -def _u(x): - return {k: _u(x[k]) for k in x} if isinstance(x, dict) else x - - -def create_msgpack_data(): - data = create_data() - # Not supported - del data["sp_series"] - del data["sp_frame"] - del data["series"]["cat"] - del data["series"]["period"] - del data["frame"]["cat_onecol"] - del data["frame"]["cat_and_float"] - del data["scalars"]["period"] - if _loose_version >= LooseVersion("0.21") and ( - _loose_version < LooseVersion("0.23.0") - ): - del data["index"]["interval"] - del data["offsets"] - return _u(data) - - def platform_name(): return "_".join( [ @@ -360,23 +337,6 @@ def write_legacy_pickles(output_dir): print("created pickle file: {pth}".format(pth=pth)) -def write_legacy_msgpack(output_dir, compress): - - version = pandas.__version__ - - print( - "This script generates a storage file for the current arch, " - "system, and python version" - ) - print(" pandas version: {0}".format(version)) - print(" output dir : {0}".format(output_dir)) - print(" storage format: msgpack") - pth = "{0}.msgpack".format(platform_name()) - to_msgpack(os.path.join(output_dir, pth), create_msgpack_data(), compress=compress) - - print("created msgpack file: {pth}".format(pth=pth)) - - def write_legacy_file(): # force our cwd to be the first searched sys.path.insert(0, ".") @@ -385,22 +345,15 @@ def write_legacy_file(): exit( "Specify output directory and storage type: generate_legacy_" "storage_files.py " - "" ) output_dir = str(sys.argv[1]) storage_type = str(sys.argv[2]) - try: - compress_type = str(sys.argv[3]) - except IndexError: - compress_type = None if storage_type == "pickle": write_legacy_pickles(output_dir=output_dir) - elif storage_type == "msgpack": - write_legacy_msgpack(output_dir=output_dir, compress=compress_type) else: - exit("storage_type must be one of {'pickle', 'msgpack'}") + exit("storage_type must be one of {'pickle'}") if __name__ == "__main__": diff --git a/pandas/tests/io/msgpack/__init__.py b/pandas/tests/io/msgpack/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/pandas/tests/io/msgpack/common.py b/pandas/tests/io/msgpack/common.py deleted file mode 100644 index 60c1c0db18de8..0000000000000 --- a/pandas/tests/io/msgpack/common.py +++ /dev/null @@ -1,2 +0,0 @@ -frombytes = lambda obj, data: obj.frombytes(data) -tobytes = lambda obj: obj.tobytes() diff --git a/pandas/tests/io/msgpack/data/frame.mp b/pandas/tests/io/msgpack/data/frame.mp deleted file mode 100644 index 21e20d262b26c1a4835bdb4c00109a371e7e46f1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 309 zcmYk2O%8%E5Jo9yGVV3T#H|+~Bc1pIl%`V+>`BSsB9Di(QO7A<_Z!tkRhFlHfXnkW7Y@kqOFZ^iNWKsK=wu;N|_+U!*!fyhnO_?A!CdXU{q@e~9VdSucd1w*T|Td;pDWe-{7% diff --git a/pandas/tests/io/msgpack/test_buffer.py b/pandas/tests/io/msgpack/test_buffer.py deleted file mode 100644 index fe1f4e73eba24..0000000000000 --- a/pandas/tests/io/msgpack/test_buffer.py +++ /dev/null @@ -1,22 +0,0 @@ -# coding: utf-8 - -from pandas.io.msgpack import packb, unpackb - -from .common import frombytes - - -def test_unpack_buffer(): - from array import array - - buf = array("b") - frombytes(buf, packb((b"foo", b"bar"))) - obj = unpackb(buf, use_list=1) - assert [b"foo", b"bar"] == obj - - -def test_unpack_bytearray(): - buf = bytearray(packb(("foo", "bar"))) - obj = unpackb(buf, use_list=1) - assert [b"foo", b"bar"] == obj - expected_type = bytes - assert all(type(s) == expected_type for s in obj) diff --git a/pandas/tests/io/msgpack/test_case.py b/pandas/tests/io/msgpack/test_case.py deleted file mode 100644 index 7e13e0dd1e8ca..0000000000000 --- a/pandas/tests/io/msgpack/test_case.py +++ /dev/null @@ -1,149 +0,0 @@ -# coding: utf-8 - -from pandas.io.msgpack import packb, unpackb - - -def check(length, obj): - v = packb(obj) - assert ( - len(v) == length - ), f"{repr(obj)} length should be {length} but got {repr(len(v))}" - assert unpackb(v, use_list=0) == obj - - -def test_1(): - for o in [ - None, - True, - False, - 0, - 1, - (1 << 6), - (1 << 7) - 1, - -1, - -((1 << 5) - 1), - -(1 << 5), - ]: - check(1, o) - - -def test_2(): - for o in [1 << 7, (1 << 8) - 1, -((1 << 5) + 1), -(1 << 7)]: - check(2, o) - - -def test_3(): - for o in [1 << 8, (1 << 16) - 1, -((1 << 7) + 1), -(1 << 15)]: - check(3, o) - - -def test_5(): - for o in [1 << 16, (1 << 32) - 1, -((1 << 15) + 1), -(1 << 31)]: - check(5, o) - - -def test_9(): - for o in [ - 1 << 32, - (1 << 64) - 1, - -((1 << 31) + 1), - -(1 << 63), - 1.0, - 0.1, - -0.1, - -1.0, - ]: - check(9, o) - - -def check_raw(overhead, num): - check(num + overhead, b" " * num) - - -def test_fixraw(): - check_raw(1, 0) - check_raw(1, (1 << 5) - 1) - - -def test_raw16(): - check_raw(3, 1 << 5) - check_raw(3, (1 << 16) - 1) - - -def test_raw32(): - check_raw(5, 1 << 16) - - -def check_array(overhead, num): - check(num + overhead, (None,) * num) - - -def test_fixarray(): - check_array(1, 0) - check_array(1, (1 << 4) - 1) - - -def test_array16(): - check_array(3, 1 << 4) - check_array(3, (1 << 16) - 1) - - -def test_array32(): - check_array(5, (1 << 16)) - - -def match(obj, buf): - assert packb(obj) == buf - assert unpackb(buf, use_list=0) == obj - - -def test_match(): - cases = [ - (None, b"\xc0"), - (False, b"\xc2"), - (True, b"\xc3"), - (0, b"\x00"), - (127, b"\x7f"), - (128, b"\xcc\x80"), - (256, b"\xcd\x01\x00"), - (-1, b"\xff"), - (-33, b"\xd0\xdf"), - (-129, b"\xd1\xff\x7f"), - ({1: 1}, b"\x81\x01\x01"), - (1.0, b"\xcb\x3f\xf0\x00\x00\x00\x00\x00\x00"), - ((), b"\x90"), - ( - tuple(range(15)), - (b"\x9f\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09" b"\x0a\x0b\x0c\x0d\x0e"), - ), - ( - tuple(range(16)), - ( - b"\xdc\x00\x10\x00\x01\x02\x03\x04\x05\x06\x07" - b"\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" - ), - ), - ({}, b"\x80"), - ( - {x: x for x in range(15)}, - ( - b"\x8f\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06\x06\x07" - b"\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e" - ), - ), - ( - {x: x for x in range(16)}, - ( - b"\xde\x00\x10\x00\x00\x01\x01\x02\x02\x03\x03\x04\x04\x05\x05\x06" - b"\x06\x07\x07\x08\x08\t\t\n\n\x0b\x0b\x0c\x0c\r\r\x0e\x0e" - b"\x0f\x0f" - ), - ), - ] - - for v, p in cases: - match(v, p) - - -def test_unicode(): - assert unpackb(packb("foobar"), use_list=1) == b"foobar" diff --git a/pandas/tests/io/msgpack/test_except.py b/pandas/tests/io/msgpack/test_except.py deleted file mode 100644 index 60c1dcca162a9..0000000000000 --- a/pandas/tests/io/msgpack/test_except.py +++ /dev/null @@ -1,38 +0,0 @@ -# coding: utf-8 - -from datetime import datetime - -import pytest - -from pandas.io.msgpack import packb, unpackb - - -class DummyException(Exception): - pass - - -class TestExceptions: - def test_raise_on_find_unsupported_value(self): - msg = "can't serialize datetime" - with pytest.raises(TypeError, match=msg): - packb(datetime.now()) - - def test_raise_from_object_hook(self): - def hook(_): - raise DummyException() - - with pytest.raises(DummyException): - unpackb(packb({}), object_hook=hook) - with pytest.raises(DummyException): - unpackb(packb({"fizz": "buzz"}), object_hook=hook) - with pytest.raises(DummyException): - unpackb(packb({"fizz": "buzz"}), object_pairs_hook=hook) - with pytest.raises(DummyException): - unpackb(packb({"fizz": {"buzz": "spam"}}), object_hook=hook) - with pytest.raises(DummyException): - unpackb(packb({"fizz": {"buzz": "spam"}}), object_pairs_hook=hook) - - def test_invalid_value(self): - msg = "Unpack failed: error" - with pytest.raises(ValueError, match=msg): - unpackb(b"\xd9\x97#DL_") diff --git a/pandas/tests/io/msgpack/test_extension.py b/pandas/tests/io/msgpack/test_extension.py deleted file mode 100644 index 5390c8aecada7..0000000000000 --- a/pandas/tests/io/msgpack/test_extension.py +++ /dev/null @@ -1,63 +0,0 @@ -import array - -import pandas.io.msgpack as msgpack -from pandas.io.msgpack import ExtType - -from .common import frombytes, tobytes - - -def test_pack_ext_type(): - def p(s): - packer = msgpack.Packer() - packer.pack_ext_type(0x42, s) - return packer.bytes() - - assert p(b"A") == b"\xd4\x42A" # fixext 1 - assert p(b"AB") == b"\xd5\x42AB" # fixext 2 - assert p(b"ABCD") == b"\xd6\x42ABCD" # fixext 4 - assert p(b"ABCDEFGH") == b"\xd7\x42ABCDEFGH" # fixext 8 - assert p(b"A" * 16) == b"\xd8\x42" + b"A" * 16 # fixext 16 - assert p(b"ABC") == b"\xc7\x03\x42ABC" # ext 8 - assert p(b"A" * 0x0123) == b"\xc8\x01\x23\x42" + b"A" * 0x0123 # ext 16 - assert ( - p(b"A" * 0x00012345) == b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345 - ) # ext 32 - - -def test_unpack_ext_type(): - def check(b, expected): - assert msgpack.unpackb(b) == expected - - check(b"\xd4\x42A", ExtType(0x42, b"A")) # fixext 1 - check(b"\xd5\x42AB", ExtType(0x42, b"AB")) # fixext 2 - check(b"\xd6\x42ABCD", ExtType(0x42, b"ABCD")) # fixext 4 - check(b"\xd7\x42ABCDEFGH", ExtType(0x42, b"ABCDEFGH")) # fixext 8 - check(b"\xd8\x42" + b"A" * 16, ExtType(0x42, b"A" * 16)) # fixext 16 - check(b"\xc7\x03\x42ABC", ExtType(0x42, b"ABC")) # ext 8 - check(b"\xc8\x01\x23\x42" + b"A" * 0x0123, ExtType(0x42, b"A" * 0x0123)) # ext 16 - check( - b"\xc9\x00\x01\x23\x45\x42" + b"A" * 0x00012345, - ExtType(0x42, b"A" * 0x00012345), - ) # ext 32 - - -def test_extension_type(): - def default(obj): - print("default called", obj) - if isinstance(obj, array.array): - typecode = 123 # application specific typecode - data = tobytes(obj) - return ExtType(typecode, data) - raise TypeError(f"Unknown type object {repr(obj)}") - - def ext_hook(code, data): - print("ext_hook called", code, data) - assert code == 123 - obj = array.array("d") - frombytes(obj, data) - return obj - - obj = [42, b"hello", array.array("d", [1.1, 2.2, 3.3])] - s = msgpack.packb(obj, default=default) - obj2 = msgpack.unpackb(s, ext_hook=ext_hook) - assert obj == obj2 diff --git a/pandas/tests/io/msgpack/test_format.py b/pandas/tests/io/msgpack/test_format.py deleted file mode 100644 index 46d0116bc3926..0000000000000 --- a/pandas/tests/io/msgpack/test_format.py +++ /dev/null @@ -1,84 +0,0 @@ -# coding: utf-8 - -from pandas.io.msgpack import unpackb - - -def check(src, should, use_list=0): - assert unpackb(src, use_list=use_list) == should - - -def testSimpleValue(): - check(b"\x93\xc0\xc2\xc3", (None, False, True)) - - -def testFixnum(): - check(b"\x92\x93\x00\x40\x7f\x93\xe0\xf0\xff", ((0, 64, 127), (-32, -16, -1))) - - -def testFixArray(): - check(b"\x92\x90\x91\x91\xc0", ((), ((None,),))) - - -def testFixRaw(): - check(b"\x94\xa0\xa1a\xa2bc\xa3def", (b"", b"a", b"bc", b"def")) - - -def testFixMap(): - check( - b"\x82\xc2\x81\xc0\xc0\xc3\x81\xc0\x80", {False: {None: None}, True: {None: {}}} - ) - - -def testUnsignedInt(): - check( - b"\x99\xcc\x00\xcc\x80\xcc\xff\xcd\x00\x00\xcd\x80\x00" - b"\xcd\xff\xff\xce\x00\x00\x00\x00\xce\x80\x00\x00\x00" - b"\xce\xff\xff\xff\xff", - (0, 128, 255, 0, 32768, 65535, 0, 2147483648, 4294967295), - ) - - -def testSignedInt(): - check( - b"\x99\xd0\x00\xd0\x80\xd0\xff\xd1\x00\x00\xd1\x80\x00" - b"\xd1\xff\xff\xd2\x00\x00\x00\x00\xd2\x80\x00\x00\x00" - b"\xd2\xff\xff\xff\xff", - (0, -128, -1, 0, -32768, -1, 0, -2147483648, -1), - ) - - -def testRaw(): - check( - b"\x96\xda\x00\x00\xda\x00\x01a\xda\x00\x02ab\xdb\x00\x00" - b"\x00\x00\xdb\x00\x00\x00\x01a\xdb\x00\x00\x00\x02ab", - (b"", b"a", b"ab", b"", b"a", b"ab"), - ) - - -def testArray(): - check( - b"\x96\xdc\x00\x00\xdc\x00\x01\xc0\xdc\x00\x02\xc2\xc3\xdd\x00" - b"\x00\x00\x00\xdd\x00\x00\x00\x01\xc0\xdd\x00\x00\x00\x02" - b"\xc2\xc3", - ((), (None,), (False, True), (), (None,), (False, True)), - ) - - -def testMap(): - check( - b"\x96" - b"\xde\x00\x00" - b"\xde\x00\x01\xc0\xc2" - b"\xde\x00\x02\xc0\xc2\xc3\xc2" - b"\xdf\x00\x00\x00\x00" - b"\xdf\x00\x00\x00\x01\xc0\xc2" - b"\xdf\x00\x00\x00\x02\xc0\xc2\xc3\xc2", - ( - {}, - {None: False}, - {True: False, None: False}, - {}, - {None: False}, - {True: False, None: False}, - ), - ) diff --git a/pandas/tests/io/msgpack/test_limits.py b/pandas/tests/io/msgpack/test_limits.py deleted file mode 100644 index 4c0697f8faf64..0000000000000 --- a/pandas/tests/io/msgpack/test_limits.py +++ /dev/null @@ -1,107 +0,0 @@ -# coding: utf-8 -import pytest - -from pandas.io.msgpack import ExtType, Packer, Unpacker, packb, unpackb - - -class TestLimits: - def test_integer(self): - x = -(2 ** 63) - assert unpackb(packb(x)) == x - msg = ( - r"((long |Python )?(int )?too (big|large) to convert" - r"( to C (unsigned )?long))?" - ) - with pytest.raises((OverflowError, ValueError), match=msg): - packb(x - 1) - x = 2 ** 64 - 1 - assert unpackb(packb(x)) == x - with pytest.raises((OverflowError, ValueError), match=msg): - packb(x + 1) - - def test_array_header(self): - packer = Packer() - packer.pack_array_header(2 ** 32 - 1) - with pytest.raises((OverflowError, ValueError)): - packer.pack_array_header(2 ** 32) - - def test_map_header(self): - packer = Packer() - packer.pack_map_header(2 ** 32 - 1) - with pytest.raises((OverflowError, ValueError)): - packer.pack_array_header(2 ** 32) - - def test_max_str_len(self): - d = "x" * 3 - packed = packb(d) - - unpacker = Unpacker(max_str_len=3, encoding="utf-8") - unpacker.feed(packed) - assert unpacker.unpack() == d - - unpacker = Unpacker(max_str_len=2, encoding="utf-8") - unpacker.feed(packed) - - msg = "3 exceeds max_str_len" - with pytest.raises(ValueError, match=msg): - unpacker.unpack() - - def test_max_bin_len(self): - d = b"x" * 3 - packed = packb(d, use_bin_type=True) - - unpacker = Unpacker(max_bin_len=3) - unpacker.feed(packed) - assert unpacker.unpack() == d - - unpacker = Unpacker(max_bin_len=2) - unpacker.feed(packed) - - msg = "3 exceeds max_bin_len" - with pytest.raises(ValueError, match=msg): - unpacker.unpack() - - def test_max_array_len(self): - d = [1, 2, 3] - packed = packb(d) - - unpacker = Unpacker(max_array_len=3) - unpacker.feed(packed) - assert unpacker.unpack() == d - - unpacker = Unpacker(max_array_len=2) - unpacker.feed(packed) - - msg = "3 exceeds max_array_len" - with pytest.raises(ValueError, match=msg): - unpacker.unpack() - - def test_max_map_len(self): - d = {1: 2, 3: 4, 5: 6} - packed = packb(d) - - unpacker = Unpacker(max_map_len=3) - unpacker.feed(packed) - assert unpacker.unpack() == d - - unpacker = Unpacker(max_map_len=2) - unpacker.feed(packed) - - msg = "3 exceeds max_map_len" - with pytest.raises(ValueError, match=msg): - unpacker.unpack() - - def test_max_ext_len(self): - d = ExtType(42, b"abc") - packed = packb(d) - - unpacker = Unpacker(max_ext_len=3) - unpacker.feed(packed) - assert unpacker.unpack() == d - - unpacker = Unpacker(max_ext_len=2) - unpacker.feed(packed) - - msg = "4 exceeds max_ext_len" - with pytest.raises(ValueError, match=msg): - unpacker.unpack() diff --git a/pandas/tests/io/msgpack/test_newspec.py b/pandas/tests/io/msgpack/test_newspec.py deleted file mode 100644 index a1cf966b9d253..0000000000000 --- a/pandas/tests/io/msgpack/test_newspec.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding: utf-8 - -from pandas.io.msgpack import ExtType, packb, unpackb - - -def test_str8(): - header = b"\xd9" - data = b"x" * 32 - b = packb(data.decode(), use_bin_type=True) - assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\x20" - assert b[2:] == data - assert unpackb(b) == data - - data = b"x" * 255 - b = packb(data.decode(), use_bin_type=True) - assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\xff" - assert b[2:] == data - assert unpackb(b) == data - - -def test_bin8(): - header = b"\xc4" - data = b"" - b = packb(data, use_bin_type=True) - assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\x00" - assert b[2:] == data - assert unpackb(b) == data - - data = b"x" * 255 - b = packb(data, use_bin_type=True) - assert len(b) == len(data) + 2 - assert b[0:2] == header + b"\xff" - assert b[2:] == data - assert unpackb(b) == data - - -def test_bin16(): - header = b"\xc5" - data = b"x" * 256 - b = packb(data, use_bin_type=True) - assert len(b) == len(data) + 3 - assert b[0:1] == header - assert b[1:3] == b"\x01\x00" - assert b[3:] == data - assert unpackb(b) == data - - data = b"x" * 65535 - b = packb(data, use_bin_type=True) - assert len(b) == len(data) + 3 - assert b[0:1] == header - assert b[1:3] == b"\xff\xff" - assert b[3:] == data - assert unpackb(b) == data - - -def test_bin32(): - header = b"\xc6" - data = b"x" * 65536 - b = packb(data, use_bin_type=True) - assert len(b) == len(data) + 5 - assert b[0:1] == header - assert b[1:5] == b"\x00\x01\x00\x00" - assert b[5:] == data - assert unpackb(b) == data - - -def test_ext(): - def check(ext, packed): - assert packb(ext) == packed - assert unpackb(packed) == ext - - check(ExtType(0x42, b"Z"), b"\xd4\x42Z") # fixext 1 - check(ExtType(0x42, b"ZZ"), b"\xd5\x42ZZ") # fixext 2 - check(ExtType(0x42, b"Z" * 4), b"\xd6\x42" + b"Z" * 4) # fixext 4 - check(ExtType(0x42, b"Z" * 8), b"\xd7\x42" + b"Z" * 8) # fixext 8 - check(ExtType(0x42, b"Z" * 16), b"\xd8\x42" + b"Z" * 16) # fixext 16 - # ext 8 - check(ExtType(0x42, b""), b"\xc7\x00\x42") - check(ExtType(0x42, b"Z" * 255), b"\xc7\xff\x42" + b"Z" * 255) - # ext 16 - check(ExtType(0x42, b"Z" * 256), b"\xc8\x01\x00\x42" + b"Z" * 256) - check(ExtType(0x42, b"Z" * 0xFFFF), b"\xc8\xff\xff\x42" + b"Z" * 0xFFFF) - # ext 32 - check(ExtType(0x42, b"Z" * 0x10000), b"\xc9\x00\x01\x00\x00\x42" + b"Z" * 0x10000) - # needs large memory - # check(ExtType(0x42, b'Z'*0xffffffff), - # b'\xc9\xff\xff\xff\xff\x42' + b'Z'*0xffffffff) diff --git a/pandas/tests/io/msgpack/test_obj.py b/pandas/tests/io/msgpack/test_obj.py deleted file mode 100644 index 03d8807c0922c..0000000000000 --- a/pandas/tests/io/msgpack/test_obj.py +++ /dev/null @@ -1,71 +0,0 @@ -# coding: utf-8 - -import pytest - -from pandas.io.msgpack import packb, unpackb - - -class DecodeError(Exception): - pass - - -class TestObj: - def _arr_to_str(self, arr): - return "".join(str(c) for c in arr) - - def bad_complex_decoder(self, o): - raise DecodeError("Ooops!") - - def _decode_complex(self, obj): - if b"__complex__" in obj: - return complex(obj[b"real"], obj[b"imag"]) - return obj - - def _encode_complex(self, obj): - if isinstance(obj, complex): - return {b"__complex__": True, b"real": 1, b"imag": 2} - return obj - - def test_encode_hook(self): - packed = packb([3, 1 + 2j], default=self._encode_complex) - unpacked = unpackb(packed, use_list=1) - assert unpacked[1] == {b"__complex__": True, b"real": 1, b"imag": 2} - - def test_decode_hook(self): - packed = packb([3, {b"__complex__": True, b"real": 1, b"imag": 2}]) - unpacked = unpackb(packed, object_hook=self._decode_complex, use_list=1) - assert unpacked[1] == 1 + 2j - - def test_decode_pairs_hook(self): - packed = packb([3, {1: 2, 3: 4}]) - prod_sum = 1 * 2 + 3 * 4 - unpacked = unpackb( - packed, object_pairs_hook=lambda l: sum(k * v for k, v in l), use_list=1 - ) - assert unpacked[1] == prod_sum - - def test_only_one_obj_hook(self): - msg = "object_pairs_hook and object_hook are mutually exclusive" - with pytest.raises(TypeError, match=msg): - unpackb(b"", object_hook=lambda x: x, object_pairs_hook=lambda x: x) - - def test_bad_hook(self): - msg = r"can't serialize \(1\+2j\)" - with pytest.raises(TypeError, match=msg): - packed = packb([3, 1 + 2j], default=lambda o: o) - unpacked = unpackb(packed, use_list=1) # noqa - - def test_array_hook(self): - packed = packb([1, 2, 3]) - unpacked = unpackb(packed, list_hook=self._arr_to_str, use_list=1) - assert unpacked == "123" - - def test_an_exception_in_objecthook1(self): - with pytest.raises(DecodeError, match="Ooops!"): - packed = packb({1: {"__complex__": True, "real": 1, "imag": 2}}) - unpackb(packed, object_hook=self.bad_complex_decoder) - - def test_an_exception_in_objecthook2(self): - with pytest.raises(DecodeError, match="Ooops!"): - packed = packb({1: [{"__complex__": True, "real": 1, "imag": 2}]}) - unpackb(packed, list_hook=self.bad_complex_decoder, use_list=1) diff --git a/pandas/tests/io/msgpack/test_pack.py b/pandas/tests/io/msgpack/test_pack.py deleted file mode 100644 index 5fc24027589cb..0000000000000 --- a/pandas/tests/io/msgpack/test_pack.py +++ /dev/null @@ -1,171 +0,0 @@ -# coding: utf-8 -from collections import OrderedDict -from io import BytesIO -import struct - -import pytest - -from pandas.io.msgpack import Packer, Unpacker, packb, unpackb - - -class TestPack: - def check(self, data, use_list=False): - re = unpackb(packb(data), use_list=use_list) - assert re == data - - def testPack(self): - test_data = [ - 0, - 1, - 127, - 128, - 255, - 256, - 65535, - 65536, - -1, - -32, - -33, - -128, - -129, - -32768, - -32769, - 1.0, - b"", - b"a", - b"a" * 31, - b"a" * 32, - None, - True, - False, - (), - ((),), - ((), None), - {None: 0}, - (1 << 23), - ] - for td in test_data: - self.check(td) - - def testPackUnicode(self): - test_data = ["", "abcd", ["defgh"], "Русский текст"] - for td in test_data: - re = unpackb(packb(td, encoding="utf-8"), use_list=1, encoding="utf-8") - assert re == td - packer = Packer(encoding="utf-8") - data = packer.pack(td) - re = Unpacker(BytesIO(data), encoding="utf-8", use_list=1).unpack() - assert re == td - - def testPackUTF32(self): - test_data = ["", "abcd", ["defgh"], "Русский текст"] - for td in test_data: - re = unpackb(packb(td, encoding="utf-32"), use_list=1, encoding="utf-32") - assert re == td - - def testPackBytes(self): - test_data = [b"", b"abcd", (b"defgh",)] - for td in test_data: - self.check(td) - - def testIgnoreUnicodeErrors(self): - re = unpackb( - packb(b"abc\xeddef"), encoding="utf-8", unicode_errors="ignore", use_list=1 - ) - assert re == "abcdef" - - def testStrictUnicodeUnpack(self): - msg = ( - r"'utf-*8' codec can't decode byte 0xed in position 3:" - " invalid continuation byte" - ) - with pytest.raises(UnicodeDecodeError, match=msg): - unpackb(packb(b"abc\xeddef"), encoding="utf-8", use_list=1) - - def testStrictUnicodePack(self): - msg = ( - r"'ascii' codec can't encode character '\\xed' in position 3:" - r" ordinal not in range\(128\)" - ) - with pytest.raises(UnicodeEncodeError, match=msg): - packb("abc\xeddef", encoding="ascii", unicode_errors="strict") - - def testIgnoreErrorsPack(self): - re = unpackb( - packb("abcФФФdef", encoding="ascii", unicode_errors="ignore"), - encoding="utf-8", - use_list=1, - ) - assert re == "abcdef" - - def testNoEncoding(self): - msg = "Can't encode unicode string: no encoding is specified" - with pytest.raises(TypeError, match=msg): - packb("abc", encoding=None) - - def testDecodeBinary(self): - re = unpackb(packb("abc"), encoding=None, use_list=1) - assert re == b"abc" - - def testPackFloat(self): - assert packb(1.0, use_single_float=True) == b"\xca" + struct.pack(">f", 1.0) - assert packb(1.0, use_single_float=False) == b"\xcb" + struct.pack(">d", 1.0) - - def testArraySize(self, sizes=[0, 5, 50, 1000]): - bio = BytesIO() - packer = Packer() - for size in sizes: - bio.write(packer.pack_array_header(size)) - for i in range(size): - bio.write(packer.pack(i)) - - bio.seek(0) - unpacker = Unpacker(bio, use_list=1) - for size in sizes: - assert unpacker.unpack() == list(range(size)) - - def test_manualreset(self, sizes=[0, 5, 50, 1000]): - packer = Packer(autoreset=False) - for size in sizes: - packer.pack_array_header(size) - for i in range(size): - packer.pack(i) - - bio = BytesIO(packer.bytes()) - unpacker = Unpacker(bio, use_list=1) - for size in sizes: - assert unpacker.unpack() == list(range(size)) - - packer.reset() - assert packer.bytes() == b"" - - def testMapSize(self, sizes=[0, 5, 50, 1000]): - bio = BytesIO() - packer = Packer() - for size in sizes: - bio.write(packer.pack_map_header(size)) - for i in range(size): - bio.write(packer.pack(i)) # key - bio.write(packer.pack(i * 2)) # value - - bio.seek(0) - unpacker = Unpacker(bio) - for size in sizes: - assert unpacker.unpack() == {i: i * 2 for i in range(size)} - - def test_odict(self): - seq = [(b"one", 1), (b"two", 2), (b"three", 3), (b"four", 4)] - od = OrderedDict(seq) - assert unpackb(packb(od), use_list=1) == dict(seq) - - def pair_hook(seq): - return list(seq) - - assert unpackb(packb(od), object_pairs_hook=pair_hook, use_list=1) == seq - - def test_pairlist(self): - pairlist = [(b"a", 1), (2, b"b"), (b"foo", b"bar")] - packer = Packer() - packed = packer.pack_map_pairs(pairlist) - unpacked = unpackb(packed, object_pairs_hook=list) - assert pairlist == unpacked diff --git a/pandas/tests/io/msgpack/test_read_size.py b/pandas/tests/io/msgpack/test_read_size.py deleted file mode 100644 index 7d2b539f12085..0000000000000 --- a/pandas/tests/io/msgpack/test_read_size.py +++ /dev/null @@ -1,71 +0,0 @@ -"""Test Unpacker's read_array_header and read_map_header methods""" -from pandas.io.msgpack import OutOfData, Unpacker, packb - -UnexpectedTypeException = ValueError - - -def test_read_array_header(): - unpacker = Unpacker() - unpacker.feed(packb(["a", "b", "c"])) - assert unpacker.read_array_header() == 3 - assert unpacker.unpack() == b"a" - assert unpacker.unpack() == b"b" - assert unpacker.unpack() == b"c" - try: - unpacker.unpack() - assert 0, "should raise exception" - except OutOfData: - assert 1, "okay" - - -def test_read_map_header(): - unpacker = Unpacker() - unpacker.feed(packb({"a": "A"})) - assert unpacker.read_map_header() == 1 - assert unpacker.unpack() == b"a" - assert unpacker.unpack() == b"A" - try: - unpacker.unpack() - assert 0, "should raise exception" - except OutOfData: - assert 1, "okay" - - -def test_incorrect_type_array(): - unpacker = Unpacker() - unpacker.feed(packb(1)) - try: - unpacker.read_array_header() - assert 0, "should raise exception" - except UnexpectedTypeException: - assert 1, "okay" - - -def test_incorrect_type_map(): - unpacker = Unpacker() - unpacker.feed(packb(1)) - try: - unpacker.read_map_header() - assert 0, "should raise exception" - except UnexpectedTypeException: - assert 1, "okay" - - -def test_correct_type_nested_array(): - unpacker = Unpacker() - unpacker.feed(packb({"a": ["b", "c", "d"]})) - try: - unpacker.read_array_header() - assert 0, "should raise exception" - except UnexpectedTypeException: - assert 1, "okay" - - -def test_incorrect_type_nested_map(): - unpacker = Unpacker() - unpacker.feed(packb([{"a": "b"}])) - try: - unpacker.read_map_header() - assert 0, "should raise exception" - except UnexpectedTypeException: - assert 1, "okay" diff --git a/pandas/tests/io/msgpack/test_seq.py b/pandas/tests/io/msgpack/test_seq.py deleted file mode 100644 index c4ac13980bc67..0000000000000 --- a/pandas/tests/io/msgpack/test_seq.py +++ /dev/null @@ -1,47 +0,0 @@ -# coding: utf-8 - -import io - -import pandas.io.msgpack as msgpack - -binarydata = bytes(bytearray(range(256))) - - -def gen_binary_data(idx): - return binarydata[: idx % 300] - - -def test_exceeding_unpacker_read_size(): - dumpf = io.BytesIO() - - packer = msgpack.Packer() - - NUMBER_OF_STRINGS = 6 - read_size = 16 - - # 5 ok for read_size=16, while 6 glibc detected *** python: double free or - # corruption (fasttop): - - # 20 ok for read_size=256, while 25 segfaults / glibc detected *** python: - # double free or corruption (!prev) - - # 40 ok for read_size=1024, while 50 introduces errors - # 7000 ok for read_size=1024*1024, while 8000 leads to glibc detected *** - # python: double free or corruption (!prev): - - for idx in range(NUMBER_OF_STRINGS): - data = gen_binary_data(idx) - dumpf.write(packer.pack(data)) - - f = io.BytesIO(dumpf.getvalue()) - dumpf.close() - - unpacker = msgpack.Unpacker(f, read_size=read_size, use_list=1) - - read_count = 0 - for idx, o in enumerate(unpacker): - assert type(o) == bytes - assert o == gen_binary_data(idx) - read_count += 1 - - assert read_count == NUMBER_OF_STRINGS diff --git a/pandas/tests/io/msgpack/test_sequnpack.py b/pandas/tests/io/msgpack/test_sequnpack.py deleted file mode 100644 index 79feb78b3b013..0000000000000 --- a/pandas/tests/io/msgpack/test_sequnpack.py +++ /dev/null @@ -1,102 +0,0 @@ -# coding: utf-8 -from io import BytesIO - -import pytest - -from pandas.io.msgpack import BufferFull, OutOfData, Unpacker - - -class TestPack: - def test_partial_data(self): - unpacker = Unpacker() - msg = "No more data to unpack" - - for data in [b"\xa5", b"h", b"a", b"l", b"l"]: - unpacker.feed(data) - with pytest.raises(StopIteration, match=msg): - next(iter(unpacker)) - - unpacker.feed(b"o") - assert next(iter(unpacker)) == b"hallo" - - def test_foobar(self): - unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b"foobar") - assert unpacker.unpack() == ord(b"f") - assert unpacker.unpack() == ord(b"o") - assert unpacker.unpack() == ord(b"o") - assert unpacker.unpack() == ord(b"b") - assert unpacker.unpack() == ord(b"a") - assert unpacker.unpack() == ord(b"r") - msg = "No more data to unpack" - with pytest.raises(OutOfData, match=msg): - unpacker.unpack() - - unpacker.feed(b"foo") - unpacker.feed(b"bar") - - k = 0 - for o, e in zip(unpacker, "foobarbaz"): - assert o == ord(e) - k += 1 - assert k == len(b"foobar") - - def test_foobar_skip(self): - unpacker = Unpacker(read_size=3, use_list=1) - unpacker.feed(b"foobar") - assert unpacker.unpack() == ord(b"f") - unpacker.skip() - assert unpacker.unpack() == ord(b"o") - unpacker.skip() - assert unpacker.unpack() == ord(b"a") - unpacker.skip() - msg = "No more data to unpack" - with pytest.raises(OutOfData, match=msg): - unpacker.unpack() - - def test_maxbuffersize_read_size_exceeds_max_buffer_size(self): - msg = "read_size should be less or equal to max_buffer_size" - with pytest.raises(ValueError, match=msg): - Unpacker(read_size=5, max_buffer_size=3) - - def test_maxbuffersize_bufferfull(self): - unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) - unpacker.feed(b"foo") - with pytest.raises(BufferFull, match=r"^$"): - unpacker.feed(b"b") - - def test_maxbuffersize(self): - unpacker = Unpacker(read_size=3, max_buffer_size=3, use_list=1) - unpacker.feed(b"foo") - assert ord("f") == next(unpacker) - unpacker.feed(b"b") - assert ord("o") == next(unpacker) - assert ord("o") == next(unpacker) - assert ord("b") == next(unpacker) - - def test_readbytes(self): - unpacker = Unpacker(read_size=3) - unpacker.feed(b"foobar") - assert unpacker.unpack() == ord(b"f") - assert unpacker.read_bytes(3) == b"oob" - assert unpacker.unpack() == ord(b"a") - assert unpacker.unpack() == ord(b"r") - - # Test buffer refill - unpacker = Unpacker(BytesIO(b"foobar"), read_size=3) - assert unpacker.unpack() == ord(b"f") - assert unpacker.read_bytes(3) == b"oob" - assert unpacker.unpack() == ord(b"a") - assert unpacker.unpack() == ord(b"r") - - def test_issue124(self): - unpacker = Unpacker() - unpacker.feed(b"\xa1?\xa1!") - assert tuple(unpacker) == (b"?", b"!") - assert tuple(unpacker) == () - unpacker.feed(b"\xa1?\xa1") - assert tuple(unpacker) == (b"?",) - assert tuple(unpacker) == () - unpacker.feed(b"!") - assert tuple(unpacker) == (b"!",) - assert tuple(unpacker) == () diff --git a/pandas/tests/io/msgpack/test_subtype.py b/pandas/tests/io/msgpack/test_subtype.py deleted file mode 100644 index c82f6f6d3bf4e..0000000000000 --- a/pandas/tests/io/msgpack/test_subtype.py +++ /dev/null @@ -1,26 +0,0 @@ -# coding: utf-8 - -from collections import namedtuple - -from pandas.io.msgpack import packb - - -class MyList(list): - pass - - -class MyDict(dict): - pass - - -class MyTuple(tuple): - pass - - -MyNamedTuple = namedtuple("MyNamedTuple", "x y") - - -def test_types(): - assert packb(MyDict()) == packb(dict()) - assert packb(MyList()) == packb(list()) - assert packb(MyNamedTuple(1, 2)) == packb((1, 2)) diff --git a/pandas/tests/io/msgpack/test_unpack.py b/pandas/tests/io/msgpack/test_unpack.py deleted file mode 100644 index 483e09efe6bb8..0000000000000 --- a/pandas/tests/io/msgpack/test_unpack.py +++ /dev/null @@ -1,64 +0,0 @@ -from io import BytesIO -import sys - -import pytest - -from pandas.io.msgpack import ExtType, OutOfData, Unpacker, packb - - -class TestUnpack: - def test_unpack_array_header_from_file(self): - f = BytesIO(packb([1, 2, 3, 4])) - unpacker = Unpacker(f) - assert unpacker.read_array_header() == 4 - assert unpacker.unpack() == 1 - assert unpacker.unpack() == 2 - assert unpacker.unpack() == 3 - assert unpacker.unpack() == 4 - msg = "No more data to unpack" - with pytest.raises(OutOfData, match=msg): - unpacker.unpack() - - def test_unpacker_hook_refcnt(self): - if not hasattr(sys, "getrefcount"): - pytest.skip("no sys.getrefcount()") - result = [] - - def hook(x): - result.append(x) - return x - - basecnt = sys.getrefcount(hook) - - up = Unpacker(object_hook=hook, list_hook=hook) - - assert sys.getrefcount(hook) >= basecnt + 2 - - up.feed(packb([{}])) - up.feed(packb([{}])) - assert up.unpack() == [{}] - assert up.unpack() == [{}] - assert result == [{}, [{}], {}, [{}]] - - del up - - assert sys.getrefcount(hook) == basecnt - - def test_unpacker_ext_hook(self): - class MyUnpacker(Unpacker): - def __init__(self): - super().__init__(ext_hook=self._hook, encoding="utf-8") - - def _hook(self, code, data): - if code == 1: - return int(data) - else: - return ExtType(code, data) - - unpacker = MyUnpacker() - unpacker.feed(packb({"a": 1}, encoding="utf-8")) - assert unpacker.unpack() == {"a": 1} - unpacker.feed(packb({"a": ExtType(1, b"123")}, encoding="utf-8")) - assert unpacker.unpack() == {"a": 123} - unpacker.feed(packb({"a": ExtType(2, b"321")}, encoding="utf-8")) - assert unpacker.unpack() == {"a": ExtType(2, b"321")} diff --git a/pandas/tests/io/msgpack/test_unpack_raw.py b/pandas/tests/io/msgpack/test_unpack_raw.py deleted file mode 100644 index f844553bfc34a..0000000000000 --- a/pandas/tests/io/msgpack/test_unpack_raw.py +++ /dev/null @@ -1,30 +0,0 @@ -"""Tests for cases where the user seeks to obtain packed msgpack objects""" - -import io - -from pandas.io.msgpack import Unpacker, packb - - -def test_write_bytes(): - unpacker = Unpacker() - unpacker.feed(b"abc") - f = io.BytesIO() - assert unpacker.unpack(f.write) == ord("a") - assert f.getvalue() == b"a" - f = io.BytesIO() - assert unpacker.skip(f.write) is None - assert f.getvalue() == b"b" - f = io.BytesIO() - assert unpacker.skip() is None - assert f.getvalue() == b"" - - -def test_write_bytes_multi_buffer(): - long_val = (5) * 100 - expected = packb(long_val) - unpacker = Unpacker(io.BytesIO(expected), read_size=3, max_buffer_size=3) - - f = io.BytesIO() - unpacked = unpacker.unpack(f.write) - assert unpacked == long_val - assert f.getvalue() == expected diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 2af370a696860..a15eac89ecedb 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -48,7 +48,6 @@ def __fspath__(self): # https://github.com/cython/cython/issues/1720 @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") class TestCommonIOCapabilities: data1 = """index,A,B,C,D foo,2,3,4,5 @@ -142,7 +141,6 @@ def test_iterator(self): (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), (pd.read_json, "os", ValueError, "json"), - (pd.read_msgpack, "os", FileNotFoundError, "mp"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) @@ -177,7 +175,6 @@ def test_read_non_existant(self, reader, module, error_class, fn_ext): (pd.read_stata, "os", FileNotFoundError, "dta"), (pd.read_sas, "os", FileNotFoundError, "sas7bdat"), (pd.read_json, "os", ValueError, "json"), - (pd.read_msgpack, "os", FileNotFoundError, "mp"), (pd.read_pickle, "os", FileNotFoundError, "pickle"), ], ) @@ -229,7 +226,6 @@ def test_read_expands_user_home_dir( (pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")), (pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")), (pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")), - (pd.read_msgpack, "os", ("io", "msgpack", "data", "frame.mp")), ( pd.read_pickle, "os", @@ -260,7 +256,6 @@ def test_read_fspath_all(self, reader, module, path, datapath): ("to_html", {}, "os"), ("to_json", {}, "os"), ("to_latex", {}, "os"), - ("to_msgpack", {}, "os"), ("to_pickle", {}, "os"), ("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"), ], diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py deleted file mode 100644 index f8005273319e0..0000000000000 --- a/pandas/tests/io/test_packers.py +++ /dev/null @@ -1,911 +0,0 @@ -import datetime -import glob -from io import BytesIO -import os -from warnings import catch_warnings, filterwarnings - -import numpy as np -import pytest - -from pandas._libs.tslib import iNaT -from pandas.errors import PerformanceWarning - -import pandas -from pandas import ( - Categorical, - DataFrame, - Index, - Interval, - MultiIndex, - NaT, - Period, - Series, - Timestamp, - bdate_range, - date_range, - period_range, -) -import pandas.util.testing as tm - -from pandas.io.packers import read_msgpack, to_msgpack - -nan = np.nan - -try: - import blosc # NOQA -except ImportError: - _BLOSC_INSTALLED = False -else: - _BLOSC_INSTALLED = True - -try: - import zlib # NOQA -except ImportError: - _ZLIB_INSTALLED = False -else: - _ZLIB_INSTALLED = True - - -@pytest.fixture(scope="module") -def current_packers_data(): - # our current version packers data - from pandas.tests.io.generate_legacy_storage_files import create_msgpack_data - - return create_msgpack_data() - - -@pytest.fixture(scope="module") -def all_packers_data(): - # our all of our current version packers data - from pandas.tests.io.generate_legacy_storage_files import create_data - - return create_data() - - -def check_arbitrary(a, b): - - if isinstance(a, (list, tuple)) and isinstance(b, (list, tuple)): - assert len(a) == len(b) - for a_, b_ in zip(a, b): - check_arbitrary(a_, b_) - elif isinstance(a, DataFrame): - tm.assert_frame_equal(a, b) - elif isinstance(a, Series): - tm.assert_series_equal(a, b) - elif isinstance(a, Index): - tm.assert_index_equal(a, b) - elif isinstance(a, Categorical): - # Temp, - # Categorical.categories is changed from str to bytes in PY3 - # maybe the same as GH 13591 - if b.categories.inferred_type == "string": - pass - else: - tm.assert_categorical_equal(a, b) - elif a is NaT: - assert b is NaT - elif isinstance(a, Timestamp): - assert a == b - assert a.freq == b.freq - else: - assert a == b - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestPackers: - def setup_method(self, method): - self.path = "__{}__.msg".format(tm.rands(10)) - - def teardown_method(self, method): - pass - - def encode_decode(self, x, compress=None, **kwargs): - with tm.ensure_clean(self.path) as p: - to_msgpack(p, x, compress=compress, **kwargs) - return read_msgpack(p, **kwargs) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestAPI(TestPackers): - def test_string_io(self): - - df = DataFrame(np.random.randn(10, 2)) - s = df.to_msgpack(None) - result = read_msgpack(s) - tm.assert_frame_equal(result, df) - - s = df.to_msgpack() - result = read_msgpack(s) - tm.assert_frame_equal(result, df) - - s = df.to_msgpack() - result = read_msgpack(BytesIO(s)) - tm.assert_frame_equal(result, df) - - s = to_msgpack(None, df) - result = read_msgpack(s) - tm.assert_frame_equal(result, df) - - with tm.ensure_clean(self.path) as p: - - s = df.to_msgpack() - with open(p, "wb") as fh: - fh.write(s) - result = read_msgpack(p) - tm.assert_frame_equal(result, df) - - def test_path_pathlib(self): - df = tm.makeDataFrame() - result = tm.round_trip_pathlib(df.to_msgpack, read_msgpack) - tm.assert_frame_equal(df, result) - - def test_path_localpath(self): - df = tm.makeDataFrame() - result = tm.round_trip_localpath(df.to_msgpack, read_msgpack) - tm.assert_frame_equal(df, result) - - def test_iterator_with_string_io(self): - - dfs = [DataFrame(np.random.randn(10, 2)) for i in range(5)] - s = to_msgpack(None, *dfs) - for i, result in enumerate(read_msgpack(s, iterator=True)): - tm.assert_frame_equal(result, dfs[i]) - - def test_invalid_arg(self): - # GH10369 - class A: - def __init__(self): - self.read = 0 - - msg = "Invalid file path or buffer object type: " - invalid_path = os.path.join("nonexistent_dir", "df.msgpack") - with pytest.raises(ValueError, match=msg.format("NoneType")): - read_msgpack(path_or_buf=None) - with pytest.raises(ValueError, match=msg.format("dict")): - read_msgpack(path_or_buf={}) - with pytest.raises(ValueError, match=msg.format(r".*\.A")): - read_msgpack(path_or_buf=A()) - with pytest.raises(FileNotFoundError, match="does not exist"): - read_msgpack(path_or_buf=invalid_path) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestNumpy(TestPackers): - def test_numpy_scalar_float(self): - x = np.float32(np.random.rand()) - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - def test_numpy_scalar_complex(self): - x = np.complex64(np.random.rand() + 1j * np.random.rand()) - x_rec = self.encode_decode(x) - assert np.allclose(x, x_rec) - - def test_scalar_float(self): - x = np.random.rand() - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - def test_scalar_bool(self): - x = np.bool_(1) - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - x = np.bool_(0) - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - def test_scalar_complex(self): - x = np.random.rand() + 1j * np.random.rand() - x_rec = self.encode_decode(x) - assert np.allclose(x, x_rec) - - def test_list_numpy_float(self): - x = [np.float32(np.random.rand()) for i in range(5)] - x_rec = self.encode_decode(x) - # current msgpack cannot distinguish list/tuple - tm.assert_almost_equal(tuple(x), x_rec) - - x_rec = self.encode_decode(tuple(x)) - tm.assert_almost_equal(tuple(x), x_rec) - - def test_list_numpy_float_complex(self): - if not hasattr(np, "complex128"): - pytest.skip("numpy can not handle complex128") - - x = [np.float32(np.random.rand()) for i in range(5)] + [ - np.complex128(np.random.rand() + 1j * np.random.rand()) for i in range(5) - ] - x_rec = self.encode_decode(x) - assert np.allclose(x, x_rec) - - def test_list_float(self): - x = [np.random.rand() for i in range(5)] - x_rec = self.encode_decode(x) - # current msgpack cannot distinguish list/tuple - tm.assert_almost_equal(tuple(x), x_rec) - - x_rec = self.encode_decode(tuple(x)) - tm.assert_almost_equal(tuple(x), x_rec) - - def test_list_float_complex(self): - x = [np.random.rand() for i in range(5)] + [ - (np.random.rand() + 1j * np.random.rand()) for i in range(5) - ] - x_rec = self.encode_decode(x) - assert np.allclose(x, x_rec) - - def test_dict_float(self): - x = {"foo": 1.0, "bar": 2.0} - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - def test_dict_complex(self): - x = {"foo": 1.0 + 1.0j, "bar": 2.0 + 2.0j} - x_rec = self.encode_decode(x) - tm.assert_dict_equal(x, x_rec) - - for key in x: - tm.assert_class_equal(x[key], x_rec[key], obj="complex value") - - def test_dict_numpy_float(self): - x = {"foo": np.float32(1.0), "bar": np.float32(2.0)} - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - def test_dict_numpy_complex(self): - x = {"foo": np.complex128(1.0 + 1.0j), "bar": np.complex128(2.0 + 2.0j)} - x_rec = self.encode_decode(x) - tm.assert_dict_equal(x, x_rec) - - for key in x: - tm.assert_class_equal(x[key], x_rec[key], obj="numpy complex128") - - def test_numpy_array_float(self): - - # run multiple times - for n in range(10): - x = np.random.rand(10) - for dtype in ["float32", "float64"]: - x = x.astype(dtype) - x_rec = self.encode_decode(x) - tm.assert_almost_equal(x, x_rec) - - def test_numpy_array_complex(self): - x = (np.random.rand(5) + 1j * np.random.rand(5)).astype(np.complex128) - x_rec = self.encode_decode(x) - assert all(map(lambda x, y: x == y, x, x_rec)) and x.dtype == x_rec.dtype - - def test_list_mixed(self): - x = [1.0, np.float32(3.5), np.complex128(4.25), "foo", np.bool_(1)] - x_rec = self.encode_decode(x) - # current msgpack cannot distinguish list/tuple - tm.assert_almost_equal(tuple(x), x_rec) - - x_rec = self.encode_decode(tuple(x)) - tm.assert_almost_equal(tuple(x), x_rec) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestBasic(TestPackers): - def test_timestamp(self): - - for i in [ - Timestamp("20130101"), - Timestamp("20130101", tz="US/Eastern"), - Timestamp("201301010501"), - ]: - i_rec = self.encode_decode(i) - assert i == i_rec - - def test_nat(self): - nat_rec = self.encode_decode(NaT) - assert NaT is nat_rec - - def test_datetimes(self): - - for i in [ - datetime.datetime(2013, 1, 1), - datetime.datetime(2013, 1, 1, 5, 1), - datetime.date(2013, 1, 1), - np.datetime64(datetime.datetime(2013, 1, 5, 2, 15)), - ]: - i_rec = self.encode_decode(i) - assert i == i_rec - - def test_timedeltas(self): - - for i in [ - datetime.timedelta(days=1), - datetime.timedelta(days=1, seconds=10), - np.timedelta64(1000000), - ]: - i_rec = self.encode_decode(i) - assert i == i_rec - - def test_periods(self): - # 13463 - for i in [Period("2010-09", "M"), Period("2014-Q1", "Q")]: - i_rec = self.encode_decode(i) - assert i == i_rec - - def test_intervals(self): - # 19967 - for i in [Interval(0, 1), Interval(0, 1, "left"), Interval(10, 25.0, "right")]: - i_rec = self.encode_decode(i) - assert i == i_rec - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestIndex(TestPackers): - def setup_method(self, method): - super().setup_method(method) - - self.d = { - "string": tm.makeStringIndex(100), - "date": tm.makeDateIndex(100), - "int": tm.makeIntIndex(100), - "rng": tm.makeRangeIndex(100), - "float": tm.makeFloatIndex(100), - "empty": Index([]), - "tuple": Index(zip(["foo", "bar", "baz"], [1, 2, 3])), - "period": Index(period_range("2012-1-1", freq="M", periods=3)), - "date2": Index(date_range("2013-01-1", periods=10)), - "bdate": Index(bdate_range("2013-01-02", periods=10)), - "cat": tm.makeCategoricalIndex(100), - "interval": tm.makeIntervalIndex(100), - "timedelta": tm.makeTimedeltaIndex(100, "H"), - } - - self.mi = { - "reg": MultiIndex.from_tuples( - [ - ("bar", "one"), - ("baz", "two"), - ("foo", "two"), - ("qux", "one"), - ("qux", "two"), - ], - names=["first", "second"], - ) - } - - def test_basic_index(self): - - for s, i in self.d.items(): - i_rec = self.encode_decode(i) - tm.assert_index_equal(i, i_rec) - - # datetime with no freq (GH5506) - i = Index([Timestamp("20130101"), Timestamp("20130103")]) - i_rec = self.encode_decode(i) - tm.assert_index_equal(i, i_rec) - - # datetime with timezone - i = Index( - [Timestamp("20130101 9:00:00"), Timestamp("20130103 11:00:00")] - ).tz_localize("US/Eastern") - i_rec = self.encode_decode(i) - tm.assert_index_equal(i, i_rec) - - def test_multi_index(self): - - for s, i in self.mi.items(): - i_rec = self.encode_decode(i) - tm.assert_index_equal(i, i_rec) - - def test_unicode(self): - i = tm.makeUnicodeIndex(100) - - i_rec = self.encode_decode(i) - tm.assert_index_equal(i, i_rec) - - def categorical_index(self): - # GH15487 - df = DataFrame(np.random.randn(10, 2)) - df = df.astype({0: "category"}).set_index(0) - result = self.encode_decode(df) - tm.assert_frame_equal(result, df) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestSeries(TestPackers): - def setup_method(self, method): - super().setup_method(method) - - self.d = {} - - s = tm.makeStringSeries() - s.name = "string" - self.d["string"] = s - - s = tm.makeObjectSeries() - s.name = "object" - self.d["object"] = s - - s = Series(iNaT, dtype="M8[ns]", index=range(5)) - self.d["date"] = s - - data = { - "A": [0.0, 1.0, 2.0, 3.0, np.nan], - "B": [0, 1, 0, 1, 0], - "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], - "D": date_range("1/1/2009", periods=5), - "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], - "F": [Timestamp("20130102", tz="US/Eastern")] * 2 - + [Timestamp("20130603", tz="CET")] * 3, - "G": [Timestamp("20130102", tz="US/Eastern")] * 5, - "H": Categorical([1, 2, 3, 4, 5]), - "I": Categorical([1, 2, 3, 4, 5], ordered=True), - "J": (np.bool_(1), 2, 3, 4, 5), - } - - self.d["float"] = Series(data["A"]) - self.d["int"] = Series(data["B"]) - self.d["mixed"] = Series(data["E"]) - self.d["dt_tz_mixed"] = Series(data["F"]) - self.d["dt_tz"] = Series(data["G"]) - self.d["cat_ordered"] = Series(data["H"]) - self.d["cat_unordered"] = Series(data["I"]) - self.d["numpy_bool_mixed"] = Series(data["J"]) - - def test_basic(self): - - # run multiple times here - for n in range(10): - for s, i in self.d.items(): - i_rec = self.encode_decode(i) - tm.assert_series_equal(i, i_rec) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestCategorical(TestPackers): - def setup_method(self, method): - super().setup_method(method) - - self.d = {} - - self.d["plain_str"] = Categorical(["a", "b", "c", "d", "e"]) - self.d["plain_str_ordered"] = Categorical( - ["a", "b", "c", "d", "e"], ordered=True - ) - - self.d["plain_int"] = Categorical([5, 6, 7, 8]) - self.d["plain_int_ordered"] = Categorical([5, 6, 7, 8], ordered=True) - - def test_basic(self): - - # run multiple times here - for n in range(10): - for s, i in self.d.items(): - i_rec = self.encode_decode(i) - tm.assert_categorical_equal(i, i_rec) - - -@pytest.mark.filterwarnings("ignore:msgpack:FutureWarning") -class TestNDFrame(TestPackers): - def setup_method(self, method): - super().setup_method(method) - - data = { - "A": [0.0, 1.0, 2.0, 3.0, np.nan], - "B": [0, 1, 0, 1, 0], - "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], - "D": date_range("1/1/2009", periods=5), - "E": [0.0, 1, Timestamp("20100101"), "foo", 2.0], - "F": [Timestamp("20130102", tz="US/Eastern")] * 5, - "G": [Timestamp("20130603", tz="CET")] * 5, - "H": Categorical(["a", "b", "c", "d", "e"]), - "I": Categorical(["a", "b", "c", "d", "e"], ordered=True), - } - - self.frame = { - "float": DataFrame(dict(A=data["A"], B=Series(data["A"]) + 1)), - "int": DataFrame(dict(A=data["B"], B=Series(data["B"]) + 1)), - "mixed": DataFrame(data), - } - - def test_basic_frame(self): - - for s, i in self.frame.items(): - i_rec = self.encode_decode(i) - tm.assert_frame_equal(i, i_rec) - - def test_multi(self): - - i_rec = self.encode_decode(self.frame) - for k in self.frame.keys(): - tm.assert_frame_equal(self.frame[k], i_rec[k]) - - packed_items = tuple( - [self.frame["float"], self.frame["float"].A, self.frame["float"].B, None] - ) - l_rec = self.encode_decode(packed_items) - check_arbitrary(packed_items, l_rec) - - # this is an oddity in that packed lists will be returned as tuples - packed_items = [ - self.frame["float"], - self.frame["float"].A, - self.frame["float"].B, - None, - ] - l_rec = self.encode_decode(packed_items) - assert isinstance(l_rec, tuple) - check_arbitrary(packed_items, l_rec) - - def test_iterator(self): - - packed_items = [ - self.frame["float"], - self.frame["float"].A, - self.frame["float"].B, - None, - ] - - with tm.ensure_clean(self.path) as path: - to_msgpack(path, *packed_items) - for i, packed in enumerate(read_msgpack(path, iterator=True)): - check_arbitrary(packed, packed_items[i]) - - def tests_datetimeindex_freq_issue(self): - - # GH 5947 - # inferring freq on the datetimeindex - df = DataFrame([1, 2, 3], index=date_range("1/1/2013", "1/3/2013")) - result = self.encode_decode(df) - tm.assert_frame_equal(result, df) - - df = DataFrame([1, 2], index=date_range("1/1/2013", "1/2/2013")) - result = self.encode_decode(df) - tm.assert_frame_equal(result, df) - - def test_dataframe_duplicate_column_names(self): - - # GH 9618 - expected_1 = DataFrame(columns=["a", "a"]) - expected_2 = DataFrame(columns=[1] * 100) - expected_2.loc[0] = np.random.randn(100) - expected_3 = DataFrame(columns=[1, 1]) - expected_3.loc[0] = ["abc", np.nan] - - result_1 = self.encode_decode(expected_1) - result_2 = self.encode_decode(expected_2) - result_3 = self.encode_decode(expected_3) - - tm.assert_frame_equal(result_1, expected_1) - tm.assert_frame_equal(result_2, expected_2) - tm.assert_frame_equal(result_3, expected_3) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestCompression(TestPackers): - """See https://github.com/pandas-dev/pandas/pull/9783 - """ - - def setup_method(self, method): - try: - from sqlalchemy import create_engine - - self._create_sql_engine = create_engine - except ImportError: - self._SQLALCHEMY_INSTALLED = False - else: - self._SQLALCHEMY_INSTALLED = True - - super().setup_method(method) - data = { - "A": np.arange(1000, dtype=np.float64), - "B": np.arange(1000, dtype=np.int32), - "C": list(100 * "abcdefghij"), - "D": date_range(datetime.datetime(2015, 4, 1), periods=1000), - "E": [datetime.timedelta(days=x) for x in range(1000)], - } - self.frame = { - "float": DataFrame({k: data[k] for k in ["A", "A"]}), - "int": DataFrame({k: data[k] for k in ["B", "B"]}), - "mixed": DataFrame(data), - } - - def test_plain(self): - i_rec = self.encode_decode(self.frame) - for k in self.frame.keys(): - tm.assert_frame_equal(self.frame[k], i_rec[k]) - - def _test_compression(self, compress): - i_rec = self.encode_decode(self.frame, compress=compress) - for k in self.frame.keys(): - value = i_rec[k] - expected = self.frame[k] - tm.assert_frame_equal(value, expected) - # make sure that we can write to the new frames - for block in value._data.blocks: - assert block.values.flags.writeable - - def test_compression_zlib(self): - if not _ZLIB_INSTALLED: - pytest.skip("no zlib") - self._test_compression("zlib") - - def test_compression_blosc(self): - if not _BLOSC_INSTALLED: - pytest.skip("no blosc") - self._test_compression("blosc") - - def _test_compression_warns_when_decompress_caches(self, monkeypatch, compress): - not_garbage = [] - control = [] # copied data - - compress_module = globals()[compress] - real_decompress = compress_module.decompress - - def decompress(ob): - """mock decompress function that delegates to the real - decompress but caches the result and a copy of the result. - """ - res = real_decompress(ob) - not_garbage.append(res) # hold a reference to this bytes object - control.append(bytearray(res)) # copy the data here to check later - return res - - # types mapped to values to add in place. - rhs = { - np.dtype("float64"): 1.0, - np.dtype("int32"): 1, - np.dtype("object"): "a", - np.dtype("datetime64[ns]"): np.timedelta64(1, "ns"), - np.dtype("timedelta64[ns]"): np.timedelta64(1, "ns"), - } - - with monkeypatch.context() as m, tm.assert_produces_warning( - PerformanceWarning - ) as ws: - m.setattr(compress_module, "decompress", decompress) - - with catch_warnings(): - filterwarnings("ignore", category=FutureWarning) - i_rec = self.encode_decode(self.frame, compress=compress) - for k in self.frame.keys(): - - value = i_rec[k] - expected = self.frame[k] - tm.assert_frame_equal(value, expected) - # make sure that we can write to the new frames even though - # we needed to copy the data - for block in value._data.blocks: - assert block.values.flags.writeable - # mutate the data in some way - block.values[0] += rhs[block.dtype] - - for w in ws: - # check the messages from our warnings - assert str(w.message) == ( - "copying data after decompressing; " - "this may mean that decompress is " - "caching its result" - ) - - for buf, control_buf in zip(not_garbage, control): - # make sure none of our mutations above affected the - # original buffers - assert buf == control_buf - - def test_compression_warns_when_decompress_caches_zlib(self, monkeypatch): - if not _ZLIB_INSTALLED: - pytest.skip("no zlib") - self._test_compression_warns_when_decompress_caches(monkeypatch, "zlib") - - def test_compression_warns_when_decompress_caches_blosc(self, monkeypatch): - if not _BLOSC_INSTALLED: - pytest.skip("no blosc") - self._test_compression_warns_when_decompress_caches(monkeypatch, "blosc") - - def _test_small_strings_no_warn(self, compress): - empty = np.array([], dtype="uint8") - with tm.assert_produces_warning(None): - with catch_warnings(): - filterwarnings("ignore", category=FutureWarning) - empty_unpacked = self.encode_decode(empty, compress=compress) - - tm.assert_numpy_array_equal(empty_unpacked, empty) - assert empty_unpacked.flags.writeable - - char = np.array([ord(b"a")], dtype="uint8") - with tm.assert_produces_warning(None): - with catch_warnings(): - filterwarnings("ignore", category=FutureWarning) - char_unpacked = self.encode_decode(char, compress=compress) - - tm.assert_numpy_array_equal(char_unpacked, char) - assert char_unpacked.flags.writeable - # if this test fails I am sorry because the interpreter is now in a - # bad state where b'a' points to 98 == ord(b'b'). - char_unpacked[0] = ord(b"b") - - # we compare the ord of bytes b'a' with unicode 'a' because the should - # always be the same (unless we were able to mutate the shared - # character singleton in which case ord(b'a') == ord(b'b'). - assert ord(b"a") == ord("a") - tm.assert_numpy_array_equal(char_unpacked, np.array([ord(b"b")], dtype="uint8")) - - def test_small_strings_no_warn_zlib(self): - if not _ZLIB_INSTALLED: - pytest.skip("no zlib") - self._test_small_strings_no_warn("zlib") - - def test_small_strings_no_warn_blosc(self): - if not _BLOSC_INSTALLED: - pytest.skip("no blosc") - self._test_small_strings_no_warn("blosc") - - def test_readonly_axis_blosc(self): - # GH11880 - if not _BLOSC_INSTALLED: - pytest.skip("no blosc") - df1 = DataFrame({"A": list("abcd")}) - df2 = DataFrame(df1, index=[1.0, 2.0, 3.0, 4.0]) - assert 1 in self.encode_decode(df1["A"], compress="blosc") - assert 1.0 in self.encode_decode(df2["A"], compress="blosc") - - def test_readonly_axis_zlib(self): - # GH11880 - df1 = DataFrame({"A": list("abcd")}) - df2 = DataFrame(df1, index=[1.0, 2.0, 3.0, 4.0]) - assert 1 in self.encode_decode(df1["A"], compress="zlib") - assert 1.0 in self.encode_decode(df2["A"], compress="zlib") - - def test_readonly_axis_blosc_to_sql(self): - # GH11880 - if not _BLOSC_INSTALLED: - pytest.skip("no blosc") - if not self._SQLALCHEMY_INSTALLED: - pytest.skip("no sqlalchemy") - expected = DataFrame({"A": list("abcd")}) - df = self.encode_decode(expected, compress="blosc") - eng = self._create_sql_engine("sqlite:///:memory:") - df.to_sql("test", eng, if_exists="append") - result = pandas.read_sql_table("test", eng, index_col="index") - result.index.names = [None] - tm.assert_frame_equal(expected, result) - - def test_readonly_axis_zlib_to_sql(self): - # GH11880 - if not _ZLIB_INSTALLED: - pytest.skip("no zlib") - if not self._SQLALCHEMY_INSTALLED: - pytest.skip("no sqlalchemy") - expected = DataFrame({"A": list("abcd")}) - df = self.encode_decode(expected, compress="zlib") - eng = self._create_sql_engine("sqlite:///:memory:") - df.to_sql("test", eng, if_exists="append") - result = pandas.read_sql_table("test", eng, index_col="index") - result.index.names = [None] - tm.assert_frame_equal(expected, result) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestEncoding(TestPackers): - def setup_method(self, method): - super().setup_method(method) - data = { - "A": ["\u2019"] * 1000, - "B": np.arange(1000, dtype=np.int32), - "C": list(100 * "abcdefghij"), - "D": date_range(datetime.datetime(2015, 4, 1), periods=1000), - "E": [datetime.timedelta(days=x) for x in range(1000)], - "G": [400] * 1000, - } - self.frame = { - "float": DataFrame({k: data[k] for k in ["A", "A"]}), - "int": DataFrame({k: data[k] for k in ["B", "B"]}), - "mixed": DataFrame(data), - } - self.utf_encodings = ["utf8", "utf16", "utf32"] - - def test_utf(self): - # GH10581 - for encoding in self.utf_encodings: - for frame in self.frame.values(): - result = self.encode_decode(frame, encoding=encoding) - tm.assert_frame_equal(result, frame) - - def test_default_encoding(self): - for frame in self.frame.values(): - result = frame.to_msgpack() - expected = frame.to_msgpack(encoding="utf8") - assert result == expected - result = self.encode_decode(frame) - tm.assert_frame_equal(result, frame) - - -files = glob.glob( - os.path.join(os.path.dirname(__file__), "data", "legacy_msgpack", "*", "*.msgpack") -) - - -@pytest.fixture(params=files) -def legacy_packer(request, datapath): - return datapath(request.param) - - -@pytest.mark.filterwarnings("ignore:.*msgpack:FutureWarning") -class TestMsgpack: - """ - How to add msgpack tests: - - 1. Install pandas version intended to output the msgpack. - 2. Execute "generate_legacy_storage_files.py" to create the msgpack. - $ python generate_legacy_storage_files.py msgpack - - 3. Move the created pickle to "data/legacy_msgpack/" directory. - """ - - minimum_structure = { - "series": ["float", "int", "mixed", "ts", "mi", "dup"], - "frame": ["float", "int", "mixed", "mi"], - "index": ["int", "date", "period"], - "mi": ["reg2"], - } - - def check_min_structure(self, data, version): - for typ, v in self.minimum_structure.items(): - - assert typ in data, '"{0}" not found in unpacked data'.format(typ) - for kind in v: - msg = '"{0}" not found in data["{1}"]'.format(kind, typ) - assert kind in data[typ], msg - - def compare(self, current_data, all_data, vf, version): - data = read_msgpack(vf) - - self.check_min_structure(data, version) - for typ, dv in data.items(): - assert typ in all_data, "unpacked data contains " 'extra key "{0}"'.format( - typ - ) - for dt, result in dv.items(): - assert ( - dt in current_data[typ] - ), 'data["{0}"] contains extra ' 'key "{1}"'.format(typ, dt) - try: - expected = current_data[typ][dt] - except KeyError: - continue - - # use a specific comparator - # if available - comp_method = "compare_{typ}_{dt}".format(typ=typ, dt=dt) - comparator = getattr(self, comp_method, None) - if comparator is not None: - comparator(result, expected, typ, version) - else: - check_arbitrary(result, expected) - - return data - - def compare_series_dt_tz(self, result, expected, typ, version): - tm.assert_series_equal(result, expected) - - def compare_frame_dt_mixed_tzs(self, result, expected, typ, version): - tm.assert_frame_equal(result, expected) - - def test_msgpacks_legacy( - self, current_packers_data, all_packers_data, legacy_packer, datapath - ): - - version = os.path.basename(os.path.dirname(legacy_packer)) - - try: - with catch_warnings(record=True): - self.compare( - current_packers_data, all_packers_data, legacy_packer, version - ) - except ImportError: - # blosc not installed - pass - - def test_msgpack_period_freq(self): - # https://github.com/pandas-dev/pandas/issues/24135 - s = Series(np.random.rand(5), index=date_range("20130101", periods=5)) - r = read_msgpack(s.to_msgpack()) - repr(r) diff --git a/pandas/tests/util/test_move.py b/pandas/tests/util/test_move.py deleted file mode 100644 index 0e28dd2dd9d71..0000000000000 --- a/pandas/tests/util/test_move.py +++ /dev/null @@ -1,44 +0,0 @@ -import pytest - -from pandas.util._move import BadMove, move_into_mutable_buffer, stolenbuf - - -def test_cannot_create_instance_of_stolen_buffer(): - # Stolen buffers need to be created through the smart constructor - # "move_into_mutable_buffer," which has a bunch of checks in it. - - msg = "cannot create 'pandas.util._move.stolenbuf' instances" - with pytest.raises(TypeError, match=msg): - stolenbuf() - - -def test_more_than_one_ref(): - # Test case for when we try to use "move_into_mutable_buffer" - # when the object being moved has other references. - - b = b"testing" - - with pytest.raises(BadMove, match="testing") as e: - - def handle_success(type_, value, tb): - assert value.args[0] is b - return type(e).handle_success(e, type_, value, tb) # super - - e.handle_success = handle_success - move_into_mutable_buffer(b) - - -def test_exactly_one_ref(): - # Test case for when the object being moved has exactly one reference. - - b = b"testing" - - # We need to pass an expression on the stack to ensure that there are - # not extra references hanging around. We cannot rewrite this test as - # buf = b[:-3] - # as_stolen_buf = move_into_mutable_buffer(buf) - # because then we would have more than one reference to buf. - as_stolen_buf = move_into_mutable_buffer(b[:-3]) - - # Materialize as byte-array to show that it is mutable. - assert bytearray(as_stolen_buf) == b"test" diff --git a/pandas/util/move.c b/pandas/util/move.c deleted file mode 100644 index 1c29a4c214909..0000000000000 --- a/pandas/util/move.c +++ /dev/null @@ -1,212 +0,0 @@ -/* -Copyright (c) 2019, PyData Development Team -All rights reserved. - -Distributed under the terms of the BSD Simplified License. - -The full license is in the LICENSE file, distributed with this software. -*/ - -#define PY_SSIZE_T_CLEAN -#include - -#ifndef Py_TPFLAGS_HAVE_GETCHARBUFFER -#define Py_TPFLAGS_HAVE_GETCHARBUFFER 0 -#endif // Py_TPFLAGS_HAVE_GETCHARBUFFER - -#ifndef Py_TPFLAGS_HAVE_NEWBUFFER -#define Py_TPFLAGS_HAVE_NEWBUFFER 0 -#endif // Py_TPFLAGS_HAVE_NEWBUFFER - -static PyObject *badmove; /* bad move exception class */ - -typedef struct { - PyObject_HEAD - /* the bytes that own the buffer we are mutating */ - PyObject *invalid_bytes; -} stolenbufobject; - -static PyTypeObject stolenbuf_type; /* forward declare type */ - -static void -stolenbuf_dealloc(stolenbufobject *self) { - Py_DECREF(self->invalid_bytes); - PyObject_Del(self); -} - -static int -stolenbuf_getbuffer(stolenbufobject *self, Py_buffer *view, int flags) { - return PyBuffer_FillInfo(view, - (PyObject*) self, - (void*) PyBytes_AS_STRING(self->invalid_bytes), - PyBytes_GET_SIZE(self->invalid_bytes), - 0, /* not readonly */ - flags); -} - -static PyBufferProcs stolenbuf_as_buffer = { - (getbufferproc) stolenbuf_getbuffer, - NULL, -}; - -PyDoc_STRVAR(stolenbuf_doc, - "A buffer that is wrapping a stolen bytes object's buffer."); - -static PyTypeObject stolenbuf_type = { - PyVarObject_HEAD_INIT(NULL, 0) - "pandas.util._move.stolenbuf", /* tp_name */ - sizeof(stolenbufobject), /* tp_basicsize */ - 0, /* tp_itemsize */ - (destructor) stolenbuf_dealloc, /* tp_dealloc */ - 0, /* tp_print */ - 0, /* tp_getattr */ - 0, /* tp_setattr */ - 0, /* tp_reserved */ - 0, /* tp_repr */ - 0, /* tp_as_number */ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ - 0, /* tp_hash */ - 0, /* tp_call */ - 0, /* tp_str */ - 0, /* tp_getattro */ - 0, /* tp_setattro */ - &stolenbuf_as_buffer, /* tp_as_buffer */ - Py_TPFLAGS_DEFAULT | - Py_TPFLAGS_HAVE_NEWBUFFER | - Py_TPFLAGS_HAVE_GETCHARBUFFER, /* tp_flags */ - stolenbuf_doc, /* tp_doc */ -}; - -PyDoc_STRVAR( - move_into_mutable_buffer_doc, - "Moves a bytes object that is about to be destroyed into a mutable buffer\n" - "without copying the data.\n" - "\n" - "Parameters\n" - "----------\n" - "bytes_rvalue : bytes with 1 refcount.\n" - " The bytes object that you want to move into a mutable buffer. This\n" - " cannot be a named object. It must only have a single reference.\n" - "\n" - "Returns\n" - "-------\n" - "buf : stolenbuf\n" - " An object that supports the buffer protocol which can give a mutable\n" - " view of the data that was previously owned by ``bytes_rvalue``.\n" - "\n" - "Raises\n" - "------\n" - "BadMove\n" - " Raised when a move is attempted on an object with more than one\n" - " reference.\n" - "\n" - "Notes\n" - "-----\n" - "If you want to use this function you are probably wrong.\n" - "\n" - "Warning: Do not call this function through *unpacking. This can\n" - "potentially trick the reference checks which may allow you to get a\n" - "mutable reference to a shared string!\n" - "\n"); - -/* This is implemented as a standalone function instead of the ``tp_new`` of - ``stolenbuf`` because we need to create a function using the METH_O flag - to support Python 3.6. In python 3.6, PyCFunction calls from python code now - count the reference owned by the argument tuple. This would cause the object - to have 2 references if used with a direct call like: ``stolenbuf(a)``; - however, if called through *unpacking like ``stolenbuf(*(a,))`` it would - only have the one reference (the tuple). */ -static PyObject* -move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue) { - stolenbufobject *ret; - - if (!PyBytes_CheckExact(bytes_rvalue)) { - PyErr_SetString(PyExc_TypeError, - "stolenbuf can only steal from bytes objects"); - return NULL; - } - - if (Py_REFCNT(bytes_rvalue) != 1) { - // there is a reference other than the caller's stack - PyErr_SetObject(badmove, bytes_rvalue); - return NULL; - } - - if (!(ret = PyObject_New(stolenbufobject, &stolenbuf_type))) { - return NULL; - } - - /* store the original bytes object in a field that is not - exposed to python */ - Py_INCREF(bytes_rvalue); - ret->invalid_bytes = bytes_rvalue; - return (PyObject*) ret; -} - -static PyMethodDef methods[] = { - {"move_into_mutable_buffer", - (PyCFunction) move_into_mutable_buffer, - METH_O, - move_into_mutable_buffer_doc}, - {NULL}, -}; - -#define MODULE_NAME "pandas.util._move" - -static PyModuleDef move_module = { - PyModuleDef_HEAD_INIT, - MODULE_NAME, - NULL, - -1, - methods, -}; - -PyDoc_STRVAR( - badmove_doc, - "Exception used to indicate that a move was attempted on a value with\n" - "more than a single reference.\n" - "\n" - "Parameters\n" - "----------\n" - "data : any\n" - " The data which was passed to ``move_into_mutable_buffer``.\n" - "\n" - "See Also\n" - "--------\n" - "pandas.util._move.move_into_mutable_buffer\n"); - -PyMODINIT_FUNC -#define ERROR_RETURN NULL -PyInit__move(void) { - PyObject *m; - - if (!(badmove = PyErr_NewExceptionWithDoc("pandas.util._move.BadMove", - badmove_doc, - NULL, - NULL))) { - return ERROR_RETURN; - } - - if (PyType_Ready(&stolenbuf_type)) { - return ERROR_RETURN; - } - - if (!(m = PyModule_Create(&move_module))) { - return ERROR_RETURN; - } - - if (PyModule_AddObject(m, - "stolenbuf", - (PyObject*) &stolenbuf_type)) { - Py_DECREF(m); - return ERROR_RETURN; - } - - if (PyModule_AddObject(m, "BadMove", badmove)) { - Py_DECREF(m); - return ERROR_RETURN; - } - - return m; -} diff --git a/setup.py b/setup.py index f7eb467cca8bc..618cad331c25a 100755 --- a/setup.py +++ b/setup.py @@ -347,8 +347,6 @@ class CheckSDist(sdist_class): _cpp_pyxfiles = [ "pandas/_libs/window/aggregations.pyx", - "pandas/io/msgpack/_packer.pyx", - "pandas/io/msgpack/_unpacker.pyx", ] def initialize_options(self): @@ -700,31 +698,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.window.indexers": {"pyxfile": "_libs/window/indexers"}, "_libs.writers": {"pyxfile": "_libs/writers"}, "io.sas._sas": {"pyxfile": "io/sas/sas"}, - "io.msgpack._packer": { - "macros": endian_macro + macros, - "depends": [ - "pandas/_libs/src/msgpack/pack.h", - "pandas/_libs/src/msgpack/pack_template.h", - ], - "include": ["pandas/_libs/src/msgpack"] + common_include, - "language": "c++", - "suffix": ".cpp", - "pyxfile": "io/msgpack/_packer", - "subdir": "io/msgpack", - }, - "io.msgpack._unpacker": { - "depends": [ - "pandas/_libs/src/msgpack/unpack.h", - "pandas/_libs/src/msgpack/unpack_define.h", - "pandas/_libs/src/msgpack/unpack_template.h", - ], - "macros": endian_macro + macros, - "include": ["pandas/_libs/src/msgpack"] + common_include, - "language": "c++", - "suffix": ".cpp", - "pyxfile": "io/msgpack/_unpacker", - "subdir": "io/msgpack", - }, } extensions = [] @@ -787,19 +760,6 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): extensions.append(ujson_ext) -# ---------------------------------------------------------------------- -# util -# extension for pseudo-safely moving bytes into mutable buffers -_move_ext = Extension( - "pandas.util._move", - depends=[], - sources=["pandas/util/move.c"], - define_macros=macros, - extra_compile_args=extra_compile_args, - extra_link_args=extra_link_args, -) -extensions.append(_move_ext) - # ----------------------------------------------------------------------