From 045a341077b73045376938080dea5c13b54ead08 Mon Sep 17 00:00:00 2001 From: mattip Date: Fri, 11 Aug 2017 15:30:44 +0300 Subject: [PATCH] COMPAT: avoid calling getsizeof() on PyPy --- pandas/core/base.py | 7 ++++--- pandas/core/indexes/multi.py | 3 ++- pandas/core/indexes/range.py | 7 +++++-- pandas/tests/frame/test_repr_info.py | 22 ++++++++++++---------- pandas/tests/test_base.py | 2 ++ pandas/tests/test_categorical.py | 9 +++++---- pandas/util/testing.py | 2 ++ 7 files changed, 32 insertions(+), 20 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 8f21e3125a27e6..0836f13437e919 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -23,6 +23,8 @@ _indexops_doc_kwargs = dict(klass='IndexOpsMixin', inplace='', unique='IndexOpsMixin', duplicated='IndexOpsMixin') +import platform +IS_PYPY = platform.python_implementation() == 'PyPy' class StringMixin(object): """implements string methods so long as object defines a `__unicode__` @@ -1061,7 +1063,7 @@ def memory_usage(self, deep=False): Notes ----- Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + are not components of the array if deep=False or if used on PyPy See Also -------- @@ -1071,9 +1073,8 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and is_object_dtype(self): + if deep and is_object_dtype(self) and not IS_PYPY: v += lib.memory_usage_of_objects(self.values) - return v def factorize(self, sort=False, na_sentinel=-1): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 420788f9008cd7..9eda0b8dd917ba 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -467,7 +467,8 @@ def _nbytes(self, deep=False): """ level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels)) label_nbytes = sum((i.nbytes for i in self.labels)) - names_nbytes = sum((getsizeof(i) for i in self.names)) + objsize = 24 # for inplementations with no meaningfule getsizeof (PyPy) + names_nbytes = sum((getsizeof(i, 24) for i in self.names)) result = level_nbytes + label_nbytes + names_nbytes # include our engine hashtable diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5071b50bbebdf8..671d581b33583b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -194,8 +194,11 @@ def _format_data(self): @cache_readonly def nbytes(self): - """ return the number of bytes in the underlying data """ - return sum([getsizeof(getattr(self, v)) for v in + """ return the number of bytes in the underlying data + On implementations where this is problematic (PyPy) + assume 24 bytes for each value + """ + return sum([getsizeof(getattr(self, v), 24) for v in ['_start', '_stop', '_step']]) def memory_usage(self, deep=False): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index c317ad542659a9..8e2bbc9d1d1b49 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -332,13 +332,14 @@ def test_info_memory_usage(self): res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+$", res[-1]) - assert (df_with_object_index.memory_usage( - index=True, deep=True).sum() > df_with_object_index.memory_usage( - index=True).sum()) + if not tm.IS_PYPY: + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() > df_with_object_index.memory_usage( + index=True).sum()) - df_object = pd.DataFrame({'a': ['a']}) - assert (df_object.memory_usage(deep=True).sum() > - df_object.memory_usage().sum()) + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] @@ -377,10 +378,11 @@ def test_info_memory_usage(self): df.memory_usage(index=True) df.index.values.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) - assert abs(diff) < 100 + if not tm.IS_PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) + assert abs(diff) < 100 def test_info_memory_usage_qualified(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9af4a9edeb8b17..e7f359b9cab738 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -144,6 +144,7 @@ def f(): pytest.raises(TypeError, f) + @pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy") def test_memory_usage(self): # Delegate does not implement memory_usage. # Check that we fall back to in-built `__sizeof__` @@ -941,6 +942,7 @@ def test_fillna(self): # check shallow_copied assert o is not result + @pytest.mark.skipif(tm.IS_PYPY, "not relevant for PyPy") def test_memory_usage(self): for o in self.objs: res = o.memory_usage() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index eecdd672095b0c..2476d10e8ef3e1 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1448,10 +1448,11 @@ def test_memory_usage(self): cat = pd.Categorical(['foo', 'foo', 'bar']) assert cat.memory_usage(deep=True) > cat.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - assert abs(diff) < 100 + if not tm.IS_PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d6ba9561340cc5..197fd494c1eceb 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -56,6 +56,8 @@ K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False +import platform +IS_PYPY = platform.python_implementation() == 'PyPy' # set testing_mode _testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)