diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 33b41d61aa9786..b367fda002b74a 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -31,6 +31,7 @@ from distutils.version import LooseVersion from itertools import product import sys +import platform import types from unicodedata import east_asian_width import struct @@ -41,6 +42,7 @@ PY3 = (sys.version_info[0] >= 3) PY35 = (sys.version_info >= (3, 5)) PY36 = (sys.version_info >= (3, 6)) +PYPY = (platform.python_implementation() == 'PyPy') try: import __builtin__ as builtins diff --git a/pandas/core/base.py b/pandas/core/base.py index 8f21e3125a27e6..4ae47360357933 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -15,6 +15,7 @@ import pandas.core.nanops as nanops import pandas._libs.lib as lib from pandas.compat.numpy import function as nv +from pandas.compat import PYPY from pandas.util._decorators import (Appender, cache_readonly, deprecate_kwarg, Substitution) from pandas.core.common import AbstractMethodError @@ -1061,7 +1062,7 @@ def memory_usage(self, deep=False): Notes ----- Memory usage does not include memory consumed by elements that - are not components of the array if deep=False + are not components of the array if deep=False or if used on PyPy See Also -------- @@ -1071,9 +1072,8 @@ def memory_usage(self, deep=False): return self.values.memory_usage(deep=deep) v = self.values.nbytes - if deep and is_object_dtype(self): + if deep and is_object_dtype(self) and not PYPY: v += lib.memory_usage_of_objects(self.values) - return v def factorize(self, sort=False, na_sentinel=-1): diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 420788f9008cd7..4d917407d4aaeb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -467,7 +467,8 @@ def _nbytes(self, deep=False): """ level_nbytes = sum((i.memory_usage(deep=deep) for i in self.levels)) label_nbytes = sum((i.nbytes for i in self.labels)) - names_nbytes = sum((getsizeof(i) for i in self.names)) + objsize = 24 # for implementations with no useful getsizeof (PyPy) + names_nbytes = sum((getsizeof(i, objsize) for i in self.names)) result = level_nbytes + label_nbytes + names_nbytes # include our engine hashtable diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5071b50bbebdf8..671d581b33583b 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -194,8 +194,11 @@ def _format_data(self): @cache_readonly def nbytes(self): - """ return the number of bytes in the underlying data """ - return sum([getsizeof(getattr(self, v)) for v in + """ return the number of bytes in the underlying data + On implementations where this is problematic (PyPy) + assume 24 bytes for each value + """ + return sum([getsizeof(getattr(self, v), 24) for v in ['_start', '_stop', '_step']]) def memory_usage(self, deep=False): diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index c317ad542659a9..d4ca980ca77606 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -332,13 +332,25 @@ def test_info_memory_usage(self): res = buf.getvalue().splitlines() assert re.match(r"memory usage: [^+]+$", res[-1]) - assert (df_with_object_index.memory_usage( - index=True, deep=True).sum() > df_with_object_index.memory_usage( - index=True).sum()) + if tm.IS_PYPY: + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() == + df_with_object_index.memory_usage( + index=True).sum()) - df_object = pd.DataFrame({'a': ['a']}) - assert (df_object.memory_usage(deep=True).sum() > - df_object.memory_usage().sum()) + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() == + df_object.memory_usage().sum()) + + else: + assert (df_with_object_index.memory_usage( + index=True, deep=True).sum() > + df_with_object_index.memory_usage( + index=True).sum()) + + df_object = pd.DataFrame({'a': ['a']}) + assert (df_object.memory_usage(deep=True).sum() > + df_object.memory_usage().sum()) # Test a DataFrame with duplicate columns dtypes = ['int64', 'int64', 'int64', 'float64'] @@ -377,10 +389,12 @@ def test_info_memory_usage(self): df.memory_usage(index=True) df.index.values.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = df.memory_usage(deep=True).sum() - sys.getsizeof(df) - assert abs(diff) < 100 + mem = df.memory_usage(deep=True).sum() + if not tm.IS_PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = mem - sys.getsizeof(df) + assert abs(diff) < 100 def test_info_memory_usage_qualified(self): diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 9af4a9edeb8b17..474c340be094a3 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -144,6 +144,7 @@ def f(): pytest.raises(TypeError, f) + @pytest.mark.skipif(tm.IS_PYPY, reason="not relevant for PyPy") def test_memory_usage(self): # Delegate does not implement memory_usage. # Check that we fall back to in-built `__sizeof__` @@ -941,6 +942,7 @@ def test_fillna(self): # check shallow_copied assert o is not result + @pytest.mark.skipif(tm.IS_PYPY, reason="not relevant for PyPy") def test_memory_usage(self): for o in self.objs: res = o.memory_usage() diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index eecdd672095b0c..2476d10e8ef3e1 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1448,10 +1448,11 @@ def test_memory_usage(self): cat = pd.Categorical(['foo', 'foo', 'bar']) assert cat.memory_usage(deep=True) > cat.nbytes - # sys.getsizeof will call the .memory_usage with - # deep=True, and add on some GC overhead - diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) - assert abs(diff) < 100 + if not tm.IS_PYPY: + # sys.getsizeof will call the .memory_usage with + # deep=True, and add on some GC overhead + diff = cat.memory_usage(deep=True) - sys.getsizeof(cat) + assert abs(diff) < 100 def test_searchsorted(self): # https://github.com/pandas-dev/pandas/issues/8420 diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d6ba9561340cc5..197fd494c1eceb 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -56,6 +56,8 @@ K = 4 _RAISE_NETWORK_ERROR_DEFAULT = False +import platform +IS_PYPY = platform.python_implementation() == 'PyPy' # set testing_mode _testing_mode_warnings = (DeprecationWarning, compat.ResourceWarning)