FIX-#4570: Replace np.bool -> np.bool_ (#4571)

Signed-off-by: Nick Crews <nicholas.b.crews@gmail.com> Co-authored-by: Devin Petersohn <devin-petersohn@users.noreply.github.com> Co-authored-by: Rehan Durrani <rehan@ponder.io>
modin-project · Jun 16, 2022 · dbc78a9 · dbc78a9
1 parent 8f35ab5
commit dbc78a9
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 30 deletions.
diff --git a/docs/release_notes/release_notes-0.16.0.rst b/docs/release_notes/release_notes-0.16.0.rst
@@ -6,6 +6,7 @@ Key Features and Updates
 ------------------------
 
 * Stability and Bugfixes
+  * FIX-#4570: Replace ``np.bool`` -> ``np.bool_`` (#4571)
   * FIX-#4543: Fix `read_csv` in case skiprows=<0, []> (#4544)
   * FIX-#4059: Add cell-wise execution for binary ops, fix bin ops for empty dataframes (#4391)
 * Performance enhancements
@@ -34,4 +35,5 @@ Key Features and Updates
 Contributors
 ------------
 @mvashishtha
+@NickCrews
 @prutskov
diff --git a/modin/core/storage_formats/pandas/query_compiler.py b/modin/core/storage_formats/pandas/query_compiler.py
@@ -1347,13 +1347,13 @@ def stack(self, level, dropna):
     applymap = Map.register(pandas.DataFrame.applymap)
     conj = Map.register(lambda df, *args, **kwargs: pandas.DataFrame(np.conj(df)))
     invert = Map.register(pandas.DataFrame.__invert__)
-    isin = Map.register(pandas.DataFrame.isin, dtypes=np.bool)
-    isna = Map.register(pandas.DataFrame.isna, dtypes=np.bool)
+    isin = Map.register(pandas.DataFrame.isin, dtypes=np.bool_)
+    isna = Map.register(pandas.DataFrame.isna, dtypes=np.bool_)
     _isfinite = Map.register(
         lambda df, *args, **kwargs: pandas.DataFrame(np.isfinite(df))
     )
     negative = Map.register(pandas.DataFrame.__neg__)
-    notna = Map.register(pandas.DataFrame.notna, dtypes=np.bool)
+    notna = Map.register(pandas.DataFrame.notna, dtypes=np.bool_)
     round = Map.register(pandas.DataFrame.round)
     replace = Map.register(pandas.DataFrame.replace)
     series_view = Map.register(
@@ -1373,22 +1373,22 @@ def stack(self, level, dropna):
 
     str_capitalize = Map.register(_str_map("capitalize"), dtypes="copy")
     str_center = Map.register(_str_map("center"), dtypes="copy")
-    str_contains = Map.register(_str_map("contains"), dtypes=np.bool)
+    str_contains = Map.register(_str_map("contains"), dtypes=np.bool_)
     str_count = Map.register(_str_map("count"), dtypes=int)
-    str_endswith = Map.register(_str_map("endswith"), dtypes=np.bool)
+    str_endswith = Map.register(_str_map("endswith"), dtypes=np.bool_)
     str_find = Map.register(_str_map("find"), dtypes="copy")
     str_findall = Map.register(_str_map("findall"), dtypes="copy")
     str_get = Map.register(_str_map("get"), dtypes="copy")
     str_index = Map.register(_str_map("index"), dtypes="copy")
-    str_isalnum = Map.register(_str_map("isalnum"), dtypes=np.bool)
-    str_isalpha = Map.register(_str_map("isalpha"), dtypes=np.bool)
-    str_isdecimal = Map.register(_str_map("isdecimal"), dtypes=np.bool)
-    str_isdigit = Map.register(_str_map("isdigit"), dtypes=np.bool)
-    str_islower = Map.register(_str_map("islower"), dtypes=np.bool)
-    str_isnumeric = Map.register(_str_map("isnumeric"), dtypes=np.bool)
-    str_isspace = Map.register(_str_map("isspace"), dtypes=np.bool)
-    str_istitle = Map.register(_str_map("istitle"), dtypes=np.bool)
-    str_isupper = Map.register(_str_map("isupper"), dtypes=np.bool)
+    str_isalnum = Map.register(_str_map("isalnum"), dtypes=np.bool_)
+    str_isalpha = Map.register(_str_map("isalpha"), dtypes=np.bool_)
+    str_isdecimal = Map.register(_str_map("isdecimal"), dtypes=np.bool_)
+    str_isdigit = Map.register(_str_map("isdigit"), dtypes=np.bool_)
+    str_islower = Map.register(_str_map("islower"), dtypes=np.bool_)
+    str_isnumeric = Map.register(_str_map("isnumeric"), dtypes=np.bool_)
+    str_isspace = Map.register(_str_map("isspace"), dtypes=np.bool_)
+    str_istitle = Map.register(_str_map("istitle"), dtypes=np.bool_)
+    str_isupper = Map.register(_str_map("isupper"), dtypes=np.bool_)
     str_join = Map.register(_str_map("join"), dtypes="copy")
     str_len = Map.register(_str_map("len"), dtypes=int)
     str_ljust = Map.register(_str_map("ljust"), dtypes="copy")
@@ -1409,7 +1409,7 @@ def stack(self, level, dropna):
     str_slice = Map.register(_str_map("slice"), dtypes="copy")
     str_slice_replace = Map.register(_str_map("slice_replace"), dtypes="copy")
     str_split = Map.register(_str_map("split"), dtypes="copy")
-    str_startswith = Map.register(_str_map("startswith"), dtypes=np.bool)
+    str_startswith = Map.register(_str_map("startswith"), dtypes=np.bool_)
     str_strip = Map.register(_str_map("strip"), dtypes="copy")
     str_swapcase = Map.register(_str_map("swapcase"), dtypes="copy")
     str_title = Map.register(_str_map("title"), dtypes="copy")

diff --git a/modin/pandas/base.py b/modin/pandas/base.py
@@ -25,6 +25,7 @@
     is_datetime_or_timedelta_dtype,
     is_dtype_equal,
     is_object_dtype,
+    pandas_dtype,
 )
 from pandas.core.indexes.api import ensure_index
 import pandas.core.window.rolling
@@ -739,7 +740,7 @@ def all(
                             type(self).__name__, "all"
                         )
                     )
-                data_for_compute = self[self.columns[self.dtypes == np.bool]]
+                data_for_compute = self[self.columns[self.dtypes == np.bool_]]
                 return data_for_compute.all(
                     axis=axis, bool_only=False, skipna=skipna, level=level, **kwargs
                 )
@@ -802,7 +803,7 @@ def any(
                             type(self).__name__, "all"
                         )
                     )
-                data_for_compute = self[self.columns[self.dtypes == np.bool]]
+                data_for_compute = self[self.columns[self.dtypes == np.bool_]]
                 return data_for_compute.any(
                     axis=axis, bool_only=False, skipna=skipna, level=level, **kwargs
                 )
@@ -1105,7 +1106,7 @@ def count(self, axis=0, level=None, numeric_only=False):  # noqa: PR01, RT01, D2
         Count non-NA cells for `BasePandasDataset`.
         """
         axis = self._get_axis_number(axis)
-        frame = self.select_dtypes([np.number, np.bool]) if numeric_only else self
+        frame = self.select_dtypes([np.number, np.bool_]) if numeric_only else self
 
         if level is not None:
             if not frame._query_compiler.has_multiindex(axis=axis):
@@ -1186,12 +1187,7 @@ def describe(
         if include is not None and (isinstance(include, np.dtype) or include != "all"):
             if not is_list_like(include):
                 include = [include]
-            include = [
-                np.dtype(i)
-                if not (isinstance(i, type) and i.__module__ == "numpy")
-                else i
-                for i in include
-            ]
+            include = [pandas_dtype(i) if i != np.number else i for i in include]
             if not any(
                 (isinstance(inc, np.dtype) and inc == d)
                 or (
@@ -1206,7 +1202,7 @@ def describe(
         if exclude is not None:
             if not is_list_like(exclude):
                 exclude = [exclude]
-            exclude = [np.dtype(e) for e in exclude]
+            exclude = [pandas_dtype(e) if e != np.number else e for e in exclude]
             if all(
                 (isinstance(exc, np.dtype) and exc == d)
                 or (

diff --git a/modin/pandas/series.py b/modin/pandas/series.py
@@ -2441,7 +2441,7 @@ def _getitem(self, key):
             Series with retrieved data.
         """
         key = apply_if_callable(key, self)
-        if isinstance(key, Series) and key.dtype == np.bool:
+        if isinstance(key, Series) and key.dtype == np.bool_:
             # This ends up being significantly faster than looping through and getting
             # each item individually.
             key = key._to_pandas()

diff --git a/modin/pandas/test/dataframe/test_reduce.py b/modin/pandas/test/dataframe/test_reduce.py
@@ -186,7 +186,7 @@ def test_2195(datetime_is_numeric, has_numeric_column):
     [
         ([np.float64], None),
         (np.float64, None),
-        (None, [np.timedelta64, np.datetime64, np.object, np.bool]),
+        (None, [np.timedelta64, np.datetime64, np.object, np.bool_]),
         (None, "all"),
         (None, np.number),
     ],

diff --git a/modin/pandas/test/test_series.py b/modin/pandas/test/test_series.py
@@ -1393,16 +1393,16 @@ def test_describe(data):
 
     try:
         pandas_result = pandas_series.describe(
-            include=[np.timedelta64, np.datetime64, np.object, np.bool]
+            include=[np.timedelta64, np.datetime64, np.object, np.bool_]
         )
     except Exception as e:
         with pytest.raises(type(e)):
             modin_series.describe(
-                include=[np.timedelta64, np.datetime64, np.object, np.bool]
+                include=[np.timedelta64, np.datetime64, np.object, np.bool_]
             )
     else:
         modin_result = modin_series.describe(
-            include=[np.timedelta64, np.datetime64, np.object, np.bool]
+            include=[np.timedelta64, np.datetime64, np.object, np.bool_]
         )
         df_equals(modin_result, pandas_result)