rapidsai · rapids-bot · Nov 8, 2021 · Sep 8, 2021 · Sep 8, 2021 · Sep 9, 2021
@@ -22,7 +22,7 @@
 import warnings
 
 from scipy import sparse as sp_sparse
-from cupy import sparse as cu_sparse
+from cupyx.scipy import sparse as cu_sparse
 import numpy as cpu_np
 import cupy as np
 import numba

@@ -22,7 +22,7 @@
 
 import numpy as cpu_np
 import cupy as np
-from cupy import sparse
+from cupyx.scipy import sparse
 from scipy import stats
 from scipy import optimize
 from scipy.special import boxcox
@@ -1238,7 +1238,7 @@ def fit(self, X, y=None) -> "RobustScaler":
                 else:
                     column_data = X[:, feature_idx]
 
-                is_not_nan = ~np.isnan(column_data).astype(np.bool)
+                is_not_nan = ~np.isnan(column_data).astype(bool)
                 column_data = column_data[is_not_nan]
                 quantiles.append(np.percentile(column_data,
                                                self.quantile_range))

@@ -258,7 +258,7 @@ def _validate_n_bins(self, n_features):
                 raise ValueError("{} received an invalid number "
                                  "of bins. Received {}, expected at least 2."
                                  .format(KBinsDiscretizer.__name__, orig_bins))
-            return np.full(n_features, orig_bins, dtype=np.int)
+            return np.full(n_features, orig_bins, dtype=int)
 
         n_bins = check_array(orig_bins, dtype=np.int, copy=True,
                              ensure_2d=False)

@@ -16,7 +16,7 @@
 import numpy
 import cupy as np
 import cuml
-from cupy import sparse
+from cupyx.scipy import sparse
 
 from ....thirdparty_adapters import (_get_mask,
                                      _masked_column_median,
@@ -447,7 +447,7 @@ def transform(self, X) -> SparseCumlArray:
             else:
                 mask = _get_mask(X.data, self.missing_values)
                 indexes = np.repeat(
-                    np.arange(len(X.indptr) - 1, dtype=np.int),
+                    np.arange(len(X.indptr) - 1, dtype=int),
                     np.diff(X.indptr).tolist())[mask]
 
                 X.data[mask] = valid_statistics[indexes].astype(X.dtype,

@@ -18,7 +18,7 @@
 
 import cupy as np
 import cupyx
-from cupy import sparse
+from cupyx.scipy import sparse
 
 
 def row_norms(X, squared=False):

@@ -13,7 +13,7 @@
 
 
 from scipy import sparse as cpu_sp
-from cupy import sparse as gpu_sp
+from cupyx.scipy import sparse as gpu_sp
 import cupy as np
 import numpy as cpu_np
 

@@ -18,7 +18,7 @@
 import numbers
 import numpy as np
 import cupy as cp
-import cupy.sparse as sp
+import cupyx.scipy.sparse as sp
 from inspect import isclass
 
 from ....common.exceptions import NotFittedError

@@ -135,9 +135,9 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin):
                  handle=None, verbose=False, connectivity='knn',
                  n_neighbors=10, output_type=None):
 
-        super().__init__(handle,
-                         verbose,
-                         output_type)
+        super().__init__(handle=handle,
+                         verbose=verbose,
+                         output_type=output_type)
 
         if linkage is not "single":
             raise ValueError("Only single linkage clustering is "

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -22,7 +22,7 @@
 from cuml.common.import_utils import has_scipy
 import cuml.internals
 from cuml.common.kernel_utils import cuda_kernel_factory
-from cupy.sparse import csr_matrix as cp_csr_matrix,\
+from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix,\
     coo_matrix as cp_coo_matrix, csc_matrix as cp_csc_matrix
 
 
@@ -191,7 +191,7 @@ def extract_knn_graph(knn_graph, convert_dtype=True, sparse=False):
         csc_matrix = DummyClass
 
     if isinstance(knn_graph, (csc_matrix, cp_csc_matrix)):
-        knn_graph = cp.sparse.csr_matrix(knn_graph)
+        knn_graph = cupyx.scipy.sparse.csr_matrix(knn_graph)
         n_samples = knn_graph.shape[0]
         reordering = knn_graph.data.reshape((n_samples, -1))
         reordering = reordering.argsort()

@@ -158,7 +158,7 @@ class PCA(BaseDecomposition,
 
     def __init__(self, *, client=None, verbose=False, **kwargs):
 
-        super().__init__(PCA._create_pca,
+        super().__init__(model_func=PCA._create_pca,
                          client=client,
                          verbose=verbose,
                          **kwargs)

@@ -121,8 +121,8 @@ def __init__(self, *, client=None, **kwargs):
         """
         Constructor for distributed TruncatedSVD model
         """
-        super().__init__(TruncatedSVD._create_tsvd,
-                         client,
+        super().__init__(model_func=TruncatedSVD._create_tsvd,
+                         client=client,
                          **kwargs)
 
     def fit(self, X, _transform=False):

@@ -102,7 +102,7 @@ def _fit(self, model, dataset, convert_dtype, broadcast_data):
         self.active_workers = data.workers
         self.datatype = data.datatype
         if self.datatype == 'cudf':
-            has_float64 = (dataset[0].dtypes.any() == np.float64)
+            has_float64 = (dataset[0].dtypes == np.float64).any()
         else:
             has_float64 = (dataset[0].dtype == np.float64)
         if has_float64:

@@ -93,7 +93,7 @@ class UMAP(BaseEstimator,
 
     """
     def __init__(self, *, model, client=None, **kwargs):
-        super().__init__(client, **kwargs)
+        super().__init__(client=client, **kwargs)
 
         self._set_internal_model(model)
 

@@ -317,7 +317,7 @@ class Lars(Base, RegressorMixin):
             X = cp.copy(X)
 
         if self.eps is None:
-            self.eps = np.finfo(np.float).eps
+            self.eps = np.finfo(float).eps
 
         self._fit_cpp(X, y, Gram, x_scale)
 

@@ -161,7 +161,7 @@ def _set_idf_diag(self):
         # log+1 instead of log makes sure terms with zero idf don't get
         # suppressed entirely.
         idf = cp.log(n_samples / df) + 1
-        self._idf_diag = cp.sparse.dia_matrix(
+        self._idf_diag = cupyx.scipy.sparse.dia_matrix(
             (idf, 0),
             shape=(self.__n_features, self.__n_features),
             dtype=df.dtype

@@ -280,10 +280,6 @@ class TSNE(Base,
         if n_components < 0:
             raise ValueError("n_components = {} should be more "
                              "than 0.".format(n_components))
-        if n_components != 2 and (method == 'barnes_hut' or method == 'fft'):
-            warnings.warn("Barnes Hut and FFT only work when "
-                          "n_components == 2. Switching to exact.")
-            method = 'exact'
         if n_components != 2:
             raise ValueError("Currently TSNE supports n_components = 2; "
                              "but got n_components = {}".format(n_components))

@@ -54,8 +54,8 @@ def _get_array_ptr(obj):
 
 @cuml.internals.api_return_any()
 def trustworthiness(X, X_embedded, handle=None, n_neighbors=5,
-                    metric='euclidean', should_downcast=True,
-                    convert_dtype=False, batch_size=512) -> double:
+                    metric='euclidean',
+                    convert_dtype=True, batch_size=512) -> double:
     """
     Expresses to what extent the local structure is retained in embedding.
     The score is defined in the range [0, 1].
@@ -83,10 +83,8 @@ def trustworthiness(X, X_embedded, handle=None, n_neighbors=5,
             Trustworthiness of the low-dimensional embedding
     """
 
-    if should_downcast:
-        convert_dtype = True
-        warnings.warn("Parameter should_downcast is deprecated, use "
-                      "convert_dtype instead. ")
+    if n_neighbors > X.shape[0]:
+        raise ValueError("n_neighbors must be <= the number of rows.")
 
     if n_neighbors > X.shape[0]:
         raise ValueError("n_neighbors must be <= the number of rows.")

@@ -222,7 +222,7 @@ def _approximate_mode(class_counts, n_draws, rng):
             need_to_add -= add_now
             if need_to_add == 0:
                 break
-    return floored.astype(cp.int)
+    return floored.astype(int)
 
 
 def train_test_split(X,

@@ -363,7 +363,7 @@ def _partial_fit(self, X, y, _classes=None, _refit=False,
             raise ValueError("classes must be passed on the first call "
                              "to partial_fit.")
 
-        if scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X):
+        if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X):
             X = _convert_x_sparse(X)
         else:
             X = input_to_cupy_array(X, order='K',
@@ -513,7 +513,7 @@ def _update_mean_variance(self, X, Y, sample_weight=None):
         new_var = cp.zeros((self.n_classes_, self.n_features_), order="F",
                            dtype=X.dtype)
         class_counts = cp.zeros(self.n_classes_, order="F", dtype=X.dtype)
-        if cp.sparse.isspmatrix(X):
+        if cupyx.scipy.sparse.isspmatrix(X):
             X = X.tocoo()
 
             count_features_coo = count_features_coo_kernel(X.dtype,
@@ -741,7 +741,7 @@ def _partial_fit(self, X, y, sample_weight=None,
                 as scipy_sparse_isspmatrix
 
         # TODO: use SparseCumlArray
-        if scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X):
+        if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X):
             X = _convert_x_sparse(X)
         else:
             X = input_to_cupy_array(X, order='K',
@@ -780,7 +780,7 @@ def _partial_fit(self, X, y, sample_weight=None,
         else:
             check_labels(Y, self.classes_)
 
-        if cp.sparse.isspmatrix(X):
+        if cupyx.scipy.sparse.isspmatrix(X):
             # X is assumed to be a COO here
             self._count_sparse(X.row, X.col, X.data, X.shape, Y, self.classes_)
         else:
@@ -832,7 +832,7 @@ def _count(self, X, Y, classes):
         Sum feature counts & class prior counts and add to current model.
         Parameters
         ----------
-        X : cupy.ndarray or cupy.sparse matrix of size
+        X : cupy.ndarray or cupyx.scipy.sparse matrix of size
                   (n_rows, n_features)
         Y : cupy.array of monotonic class labels
         """
@@ -1035,7 +1035,7 @@ class MultinomialNB(_BaseDiscreteNB):
 
         # Put feature vectors and labels on the GPU
 
-        X = cp.sparse.csr_matrix(features.tocsr(), dtype=cp.float32)
+        X = cupyx.scipy.sparse.csr_matrix(features.tocsr(), dtype=cp.float32)
         y = cp.asarray(twenty_train.target, dtype=cp.int32)
 
         # Train model
@@ -1196,7 +1196,7 @@ def __init__(self, *, alpha=1.0, binarize=.0, fit_prior=True,
     def _check_X(self, X):
         X = super()._check_X(X)
         if self.binarize is not None:
-            if cp.sparse.isspmatrix(X):
+            if cupyx.scipy.sparse.isspmatrix(X):
                 X.data = binarize(X.data, threshold=self.binarize)
             else:
                 X = binarize(X, threshold=self.binarize)
@@ -1205,7 +1205,7 @@ def _check_X(self, X):
     def _check_X_y(self, X, y):
         X, y = super()._check_X_y(X, y)
         if self.binarize is not None:
-            if cp.sparse.isspmatrix(X):
+            if cupyx.scipy.sparse.isspmatrix(X):
                 X.data = binarize(X.data, threshold=self.binarize)
             else:
                 X = binarize(X, threshold=self.binarize)

@@ -107,7 +107,7 @@ def __init__(self, *,
                  categories='auto',
                  drop=None,
                  sparse=True,
-                 dtype=np.float,
+                 dtype=np.float32,
                  handle_unknown='error',
                  handle=None,
                  verbose=False,
@@ -347,7 +347,7 @@ def transform(self, X):
 
                 if self.drop_idx_ is not None:
                     drop_idx = self.drop_idx_[feature] + j
-                    mask = cp.ones(col_idx.shape, dtype=cp.bool)
+                    mask = cp.ones(col_idx.shape, dtype=bool)
                     mask[col_idx == drop_idx] = False
                     col_idx = col_idx[mask]
                     row_idx = row_idx[mask]
@@ -454,8 +454,9 @@ def inverse_transform(self, X):
                 result = cp.asarray(result.as_gpu_matrix())
             except ValueError:
                 warnings.warn("The input one hot encoding contains rows with "
-                              "unknown categories. Arrays do not support null "
-                              "values. Returning output as a DataFrame "
+                              "unknown categories. Since device arrays do not "
+                              "support null values, the output will be "
+                              "returned as a DataFrame "
                               "instead.")
         return result
 

@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
 #
 
 import cudf
-import numpy as np
 import cupy as cp
 from .porter_stemmer_utils.suffix_utils import (
     get_stem_series,
@@ -734,7 +733,7 @@ def get_condition_flag(word_str_ser, condition):
         return a bool series where flag is valid
     """
     if condition is None:
-        return cudf.Series(cp.ones(len(word_str_ser), np.bool))
+        return cudf.Series(cp.ones(len(word_str_ser), bool))
     else:
         return condition(word_str_ser)
 
@@ -812,5 +811,5 @@ def build_can_replace_mask(len_mask, mask):
       if mask is None else returns mask
     """
     if mask is None:
-        mask = cudf.Series(cp.ones(len_mask, dtype=cp.bool))
+        mask = cudf.Series(cp.ones(len_mask, dtype=bool))
     return mask
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -73,8 +73,8 @@ def imp():
     from cuml.dask.linear_model import LinearRegression as cumlOLS_dask
 
     n_info = 5
-    nrows = np.int(nrows)
-    ncols = np.int(ncols)
+    nrows = int(nrows)
+    ncols = int(ncols)
     X, y = make_regression_dataset(datatype, nrows, ncols, n_info)
 
     X_df, y_df = _prep_training_data(client, X, y, n_parts)

@@ -1,4 +1,4 @@
-# Copyright (c) 2019, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -68,8 +68,8 @@ def test_ridge(nrows, ncols, n_parts, fit_intercept,
     from cuml.dask.linear_model import Ridge as cumlRidge_dask
 
     n_info = 5
-    nrows = np.int(nrows)
-    ncols = np.int(ncols)
+    nrows = int(nrows)
+    ncols = int(ncols)
     X, y = make_regression_dataset(datatype, nrows, ncols, n_info)
 
     X_df, y_df = _prep_training_data(client, X, y, n_parts)