Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove most warnings from pytest suite #4196

Merged
merged 19 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import warnings

from scipy import sparse as sp_sparse
from cupy import sparse as cu_sparse
from cupyx.scipy import sparse as cu_sparse
import numpy as cpu_np
import cupy as np
import numba
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/_thirdparty/sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

import numpy as cpu_np
import cupy as np
from cupy import sparse
from cupyx.scipy import sparse
from scipy import stats
from scipy import optimize
from scipy.special import boxcox
Expand Down Expand Up @@ -1238,7 +1238,7 @@ def fit(self, X, y=None) -> "RobustScaler":
else:
column_data = X[:, feature_idx]

is_not_nan = ~np.isnan(column_data).astype(np.bool)
is_not_nan = ~np.isnan(column_data).astype(bool)
column_data = column_data[is_not_nan]
quantiles.append(np.percentile(column_data,
self.quantile_range))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ def _validate_n_bins(self, n_features):
raise ValueError("{} received an invalid number "
"of bins. Received {}, expected at least 2."
.format(KBinsDiscretizer.__name__, orig_bins))
return np.full(n_features, orig_bins, dtype=np.int)
return np.full(n_features, orig_bins, dtype=int)

n_bins = check_array(orig_bins, dtype=np.int, copy=True,
ensure_2d=False)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy
import cupy as np
import cuml
from cupy import sparse
from cupyx.scipy import sparse

from ....thirdparty_adapters import (_get_mask,
_masked_column_median,
Expand Down Expand Up @@ -447,7 +447,7 @@ def transform(self, X) -> SparseCumlArray:
else:
mask = _get_mask(X.data, self.missing_values)
indexes = np.repeat(
np.arange(len(X.indptr) - 1, dtype=np.int),
np.arange(len(X.indptr) - 1, dtype=int),
np.diff(X.indptr).tolist())[mask]

X.data[mask] = valid_statistics[indexes].astype(X.dtype,
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/_thirdparty/sklearn/utils/extmath.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import cupy as np
import cupyx
from cupy import sparse
from cupyx.scipy import sparse


def row_norms(X, squared=False):
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/_thirdparty/sklearn/utils/sparsefuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@


from scipy import sparse as cpu_sp
from cupy import sparse as gpu_sp
from cupyx.scipy import sparse as gpu_sp
import cupy as np
import numpy as cpu_np

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/_thirdparty/sklearn/utils/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numbers
import numpy as np
import cupy as cp
import cupy.sparse as sp
import cupyx.scipy.sparse as sp
from inspect import isclass

from ....common.exceptions import NotFittedError
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/cluster/agglomerative.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -135,9 +135,9 @@ class AgglomerativeClustering(Base, ClusterMixin, CMajorInputTagMixin):
handle=None, verbose=False, connectivity='knn',
n_neighbors=10, output_type=None):

super().__init__(handle,
verbose,
output_type)
super().__init__(handle=handle,
verbose=verbose,
output_type=output_type)

if linkage is not "single":
raise ValueError("Only single linkage clustering is "
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/common/sparsefuncs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -22,7 +22,7 @@
from cuml.common.import_utils import has_scipy
import cuml.internals
from cuml.common.kernel_utils import cuda_kernel_factory
from cupy.sparse import csr_matrix as cp_csr_matrix,\
from cupyx.scipy.sparse import csr_matrix as cp_csr_matrix,\
coo_matrix as cp_coo_matrix, csc_matrix as cp_csc_matrix


Expand Down Expand Up @@ -191,7 +191,7 @@ def extract_knn_graph(knn_graph, convert_dtype=True, sparse=False):
csc_matrix = DummyClass

if isinstance(knn_graph, (csc_matrix, cp_csc_matrix)):
knn_graph = cp.sparse.csr_matrix(knn_graph)
knn_graph = cupyx.scipy.sparse.csr_matrix(knn_graph)
n_samples = knn_graph.shape[0]
reordering = knn_graph.data.reshape((n_samples, -1))
reordering = reordering.argsort()
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/decomposition/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ class PCA(BaseDecomposition,

def __init__(self, *, client=None, verbose=False, **kwargs):

super().__init__(PCA._create_pca,
super().__init__(model_func=PCA._create_pca,
client=client,
verbose=verbose,
**kwargs)
Expand Down
4 changes: 2 additions & 2 deletions python/cuml/dask/decomposition/tsvd.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ def __init__(self, *, client=None, **kwargs):
"""
Constructor for distributed TruncatedSVD model
"""
super().__init__(TruncatedSVD._create_tsvd,
client,
super().__init__(model_func=TruncatedSVD._create_tsvd,
client=client,
**kwargs)

def fit(self, X, _transform=False):
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/ensemble/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _fit(self, model, dataset, convert_dtype, broadcast_data):
self.active_workers = data.workers
self.datatype = data.datatype
if self.datatype == 'cudf':
has_float64 = (dataset[0].dtypes.any() == np.float64)
has_float64 = (dataset[0].dtypes == np.float64).any()
else:
has_float64 = (dataset[0].dtype == np.float64)
if has_float64:
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/dask/manifold/umap.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class UMAP(BaseEstimator,

"""
def __init__(self, *, model, client=None, **kwargs):
super().__init__(client, **kwargs)
super().__init__(client=client, **kwargs)

self._set_internal_model(model)

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/experimental/linear_model/lars.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ class Lars(Base, RegressorMixin):
X = cp.copy(X)

if self.eps is None:
self.eps = np.finfo(np.float).eps
self.eps = np.finfo(float).eps

self._fit_cpp(X, y, Gram, x_scale)

Expand Down
2 changes: 1 addition & 1 deletion python/cuml/feature_extraction/_tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def _set_idf_diag(self):
# log+1 instead of log makes sure terms with zero idf don't get
# suppressed entirely.
idf = cp.log(n_samples / df) + 1
self._idf_diag = cp.sparse.dia_matrix(
self._idf_diag = cupyx.scipy.sparse.dia_matrix(
(idf, 0),
shape=(self.__n_features, self.__n_features),
dtype=df.dtype
Expand Down
4 changes: 0 additions & 4 deletions python/cuml/manifold/t_sne.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,6 @@ class TSNE(Base,
if n_components < 0:
raise ValueError("n_components = {} should be more "
"than 0.".format(n_components))
if n_components != 2 and (method == 'barnes_hut' or method == 'fft'):
warnings.warn("Barnes Hut and FFT only work when "
"n_components == 2. Switching to exact.")
method = 'exact'
if n_components != 2:
raise ValueError("Currently TSNE supports n_components = 2; "
"but got n_components = {}".format(n_components))
Expand Down
10 changes: 4 additions & 6 deletions python/cuml/metrics/trustworthiness.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ def _get_array_ptr(obj):

@cuml.internals.api_return_any()
def trustworthiness(X, X_embedded, handle=None, n_neighbors=5,
metric='euclidean', should_downcast=True,
convert_dtype=False, batch_size=512) -> double:
metric='euclidean',
convert_dtype=True, batch_size=512) -> double:
"""
Expresses to what extent the local structure is retained in embedding.
The score is defined in the range [0, 1].
Expand Down Expand Up @@ -83,10 +83,8 @@ def trustworthiness(X, X_embedded, handle=None, n_neighbors=5,
Trustworthiness of the low-dimensional embedding
"""

if should_downcast:
convert_dtype = True
warnings.warn("Parameter should_downcast is deprecated, use "
"convert_dtype instead. ")
if n_neighbors > X.shape[0]:
raise ValueError("n_neighbors must be <= the number of rows.")

if n_neighbors > X.shape[0]:
raise ValueError("n_neighbors must be <= the number of rows.")
Expand Down
2 changes: 1 addition & 1 deletion python/cuml/model_selection/_split.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def _approximate_mode(class_counts, n_draws, rng):
need_to_add -= add_now
if need_to_add == 0:
break
return floored.astype(cp.int)
return floored.astype(int)


def train_test_split(X,
Expand Down
16 changes: 8 additions & 8 deletions python/cuml/naive_bayes/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def _partial_fit(self, X, y, _classes=None, _refit=False,
raise ValueError("classes must be passed on the first call "
"to partial_fit.")

if scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X):
if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X):
X = _convert_x_sparse(X)
else:
X = input_to_cupy_array(X, order='K',
Expand Down Expand Up @@ -513,7 +513,7 @@ def _update_mean_variance(self, X, Y, sample_weight=None):
new_var = cp.zeros((self.n_classes_, self.n_features_), order="F",
dtype=X.dtype)
class_counts = cp.zeros(self.n_classes_, order="F", dtype=X.dtype)
if cp.sparse.isspmatrix(X):
if cupyx.scipy.sparse.isspmatrix(X):
X = X.tocoo()

count_features_coo = count_features_coo_kernel(X.dtype,
Expand Down Expand Up @@ -741,7 +741,7 @@ def _partial_fit(self, X, y, sample_weight=None,
as scipy_sparse_isspmatrix

# TODO: use SparseCumlArray
if scipy_sparse_isspmatrix(X) or cp.sparse.isspmatrix(X):
if scipy_sparse_isspmatrix(X) or cupyx.scipy.sparse.isspmatrix(X):
X = _convert_x_sparse(X)
else:
X = input_to_cupy_array(X, order='K',
Expand Down Expand Up @@ -780,7 +780,7 @@ def _partial_fit(self, X, y, sample_weight=None,
else:
check_labels(Y, self.classes_)

if cp.sparse.isspmatrix(X):
if cupyx.scipy.sparse.isspmatrix(X):
# X is assumed to be a COO here
self._count_sparse(X.row, X.col, X.data, X.shape, Y, self.classes_)
else:
Expand Down Expand Up @@ -832,7 +832,7 @@ def _count(self, X, Y, classes):
Sum feature counts & class prior counts and add to current model.
Parameters
----------
X : cupy.ndarray or cupy.sparse matrix of size
X : cupy.ndarray or cupyx.scipy.sparse matrix of size
(n_rows, n_features)
Y : cupy.array of monotonic class labels
"""
Expand Down Expand Up @@ -1035,7 +1035,7 @@ class MultinomialNB(_BaseDiscreteNB):

# Put feature vectors and labels on the GPU

X = cp.sparse.csr_matrix(features.tocsr(), dtype=cp.float32)
X = cupyx.scipy.sparse.csr_matrix(features.tocsr(), dtype=cp.float32)
y = cp.asarray(twenty_train.target, dtype=cp.int32)

# Train model
Expand Down Expand Up @@ -1196,7 +1196,7 @@ def __init__(self, *, alpha=1.0, binarize=.0, fit_prior=True,
def _check_X(self, X):
X = super()._check_X(X)
if self.binarize is not None:
if cp.sparse.isspmatrix(X):
if cupyx.scipy.sparse.isspmatrix(X):
X.data = binarize(X.data, threshold=self.binarize)
else:
X = binarize(X, threshold=self.binarize)
Expand All @@ -1205,7 +1205,7 @@ def _check_X(self, X):
def _check_X_y(self, X, y):
X, y = super()._check_X_y(X, y)
if self.binarize is not None:
if cp.sparse.isspmatrix(X):
if cupyx.scipy.sparse.isspmatrix(X):
X.data = binarize(X.data, threshold=self.binarize)
else:
X = binarize(X, threshold=self.binarize)
Expand Down
9 changes: 5 additions & 4 deletions python/cuml/preprocessing/encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def __init__(self, *,
categories='auto',
drop=None,
sparse=True,
dtype=np.float,
dtype=np.float32,
handle_unknown='error',
handle=None,
verbose=False,
Expand Down Expand Up @@ -347,7 +347,7 @@ def transform(self, X):

if self.drop_idx_ is not None:
drop_idx = self.drop_idx_[feature] + j
mask = cp.ones(col_idx.shape, dtype=cp.bool)
mask = cp.ones(col_idx.shape, dtype=bool)
mask[col_idx == drop_idx] = False
col_idx = col_idx[mask]
row_idx = row_idx[mask]
Expand Down Expand Up @@ -454,8 +454,9 @@ def inverse_transform(self, X):
result = cp.asarray(result.as_gpu_matrix())
except ValueError:
warnings.warn("The input one hot encoding contains rows with "
"unknown categories. Arrays do not support null "
"values. Returning output as a DataFrame "
"unknown categories. Since device arrays do not "
"support null values, the output will be "
"returned as a DataFrame "
"instead.")
return result

Expand Down
7 changes: 3 additions & 4 deletions python/cuml/preprocessing/text/stem/porter_stemmer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (c) 2020, NVIDIA CORPORATION.
# Copyright (c) 2020-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -15,7 +15,6 @@
#

import cudf
import numpy as np
import cupy as cp
from .porter_stemmer_utils.suffix_utils import (
get_stem_series,
Expand Down Expand Up @@ -734,7 +733,7 @@ def get_condition_flag(word_str_ser, condition):
return a bool series where flag is valid
"""
if condition is None:
return cudf.Series(cp.ones(len(word_str_ser), np.bool))
return cudf.Series(cp.ones(len(word_str_ser), bool))
else:
return condition(word_str_ser)

Expand Down Expand Up @@ -812,5 +811,5 @@ def build_can_replace_mask(len_mask, mask):
if mask is None else returns mask
"""
if mask is None:
mask = cudf.Series(cp.ones(len_mask, dtype=cp.bool))
mask = cudf.Series(cp.ones(len_mask, dtype=bool))
return mask
6 changes: 3 additions & 3 deletions python/cuml/test/dask/test_linear_regression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019-2020, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -73,8 +73,8 @@ def imp():
from cuml.dask.linear_model import LinearRegression as cumlOLS_dask

n_info = 5
nrows = np.int(nrows)
ncols = np.int(ncols)
nrows = int(nrows)
ncols = int(ncols)
X, y = make_regression_dataset(datatype, nrows, ncols, n_info)

X_df, y_df = _prep_training_data(client, X, y, n_parts)
Expand Down
6 changes: 3 additions & 3 deletions python/cuml/test/dask/test_ridge_regression.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019, NVIDIA CORPORATION.
# Copyright (c) 2019-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,8 +68,8 @@ def test_ridge(nrows, ncols, n_parts, fit_intercept,
from cuml.dask.linear_model import Ridge as cumlRidge_dask

n_info = 5
nrows = np.int(nrows)
ncols = np.int(ncols)
nrows = int(nrows)
ncols = int(ncols)
X, y = make_regression_dataset(datatype, nrows, ncols, n_info)

X_df, y_df = _prep_training_data(client, X, y, n_parts)
Expand Down
Loading