Skip to content

Commit

Permalink
fix cuml
Browse files Browse the repository at this point in the history
  • Loading branch information
tvdboom committed Aug 28, 2023
1 parent c17443e commit 91090b4
Show file tree
Hide file tree
Showing 14 changed files with 163 additions and 161 deletions.
30 changes: 15 additions & 15 deletions atom/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,18 +229,18 @@ class ATOMClassifier(BaseTransformer, ATOM):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "sklearnex"
Expand Down Expand Up @@ -455,18 +455,18 @@ class ATOMForecaster(BaseTransformer, ATOM):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "sklearnex"
Expand Down Expand Up @@ -684,18 +684,18 @@ class ATOMRegressor(BaseTransformer, ATOM):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "sklearnex"
Expand Down
4 changes: 2 additions & 2 deletions atom/atom.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def __init__(
self.log("GPU training enabled.", 1)
if (data := self.engine.get("data")) != "numpy":
self.log(f"Data engine: {data}.", 1)
if (models := self.engine.get("models")) != "sklearn":
self.log(f"Models engine: {models}.", 1)
if (models := self.engine.get("estimator")) != "sklearn":
self.log(f"Estimator engine: {models}.", 1)
if self.backend == "ray" or self.n_jobs > 1:
self.log(f"Parallelization backend: {self.backend}", 1)
if self.experiment:
Expand Down
10 changes: 5 additions & 5 deletions atom/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,18 +109,18 @@ class BaseModel(BaseTransformer, BaseTracker, HTPlot, PredictionPlot, ShapPlot):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "sklearnex"
Expand Down
10 changes: 5 additions & 5 deletions atom/basetransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ class BaseTransformer:
- backend: Parallelization backend.
- verbose: Verbosity level of the output.
- warnings: Whether to show or suppress encountered warnings.
- logger: Name of the log file or Logger object.
- logger: Name of the log file, Logger object or None.
- experiment: Name of the mlflow experiment used for tracking.
- random_state: Seed used by the random number generator.
Expand Down Expand Up @@ -120,8 +120,8 @@ def engine(self) -> dict:
@engine.setter
def engine(self, value: dict | None):
if not value:
value = {"data": "numpy", "models": "sklearn"}
elif "data" not in value and "models" not in value:
value = {"data": "numpy", "estimator": "sklearn"}
elif "data" not in value and "estimator" not in value:
raise ValueError(
f"Invalid value for the engine parameter, got {value}. "
"The value should be a dict with keys 'data' and/or 'models'."
Expand All @@ -145,7 +145,7 @@ def engine(self, value: dict | None):
# Update env variable to use for PandasModin in utils.py
os.environ["ATOM_DATA_ENGINE"] = value["data"].lower()

if models := value.get("models"):
if models := value.get("estimator"):
if models.lower() == "sklearnex":
if not find_spec("sklearnex"):
raise ModuleNotFoundError(
Expand All @@ -171,7 +171,7 @@ def engine(self, value: dict | None):
f"got {models}. Choose from: sklearn, sklearnex, cuml."
)
else:
value["models"] = "sklearn"
value["estimator"] = "sklearn"

self._engine = value

Check notice on line 176 in atom/basetransformer.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

An instance attribute is defined outside `__init__`

Instance attribute _engine defined outside __init__

Expand Down
62 changes: 31 additions & 31 deletions atom/data_cleaning.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,18 +546,18 @@ class Cleaner(BaseEstimator, TransformerMixin, BaseTransformer):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "cuml"
Expand Down Expand Up @@ -979,18 +979,18 @@ class Discretizer(BaseEstimator, TransformerMixin, BaseTransformer):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "cuml"
Expand Down Expand Up @@ -1701,18 +1701,18 @@ class Imputer(BaseEstimator, TransformerMixin, BaseTransformer):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "cuml"
Expand Down Expand Up @@ -1896,7 +1896,7 @@ def fit(self, X: FEATURES, y: TARGET | None = None) -> Imputer:
# Load the imputer class from sklearn or cuml (different modules)
estimator = self._get_est_class(
name="SimpleImputer",
module="preprocessing" if self.engine["models"] == "cuml" else "impute",
module="preprocessing" if self.engine["estimator"] == "cuml" else "impute",
)

# Assign an imputer to each column
Expand Down Expand Up @@ -2102,18 +2102,18 @@ class Normalizer(BaseEstimator, TransformerMixin, BaseTransformer):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "cuml"
Expand Down Expand Up @@ -2401,18 +2401,18 @@ class Pruner(BaseEstimator, TransformerMixin, BaseTransformer):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "sklearnex"
Expand Down Expand Up @@ -2706,18 +2706,18 @@ class Scaler(BaseEstimator, TransformerMixin, BaseTransformer):
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "cuml"
Expand Down
50 changes: 23 additions & 27 deletions atom/feature_engineering.py
Original file line number Diff line number Diff line change
Expand Up @@ -998,18 +998,18 @@ class FeatureSelector(
engine: dict or None, default=None
Execution engine to use for [data][data-acceleration] and
[models][model-acceleration]. The value should be a dictionary
with keys `data` and/or `models`, with their corresponding
choice as values. If None, the default options are selected.
Choose from:
[estimators][estimator-acceleration]. The value should be a
dictionary with keys `data` and/or `estimator`, with their
corresponding choice as values. If None, the default options
are selected. Choose from:
- "data":
- "numpy" (default)
- "pyarrow"
- "modin"
- "models":
- "estimator":
- "sklearn" (default)
- "sklearnex"
Expand Down Expand Up @@ -1411,35 +1411,31 @@ def objective_function(model, X_train, y_train, X_valid, y_valid, scoring):
self._estimator = SelectKBest(solver, k=self._n_features).fit(X, y)

elif self.strategy.lower() == "pca":
# The PCA and TruncatedSVD both get all possible components to use
# for the plots (n_components must be < n_features and <= n_rows)
if is_sparse(X):
estimator = self._get_est_class("TruncatedSVD", "decomposition")

self._estimator = estimator(
n_components=min(len(X), X.shape[1] - 1),
algorithm="randomized" if self.solver is None else self.solver,
random_state=self.random_state,
**self.kwargs,
)
else:
if not is_sparse(X):
# PCA requires the features to be scaled
if not check_scaling(X):
self.scaler = Scaler()
X = self.scaler.fit_transform(X)

Check notice on line 1418 in atom/feature_engineering.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

PEP 8 naming convention violation

Variable in function should be lowercase

estimator = self._get_est_class("PCA", "decomposition")
solver_param = "svd_solver"
else:
estimator = self._get_est_class("TruncatedSVD", "decomposition")
solver_param = "algorithm"

if self.solver is None:
solver = sign(estimator)["svd_solver"].default
else:
solver = self.solver
if self.solver is None:
solver = sign(estimator)[solver_param].default
else:
solver = self.solver

self._estimator = estimator(
n_components=min(len(X), X.shape[1] - 1),
svd_solver=solver,
random_state=self.random_state,
**self.kwargs,
)
# The PCA and TruncatedSVD both get all possible components to use
# for the plots (n_components must be < n_features and <= n_rows)
self._estimator = estimator(

Check warning on line 1433 in atom/feature_engineering.py

View workflow job for this annotation

GitHub Actions / Qodana Community for Python

Attempt to call a non-callable object

'Predictor' object is not callable
n_components=min(len(X), X.shape[1] - 1),
**{solver_param: solver},
random_state=self.random_state,
**self.kwargs,
)

self._estimator.fit(X)
self._estimator._comps = min(
Expand Down
Loading

0 comments on commit 91090b4

Please sign in to comment.