diff --git a/sklego/dummy.py b/sklego/dummy.py index 4886a8b6..9b385ffc 100644 --- a/sklego/dummy.py +++ b/sklego/dummy.py @@ -72,7 +72,7 @@ def fit(self, X: np.array, y: np.array) -> "RandomRegressor": """ if self.strategy not in self._ALLOWED_STRATEGIES: raise ValueError(f"strategy {self.strategy} is not in {self._ALLOWED_STRATEGIES}") - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) self.n_features_in_ = X.shape[1] self.min_ = np.min(y) diff --git a/sklego/feature_selection/mrmr.py b/sklego/feature_selection/mrmr.py index 64f436bb..f08a3989 100644 --- a/sklego/feature_selection/mrmr.py +++ b/sklego/feature_selection/mrmr.py @@ -203,7 +203,7 @@ def fit(self, X, y): k parameter is not integer type or is < n_features_in (X.shape[1]) or < 1 """ - X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True) + X, y = validate_data(self, X, y, dtype="numeric", y_numeric=True, y_required=True) self._y_dtype = y.dtype diff --git a/sklego/linear_model.py b/sklego/linear_model.py index a1a10b71..97310ef7 100644 --- a/sklego/linear_model.py +++ b/sklego/linear_model.py @@ -98,7 +98,7 @@ def fit(self, X, y): - If `span` is not between 0 and 1. - If `sigma` is negative. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if self.span is not None: if not 0 <= self.span <= 1: raise ValueError(f"Param `span` must be 0 <= span <= 1, got: {self.span}") @@ -225,7 +225,7 @@ def fit(self, X, y): self : ProbWeightRegression The fitted estimator. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) # Construct the problem. betas = cp.Variable(X.shape[1]) @@ -373,7 +373,7 @@ def fit(self, X, y): ValueError If `effect` is not one of "linear", "quadratic" or "constant". """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if self.effect not in self._ALLOWED_EFFECTS: raise ValueError(f"effect {self.effect} must be in {self._ALLOWED_EFFECTS}") @@ -1054,7 +1054,7 @@ def _prepare_inputs(self, X, sample_weight, y): This method is called by `fit` to prepare the inputs for the optimization problem. It adds an intercept column to `X` if `fit_intercept=True`, and returns the loss function and its gradient. """ - X, y = validate_data(self, X, y, y_numeric=True) + X, y = validate_data(self, X, y, y_numeric=True, y_required=True) sample_weight = _check_sample_weight(sample_weight, X) self.n_features_in_ = X.shape[1] diff --git a/sklego/meta/confusion_balancer.py b/sklego/meta/confusion_balancer.py index d9b389d6..06edcf3d 100644 --- a/sklego/meta/confusion_balancer.py +++ b/sklego/meta/confusion_balancer.py @@ -64,7 +64,7 @@ def fit(self, X, y): If the underlying estimator does not have a `predict_proba` method. """ - X, y = validate_data(self.estimator, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self.estimator, X, y, dtype=FLOAT_DTYPES, y_required=True) if not isinstance(self.estimator, ProbabilisticClassifier): raise ValueError( "The ConfusionBalancer meta model only works on classification models with .predict_proba." diff --git a/sklego/meta/decay_estimator.py b/sklego/meta/decay_estimator.py index b8be234a..37a8fac6 100644 --- a/sklego/meta/decay_estimator.py +++ b/sklego/meta/decay_estimator.py @@ -126,7 +126,7 @@ def fit(self, X, y): """ if self.check_input: - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, ensure_min_features=0) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, ensure_min_features=0, y_required=True) if self.decay_func in self._ALLOWED_DECAYS.keys(): self.decay_func_ = self._ALLOWED_DECAYS[self.decay_func] diff --git a/sklego/meta/estimator_transformer.py b/sklego/meta/estimator_transformer.py index 4a0c5b52..2cf0b8c9 100644 --- a/sklego/meta/estimator_transformer.py +++ b/sklego/meta/estimator_transformer.py @@ -54,7 +54,7 @@ def fit(self, X, y, **kwargs): """ if self.check_input: - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, multi_output=True) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, multi_output=True, y_required=True) self.multi_output_ = len(y.shape) > 1 self.estimator_ = clone(self.estimator) diff --git a/sklego/meta/ordinal_classification.py b/sklego/meta/ordinal_classification.py index e5628103..9404b430 100644 --- a/sklego/meta/ordinal_classification.py +++ b/sklego/meta/ordinal_classification.py @@ -131,7 +131,7 @@ def fit(self, X, y): if not hasattr(self.estimator, "predict_proba"): raise ValueError("The estimator must implement `.predict_proba()` method.") - X, y = validate_data(self, X, y, ensure_min_samples=2, ensure_2d=True) + X, y = validate_data(self, X, y, ensure_min_samples=2, ensure_2d=True, y_required=True) self.classes_ = np.sort(np.unique(y)) self.n_features_in_ = X.shape[1] diff --git a/sklego/meta/outlier_classifier.py b/sklego/meta/outlier_classifier.py index 0d69d94c..4e1c1357 100644 --- a/sklego/meta/outlier_classifier.py +++ b/sklego/meta/outlier_classifier.py @@ -88,7 +88,10 @@ def fit(self, X, y=None): f"Passed model {self.model} does not have a `decision_function` " f"method. This is required for `predict_proba` estimation." ) - X, y = validate_data(self, X, y) + if y is not None: + X, y = validate_data(self, X, y) + else: + X = validate_data(self, X) self.estimator_ = clone(self.model).fit(X, y) self.n_features_in_ = self.estimator_.n_features_in_ self.classes_ = np.array([0, 1]) diff --git a/sklego/meta/subjective_classifier.py b/sklego/meta/subjective_classifier.py index ade527ed..ae72981f 100644 --- a/sklego/meta/subjective_classifier.py +++ b/sklego/meta/subjective_classifier.py @@ -111,7 +111,7 @@ def fit(self, X, y): if self.evidence not in self._ALLOWED_EVIDENCE: raise ValueError(f"Invalid evidence: the provided evidence should be one of {self._ALLOWED_EVIDENCE}") - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if set(y) - set(self.prior.keys()): raise ValueError( f"Training data is inconsistent with prior: no prior defined for classes " diff --git a/sklego/meta/zero_inflated_regressor.py b/sklego/meta/zero_inflated_regressor.py index 775d6f84..353e74a7 100644 --- a/sklego/meta/zero_inflated_regressor.py +++ b/sklego/meta/zero_inflated_regressor.py @@ -92,7 +92,7 @@ def fit(self, X, y, sample_weight=None): ValueError If `classifier` is not a classifier or `regressor` is not a regressor. """ - X, y = validate_data(self, X, y) + X, y = validate_data(self, X, y, y_required=True) self.n_features_in_ = X.shape[1] diff --git a/sklego/mixture/bayesian_gmm_classifier.py b/sklego/mixture/bayesian_gmm_classifier.py index eb37d83e..c4c2d173 100644 --- a/sklego/mixture/bayesian_gmm_classifier.py +++ b/sklego/mixture/bayesian_gmm_classifier.py @@ -78,7 +78,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "BayesianGMMClassifier": self : BayesianGMMClassifier The fitted estimator. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if X.ndim == 1: X = np.expand_dims(X, 1) diff --git a/sklego/mixture/gmm_classifier.py b/sklego/mixture/gmm_classifier.py index a4333a3c..9c5cf02b 100644 --- a/sklego/mixture/gmm_classifier.py +++ b/sklego/mixture/gmm_classifier.py @@ -73,7 +73,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "GMMClassifier": self : GMMClassifier The fitted estimator. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if X.ndim == 1: X = np.expand_dims(X, 1) diff --git a/sklego/naive_bayes.py b/sklego/naive_bayes.py index 6c01268d..5a9f1462 100644 --- a/sklego/naive_bayes.py +++ b/sklego/naive_bayes.py @@ -74,7 +74,7 @@ def fit(self, X, y) -> "GaussianMixtureNB": self : GaussianMixtureNB The fitted estimator. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if X.ndim == 1: X = np.expand_dims(X, 1) @@ -239,7 +239,7 @@ def fit(self, X, y) -> "BayesianGaussianMixtureNB": self : BayesianGaussianMixtureNB The fitted estimator. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) if X.ndim == 1: X = np.expand_dims(X, 1) diff --git a/sklego/neighbors.py b/sklego/neighbors.py index 0adc14cf..d7d40527 100644 --- a/sklego/neighbors.py +++ b/sklego/neighbors.py @@ -63,7 +63,7 @@ def fit(self, X: np.ndarray, y: np.ndarray): self : BayesianKernelDensityClassifier The fitted estimator. """ - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES, y_required=True) self.classes_ = unique_labels(y) self.models_, self.priors_logp_ = {}, {} diff --git a/sklego/preprocessing/intervalencoder.py b/sklego/preprocessing/intervalencoder.py index ee51c8a6..6d16c380 100644 --- a/sklego/preprocessing/intervalencoder.py +++ b/sklego/preprocessing/intervalencoder.py @@ -157,7 +157,7 @@ def fit(self, X, y): # these two matrices will have shape (columns, quantiles) # quantiles indicate where the interval split occurs - X, y = validate_data(self, X, y) + X, y = validate_data(self, X, y, y_required=True) self.quantiles_ = np.zeros((X.shape[1], self.n_chunks)) # heights indicate what heights these intervals will have self.heights_ = np.zeros((X.shape[1], self.n_chunks)) diff --git a/sklego/preprocessing/randomadder.py b/sklego/preprocessing/randomadder.py index a3a690ad..66de92e2 100644 --- a/sklego/preprocessing/randomadder.py +++ b/sklego/preprocessing/randomadder.py @@ -68,7 +68,7 @@ def fit(self, X, y): The fitted transformer. """ super().fit(X, y) - X, y = validate_data(self, X, y, dtype=FLOAT_DTYPES) + X = validate_data(self, X, dtype=FLOAT_DTYPES) self.n_features_in_ = X.shape[1] return self