Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: class inheritance order #725

Merged
merged 2 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ maintainers = [
]

dependencies = [
"narwhals>=1.2.0",
"narwhals>=1.5.0",
"pandas>=1.1.5",
"scikit-learn>=1.0",
"importlib-metadata >= 1.0; python_version < '3.8'",
Expand Down
4 changes: 2 additions & 2 deletions sklego/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

import numpy as np
import pandas as pd
from sklearn.base import TransformerMixin
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_array, check_is_fitted, check_X_y


class TrainOnlyTransformerMixin(TransformerMixin):
class TrainOnlyTransformerMixin(TransformerMixin, BaseEstimator):
"""Mixin class for transformers that can handle training and test data differently.

This mixin allows using a separate function for transforming training and test data.
Expand Down
2 changes: 1 addition & 1 deletion sklego/decomposition/pca_reconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class PCAOutlierDetection(BaseEstimator, OutlierMixin):
class PCAOutlierDetection(OutlierMixin, BaseEstimator):
"""`PCAOutlierDetection` is an outlier detector based on the reconstruction error from PCA.

If the difference between original and reconstructed data is larger than the `threshold`, the point is
Expand Down
2 changes: 1 addition & 1 deletion sklego/decomposition/umap_reconstruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class UMAPOutlierDetection(BaseEstimator, OutlierMixin):
class UMAPOutlierDetection(OutlierMixin, BaseEstimator):
"""`UMAPOutlierDetection` is an outlier detector based on the reconstruction error from UMAP.

If the difference between original and reconstructed data is larger than the `threshold`, the point is
Expand Down
2 changes: 1 addition & 1 deletion sklego/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
)


class RandomRegressor(BaseEstimator, RegressorMixin):
class RandomRegressor(RegressorMixin, BaseEstimator):
"""A `RandomRegressor` makes random predictions only based on the `y` value that is seen.

The goal is that such a regressor can be used for benchmarking. It _should be_ easily beatable.
Expand Down
14 changes: 7 additions & 7 deletions sklego/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
)


class LowessRegression(BaseEstimator, RegressorMixin):
class LowessRegression(RegressorMixin, BaseEstimator):
"""`LowessRegression` estimator: LOWESS (Locally Weighted Scatterplot Smoothing) is a type of
[local regression](https://en.wikipedia.org/wiki/Local_regression).

Expand Down Expand Up @@ -155,7 +155,7 @@ def predict(self, X):
return results


class ProbWeightRegression(BaseEstimator, RegressorMixin):
class ProbWeightRegression(RegressorMixin, BaseEstimator):
"""`ProbWeightRegression` assumes that all input signals in `X` need to be reweighted with weights that sum up to
one in order to predict `y`.

Expand Down Expand Up @@ -276,7 +276,7 @@ def coefs_(self):
return self.coef_


class DeadZoneRegressor(BaseEstimator, RegressorMixin):
class DeadZoneRegressor(RegressorMixin, BaseEstimator):
r"""The `DeadZoneRegressor` estimator implements a regression model that incorporates a _dead zone effect_ for
improving the robustness of regression predictions.

Expand Down Expand Up @@ -480,7 +480,7 @@ def allowed_effects(self):
return self._ALLOWED_EFFECTS


class _FairClassifier(BaseEstimator, LinearClassifierMixin):
class _FairClassifier(LinearClassifierMixin, BaseEstimator):
"""Base class for fair classifiers that address sensitive attribute fairness.

This base class provides a foundation for fair classifiers that aim to mitigate bias and discrimination by taking
Expand Down Expand Up @@ -682,7 +682,7 @@ def _more_tags(self):
return {"poor_score": True}


class DemographicParityClassifier(BaseEstimator, LinearClassifierMixin):
class DemographicParityClassifier(LinearClassifierMixin, BaseEstimator):
r"""`DemographicParityClassifier` is a logistic regression classifier which can be constrained on demographic
parity (p% score).

Expand Down Expand Up @@ -800,7 +800,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
return []


class EqualOpportunityClassifier(BaseEstimator, LinearClassifierMixin):
class EqualOpportunityClassifier(LinearClassifierMixin, BaseEstimator):
r"""`EqualOpportunityClassifier` is a logistic regression classifier which can be constrained on equal opportunity
score.

Expand Down Expand Up @@ -914,7 +914,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
return []


class BaseScipyMinimizeRegressor(BaseEstimator, RegressorMixin, ABC):
class BaseScipyMinimizeRegressor(RegressorMixin, BaseEstimator, ABC):
"""Abstract base class for regressors relying on Scipy's
[minimize method](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) to minimize a
(custom) loss function.
Expand Down
6 changes: 3 additions & 3 deletions sklego/meta/_grouped_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,9 @@ def parse_X_y(X, y, groups, check_X=True, **kwargs) -> nw.DataFrame:

# Convert y and assign it to the frame
n_samples = X.shape[0]
y_series = nw.from_dict(
data={"tmp": [None] * n_samples if y is None else y}, native_namespace=nw.get_native_namespace(X)
)["tmp"]
y_series = nw.new_series(
name="tmp", values=[None] * n_samples if y is None else y, native_namespace=nw.get_native_namespace(X)
)

return X.with_columns(__sklego_target__=y_series)

Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/confusion_balancer.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklego.base import ProbabilisticClassifier


class ConfusionBalancer(BaseEstimator, MetaEstimatorMixin, ClassifierMixin):
class ConfusionBalancer(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
r"""The `ConfusionBalancer` estimator attempts to give it's child estimator a more balanced output by learning from
the confusion matrix during training.

Expand Down
4 changes: 2 additions & 2 deletions sklego/meta/grouped_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ def _more_tags(self):
return {"allow_nan": True}


class GroupedRegressor(GroupedPredictor, RegressorMixin):
class GroupedRegressor(RegressorMixin, GroupedPredictor):
"""`GroupedRegressor` is a meta-estimator that fits a separate regressor for each group in the input data.

Its spec is the same as [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] but it is available
Expand Down Expand Up @@ -439,7 +439,7 @@ def fit(self, X, y):
return super().fit(X, y)


class GroupedClassifier(GroupedPredictor, ClassifierMixin):
class GroupedClassifier(ClassifierMixin, GroupedPredictor):
"""`GroupedClassifier` is a meta-estimator that fits a separate classifier for each group in the input data.

Its equivalent to [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] with `shrinkage=None`
Expand Down
18 changes: 9 additions & 9 deletions sklego/meta/hierarchical_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,10 +282,10 @@ def fit(self, X, y=None):
raise ValueError(msg)

native_namespace = nw.get_native_namespace(X)
target_series = nw.from_dict({self._TARGET_NAME: y}, native_namespace=native_namespace)[self._TARGET_NAME]
global_series = nw.from_dict({self._GLOBAL_NAME: np.ones(n_samples)}, native_namespace=native_namespace)[
self._GLOBAL_NAME
]
target_series = nw.new_series(name=self._TARGET_NAME, values=y, native_namespace=native_namespace)
global_series = nw.new_series(
name=self._GLOBAL_NAME, values=np.ones(n_samples), native_namespace=native_namespace
)
frame = X.with_columns(
**{
self._TARGET_NAME: target_series,
Expand Down Expand Up @@ -322,9 +322,9 @@ def _predict_estimators(self, X, method_name):

n_samples = X.shape[0]
native_namespace = nw.get_native_namespace(X)
global_series = nw.from_dict({self._GLOBAL_NAME: np.ones(n_samples)}, native_namespace=native_namespace)[
self._GLOBAL_NAME
]
global_series = nw.new_series(
name=self._GLOBAL_NAME, values=np.ones(n_samples), native_namespace=native_namespace
)

frame = X.with_columns(
**{
Expand Down Expand Up @@ -424,7 +424,7 @@ def _more_tags(self):
return {"allow_nan": True}


class HierarchicalRegressor(HierarchicalPredictor, RegressorMixin):
class HierarchicalRegressor(RegressorMixin, HierarchicalPredictor):
"""A hierarchical regressor that predicts values using hierarchical grouping.

This class extends [`HierarchicalPredictor`][sklego.meta.hierarchical_predictor.HierarchicalPredictor] and adds
Expand Down Expand Up @@ -537,7 +537,7 @@ def predict(self, X):
return self._predict_estimators(X, "predict")


class HierarchicalClassifier(HierarchicalPredictor, ClassifierMixin):
class HierarchicalClassifier(ClassifierMixin, HierarchicalPredictor):
"""A hierarchical classifier that predicts labels using hierarchical grouping.

This class extends [`HierarchicalPredictor`][sklego.meta.hierarchical_predictor.HierarchicalPredictor] and adds
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/outlier_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklego.base import OutlierModel


class OutlierClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
class OutlierClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
"""Morphs an outlier detection model into a classifier.

When an outlier is detected it will output 1 and 0 otherwise. This way you can use familiar metrics again and this
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/regression_outlier_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sklearn.utils.validation import check_array, check_is_fitted


class RegressionOutlierDetector(BaseEstimator, OutlierMixin):
class RegressionOutlierDetector(OutlierMixin, BaseEstimator):
"""Morphs a regression estimator into one that can detect outliers. We will try to predict `column` in X.

Parameters
Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/subjective_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted, check_X_y


class SubjectiveClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
class SubjectiveClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
"""Corrects predictions of the inner classifier by taking into account a (subjective) prior distribution of the
classes.

Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/thresholder.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sklego.base import ProbabilisticClassifier


class Thresholder(BaseEstimator, ClassifierMixin):
class Thresholder(ClassifierMixin, BaseEstimator):
"""Takes a binary classifier and moves the threshold. This way you might design the algorithm to only accept a
certain class if the probability for it is larger than, say, 90% instead of 50%.

Expand Down
2 changes: 1 addition & 1 deletion sklego/meta/zero_inflated_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.utils.validation import _check_sample_weight, check_array, check_is_fitted, check_X_y


class ZeroInflatedRegressor(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
class ZeroInflatedRegressor(RegressorMixin, BaseEstimator, MetaEstimatorMixin):
"""A meta regressor for zero-inflated datasets, i.e. the targets contain a lot of zeroes.

`ZeroInflatedRegressor` consists of a classifier and a regressor.
Expand Down
2 changes: 1 addition & 1 deletion sklego/mixture/bayesian_gmm_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class BayesianGMMClassifier(BaseEstimator, ClassifierMixin):
class BayesianGMMClassifier(ClassifierMixin, BaseEstimator):
"""The `BayesianGMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`.
Once a density is trained for each class we can evaluate the likelihood scores to see which class is more likely.

Expand Down
2 changes: 1 addition & 1 deletion sklego/mixture/gmm_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class GMMClassifier(BaseEstimator, ClassifierMixin):
class GMMClassifier(ClassifierMixin, BaseEstimator):
"""The `GMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`. Once a density is
trained for each class we can evaluate the likelihood scores to see which class is more likely.

Expand Down
4 changes: 2 additions & 2 deletions sklego/model_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,8 +263,8 @@ def update_split_info(indices, j, part, summary):

j = 0
for i in self.split(nw.to_native(X)):
train_info = nw.to_native(nw.from_dict({"tmp": i[0]}, native_namespace=native_namespace)["tmp"])
valid_info = nw.to_native(nw.from_dict({"tmp": i[1]}, native_namespace=native_namespace)["tmp"])
train_info = nw.to_native(nw.new_series(name="tmp", values=i[0], native_namespace=native_namespace))
valid_info = nw.to_native(nw.new_series(name="tmp", values=i[1], native_namespace=native_namespace))
update_split_info(train_info, j, "train", summary)
update_split_info(valid_info, j, "valid", summary)
j = j + 1
Expand Down
4 changes: 2 additions & 2 deletions sklego/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class GaussianMixtureNB(BaseEstimator, ClassifierMixin):
class GaussianMixtureNB(ClassifierMixin, BaseEstimator):
"""The `GaussianMixtureNB` estimator is a naive bayes classifier that uses a mixture of gaussians instead of
merely a single one. In particular it trains a `GaussianMixture` model for each class in the target and for each
feature in the data, on the subset of `X` where `y == class`.
Expand Down Expand Up @@ -158,7 +158,7 @@ def num_fit_cols_(self):
return self.n_features_in_


class BayesianGaussianMixtureNB(BaseEstimator, ClassifierMixin):
class BayesianGaussianMixtureNB(ClassifierMixin, BaseEstimator):
"""The `BayesianGaussianMixtureNB` estimator is a naive bayes classifier that uses a bayesian mixture of gaussians
instead of merely a single one. In particular it trains a `BayesianGaussianMixture` model for each class in the
target and for each feature in the data, on the subset of `X` where `y == class`.
Expand Down
2 changes: 1 addition & 1 deletion sklego/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted


class BayesianKernelDensityClassifier(BaseEstimator, ClassifierMixin):
class BayesianKernelDensityClassifier(ClassifierMixin, BaseEstimator):
"""The `BayesianKernelDensityClassifier` estimator trains using Kernel Density estimations to generate the joint
distribution.

Expand Down
6 changes: 3 additions & 3 deletions sklego/preprocessing/pandastransformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _nw_select_dtypes(include: str | list[str], exclude: str | list[str], schema
return feature_names


class ColumnDropper(BaseEstimator, TransformerMixin):
class ColumnDropper(TransformerMixin, BaseEstimator):
"""The `ColumnDropper` transformer allows dropping specific columns from a DataFrame by name.
Can be useful in a sklearn Pipeline.

Expand Down Expand Up @@ -226,7 +226,7 @@ def _check_column_names(self, X):
raise KeyError(f"{list(non_existent_columns)} column(s) not in DataFrame")


class TypeSelector(BaseEstimator, TransformerMixin):
class TypeSelector(TransformerMixin, BaseEstimator):
"""The `TypeSelector` transformer allows to select columns in a DataFrame based on their type.
Can be useful in a sklearn Pipeline.

Expand Down Expand Up @@ -412,7 +412,7 @@ def __init__(self, include=None, exclude=None):
super().__init__(include=include, exclude=exclude)


class ColumnSelector(BaseEstimator, TransformerMixin):
class ColumnSelector(TransformerMixin, BaseEstimator):
"""The `ColumnSelector` transformer allows selecting specific columns from a DataFrame by name.
Can be useful in a sklearn Pipeline.

Expand Down
1 change: 1 addition & 0 deletions tests/test_meta/test_grouped_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def test_sklearn_compatible_estimator(estimator, check):
"check_fit2d_predict1d", # custom message
"check_estimators_empty_data_messages", # custom message
"check_supervised_y_2d", # TODO: Is it possible to support multioutput?
"check_requires_y_none",
}:
pytest.skip()

Expand Down
1 change: 1 addition & 0 deletions tests/test_meta/test_hierarchical_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def test_sklearn_compatible_estimator(estimator, check):
"check_fit2d_1feature", # custom message
"check_supervised_y_2d", # TODO: Is it possible to support multioutput?
"check_estimators_empty_data_messages", # custom message
"check_requires_y_none",
}:
pytest.skip()

Expand Down
Loading