diff --git a/pyproject.toml b/pyproject.toml
index ce7e06e1..c344c895 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,7 +20,7 @@ maintainers = [
 ]
 
 dependencies = [
-    "narwhals>=1.2.0",
+    "narwhals>=1.5.0",
     "pandas>=1.1.5",
     "scikit-learn>=1.0",
     "importlib-metadata >= 1.0; python_version < '3.8'",
diff --git a/sklego/common.py b/sklego/common.py
index 548faea2..d3652277 100644
--- a/sklego/common.py
+++ b/sklego/common.py
@@ -4,11 +4,11 @@
 
 import numpy as np
 import pandas as pd
-from sklearn.base import TransformerMixin
+from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
 
-class TrainOnlyTransformerMixin(TransformerMixin):
+class TrainOnlyTransformerMixin(TransformerMixin, BaseEstimator):
     """Mixin class for transformers that can handle training and test data differently.
 
     This mixin allows using a separate function for transforming training and test data.
diff --git a/sklego/decomposition/pca_reconstruction.py b/sklego/decomposition/pca_reconstruction.py
index 3dcc51aa..cb02ad21 100644
--- a/sklego/decomposition/pca_reconstruction.py
+++ b/sklego/decomposition/pca_reconstruction.py
@@ -4,7 +4,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class PCAOutlierDetection(BaseEstimator, OutlierMixin):
+class PCAOutlierDetection(OutlierMixin, BaseEstimator):
     """`PCAOutlierDetection` is an outlier detector based on the reconstruction error from PCA.
 
     If the difference between original and reconstructed data is larger than the `threshold`, the point is
diff --git a/sklego/decomposition/umap_reconstruction.py b/sklego/decomposition/umap_reconstruction.py
index 330fe8f8..3859f490 100644
--- a/sklego/decomposition/umap_reconstruction.py
+++ b/sklego/decomposition/umap_reconstruction.py
@@ -11,7 +11,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class UMAPOutlierDetection(BaseEstimator, OutlierMixin):
+class UMAPOutlierDetection(OutlierMixin, BaseEstimator):
     """`UMAPOutlierDetection` is an outlier detector based on the reconstruction error from UMAP.
 
     If the difference between original and reconstructed data is larger than the `threshold`, the point is
diff --git a/sklego/dummy.py b/sklego/dummy.py
index 35b4d639..03157161 100644
--- a/sklego/dummy.py
+++ b/sklego/dummy.py
@@ -11,7 +11,7 @@
 )
 
 
-class RandomRegressor(BaseEstimator, RegressorMixin):
+class RandomRegressor(RegressorMixin, BaseEstimator):
     """A `RandomRegressor` makes random predictions only based on the `y` value that is seen.
 
     The goal is that such a regressor can be used for benchmarking. It _should be_ easily beatable.
diff --git a/sklego/linear_model.py b/sklego/linear_model.py
index ebe9fc43..4673b608 100644
--- a/sklego/linear_model.py
+++ b/sklego/linear_model.py
@@ -27,7 +27,7 @@
 )
 
 
-class LowessRegression(BaseEstimator, RegressorMixin):
+class LowessRegression(RegressorMixin, BaseEstimator):
     """`LowessRegression` estimator: LOWESS (Locally Weighted Scatterplot Smoothing) is a type of
     [local regression](https://en.wikipedia.org/wiki/Local_regression).
 
@@ -155,7 +155,7 @@ def predict(self, X):
         return results
 
 
-class ProbWeightRegression(BaseEstimator, RegressorMixin):
+class ProbWeightRegression(RegressorMixin, BaseEstimator):
     """`ProbWeightRegression` assumes that all input signals in `X` need to be reweighted with weights that sum up to
     one in order to predict `y`.
 
@@ -276,7 +276,7 @@ def coefs_(self):
         return self.coef_
 
 
-class DeadZoneRegressor(BaseEstimator, RegressorMixin):
+class DeadZoneRegressor(RegressorMixin, BaseEstimator):
     r"""The `DeadZoneRegressor` estimator implements a regression model that incorporates a _dead zone effect_ for
     improving the robustness of regression predictions.
 
@@ -480,7 +480,7 @@ def allowed_effects(self):
         return self._ALLOWED_EFFECTS
 
 
-class _FairClassifier(BaseEstimator, LinearClassifierMixin):
+class _FairClassifier(LinearClassifierMixin, BaseEstimator):
     """Base class for fair classifiers that address sensitive attribute fairness.
 
     This base class provides a foundation for fair classifiers that aim to mitigate bias and discrimination by taking
@@ -682,7 +682,7 @@ def _more_tags(self):
         return {"poor_score": True}
 
 
-class DemographicParityClassifier(BaseEstimator, LinearClassifierMixin):
+class DemographicParityClassifier(LinearClassifierMixin, BaseEstimator):
     r"""`DemographicParityClassifier` is a logistic regression classifier which can be constrained on demographic
     parity (p% score).
 
@@ -800,7 +800,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
             return []
 
 
-class EqualOpportunityClassifier(BaseEstimator, LinearClassifierMixin):
+class EqualOpportunityClassifier(LinearClassifierMixin, BaseEstimator):
     r"""`EqualOpportunityClassifier` is a logistic regression classifier which can be constrained on equal opportunity
     score.
 
@@ -914,7 +914,7 @@ def constraints(self, y_hat, y_true, sensitive, n_obs):
             return []
 
 
-class BaseScipyMinimizeRegressor(BaseEstimator, RegressorMixin, ABC):
+class BaseScipyMinimizeRegressor(RegressorMixin, BaseEstimator, ABC):
     """Abstract base class for regressors relying on Scipy's
     [minimize method](https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html) to minimize a
     (custom) loss function.
diff --git a/sklego/meta/_grouped_utils.py b/sklego/meta/_grouped_utils.py
index c873e0cb..6d65ad3c 100644
--- a/sklego/meta/_grouped_utils.py
+++ b/sklego/meta/_grouped_utils.py
@@ -33,9 +33,9 @@ def parse_X_y(X, y, groups, check_X=True, **kwargs) -> nw.DataFrame:
 
     # Convert y and assign it to the frame
     n_samples = X.shape[0]
-    y_series = nw.from_dict(
-        data={"tmp": [None] * n_samples if y is None else y}, native_namespace=nw.get_native_namespace(X)
-    )["tmp"]
+    y_series = nw.new_series(
+        name="tmp", values=[None] * n_samples if y is None else y, native_namespace=nw.get_native_namespace(X)
+    )
 
     return X.with_columns(__sklego_target__=y_series)
 
diff --git a/sklego/meta/confusion_balancer.py b/sklego/meta/confusion_balancer.py
index 8821d8b0..26b00fdc 100644
--- a/sklego/meta/confusion_balancer.py
+++ b/sklego/meta/confusion_balancer.py
@@ -7,7 +7,7 @@
 from sklego.base import ProbabilisticClassifier
 
 
-class ConfusionBalancer(BaseEstimator, MetaEstimatorMixin, ClassifierMixin):
+class ConfusionBalancer(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
     r"""The `ConfusionBalancer` estimator attempts to give it's child estimator a more balanced output by learning from
     the confusion matrix during training.
 
diff --git a/sklego/meta/grouped_predictor.py b/sklego/meta/grouped_predictor.py
index 40878201..80eb819f 100644
--- a/sklego/meta/grouped_predictor.py
+++ b/sklego/meta/grouped_predictor.py
@@ -402,7 +402,7 @@ def _more_tags(self):
         return {"allow_nan": True}
 
 
-class GroupedRegressor(GroupedPredictor, RegressorMixin):
+class GroupedRegressor(RegressorMixin, GroupedPredictor):
     """`GroupedRegressor` is a meta-estimator that fits a separate regressor for each group in the input data.
 
     Its spec is the same as [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] but it is available
@@ -439,7 +439,7 @@ def fit(self, X, y):
         return super().fit(X, y)
 
 
-class GroupedClassifier(GroupedPredictor, ClassifierMixin):
+class GroupedClassifier(ClassifierMixin, GroupedPredictor):
     """`GroupedClassifier` is a meta-estimator that fits a separate classifier for each group in the input data.
 
     Its equivalent to [`GroupedPredictor`][sklego.meta.grouped_predictor.GroupedPredictor] with `shrinkage=None`
diff --git a/sklego/meta/hierarchical_predictor.py b/sklego/meta/hierarchical_predictor.py
index 5d71cc5c..058d0f0b 100644
--- a/sklego/meta/hierarchical_predictor.py
+++ b/sklego/meta/hierarchical_predictor.py
@@ -282,10 +282,10 @@ def fit(self, X, y=None):
             raise ValueError(msg)
 
         native_namespace = nw.get_native_namespace(X)
-        target_series = nw.from_dict({self._TARGET_NAME: y}, native_namespace=native_namespace)[self._TARGET_NAME]
-        global_series = nw.from_dict({self._GLOBAL_NAME: np.ones(n_samples)}, native_namespace=native_namespace)[
-            self._GLOBAL_NAME
-        ]
+        target_series = nw.new_series(name=self._TARGET_NAME, values=y, native_namespace=native_namespace)
+        global_series = nw.new_series(
+            name=self._GLOBAL_NAME, values=np.ones(n_samples), native_namespace=native_namespace
+        )
         frame = X.with_columns(
             **{
                 self._TARGET_NAME: target_series,
@@ -322,9 +322,9 @@ def _predict_estimators(self, X, method_name):
 
         n_samples = X.shape[0]
         native_namespace = nw.get_native_namespace(X)
-        global_series = nw.from_dict({self._GLOBAL_NAME: np.ones(n_samples)}, native_namespace=native_namespace)[
-            self._GLOBAL_NAME
-        ]
+        global_series = nw.new_series(
+            name=self._GLOBAL_NAME, values=np.ones(n_samples), native_namespace=native_namespace
+        )
 
         frame = X.with_columns(
             **{
@@ -424,7 +424,7 @@ def _more_tags(self):
         return {"allow_nan": True}
 
 
-class HierarchicalRegressor(HierarchicalPredictor, RegressorMixin):
+class HierarchicalRegressor(RegressorMixin, HierarchicalPredictor):
     """A hierarchical regressor that predicts values using hierarchical grouping.
 
     This class extends [`HierarchicalPredictor`][sklego.meta.hierarchical_predictor.HierarchicalPredictor] and adds
@@ -537,7 +537,7 @@ def predict(self, X):
         return self._predict_estimators(X, "predict")
 
 
-class HierarchicalClassifier(HierarchicalPredictor, ClassifierMixin):
+class HierarchicalClassifier(ClassifierMixin, HierarchicalPredictor):
     """A hierarchical classifier that predicts labels using hierarchical grouping.
 
     This class extends [`HierarchicalPredictor`][sklego.meta.hierarchical_predictor.HierarchicalPredictor] and adds
diff --git a/sklego/meta/outlier_classifier.py b/sklego/meta/outlier_classifier.py
index 09f6d50d..d965e443 100644
--- a/sklego/meta/outlier_classifier.py
+++ b/sklego/meta/outlier_classifier.py
@@ -7,7 +7,7 @@
 from sklego.base import OutlierModel
 
 
-class OutlierClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
+class OutlierClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
     """Morphs an outlier detection model into a classifier.
 
     When an outlier is detected it will output 1 and 0 otherwise. This way you can use familiar metrics again and this
diff --git a/sklego/meta/regression_outlier_detector.py b/sklego/meta/regression_outlier_detector.py
index 6ef8a8b2..4c51267a 100644
--- a/sklego/meta/regression_outlier_detector.py
+++ b/sklego/meta/regression_outlier_detector.py
@@ -5,7 +5,7 @@
 from sklearn.utils.validation import check_array, check_is_fitted
 
 
-class RegressionOutlierDetector(BaseEstimator, OutlierMixin):
+class RegressionOutlierDetector(OutlierMixin, BaseEstimator):
     """Morphs a regression estimator into one that can detect outliers. We will try to predict `column` in X.
 
     Parameters
diff --git a/sklego/meta/subjective_classifier.py b/sklego/meta/subjective_classifier.py
index 60e72463..b396bddc 100644
--- a/sklego/meta/subjective_classifier.py
+++ b/sklego/meta/subjective_classifier.py
@@ -6,7 +6,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted, check_X_y
 
 
-class SubjectiveClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
+class SubjectiveClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
     """Corrects predictions of the inner classifier by taking into account a (subjective) prior distribution of the
     classes.
 
diff --git a/sklego/meta/thresholder.py b/sklego/meta/thresholder.py
index b08e76b8..126071f0 100644
--- a/sklego/meta/thresholder.py
+++ b/sklego/meta/thresholder.py
@@ -10,7 +10,7 @@
 from sklego.base import ProbabilisticClassifier
 
 
-class Thresholder(BaseEstimator, ClassifierMixin):
+class Thresholder(ClassifierMixin, BaseEstimator):
     """Takes a binary classifier and moves the threshold. This way you might design the algorithm to only accept a
     certain class if the probability for it is larger than, say, 90% instead of 50%.
 
diff --git a/sklego/meta/zero_inflated_regressor.py b/sklego/meta/zero_inflated_regressor.py
index 18d41a14..3b41626b 100644
--- a/sklego/meta/zero_inflated_regressor.py
+++ b/sklego/meta/zero_inflated_regressor.py
@@ -8,7 +8,7 @@
 from sklearn.utils.validation import _check_sample_weight, check_array, check_is_fitted, check_X_y
 
 
-class ZeroInflatedRegressor(BaseEstimator, RegressorMixin, MetaEstimatorMixin):
+class ZeroInflatedRegressor(RegressorMixin, BaseEstimator, MetaEstimatorMixin):
     """A meta regressor for zero-inflated datasets, i.e. the targets contain a lot of zeroes.
 
     `ZeroInflatedRegressor` consists of a classifier and a regressor.
diff --git a/sklego/mixture/bayesian_gmm_classifier.py b/sklego/mixture/bayesian_gmm_classifier.py
index 66b6b5e0..805420df 100644
--- a/sklego/mixture/bayesian_gmm_classifier.py
+++ b/sklego/mixture/bayesian_gmm_classifier.py
@@ -7,7 +7,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class BayesianGMMClassifier(BaseEstimator, ClassifierMixin):
+class BayesianGMMClassifier(ClassifierMixin, BaseEstimator):
     """The `BayesianGMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`.
     Once a density is trained for each class we can evaluate the likelihood scores to see which class is more likely.
 
diff --git a/sklego/mixture/gmm_classifier.py b/sklego/mixture/gmm_classifier.py
index 01044325..9b6705a5 100644
--- a/sklego/mixture/gmm_classifier.py
+++ b/sklego/mixture/gmm_classifier.py
@@ -7,7 +7,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class GMMClassifier(BaseEstimator, ClassifierMixin):
+class GMMClassifier(ClassifierMixin, BaseEstimator):
     """The `GMMClassifier` trains a Gaussian Mixture Model for each class in `y` on a dataset `X`. Once a density is
     trained for each class we can evaluate the likelihood scores to see which class is more likely.
 
diff --git a/sklego/model_selection.py b/sklego/model_selection.py
index 9fa26132..08747492 100644
--- a/sklego/model_selection.py
+++ b/sklego/model_selection.py
@@ -263,8 +263,8 @@ def update_split_info(indices, j, part, summary):
 
         j = 0
         for i in self.split(nw.to_native(X)):
-            train_info = nw.to_native(nw.from_dict({"tmp": i[0]}, native_namespace=native_namespace)["tmp"])
-            valid_info = nw.to_native(nw.from_dict({"tmp": i[1]}, native_namespace=native_namespace)["tmp"])
+            train_info = nw.to_native(nw.new_series(name="tmp", values=i[0], native_namespace=native_namespace))
+            valid_info = nw.to_native(nw.new_series(name="tmp", values=i[1], native_namespace=native_namespace))
             update_split_info(train_info, j, "train", summary)
             update_split_info(valid_info, j, "valid", summary)
             j = j + 1
diff --git a/sklego/naive_bayes.py b/sklego/naive_bayes.py
index a3fab146..2ed87aed 100644
--- a/sklego/naive_bayes.py
+++ b/sklego/naive_bayes.py
@@ -8,7 +8,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class GaussianMixtureNB(BaseEstimator, ClassifierMixin):
+class GaussianMixtureNB(ClassifierMixin, BaseEstimator):
     """The `GaussianMixtureNB` estimator is a naive bayes classifier that uses a mixture of gaussians instead of
     merely a single one. In particular it trains a `GaussianMixture` model for each class in the target and for each
     feature in the data, on the subset of `X` where `y == class`.
@@ -158,7 +158,7 @@ def num_fit_cols_(self):
         return self.n_features_in_
 
 
-class BayesianGaussianMixtureNB(BaseEstimator, ClassifierMixin):
+class BayesianGaussianMixtureNB(ClassifierMixin, BaseEstimator):
     """The `BayesianGaussianMixtureNB` estimator is a naive bayes classifier that uses a bayesian mixture of gaussians
     instead of merely a single one. In particular it trains a `BayesianGaussianMixture` model for each class in the
     target and for each feature in the data, on the subset of `X` where `y == class`.
diff --git a/sklego/neighbors.py b/sklego/neighbors.py
index 55cdbe19..9a35ba0c 100644
--- a/sklego/neighbors.py
+++ b/sklego/neighbors.py
@@ -6,7 +6,7 @@
 from sklearn.utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 
 
-class BayesianKernelDensityClassifier(BaseEstimator, ClassifierMixin):
+class BayesianKernelDensityClassifier(ClassifierMixin, BaseEstimator):
     """The `BayesianKernelDensityClassifier` estimator trains using Kernel Density estimations to generate the joint
     distribution.
 
diff --git a/sklego/preprocessing/pandastransformers.py b/sklego/preprocessing/pandastransformers.py
index 2af07cb3..faccfa98 100644
--- a/sklego/preprocessing/pandastransformers.py
+++ b/sklego/preprocessing/pandastransformers.py
@@ -60,7 +60,7 @@ def _nw_select_dtypes(include: str | list[str], exclude: str | list[str], schema
     return feature_names
 
 
-class ColumnDropper(BaseEstimator, TransformerMixin):
+class ColumnDropper(TransformerMixin, BaseEstimator):
     """The `ColumnDropper` transformer allows dropping specific columns from a DataFrame by name.
     Can be useful in a sklearn Pipeline.
 
@@ -226,7 +226,7 @@ def _check_column_names(self, X):
             raise KeyError(f"{list(non_existent_columns)} column(s) not in DataFrame")
 
 
-class TypeSelector(BaseEstimator, TransformerMixin):
+class TypeSelector(TransformerMixin, BaseEstimator):
     """The `TypeSelector` transformer allows to select columns in a DataFrame based on their type.
     Can be useful in a sklearn Pipeline.
 
@@ -412,7 +412,7 @@ def __init__(self, include=None, exclude=None):
         super().__init__(include=include, exclude=exclude)
 
 
-class ColumnSelector(BaseEstimator, TransformerMixin):
+class ColumnSelector(TransformerMixin, BaseEstimator):
     """The `ColumnSelector` transformer allows selecting specific columns from a DataFrame by name.
     Can be useful in a sklearn Pipeline.
 
diff --git a/tests/test_meta/test_grouped_predictor.py b/tests/test_meta/test_grouped_predictor.py
index 93a181f0..cc08a874 100644
--- a/tests/test_meta/test_grouped_predictor.py
+++ b/tests/test_meta/test_grouped_predictor.py
@@ -32,6 +32,7 @@ def test_sklearn_compatible_estimator(estimator, check):
         "check_fit2d_predict1d",  # custom message
         "check_estimators_empty_data_messages",  # custom message
         "check_supervised_y_2d",  # TODO: Is it possible to support multioutput?
+        "check_requires_y_none",
     }:
         pytest.skip()
 
diff --git a/tests/test_meta/test_hierarchical_predictor.py b/tests/test_meta/test_hierarchical_predictor.py
index a6bbc465..02d9d321 100644
--- a/tests/test_meta/test_hierarchical_predictor.py
+++ b/tests/test_meta/test_hierarchical_predictor.py
@@ -31,6 +31,7 @@ def test_sklearn_compatible_estimator(estimator, check):
         "check_fit2d_1feature",  # custom message
         "check_supervised_y_2d",  # TODO: Is it possible to support multioutput?
         "check_estimators_empty_data_messages",  # custom message
+        "check_requires_y_none",
     }:
         pytest.skip()