diff --git a/.gitignore b/.gitignore
index 4c2794f1a..15eba83e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ __pycache__
 *.egg-info
 *.swp
 *.swo
+*DS_Store
 
 .tox/
 build/
diff --git a/src/skmatter/_selection.py b/src/skmatter/_selection.py
index ebb25ecb2..867fc7b03 100644
--- a/src/skmatter/_selection.py
+++ b/src/skmatter/_selection.py
@@ -12,9 +12,8 @@
 from scipy.sparse.linalg import eigsh
 from sklearn.base import BaseEstimator, MetaEstimatorMixin
 from sklearn.feature_selection._base import SelectorMixin
-from sklearn.utils import check_array, check_random_state, safe_mask
-from sklearn.utils._tags import _safe_tags
-from sklearn.utils.validation import check_is_fitted
+from sklearn.utils import check_array, check_random_state, check_X_y, safe_mask
+from sklearn.utils.validation import FLOAT_DTYPES, as_float_array, check_is_fitted
 
 from .utils import (
     X_orthogonalizer,
@@ -125,7 +124,6 @@ def fit(self, X, y=None, warm_start=False):
         -------
         self : object
         """
-        tags = self._get_tags()
 
         if self.selection_type == "feature":
             self._axis = 1
@@ -144,28 +142,28 @@ def fit(self, X, y=None, warm_start=False):
         elif self.progress_bar is False:
             self.report_progress_ = no_progress_bar
 
-        params = dict(
-            accept_sparse="csc",
-            force_all_finite=not tags.get("allow_nan", True),
-        )
-        if self._axis == 1:
-            params["ensure_min_features"] = 2
-        else:
-            params["ensure_min_samples"] = 2
+        params = dict(ensure_min_samples=2, ensure_min_features=2, dtype=FLOAT_DTYPES)
 
-        if y is not None:
-            params["multi_output"] = True
+        if hasattr(self, "mixing") or y is not None:
             X, y = self._validate_data(X, y, **params)
+            X, y = check_X_y(X, y, multi_output=True)
 
             if len(y.shape) == 1:
                 # force y to have multi_output 2D format even when it's 1D, since
                 # many functions, most notably PCov routines, assume an array storage
                 # format, most notably to compute (y @ y.T)
                 y = y.reshape((len(y), 1))
+
         else:
             X = check_array(X, **params)
 
+        if self.full and self.score_threshold is not None:
+            raise ValueError(
+                "You cannot specify both `score_threshold` and `full=True`."
+            )
+
         n_to_select_from = X.shape[self._axis]
+        self.n_samples_in_, self.n_features_in_ = X.shape
 
         self.n_samples_in_, self.n_features_in_ = X.shape
 
@@ -243,22 +241,27 @@ def transform(self, X, y=None):
             The selected subset of the input.
         """
 
-        if len(X.shape) == 1:
-            X = X.reshape(-1, 1)
+        check_is_fitted(self, ["_axis", "selected_idx_", "n_selected_"])
+
+        if self._axis == 0:
+            raise ValueError(
+                "Transform is not currently supported for sample selection."
+            )
 
         mask = self.get_support()
 
-        # note: we use _safe_tags instead of _get_tags because this is a
-        # public Mixin.
-        X = self._validate_data(
-            X,
-            dtype=None,
-            accept_sparse="csr",
-            force_all_finite=not _safe_tags(self, key="allow_nan"),
-            reset=False,
-            ensure_2d=self._axis,
-        )
+        X = check_array(X)
 
+        if len(X.shape) == 1:
+            if self._axis == 0:
+                X = X.reshape(-1, 1)
+            else:
+                X = X.reshape(1, -1)
+
+        if len(mask) != X.shape[self._axis]:
+            raise ValueError(
+                "X has a different shape than during fitting. Reshape your data."
+            )
         if self._axis == 1:
             return X[:, safe_mask(X, mask)]
         else:
@@ -517,7 +520,7 @@ def _init_greedy_search(self, X, y, n_to_select):
         features and computes their initial importance score.
         """
 
-        self.X_current_ = X.copy()
+        self.X_current_ = as_float_array(X.copy())
         self.pi_ = self._compute_pi(self.X_current_)
 
         super()._init_greedy_search(X, y, n_to_select)
diff --git a/src/skmatter/decomposition/_pcovr.py b/src/skmatter/decomposition/_pcovr.py
index 4b3bb2b97..7e5afd42d 100644
--- a/src/skmatter/decomposition/_pcovr.py
+++ b/src/skmatter/decomposition/_pcovr.py
@@ -130,6 +130,8 @@ class PCovR(_BasePCA, LinearModel):
          Used when the 'arpack' or 'randomized' solvers are used. Pass an int
          for reproducible results across multiple function calls.
 
+    whiten : boolean, deprecated
+
     Attributes
     ----------
 
@@ -202,12 +204,13 @@ def __init__(
         regressor=None,
         iterated_power="auto",
         random_state=None,
+        whiten=False,
     ):
         self.mixing = mixing
         self.n_components = n_components
         self.space = space
 
-        self.whiten = False
+        self.whiten = whiten
         self.svd_solver = svd_solver
         self.tol = tol
         self.iterated_power = iterated_power
diff --git a/src/skmatter/linear_model/_base.py b/src/skmatter/linear_model/_base.py
index dacde2fca..800cf67f4 100644
--- a/src/skmatter/linear_model/_base.py
+++ b/src/skmatter/linear_model/_base.py
@@ -2,6 +2,8 @@
 from scipy.linalg import orthogonal_procrustes
 from sklearn.base import MultiOutputMixin, RegressorMixin
 from sklearn.linear_model import LinearRegression
+from sklearn.utils import check_array, check_X_y
+from sklearn.utils.validation import check_is_fitted
 
 
 class OrthogonalRegression(MultiOutputMixin, RegressorMixin):
@@ -61,6 +63,15 @@ def fit(self, X, y):
             and n_targets is the number of target properties.
         """
 
+        X, y = check_X_y(
+            X,
+            y,
+            y_numeric=True,
+            ensure_min_features=1,
+            ensure_min_samples=1,
+            multi_output=True,
+        )
+
         self.n_samples_in_, self.n_features_in_ = X.shape
         if self.use_orthogonal_projector:
             # check estimator
@@ -71,12 +82,15 @@ def fit(self, X, y):
             )
             # compute orthogonal projectors
             linear_estimator.fit(X, y)
-            U, _, Vt = np.linalg.svd(linear_estimator.coef_.T, full_matrices=False)
-            # project X and y to same dimension
-            X = X @ U
-            y = y @ Vt.T
+            coef = np.reshape(linear_estimator.coef_.T, (X.shape[1], -1))
+            U, _, Vt = np.linalg.svd(coef, full_matrices=False)
+
             # compute weights by solving the Procrustes problem
-            self.coef_ = (U @ orthogonal_procrustes(X, y)[0] @ Vt).T
+            self.coef_ = (
+                U
+                @ orthogonal_procrustes(X @ U, y.reshape(X.shape[0], -1) @ Vt.T)[0]
+                @ Vt
+            ).T
         else:
             self.max_components_ = max(X.shape[1], y.shape[1])
             X = np.pad(X, [(0, 0), (0, self.max_components_ - X.shape[1])])
@@ -93,6 +107,9 @@ def predict(self, X):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
         """
+        X = check_array(X, ensure_min_features=1, ensure_min_samples=1)
+        check_is_fitted(self, ["coef_"])
+
         if not (self.use_orthogonal_projector):
             X = np.pad(X, [(0, 0), (0, self.max_components_ - X.shape[1])])
         return X @ self.coef_.T
diff --git a/src/skmatter/linear_model/_ridge.py b/src/skmatter/linear_model/_ridge.py
index 491ecbc4c..3969b2a33 100644
--- a/src/skmatter/linear_model/_ridge.py
+++ b/src/skmatter/linear_model/_ridge.py
@@ -1,11 +1,13 @@
 import numpy as np
 from joblib import Parallel, delayed
-from sklearn.base import MultiOutputMixin, RegressorMixin
+from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin
 from sklearn.metrics import check_scoring
 from sklearn.model_selection import KFold
+from sklearn.utils import check_array
+from sklearn.utils.validation import check_is_fitted
 
 
-class RidgeRegression2FoldCV(MultiOutputMixin, RegressorMixin):
+class RidgeRegression2FoldCV(BaseEstimator, MultiOutputMixin, RegressorMixin):
     r"""Ridge regression with an efficient 2-fold cross-validation method using the SVD
     solver.
 
@@ -110,6 +112,9 @@ def __init__(
         self.shuffle = shuffle
         self.n_jobs = n_jobs
 
+    def _more_tags(self):
+        return {"multioutput_only": True}
+
     def fit(self, X, y):
         """
         Parameters
@@ -138,6 +143,7 @@ def fit(self, X, y):
                 "[0,1)"
             )
 
+        X, y = self._validate_data(X, y, y_numeric=True, multi_output=True)
         self.n_samples_in_, self.n_features_in_ = X.shape
 
         # check_scoring uses estimators scoring function if the scorer is None, this is
@@ -164,6 +170,11 @@ def predict(self, X):
             Training data, where n_samples is the number of samples
             and n_features is the number of features.
         """
+
+        X = check_array(X)
+
+        check_is_fitted(self, ["coef_"])
+
         return X @ self.coef_.T
 
     def _2fold_cv(self, X, y, fold1_idx, fold2_idx, scorer):
diff --git a/src/skmatter/metrics/_reconstruction_measures.py b/src/skmatter/metrics/_reconstruction_measures.py
index 02d3d6557..86bab2fab 100644
--- a/src/skmatter/metrics/_reconstruction_measures.py
+++ b/src/skmatter/metrics/_reconstruction_measures.py
@@ -445,7 +445,7 @@ def pointwise_local_reconstruction_error(
 
     scaler.fit(X_train)
     X_train = scaler.transform(X_train)
-    X_test = scaler.transform(X_test)
+    X_test = scaler.transform(X_test).astype(X_train.dtype)
     scaler.fit(Y_train)
     Y_train = scaler.transform(Y_train)
     Y_test = scaler.transform(Y_test)
diff --git a/src/skmatter/preprocessing/_data.py b/src/skmatter/preprocessing/_data.py
index 94dd0e02e..9e4651466 100644
--- a/src/skmatter/preprocessing/_data.py
+++ b/src/skmatter/preprocessing/_data.py
@@ -135,6 +135,13 @@ def fit(self, X, y=None, sample_weight=None):
             Fitted scaler.
         """
 
+        X = self._validate_data(
+            X,
+            copy=self.copy,
+            estimator=self,
+            dtype=FLOAT_DTYPES,
+            ensure_min_samples=2,
+        )
         self.n_samples_in_, self.n_features_in_ = X.shape
 
         if sample_weight is not None:
@@ -157,7 +164,7 @@ def fit(self, X, y=None, sample_weight=None):
                 self.scale_ = np.sqrt(var)
             else:
                 var_sum = var.sum()
-                if var_sum < abs(np.mean(X_mean)) * self.rtol + self.atol:
+                if var_sum < abs(np.average(X_mean)) * self.rtol + self.atol:
                     raise ValueError("Cannot normalize a matrix with zero variance")
                 self.scale_ = np.sqrt(var_sum)
 
@@ -187,11 +194,9 @@ def transform(self, X, y=None, copy=None):
         X = self._validate_data(
             X,
             reset=False,
-            accept_sparse="csr",
             copy=copy,
             estimator=self,
             dtype=FLOAT_DTYPES,
-            force_all_finite="allow-nan",
         )
         check_is_fitted(
             self, attributes=["n_samples_in_", "n_features_in_", "scale_", "mean_"]
@@ -288,7 +293,7 @@ def __init__(self, with_center=True, with_trace=True):
         self.with_trace = with_trace
         super().__init__()
 
-    def fit(self, K=None, y=None, sample_weight=None):
+    def fit(self, K, y=None, sample_weight=None):
         """Fit KernelFlexibleCenterer
 
         Parameters
@@ -310,7 +315,7 @@ def fit(self, K=None, y=None, sample_weight=None):
             Fitted transformer.
         """
 
-        Kc = self._validate_data(K, copy=True, dtype=FLOAT_DTYPES, reset=False)
+        K = self._validate_data(K, copy=True, dtype=FLOAT_DTYPES, reset=False)
 
         if sample_weight is not None:
             self.sample_weight_ = _check_sample_weight(sample_weight, K, dtype=K.dtype)
@@ -327,20 +332,20 @@ def fit(self, K=None, y=None, sample_weight=None):
             else:
                 super().fit(K, y)
 
-            K_pred_cols = np.average(Kc, weights=self.sample_weight_, axis=1)[
+            K_pred_cols = np.average(K, weights=self.sample_weight_, axis=1)[
                 :, np.newaxis
             ]
         else:
-            self.K_fit_rows_ = np.zeros(Kc.shape[1])
+            self.K_fit_rows_ = np.zeros(K.shape[1])
             self.K_fit_all_ = 0.0
-            K_pred_cols = np.zeros((Kc.shape[0], 1))
+            K_pred_cols = np.zeros((K.shape[0], 1))
 
         if self.with_trace:
-            Kc -= self.K_fit_rows_
-            Kc -= K_pred_cols
-            Kc += self.K_fit_all_
+            K -= self.K_fit_rows_
+            K -= K_pred_cols
+            K += self.K_fit_all_
 
-            self.scale_ = np.trace(Kc) / Kc.shape[0]
+            self.scale_ = np.trace(K) / K.shape[0]
         else:
             self.scale_ = 1.0
 
@@ -408,7 +413,7 @@ def fit_transform(self, K, y=None, sample_weight=None, copy=True, **fit_params):
         return self.transform(K, copy)
 
 
-class SparseKernelCenterer(TransformerMixin, BaseEstimator):
+class SparseKernelCenterer(TransformerMixin):
     r"""Kernel centering method for sparse kernels, similar to
     KernelFlexibleCenterer.
 
diff --git a/src/skmatter/utils/_orthogonalizers.py b/src/skmatter/utils/_orthogonalizers.py
index 00a68949e..4bfba3739 100644
--- a/src/skmatter/utils/_orthogonalizers.py
+++ b/src/skmatter/utils/_orthogonalizers.py
@@ -56,9 +56,9 @@ def X_orthogonalizer(x1, c=None, x2=None, tol=1e-12, copy=False):
         if np.linalg.norm(col) < tol:
             warnings.warn("Column vector contains only zeros.", stacklevel=1)
         else:
-            col /= np.linalg.norm(col, axis=0)
+            col = np.divide(col, np.linalg.norm(col, axis=0))
 
-        xnew -= col @ (col.T @ xnew)
+        xnew -= (col @ (col.T @ xnew)).astype(xnew.dtype)
 
     return xnew
 
diff --git a/src/skmatter/utils/_pcovr_utils.py b/src/skmatter/utils/_pcovr_utils.py
index 515d5edfc..69ae2e394 100644
--- a/src/skmatter/utils/_pcovr_utils.py
+++ b/src/skmatter/utils/_pcovr_utils.py
@@ -186,7 +186,7 @@ def pcovr_covariance(
         C_Y = C_Y.reshape((C.shape[0], -1))
         C_Y = np.real(C_Y)
 
-        C += (1 - mixing) * C_Y @ C_Y.T
+        C += (1 - mixing) * np.array(C_Y @ C_Y.T, dtype=np.float64)
 
     if mixing > 0:
         C += (mixing) * (X.T @ X)
diff --git a/tests/test_check_estimators.py b/tests/test_check_estimators.py
new file mode 100644
index 000000000..f744a7a05
--- /dev/null
+++ b/tests/test_check_estimators.py
@@ -0,0 +1,26 @@
+from sklearn.utils.estimator_checks import parametrize_with_checks
+
+from skmatter.decomposition import KernelPCovR, PCovR
+from skmatter.feature_selection import CUR as fCUR
+from skmatter.feature_selection import FPS as fFPS
+from skmatter.feature_selection import PCovCUR as fPCovCUR
+from skmatter.feature_selection import PCovFPS as fPCovFPS
+from skmatter.linear_model import RidgeRegression2FoldCV  # OrthogonalRegression,
+from skmatter.preprocessing import KernelNormalizer, StandardFlexibleScaler
+
+
+@parametrize_with_checks(
+    [
+        KernelPCovR(mixing=0.5),
+        PCovR(mixing=0.5),
+        fCUR(),
+        fFPS(),
+        fPCovCUR(),
+        fPCovFPS(),
+        RidgeRegression2FoldCV(),
+        KernelNormalizer(),
+        StandardFlexibleScaler(),
+    ]
+)
+def test_sklearn_compatible_estimator(estimator, check):
+    check(estimator)
diff --git a/tests/test_feature_simple_cur.py b/tests/test_feature_simple_cur.py
index ba92facd5..72554471d 100644
--- a/tests/test_feature_simple_cur.py
+++ b/tests/test_feature_simple_cur.py
@@ -4,12 +4,13 @@
 from sklearn import exceptions
 
 from skmatter.datasets import load_csd_1000r as load
-from skmatter.feature_selection import CUR
+from skmatter.feature_selection import CUR, FPS
 
 
 class TestCUR(unittest.TestCase):
     def setUp(self):
         self.X, _ = load(return_X_y=True)
+        self.X = FPS(n_to_select=10).fit(self.X).transform(self.X)
 
     def test_bad_transform(self):
         selector = CUR(n_to_select=2)
diff --git a/tests/test_greedy_selector.py b/tests/test_greedy_selector.py
index fe83b71a8..0bfe6de99 100644
--- a/tests/test_greedy_selector.py
+++ b/tests/test_greedy_selector.py
@@ -61,9 +61,10 @@ def test_bad_warm_start(self):
 
     def test_bad_y(self):
         self.X, self.Y = get_dataset(return_X_y=True)
+        Y = self.Y[:2]
         selector = GreedyTester(n_to_select=2)
         with self.assertRaises(ValueError):
-            selector.fit(X=self.X, y=self.Y[:2])
+            selector.fit(X=self.X, y=Y)
 
     def test_bad_transform(self):
         selector = GreedyTester(n_to_select=2)
@@ -72,8 +73,7 @@ def test_bad_transform(self):
             _ = selector.transform(self.X[:, :3])
         self.assertEqual(
             str(cm.exception),
-            "X has 3 features, but GreedyTester is expecting {} features"
-            " as input.".format(self.X.shape[1]),
+            "X has a different shape than during fitting. Reshape your data.",
         )
 
     def test_no_nfeatures(self):
@@ -120,16 +120,16 @@ def test_size_input(self):
         X = np.array([1, 2, 3, 4, 5]).reshape(-1, 1)
         selector_sample = GreedyTester(selection_type="sample")
         selector_feature = GreedyTester(selection_type="feature")
-
         with self.assertRaises(ValueError) as cm:
             selector_feature.fit(X)
         self.assertEqual(
             str(cm.exception),
-            f"Found array with 1 feature(s) (shape={X.shape}) while a minimum of 2 is "
-            "required.",
+            f"Found array with 1 feature(s) (shape={X.shape})"
+            " while a minimum of 2 is required.",
         )
 
         X = X.reshape(1, -1)
+
         with self.assertRaises(ValueError) as cm:
             selector_sample.fit(X)
         self.assertEqual(
diff --git a/tests/test_kernel_normalizer.py b/tests/test_kernel_normalizer.py
index 694b39d0a..d17ddf9f3 100644
--- a/tests/test_kernel_normalizer.py
+++ b/tests/test_kernel_normalizer.py
@@ -41,12 +41,6 @@ def test_invalid_sample_weights(self):
         with self.assertRaises(ValueError):
             model.fit_transform(K, sample_weight=wts_dim)
 
-    def test_NoInputs(self):
-        """Checks that fit cannot be called with zero inputs."""
-        model = KernelNormalizer()
-        with self.assertRaises(ValueError):
-            model.fit()
-
     def test_ValueError(self):
         """Checks that a non-square matrix cannot be normalized."""
         K = self.random_state.uniform(0, 100, size=(3, 4))
diff --git a/tests/test_orthogonalizers.py b/tests/test_orthogonalizers.py
index 899cf73ce..0578141c8 100644
--- a/tests/test_orthogonalizers.py
+++ b/tests/test_orthogonalizers.py
@@ -20,8 +20,8 @@ def __init__(self, *args, **kwargs):
         self.random_state = np.random.RandomState(0)
 
     def setUp(self):
-        self.n_samples = 100
-        self.n_features = 100
+        self.n_samples = 2
+        self.n_features = 4
 
     def test_null_column(self):
         # checks that the column passed to the orthogonalizer
@@ -117,6 +117,7 @@ def test_multiple_orthogonalizations(self):
         X_correlated = X_orthogonalizer(
             X_correlated, x2=X_correlated[:, :n_uncorrelated]
         )
+        print(X_correlated)
 
         self.assertLessEqual(np.linalg.norm(X_correlated), EPSILON)
 
diff --git a/tests/test_sample_simple_cur.py b/tests/test_sample_simple_cur.py
index 9e82c18c3..b3a9437e1 100644
--- a/tests/test_sample_simple_cur.py
+++ b/tests/test_sample_simple_cur.py
@@ -1,23 +1,33 @@
 import unittest
 
 import numpy as np
-from sklearn import exceptions
 from sklearn.datasets import fetch_california_housing as load
 
-from skmatter.sample_selection import CUR
+from skmatter.sample_selection import CUR, FPS
 
 
 class TestCUR(unittest.TestCase):
     def setUp(self):
         self.X, _ = load(return_X_y=True)
-        self.X = self.X[:1000]
+        self.X = self.X[FPS(n_to_select=100).fit(self.X).selected_idx_]
         self.n_select = min(20, min(self.X.shape) // 2)
 
-    def test_bad_transform(self):
-        selector = CUR(n_to_select=2)
-        with self.assertRaises(exceptions.NotFittedError):
+    def test_sample_transform(self):
+        """
+        This test checks that an error is raised when the transform function is used,
+        because sklearn does not support well transformers that change the number
+        of samples with other classes like Pipeline
+        """
+        selector = CUR(n_to_select=1)
+        selector.fit(self.X)
+        with self.assertRaises(ValueError) as error:
             _ = selector.transform(self.X)
 
+        self.assertTrue(
+            "Transform is not currently supported for sample selection."
+            == str(error.exception)
+        )
+
     def test_restart(self):
         """
         This test checks that the model can be restarted with a new instance
diff --git a/tests/test_standard_flexible_scaler.py b/tests/test_standard_flexible_scaler.py
index 5e5108a47..e1d6cc1f6 100644
--- a/tests/test_standard_flexible_scaler.py
+++ b/tests/test_standard_flexible_scaler.py
@@ -188,6 +188,14 @@ def test_ValueError_full(self):
         with self.assertRaises(ValueError):
             model.fit(X)
 
+    def test_not_w_mean(self):
+        """Checks that the matrix normalized `with_mean=False`
+        does not have a mean."""
+        X = np.array([2, 2, 3]).reshape(-1, 1)
+        model = StandardFlexibleScaler(with_mean=False)
+        model.fit(X)
+        self.assertTrue(np.allclose(model.mean_, 0))
+
 
 if __name__ == "__main__":
     unittest.main()