From 7b9edfedfac4249b9acb84c22bfaf800f1f50892 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 22 Feb 2024 14:26:54 -0500
Subject: [PATCH 01/14] Fix

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .pre-commit-config.yaml               |   1 +
 pyproject.toml                        |  11 +-
 sktree/_lib/sklearn_fork              |   2 +-
 sktree/ensemble/_extensions.py        |  35 +---
 sktree/ensemble/_honest_forest.py     | 268 ++++++--------------------
 sktree/tests/test_honest_forest.py    |  51 ++++-
 sktree/tree/_honest_tree.py           |  57 ++++--
 sktree/tree/_multiview.py             |   4 +-
 sktree/tree/tests/test_honest_tree.py |  33 ++++
 9 files changed, 186 insertions(+), 276 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9b5825e5c..22194c830 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -63,6 +63,7 @@ repos:
     hooks:
       - id: toml-sort
         files: ^pyproject\.toml$
+        args: ['-i']
 
   # mypy
   - repo: https://github.com/pre-commit/mirrors-mypy
diff --git a/pyproject.toml b/pyproject.toml
index 37837441a..5e613f7c1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,6 +15,9 @@ requires = [
   "numpy>=1.25; python_version>='3.9'"
 ]
 
+[lint.per-file-ignores]
+'__init__.py' = ['F401']
+
 [project]
 name = "scikit-tree"
 version = "0.7.0dev0"
@@ -203,7 +206,9 @@ add_ignore = 'D100,D104,D105,D107'
 [tool.pytest.ini_options]
 minversion = '6.0'
 addopts = '--durations 20 --junit-xml=junit-results.xml --verbose --ignore=sktree/_lib/ -k "not slowtest"'
-filterwarnings = []
+filterwarnings = [
+  'ignore:Using sklearn tree so store_leaf_values cannot be set.*'
+]
 
 [tool.rstcheck]
 ignore_directives = [
@@ -261,10 +266,10 @@ extend-exclude = [
   'validation'
 ]
 line-length = 88
-ignore = ['E731']
+lint.ignore = ['E731']
 
 [tool.ruff.per-file-ignores]
-'__init__.py' = ['F401']
+"__init__.py" = ["F401"]
 
 [tool.spin]
 package = 'sktree'
diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index db91ddb6f..d48716a6b 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit db91ddb6f090ac091588dd844be6d9c60afc341f
+Subproject commit d48716a6b2cc6373b9e66bf959f2b43b89f10c5d
diff --git a/sktree/ensemble/_extensions.py b/sktree/ensemble/_extensions.py
index 99aef5a6c..1933d1415 100644
--- a/sktree/ensemble/_extensions.py
+++ b/sktree/ensemble/_extensions.py
@@ -1,5 +1,4 @@
 import threading
-from numbers import Integral, Real
 
 import numpy as np
 from joblib import Parallel, delayed
@@ -7,6 +6,7 @@
 from sklearn.utils import check_random_state
 from sklearn.utils.validation import check_is_fitted
 
+from .._lib.sklearn.ensemble._forest import _get_n_samples_bootstrap
 from ..tree._classes import DTYPE
 
 
@@ -55,39 +55,6 @@ def _generate_sample_indices(random_state, n_samples, n_samples_bootstrap, boots
     return sample_indices
 
 
-def _get_n_samples_bootstrap(n_samples, max_samples):
-    """
-    Get the number of samples in a bootstrap sample.
-
-    XXX: Note this is copied from sklearn. We override the ability
-    to sample a higher number of bootstrap samples to enable sampling
-    closer to 80% unique training data points for in-bag computation.
-
-    Parameters
-    ----------
-    n_samples : int
-        Number of samples in the dataset.
-    max_samples : int or float
-        The maximum number of samples to draw from the total available:
-            - if float, this indicates a fraction of the total;
-            - if int, this indicates the exact number of samples;
-            - if None, this indicates the total number of samples.
-
-    Returns
-    -------
-    n_samples_bootstrap : int
-        The total number of samples to draw for the bootstrap sample.
-    """
-    if max_samples is None:
-        return n_samples
-
-    if isinstance(max_samples, Integral):
-        return max_samples
-
-    if isinstance(max_samples, Real):
-        return round(n_samples * max_samples)
-
-
 class ForestMixin:
     """A set of mixin methods to extend forests models API."""
 
diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index b9bd9a4bd..7a3b23eb5 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -3,25 +3,19 @@
 
 import threading
 from numbers import Integral
-from warnings import catch_warnings, simplefilter, warn
+from warnings import catch_warnings, simplefilter
 
 import numpy as np
 from joblib import Parallel, delayed
-from scipy.sparse import issparse
-from sklearn.base import _fit_context
-from sklearn.ensemble._base import _partition_estimators
-from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
-from sklearn.exceptions import DataConversionWarning
-from sklearn.utils import check_random_state, compute_sample_weight
-from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+from sklearn.base import _fit_context, clone
+from sklearn.ensemble._base import _partition_estimators, _set_random_states
+from sklearn.utils import compute_sample_weight
 from sklearn.utils._param_validation import Interval, RealNotInt
-from sklearn.utils.multiclass import type_of_target
-from sklearn.utils.validation import _check_sample_weight, check_is_fitted
+from sklearn.utils.validation import check_is_fitted
 
 from .._lib.sklearn.ensemble._forest import ForestClassifier
-from .._lib.sklearn.tree._tree import DOUBLE, DTYPE
 from ..tree import HonestTreeClassifier
-from ._extensions import ForestClassifierMixin, _generate_sample_indices, _get_n_samples_bootstrap
+from ._extensions import ForestClassifierMixin, _generate_sample_indices
 
 
 def _parallel_build_trees(
@@ -419,27 +413,30 @@ def __init__(
         honest_fraction=0.5,
         tree_estimator=None,
         stratify=False,
+        **tree_estimator_params,
     ):
+        estimator_params = (
+            "criterion",
+            "splitter",
+            "max_depth",
+            "min_samples_split",
+            "min_samples_leaf",
+            "min_weight_fraction_leaf",
+            "max_features",
+            "max_leaf_nodes",
+            "min_impurity_decrease",
+            "random_state",
+            "ccp_alpha",
+            "tree_estimator",
+            "honest_fraction",
+            "honest_prior",
+            "stratify",
+        )
+
         super().__init__(
             estimator=HonestTreeClassifier(),
             n_estimators=n_estimators,
-            estimator_params=(
-                "criterion",
-                "splitter",
-                "max_depth",
-                "min_samples_split",
-                "min_samples_leaf",
-                "min_weight_fraction_leaf",
-                "max_features",
-                "max_leaf_nodes",
-                "min_impurity_decrease",
-                "random_state",
-                "ccp_alpha",
-                "tree_estimator",
-                "honest_fraction",
-                "honest_prior",
-                "stratify",
-            ),
+            estimator_params=estimator_params,
             bootstrap=bootstrap,
             oob_score=oob_score,
             n_jobs=n_jobs,
@@ -463,9 +460,10 @@ def __init__(
         self.honest_prior = honest_prior
         self.tree_estimator = tree_estimator
         self.stratify = stratify
+        self._tree_estimator_params = tree_estimator_params
 
     @_fit_context(prefer_skip_nested_validation=True)
-    def fit(self, X, y, sample_weight=None, classes=None):
+    def fit(self, X, y, sample_weight=None, classes=None, **fit_params):
         """
         Build a forest of trees from the training set (X, y).
 
@@ -490,202 +488,50 @@ def fit(self, X, y, sample_weight=None, classes=None):
         classes : array-like of shape (n_classes,), default=None
             List of all the classes that can possibly appear in the y vector.
 
+        **fit_params : dict
+            Parameters to pass to the underlying base tree estimators.
+
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
         Returns
         -------
         self : object
             Fitted estimator.
         """
-        # XXX: This entire function is a copy of what is in scikit-learn
-        # with the exception of:
-        # - _get_n_samples_bootstrap is a re-defined function to allow higher
-        #   max_samples
-
-        MAX_INT = np.iinfo(np.int32).max
-        # Validate or convert input data
-        if issparse(y):
-            raise ValueError("sparse multilabel-indicator for y is not supported.")
-
-        X, y = self._validate_data(
-            X,
-            y,
-            multi_output=True,
-            accept_sparse="csc",
-            dtype=DTYPE,
-            force_all_finite=False,
-        )
-        # _compute_missing_values_in_feature_mask checks if X has missing values and
-        # will raise an error if the underlying tree base estimator can't handle missing
-        # values. Only the criterion is required to determine if the tree supports
-        # missing values.
-        estimator = type(self.estimator)(criterion=self.criterion)
-        missing_values_in_feature_mask = estimator._compute_missing_values_in_feature_mask(
-            X, estimator_name=self.__class__.__name__
-        )
-
-        if sample_weight is not None:
-            sample_weight = _check_sample_weight(sample_weight, X)
-
-        if issparse(X):
-            # Pre-sort indices to avoid that each individual tree of the
-            # ensemble sorts the indices.
-            X.sort_indices()
-
-        y = np.atleast_1d(y)
-        if y.ndim == 2 and y.shape[1] == 1:
-            warn(
-                (
-                    "A column-vector y was passed when a 1d array was"
-                    " expected. Please change the shape of y to "
-                    "(n_samples,), for example using ravel()."
-                ),
-                DataConversionWarning,
-                stacklevel=2,
-            )
-
-        if y.ndim == 1:
-            # reshape is necessary to preserve the data contiguity against vs
-            # [:, np.newaxis] that does not.
-            y = np.reshape(y, (-1, 1))
-
-        if self.criterion == "poisson":
-            if np.any(y < 0):
-                raise ValueError(
-                    "Some value(s) of y are negative which is "
-                    "not allowed for Poisson regression."
-                )
-            if np.sum(y) <= 0:
-                raise ValueError(
-                    "Sum of y is not strictly positive which "
-                    "is necessary for Poisson regression."
-                )
-
-        self._n_samples, self.n_outputs_ = y.shape
-
-        y, expanded_class_weight = self._validate_y_class_weight(y, classes=classes)
-
-        if getattr(y, "dtype", None) != DOUBLE or not y.flags.contiguous:
-            y = np.ascontiguousarray(y, dtype=DOUBLE)
 
-        if expanded_class_weight is not None:
-            if sample_weight is not None:
-                sample_weight = sample_weight * expanded_class_weight
-            else:
-                sample_weight = expanded_class_weight
+        super().fit(X, y, sample_weight=sample_weight, classes=classes, **fit_params)
 
-        # compute the number of samples we want to sample for each tree
-        # possibly without replacement
-        n_samples_bootstrap = _get_n_samples_bootstrap(
-            n_samples=X.shape[0], max_samples=self.max_samples
+        # Compute honest decision function
+        self.honest_decision_function_ = self._predict_proba(
+            X, indices=self.honest_indices_, impute_missing=np.nan
         )
 
-        self._n_samples_bootstrap = n_samples_bootstrap
-
-        self._validate_estimator()
-
-        if not self.bootstrap and self.oob_score:
-            raise ValueError("Out of bag estimation only available if bootstrap=True")
-
-        random_state = check_random_state(self.random_state)
-
-        if not self.warm_start or not hasattr(self, "estimators_"):
-            # Free allocated memory, if any
-            self.estimators_ = []
-
-        n_more_estimators = self.n_estimators - len(self.estimators_)
-
-        if self.max_bins is not None:
-            # `_openmp_effective_n_threads` is used to take cgroups CPU quotes
-            # into account when determine the maximum number of threads to use.
-            n_threads = _openmp_effective_n_threads()
-
-            # Bin the data
-            # For ease of use of the API, the user-facing GBDT classes accept the
-            # parameter max_bins, which doesn't take into account the bin for
-            # missing values (which is always allocated). However, since max_bins
-            # isn't the true maximal number of bins, all other private classes
-            # (binmapper, histbuilder...) accept n_bins instead, which is the
-            # actual total number of bins. Everywhere in the code, the
-            # convention is that n_bins == max_bins + 1
-            n_bins = self.max_bins + 1  # + 1 for missing values
-            self._bin_mapper = _BinMapper(
-                n_bins=n_bins,
-                # is_categorical=self.is_categorical_,
-                known_categories=None,
-                random_state=random_state,
-                n_threads=n_threads,
-            )
-
-            # XXX: in order for this to work with the underlying tree submodule's Cython
-            # code, we need to convert this into the original data's DTYPE because
-            # the Cython code assumes that `DTYPE` is used.
-            # The proper implementation will be a lot more complicated and should be
-            # tackled once scikit-learn has finalized their inclusion of missing data
-            # and categorical support for decision trees
-            X = self._bin_data(X, is_training_data=True)  # .astype(DTYPE)
-        else:
-            self._bin_mapper = None
-
-        if n_more_estimators < 0:
-            raise ValueError(
-                "n_estimators=%d must be larger or equal to "
-                "len(estimators_)=%d when warm_start==True"
-                % (self.n_estimators, len(self.estimators_))
-            )
-
-        elif n_more_estimators == 0:
-            warn("Warm-start fitting without increasing n_estimators does not " "fit new trees.")
-        else:
-            if self.warm_start and len(self.estimators_) > 0:
-                # We draw from the random state to get the random state we
-                # would have got if we hadn't used a warm_start.
-                random_state.randint(MAX_INT, size=len(self.estimators_))
+        return self
 
-            trees = self._construct_trees(
-                X,
-                y,
-                sample_weight,
-                random_state,
-                n_samples_bootstrap,
-                missing_values_in_feature_mask,
-                classes,
-                n_more_estimators,
-            )
+    def _make_estimator(self, append=True, random_state=None):
+        """Make and configure a copy of the `estimator_` attribute.
 
-            # Collect newly grown trees
-            self.estimators_.extend(trees)
-
-        if self.oob_score and (n_more_estimators > 0 or not hasattr(self, "oob_score_")):
-            y_type = type_of_target(y)
-            if y_type == "unknown" or (
-                self._estimator_type == "classifier" and y_type == "multiclass-multioutput"
-            ):
-                # FIXME: we could consider to support multiclass-multioutput if
-                # we introduce or reuse a constructor parameter (e.g.
-                # oob_score) allowing our user to pass a callable defining the
-                # scoring strategy on OOB sample.
-                raise ValueError(
-                    "The type of target cannot be used to compute OOB "
-                    f"estimates. Got {y_type} while only the following are "
-                    "supported: continuous, continuous-multioutput, binary, "
-                    "multiclass, multilabel-indicator."
-                )
+        Warning: This method should be used to properly instantiate new
+        sub-estimators.
+        """
+        estimator = clone(self.estimator_)
+        estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})
 
-            if callable(self.oob_score):
-                self._set_oob_score_and_attributes(X, y, scoring_function=self.oob_score)
-            else:
-                self._set_oob_score_and_attributes(X, y)
+        # XXX: This is the only change compared to scikit-learn's make_estimator
+        # additionally set the tree_estimator_params
+        estimator._tree_estimator_params = self._tree_estimator_params
 
-        # Decapsulate classes_ attributes
-        if hasattr(self, "classes_") and self.n_outputs_ == 1:
-            self.n_classes_ = self.n_classes_[0]
-            self.classes_ = self.classes_[0]
+        if random_state is not None:
+            _set_random_states(estimator, random_state)
 
-        # Compute honest decision function
-        self.honest_decision_function_ = self._predict_proba(
-            X, indices=self.honest_indices_, impute_missing=np.nan
-        )
+        if append:
+            self.estimators_.append(estimator)
 
-        return self
+        return estimator
 
     def _construct_trees(
         self,
diff --git a/sktree/tests/test_honest_forest.py b/sktree/tests/test_honest_forest.py
index 2efc4d438..c06fb716c 100644
--- a/sktree/tests/test_honest_forest.py
+++ b/sktree/tests/test_honest_forest.py
@@ -12,7 +12,11 @@
 from sktree.datasets import make_quadratic_classification
 from sktree.ensemble import HonestForestClassifier
 from sktree.stats.utils import _mutual_information
-from sktree.tree import ObliqueDecisionTreeClassifier, PatchObliqueDecisionTreeClassifier
+from sktree.tree import (
+    MultiViewDecisionTreeClassifier,
+    ObliqueDecisionTreeClassifier,
+    PatchObliqueDecisionTreeClassifier,
+)
 
 CLF_CRITERIONS = ("gini", "entropy")
 
@@ -155,16 +159,15 @@ def test_max_samples():
     uf = uf.fit(X, y)
 
 
-@pytest.mark.parametrize("bootstrap", [True, False])
 @pytest.mark.parametrize("max_samples", [0.75, 1.0])
-def test_honest_forest_has_deterministic_sampling_for_oob_structure_and_leaves(
-    bootstrap, max_samples
-):
+def test_honest_forest_has_deterministic_sampling_for_oob_structure_and_leaves(max_samples):
     """Test that honest forest can produce the oob, structure and leaf-node samples.
 
     When bootstrap is True, oob should be exclusive from structure and leaf-node samples.
     When bootstrap is False, there is no oob.
     """
+    bootstrap = True
+
     n_estimators = 5
     est = HonestForestClassifier(
         n_estimators=n_estimators,
@@ -325,7 +328,7 @@ def test_honest_forest_with_sklearn_trees():
     https://github.com/neurodata/scikit-tree/pull/157."""
 
     # generate the high-dimensional quadratic data
-    X, y = make_quadratic_classification(1024, 4096, noise=True, seed=0)
+    X, y = make_quadratic_classification(256, 2048, noise=True, seed=0)
     y = y.squeeze()
     print(X.shape, y.shape)
     print(np.sum(y) / len(y))
@@ -372,7 +375,7 @@ def test_honest_forest_with_sklearn_trees_with_auc():
     scores = []
     sk_scores = []
     for idx in range(10):
-        X, y = make_quadratic_classification(1024, 4096, noise=True, seed=idx)
+        X, y = make_quadratic_classification(256, 2048, noise=True, seed=idx)
         y = y.squeeze()
 
         skForest.fit(X, y)
@@ -412,7 +415,7 @@ def test_honest_forest_with_sklearn_trees_with_mi():
     scores = []
     sk_scores = []
     for idx in range(10):
-        X, y = make_quadratic_classification(1024, 4096, noise=True, seed=idx)
+        X, y = make_quadratic_classification(256, 2048, noise=True, seed=idx)
         y = y.squeeze()
 
         skForest.fit(X, y)
@@ -432,3 +435,35 @@ def test_honest_forest_with_sklearn_trees_with_mi():
     print(np.mean(scores), np.mean(sk_scores))
     print(np.std(scores), np.std(sk_scores))
     assert_allclose(np.mean(sk_scores), np.mean(scores), atol=0.005)
+
+
+def test_honest_forest_with_tree_estimator_params():
+    X = np.ones((20, 4))
+    X[10:] *= -1
+    y = [0] * 10 + [1] * 10
+
+    # test with a parameter that is a repeat of an init parameter
+    clf = HonestForestClassifier(
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=0,
+        feature_set_ends=[10, 20],
+    )
+    with pytest.raises(ValueError, match=r"Invalid parameter\(s\)"):
+        clf.fit(X, y)
+
+    # test with a parameter that is not in any init signature
+    clf = HonestForestClassifier(
+        tree_estimator=MultiViewDecisionTreeClassifier(),
+        random_state=0,
+        blah=0,
+    )
+    with pytest.raises(ValueError, match=r"Invalid parameter\(s\)"):
+        clf.fit(X, y)
+
+    # passing in a valid argument to the tree_estimator should work
+    clf = HonestForestClassifier(
+        tree_estimator=MultiViewDecisionTreeClassifier(),
+        random_state=0,
+        feature_set_ends=[10, 20],
+    )
+    clf.fit(X, y)
diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py
index 2a434daea..a0cb34950 100644
--- a/sktree/tree/_honest_tree.py
+++ b/sktree/tree/_honest_tree.py
@@ -4,7 +4,7 @@
 import numpy as np
 from sklearn.base import ClassifierMixin, MetaEstimatorMixin, _fit_context, clone
 from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.utils._param_validation import Interval, RealNotInt, StrOptions
+from sklearn.utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
 from sklearn.utils.multiclass import _check_partial_fit_first_call, check_classification_targets
 from sklearn.utils.validation import check_is_fitted, check_X_y
 
@@ -18,6 +18,11 @@ class HonestTreeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseDecisionTree
 
     Parameters
     ----------
+    tree_estimator : object, default=None
+        Instantiated tree of type BaseDecisionTree from sktree.
+        If None, then sklearn's DecisionTreeClassifier with default parameters will
+        be used.
+
     criterion : {"gini", "entropy"}, default="gini"
         The function to measure the quality of a split. Supported criteria are
         "gini" for the Gini impurity and "entropy" for the information gain.
@@ -150,12 +155,6 @@ class HonestTreeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseDecisionTree
 
         Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
 
-    tree_estimator : object, default=None
-        Instantiated tree of type BaseDecisionTree from sktree.
-        If None, then DecisionTreeClassifier with default parameters will
-        be used. Note that one MUST use trees imported from the `sktree.tree`
-        API namespace rather than from `sklearn.tree`.
-
     honest_fraction : float, default=0.5
         Fraction of training samples used for estimates in the leaves. The
         remaining samples will be used to learn the tree structure. A larger
@@ -272,6 +271,10 @@ class frequency in the voting subsample.
 
     _parameter_constraints: dict = {
         **BaseDecisionTree._parameter_constraints,
+        "tree_estimator": [
+            HasMethods(["fit", "predict", "predict_proba", "apply"]),
+            None,
+        ],
         "honest_fraction": [Interval(RealNotInt, 0.0, 1.0, closed="neither")],
         "honest_prior": [StrOptions({"empirical", "uniform", "ignore"})],
         "stratify": ["boolean"],
@@ -279,6 +282,7 @@ class frequency in the voting subsample.
 
     def __init__(
         self,
+        tree_estimator=None,
         criterion="gini",
         splitter="best",
         max_depth=None,
@@ -292,10 +296,10 @@ def __init__(
         class_weight=None,
         ccp_alpha=0.0,
         monotonic_cst=None,
-        tree_estimator=None,
         honest_fraction=0.5,
         honest_prior="empirical",
         stratify=False,
+        **tree_estimator_params,
     ):
         self.tree_estimator = tree_estimator
         self.criterion = criterion
@@ -310,13 +314,15 @@ def __init__(
         self.random_state = random_state
         self.min_impurity_decrease = min_impurity_decrease
         self.ccp_alpha = ccp_alpha
+        self.monotonic_cst = monotonic_cst
+
         self.honest_fraction = honest_fraction
         self.honest_prior = honest_prior
-        self.monotonic_cst = monotonic_cst
         self.stratify = stratify
 
         # XXX: to enable this, we need to also reset the leaf node samples during `_set_leaf_nodes`
         self.store_leaf_values = False
+        self._tree_estimator_params = tree_estimator_params
 
     @_fit_context(prefer_skip_nested_validation=True)
     def fit(
@@ -521,6 +527,15 @@ def _partition_honest_indices(self, y, sample_weight):
 
         return _sample_weight
 
+    def _get_estimator(self):
+        """Resolve which estimator to return (default is DecisionTreeClassifier)"""
+        if self.tree_estimator is None:
+            self.estimator_ = DecisionTreeClassifier(random_state=self.random_state)
+        else:
+            # XXX: maybe error out if the base tree estimator is already fitted
+            self.estimator_ = clone(self.tree_estimator)
+        return self.estimator_
+
     def _fit(
         self,
         X,
@@ -564,14 +579,21 @@ def _fit(
         if check_input:
             X, y = check_X_y(X, y, multi_output=True)
 
-        if self.tree_estimator is None:
-            self.estimator_ = DecisionTreeClassifier(random_state=self.random_state)
-        else:
-            # XXX: maybe error out if the tree_estimator is already fitted
-            self.estimator_ = clone(self.tree_estimator)
+        self.estimator_ = self._get_estimator()
 
-        # obtain the structure sample weights
-        sample_weights_structure = self._partition_honest_indices(y, sample_weight)
+        # check that all of tree_estimator_params are valid
+        init_params = self.estimator_.__init__.__code__.co_varnames[1:]  # exclude 'self'
+        honest_tree_init_params = self.__init__.__code__.co_varnames[1:]  # exclude 'self'
+        invalid_params = []
+        for param in self._tree_estimator_params.keys():
+            if param not in init_params or param in honest_tree_init_params:
+                invalid_params.append(param)
+
+        if invalid_params:
+            raise ValueError(
+                f"Invalid parameter(s) for estimator {self.estimator_.__class__.__name__}: "
+                f'{", ".join(invalid_params)}'
+            )
 
         self.estimator_.set_params(
             **dict(
@@ -602,6 +624,9 @@ def _fit(
 
             warn("Using sklearn tree so store_leaf_values cannot be set.")
 
+        # obtain the structure sample weights
+        sample_weights_structure = self._partition_honest_indices(y, sample_weight)
+
         # Learn structure on subsample
         # XXX: this allows us to use BaseDecisionTree without partial_fit API
         try:
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 7d9edda73..ff1fa3a9f 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -573,8 +573,6 @@ def _fit(
         #  - self.max_features_arr contains a possible array-like setting of max_features
         self._max_features_arr = self.max_features
         self.max_features = None
-        self = super()._fit(
-            X, y, sample_weight, check_input, missing_values_in_feature_mask, classes
-        )
+        super()._fit(X, y, sample_weight, check_input, missing_values_in_feature_mask, classes)
         self.max_features = self._max_features_arr
         return self
diff --git a/sktree/tree/tests/test_honest_tree.py b/sktree/tree/tests/test_honest_tree.py
index b412faff7..d0b673fea 100644
--- a/sktree/tree/tests/test_honest_tree.py
+++ b/sktree/tree/tests/test_honest_tree.py
@@ -9,6 +9,7 @@
 from sktree._lib.sklearn.tree import DecisionTreeClassifier
 from sktree.tree import (
     HonestTreeClassifier,
+    MultiViewDecisionTreeClassifier,
     ObliqueDecisionTreeClassifier,
     PatchObliqueDecisionTreeClassifier,
 )
@@ -62,6 +63,38 @@ def test_toy_accuracy():
     np.testing.assert_array_equal(clf.predict(X), y)
 
 
+def test_honest_tree_with_tree_estimator_params():
+    X = np.ones((20, 4))
+    X[10:] *= -1
+    y = [0] * 10 + [1] * 10
+
+    # test with a parameter that is a repeat of an init parameter
+    clf = HonestTreeClassifier(
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=0,
+        feature_set_ends=[10, 20],
+    )
+    with pytest.raises(ValueError, match=r"Invalid parameter\(s\)"):
+        clf.fit(X, y)
+
+    # test with a parameter that is not in any init signature
+    clf = HonestTreeClassifier(
+        tree_estimator=MultiViewDecisionTreeClassifier(),
+        random_state=0,
+        blah=0,
+    )
+    with pytest.raises(ValueError, match=r"Invalid parameter\(s\)"):
+        clf.fit(X, y)
+
+    # passing in a valid argument to the tree_estimator should work
+    clf = HonestTreeClassifier(
+        tree_estimator=MultiViewDecisionTreeClassifier(),
+        random_state=0,
+        feature_set_ends=[10, 20],
+    )
+    clf.fit(X, y)
+
+
 @pytest.mark.parametrize(
     "honest_prior, val",
     [

From 5e75cf080ba7a582a9f6050bcae91bdb634c4dac Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 22 Feb 2024 14:44:41 -0500
Subject: [PATCH 02/14] Add changelog

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.7.rst            |  4 ++++
 sktree/ensemble/_honest_forest.py | 13 +++++++++----
 sktree/tree/_honest_tree.py       |  8 +++++++-
 3 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new/v0.7.rst b/doc/whats_new/v0.7.rst
index 2d474beca..cc389e17e 100644
--- a/doc/whats_new/v0.7.rst
+++ b/doc/whats_new/v0.7.rst
@@ -27,6 +27,10 @@ Changelog
     by `Sambit Panda`_ (:pr:`#203`)
 - |Feature| :func:`sktree.stats.build_coleman_forest` and :func:`sktree.stats.build_permutation_forest`
     are added to compute p-values given an estimator and permutation-estimator, `Adam Li`_ (:pr:`#222`)
+- |API| :class:`sktree.HonestForestClassifier` and :class:`sktree.tree.HonestTreeClassifier`
+    now overwrite all parameters set by the underlying ``tree_estimator`` and allow you to directly
+    pass any extra parameters that ``tree_estimator`` has compared to the original
+    :class:`~sklearn.tree.DecisionTreeClassifier`, `Adam Li`_ (:pr:`#228`)
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index 7a3b23eb5..33833d0a9 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -246,15 +246,20 @@ class HonestForestClassifier(ForestClassifier, ForestClassifierMixin):
         fraction creates shallower trees with lower variance estimates.
 
     tree_estimator : object, default=None
-        Type of decision tree classifier to use. By default `None`, which
-        defaults to `sktree.tree.DecisionTreeClassifier`. Note
-        that one MUST use trees imported from the `sktree.tree`
-        API namespace rather than from `sklearn.tree`.
+        Instantiated tree of type BaseDecisionTree from sktree.
+        If None, then sklearn's DecisionTreeClassifier with default parameters will
+        be used. Note that none of the parameters in ``tree_estimator`` need
+        to be set. The parameters of the ``tree_estimator`` can be set using
+        the ``tree_estimator_params`` keyword argument.
 
     stratify : bool
         Whether or not to stratify sample when considering structure and leaf indices.
         By default False.
 
+    **tree_estimator_params : dict
+        Parameters to pass to the underlying base tree estimators.
+        These must be parameters for ``tree_estimator``.
+
     Attributes
     ----------
     estimator : sktree.tree.HonestTreeClassifier
diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py
index a0cb34950..5927703f5 100644
--- a/sktree/tree/_honest_tree.py
+++ b/sktree/tree/_honest_tree.py
@@ -21,7 +21,9 @@ class HonestTreeClassifier(MetaEstimatorMixin, ClassifierMixin, BaseDecisionTree
     tree_estimator : object, default=None
         Instantiated tree of type BaseDecisionTree from sktree.
         If None, then sklearn's DecisionTreeClassifier with default parameters will
-        be used.
+        be used. Note that none of the parameters in ``tree_estimator`` need
+        to be set. The parameters of the ``tree_estimator`` can be set using
+        the ``tree_estimator_params`` keyword argument.
 
     criterion : {"gini", "entropy"}, default="gini"
         The function to measure the quality of a split. Supported criteria are
@@ -171,6 +173,10 @@ class frequency in the voting subsample.
         Whether or not to stratify sample when considering structure and leaf indices.
         By default False.
 
+    **tree_estimator_params : dict
+        Parameters to pass to the underlying base tree estimators.
+        These must be parameters for ``tree_estimator``.
+
     Attributes
     ----------
     estimator_ : object

From 7ab70cde5b35e4524c19b35905da8a809687bdf1 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 22 Feb 2024 15:08:49 -0500
Subject: [PATCH 03/14] Fix return

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_classes.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py
index 4763bb53d..1ce5adb34 100644
--- a/sktree/tree/_classes.py
+++ b/sktree/tree/_classes.py
@@ -302,6 +302,7 @@ def _build_tree(
             )
 
         builder.build(self.tree_, X, sample_weight)
+        return self
 
     def predict(self, X, check_input=True):
         """Assign labels based on clustering the affinity matrix.
@@ -573,6 +574,7 @@ def _build_tree(
             )
 
         builder.build(self.tree_, X, sample_weight)
+        return self
 
 
 class ObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
@@ -986,6 +988,8 @@ def _build_tree(
             self.n_classes_ = self.n_classes_[0]
             self.classes_ = self.classes_[0]
 
+        return self
+
 
 class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
     """An oblique decision tree Regressor.
@@ -1364,6 +1368,7 @@ def _build_tree(
             )
 
         builder.build(self.tree_, X, y, sample_weight, None)
+        return self
 
 
 class PatchObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
@@ -1839,6 +1844,8 @@ def _build_tree(
             self.n_classes_ = self.n_classes_[0]
             self.classes_ = self.classes_[0]
 
+        return self
+
     def _more_tags(self):
         # XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
         # However, for MORF it is not supported
@@ -2306,6 +2313,8 @@ def _build_tree(
 
         builder.build(self.tree_, X, y, sample_weight, None)
 
+        return self
+
     def _more_tags(self):
         # XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
         # However, for MORF it is not supported
@@ -2736,6 +2745,7 @@ def _build_tree(
         if self.n_outputs_ == 1:
             self.n_classes_ = self.n_classes_[0]
             self.classes_ = self.classes_[0]
+        return self
 
 
 class ExtraObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
@@ -3125,3 +3135,5 @@ def _build_tree(
             )
 
         builder.build(self.tree_, X, y, sample_weight)
+
+        return self

From 7549c1b18db03cc16b5f074607ec37d7f27136d8 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Thu, 22 Feb 2024 18:11:15 -0500
Subject: [PATCH 04/14] Fix unit-testss

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_extensions.py    | 8 ++++----
 sktree/ensemble/_honest_forest.py | 9 +++++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/sktree/ensemble/_extensions.py b/sktree/ensemble/_extensions.py
index 1933d1415..edf301f8a 100644
--- a/sktree/ensemble/_extensions.py
+++ b/sktree/ensemble/_extensions.py
@@ -47,11 +47,11 @@ def _generate_sample_indices(random_state, n_samples, n_samples_bootstrap, boots
     XXX: this is copied over from scikit-learn and modified to allow sampling with
     and without replacement given ``bootstrap``.
     """
-
     random_instance = check_random_state(random_state)
     n_sample_idx = np.arange(0, n_samples, dtype=np.int32)
-    sample_indices = random_instance.choice(n_sample_idx, n_samples_bootstrap, replace=bootstrap)
-
+    sample_indices = np.atleast_1d(
+        random_instance.choice(n_sample_idx, n_samples_bootstrap, replace=bootstrap)
+    )
     return sample_indices
 
 
@@ -65,7 +65,7 @@ def oob_samples_(self):
         Only utilized if ``bootstrap=True``, otherwise, all samples are "in-bag".
         """
         if self.bootstrap is False and (
-            self._n_samples_bootstrap == self._n_samples or self._n_samples_bootstrap is None
+            self._n_samples_bootstrap is None or (self._n_samples_bootstrap == self._n_samples)
         ):
             raise RuntimeError(
                 "Cannot extract out-of-bag samples when bootstrap is False and "
diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index 33833d0a9..768cd31a0 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -663,7 +663,9 @@ def oob_samples_(self):
 
         Only utilized if ``bootstrap=True``, otherwise, all samples are "in-bag".
         """
-        if self.bootstrap is False and self._n_samples_bootstrap == self._n_samples:
+        if self.bootstrap is False and (
+            self._n_samples_bootstrap is None or self._n_samples_bootstrap == self._n_samples
+        ):
             raise RuntimeError(
                 "Cannot extract out-of-bag samples when bootstrap is False and "
                 "n_samples == n_samples_bootstrap"
@@ -767,13 +769,16 @@ def get_leaf_node_samples(self, X):
     def _get_estimators_indices(self):
         # Get drawn indices along both sample and feature axes
         for tree in self.estimators_:
-            if not self.bootstrap and self._n_samples_bootstrap == self._n_samples:
+            if not self.bootstrap and (
+                self._n_samples_bootstrap is None or (self._n_samples_bootstrap == self._n_samples)
+            ):
                 yield np.arange(self._n_samples, dtype=np.int32)
             else:
                 # tree.random_state is actually an immutable integer seed rather
                 # than a mutable RandomState instance, so it's safe to use it
                 # repeatedly when calling this property.
                 seed = tree.random_state
+
                 # Operations accessing random_state must be performed identically
                 # to those in `_parallel_build_trees()`
                 yield _generate_sample_indices(

From d196a6caf284bd491c20740c0620d81a79f2b1e6 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 09:25:24 -0500
Subject: [PATCH 05/14] Change wrt main

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_extensions.py      |  4 +---
 sktree/ensemble/_honest_forest.py   | 36 ++++++++++++++---------------
 sktree/stats/forestht.py            |  2 +-
 sktree/stats/permuteforest.py       |  2 ++
 sktree/stats/tests/test_forestht.py | 24 ++++++++++++++-----
 5 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/sktree/ensemble/_extensions.py b/sktree/ensemble/_extensions.py
index edf301f8a..6e8219763 100644
--- a/sktree/ensemble/_extensions.py
+++ b/sktree/ensemble/_extensions.py
@@ -49,9 +49,7 @@ def _generate_sample_indices(random_state, n_samples, n_samples_bootstrap, boots
     """
     random_instance = check_random_state(random_state)
     n_sample_idx = np.arange(0, n_samples, dtype=np.int32)
-    sample_indices = np.atleast_1d(
-        random_instance.choice(n_sample_idx, n_samples_bootstrap, replace=bootstrap)
-    )
+    sample_indices = random_instance.choice(n_sample_idx, n_samples_bootstrap, replace=bootstrap)
     return sample_indices
 
 
diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index 768cd31a0..4ae2691c7 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -420,28 +420,26 @@ def __init__(
         stratify=False,
         **tree_estimator_params,
     ):
-        estimator_params = (
-            "criterion",
-            "splitter",
-            "max_depth",
-            "min_samples_split",
-            "min_samples_leaf",
-            "min_weight_fraction_leaf",
-            "max_features",
-            "max_leaf_nodes",
-            "min_impurity_decrease",
-            "random_state",
-            "ccp_alpha",
-            "tree_estimator",
-            "honest_fraction",
-            "honest_prior",
-            "stratify",
-        )
-
         super().__init__(
             estimator=HonestTreeClassifier(),
             n_estimators=n_estimators,
-            estimator_params=estimator_params,
+            estimator_params=(
+                "criterion",
+                "splitter",
+                "max_depth",
+                "min_samples_split",
+                "min_samples_leaf",
+                "min_weight_fraction_leaf",
+                "max_features",
+                "max_leaf_nodes",
+                "min_impurity_decrease",
+                "random_state",
+                "ccp_alpha",
+                "tree_estimator",
+                "honest_fraction",
+                "honest_prior",
+                "stratify",
+            ),
             bootstrap=bootstrap,
             oob_score=oob_score,
             n_jobs=n_jobs,
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 9b9aef692..eaf616928 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -1338,7 +1338,7 @@ def build_coleman_forest(
 
     forest_result = ForestTestResult(observe_test_stat, permute_stat, observe_stat, pvalue)
     if return_posteriors:
-        return forest_result, orig_forest_proba, perm_forest_proba
+        return forest_result, orig_forest_proba, perm_forest_proba, est, perm_est
     else:
         return forest_result
 
diff --git a/sktree/stats/permuteforest.py b/sktree/stats/permuteforest.py
index 8c2dfe7b1..42f318c06 100644
--- a/sktree/stats/permuteforest.py
+++ b/sktree/stats/permuteforest.py
@@ -297,6 +297,7 @@ def __init__(
         tree_estimator=None,
         stratify=False,
         permute_per_tree=False,
+        **tree_estimator_params,
     ):
         super().__init__(
             n_estimators,
@@ -322,6 +323,7 @@ def __init__(
             honest_fraction,
             tree_estimator,
             stratify,
+            **tree_estimator_params,
         )
         self.permute_per_tree = permute_per_tree
 
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index ca9d7a1de..ed9d9e780 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -785,9 +785,9 @@ def test_build_coleman_forest():
 
     Test the function under alternative and null hypothesis for a very simple dataset.
     """
-    n_estimators = 10
+    n_estimators = 40
     n_samples = 20
-    n_features = 3
+    n_features = 5
     rng = np.random.default_rng(seed)
 
     _X = rng.uniform(size=(n_samples, n_features))
@@ -799,19 +799,31 @@ def test_build_coleman_forest():
     )  # Binary classification
 
     clf = HonestForestClassifier(
-        n_estimators=n_estimators, random_state=seed, n_jobs=-1, honest_fraction=0.5, bootstrap=True
+        n_estimators=n_estimators,
+        random_state=seed,
+        n_jobs=-1,
+        honest_fraction=0.5,
+        bootstrap=True,
+        max_samples=1.6,
     )
     perm_clf = PermutationHonestForestClassifier(
-        n_estimators=n_estimators, random_state=seed, n_jobs=-1, honest_fraction=0.5, bootstrap=True
+        n_estimators=n_estimators,
+        random_state=seed,
+        n_jobs=-1,
+        honest_fraction=0.5,
+        bootstrap=True,
+        max_samples=1.6,
     )
     with pytest.raises(
         RuntimeError, match="Permutation forest must be a PermutationHonestForestClassifier"
     ):
         build_coleman_forest(clf, clf, X, y)
 
-    forest_result, orig_forest_proba, perm_forest_proba = build_coleman_forest(
-        clf, perm_clf, X, y, metric="s@98"
+    forest_result, orig_forest_proba, perm_forest_proba, clf, perm_clf = build_coleman_forest(
+        clf, perm_clf, X, y, metric="s@98", n_repeats=1000, seed=seed
     )
+    assert clf._n_samples_bootstrap == round(20 * 1.6)
+
     assert forest_result.pvalue <= 0.05, f"{forest_result.pvalue}"
     assert forest_result.observe_stat > 0.1, f"{forest_result.observe_stat}"
     assert_array_equal(orig_forest_proba.shape, perm_forest_proba.shape)

From 97155f80fe16fdc77ef597a0661d5fc9b3550d76 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 10:42:31 -0500
Subject: [PATCH 06/14] Fix unit tests

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/_lib/sklearn_fork          |  2 +-
 sktree/ensemble/_honest_forest.py | 45 -------------------------------
 sktree/stats/permuteforest.py     |  2 +-
 3 files changed, 2 insertions(+), 47 deletions(-)

diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index d48716a6b..94517a38a 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit d48716a6b2cc6373b9e66bf959f2b43b89f10c5d
+Subproject commit 94517a38a6354ee02ef715d1077c8ec6d1713d3b
diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index 4ae2691c7..ec146bda2 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -536,51 +536,6 @@ def _make_estimator(self, append=True, random_state=None):
 
         return estimator
 
-    def _construct_trees(
-        self,
-        X,
-        y,
-        sample_weight,
-        random_state,
-        n_samples_bootstrap,
-        missing_values_in_feature_mask,
-        classes,
-        n_more_estimators,
-    ):
-        trees = [
-            self._make_estimator(append=False, random_state=random_state)
-            for i in range(n_more_estimators)
-        ]
-
-        # Parallel loop: we prefer the threading backend as the Cython code
-        # for fitting the trees is internally releasing the Python GIL
-        # making threading more efficient than multiprocessing in
-        # that case. However, for joblib 0.12+ we respect any
-        # parallel_backend contexts set at a higher level,
-        # since correctness does not rely on using threads.
-        trees = Parallel(
-            n_jobs=self.n_jobs,
-            verbose=self.verbose,
-            prefer="threads",
-        )(
-            delayed(_parallel_build_trees)(
-                t,
-                self.bootstrap,
-                X,
-                y,
-                sample_weight,
-                i,
-                len(trees),
-                verbose=self.verbose,
-                class_weight=self.class_weight,
-                n_samples_bootstrap=n_samples_bootstrap,
-                missing_values_in_feature_mask=missing_values_in_feature_mask,
-                classes=classes,
-            )
-            for i, t in enumerate(trees)
-        )
-        return trees
-
     def predict_proba(self, X):
         """
         Predict class probabilities for X.
diff --git a/sktree/stats/permuteforest.py b/sktree/stats/permuteforest.py
index 42f318c06..fc9aa19a8 100644
--- a/sktree/stats/permuteforest.py
+++ b/sktree/stats/permuteforest.py
@@ -470,4 +470,4 @@ def _construct_trees(
                 for i, t in enumerate(trees)
             )
 
-        return trees
+        self.estimators_.extend(trees)

From 7d377c1dfe5313eb66f71dba29d283146948d5d3 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 10:50:38 -0500
Subject: [PATCH 07/14] Get ci pass

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/stats/permuteforest.py       |  4 ++--
 sktree/stats/tests/test_forestht.py | 14 +++++++++-----
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/sktree/stats/permuteforest.py b/sktree/stats/permuteforest.py
index fc9aa19a8..460361295 100644
--- a/sktree/stats/permuteforest.py
+++ b/sktree/stats/permuteforest.py
@@ -442,8 +442,8 @@ def _construct_trees(
                 )
             )
         else:
-            perm_idx = random_state.choice(
-                self._n_samples, size=(self._n_samples, 1), replace=False
+            perm_idx = np.array(
+                random_state.choice(self._n_samples, size=(self._n_samples, 1), replace=False)
             )
             X[:, self.covariate_index_] = X[perm_idx, self.covariate_index_]
             self.permutation_indices_ = perm_idx
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index ed9d9e780..18ea7ec34 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -786,7 +786,7 @@ def test_build_coleman_forest():
     Test the function under alternative and null hypothesis for a very simple dataset.
     """
     n_estimators = 40
-    n_samples = 20
+    n_samples = 30
     n_features = 5
     rng = np.random.default_rng(seed)
 
@@ -819,17 +819,21 @@ def test_build_coleman_forest():
     ):
         build_coleman_forest(clf, clf, X, y)
 
-    forest_result, orig_forest_proba, perm_forest_proba, clf, perm_clf = build_coleman_forest(
-        clf, perm_clf, X, y, metric="s@98", n_repeats=1000, seed=seed
+    forest_result, orig_forest_proba, perm_forest_proba, clf_fitted, perm_clf_fitted = (
+        build_coleman_forest(clf, perm_clf, X, y, metric="s@98", n_repeats=1000, seed=seed)
     )
-    assert clf._n_samples_bootstrap == round(20 * 1.6)
+    assert clf_fitted._n_samples_bootstrap == round(n_samples * 1.6)
+    assert perm_clf_fitted._n_samples_bootstrap == round(n_samples * 1.6)
+    assert_array_equal(perm_clf_fitted.permutation_indices_.shape, (n_samples, 1))
 
     assert forest_result.pvalue <= 0.05, f"{forest_result.pvalue}"
     assert forest_result.observe_stat > 0.1, f"{forest_result.observe_stat}"
     assert_array_equal(orig_forest_proba.shape, perm_forest_proba.shape)
 
     X = np.vstack([_X, _X])
-    forest_result, _, _ = build_coleman_forest(clf, perm_clf, X, y, metric="s@98")
+    forest_result, _, _, clf_fitted, perm_clf_fitted = build_coleman_forest(
+        clf, perm_clf, X, y, metric="s@98"
+    )
     assert forest_result.pvalue > 0.05, f"{forest_result.pvalue}"
     assert forest_result.observe_stat < 0.05, f"{forest_result.observe_stat}"
 

From 87b2182cfa46e97c6eb0de0b363247f8b445e869 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 10:55:07 -0500
Subject: [PATCH 08/14] Merging

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_honest_forest.py   |  6 +++
 sktree/stats/tests/test_forestht.py | 64 +++++++++++++++++++++++++++++
 sktree/tree/_honest_tree.py         |  9 ++--
 3 files changed, 73 insertions(+), 6 deletions(-)

diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index ec146bda2..eb1dab87a 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -536,6 +536,12 @@ def _make_estimator(self, append=True, random_state=None):
 
         return estimator
 
+    def _inherit_estimator_attributes(self):
+        """Initialize necessary attributes from the provided tree estimator"""
+        for attr_name in dir(self.estimator_[0]):
+            if not attr_name.startswith("_") and attr_name.endswith("_"):
+                setattr(self, attr_name, getattr(self.estimator_, attr_name))
+
     def predict_proba(self, X):
         """
         Predict class probabilities for X.
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 18ea7ec34..5a0cfff42 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -838,6 +838,70 @@ def test_build_coleman_forest():
     assert forest_result.observe_stat < 0.05, f"{forest_result.observe_stat}"
 
 
+def test_build_coleman_forest_multiview():
+    """Simple test for building a Coleman forest.
+
+    Test the function under alternative and null hypothesis for a very simple dataset.
+    """
+    n_estimators = 40
+    n_samples = 30
+    n_features = 5
+    rng = np.random.default_rng(seed)
+
+    _X = rng.uniform(size=(n_samples, n_features))
+    _X = rng.uniform(size=(n_samples // 2, n_features))
+    X2 = _X + 3
+    X = np.vstack([_X, X2])
+    y = np.vstack(
+        [np.zeros((n_samples // 2, 1)), np.ones((n_samples // 2, 1))]
+    )  # Binary classification
+
+    clf = HonestForestClassifier(
+        n_estimators=n_estimators,
+        random_state=seed,
+        n_jobs=-1,
+        honest_fraction=0.5,
+        bootstrap=True,
+        max_samples=1.6,
+        max_features=[1, 2],
+        tree_estimator=MultiViewDecisionTreeClassifier(),
+        feature_set_ends=[2, 5],
+    )
+    perm_clf = PermutationHonestForestClassifier(
+        n_estimators=n_estimators,
+        random_state=seed,
+        n_jobs=-1,
+        honest_fraction=0.5,
+        bootstrap=True,
+        max_samples=1.6,
+        max_features=[1, 2],
+        tree_estimator=MultiViewDecisionTreeClassifier(),
+        feature_set_ends=[2, 5],
+    )
+    with pytest.raises(
+        RuntimeError, match="Permutation forest must be a PermutationHonestForestClassifier"
+    ):
+        build_coleman_forest(clf, clf, X, y)
+
+    forest_result, orig_forest_proba, perm_forest_proba, clf_fitted, perm_clf_fitted = (
+        build_coleman_forest(clf, perm_clf, X, y, metric="s@98", n_repeats=1000, seed=seed)
+    )
+    assert clf_fitted._n_samples_bootstrap == round(n_samples * 1.6)
+    assert perm_clf_fitted._n_samples_bootstrap == round(n_samples * 1.6)
+    assert_array_equal(perm_clf_fitted.permutation_indices_.shape, (n_samples, 1))
+
+    assert forest_result.pvalue <= 0.05, f"{forest_result.pvalue}"
+    assert forest_result.observe_stat > 0.1, f"{forest_result.observe_stat}"
+    assert_array_equal(orig_forest_proba.shape, perm_forest_proba.shape)
+
+    X = np.vstack([_X, _X])
+    forest_result, _, _, clf_fitted, perm_clf_fitted = build_coleman_forest(
+        clf, perm_clf, X, y, metric="s@98"
+    )
+    assert forest_result.pvalue > 0.05, f"{forest_result.pvalue}"
+    assert forest_result.observe_stat < 0.05, f"{forest_result.observe_stat}"
+
+
 def test_build_permutation_forest():
     """Simple test for building a permutation forest."""
     n_estimators = 30
diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py
index 5927703f5..7ccaf1f28 100644
--- a/sktree/tree/_honest_tree.py
+++ b/sktree/tree/_honest_tree.py
@@ -730,12 +730,9 @@ def _set_leaf_nodes(self, leaf_ids, y):
 
     def _inherit_estimator_attributes(self):
         """Initialize necessary attributes from the provided tree estimator"""
-        self.classes_ = self.estimator_.classes_
-        self.max_features_ = self.estimator_.max_features_
-        self.n_classes_ = self.estimator_.n_classes_
-        self.n_features_in_ = self.estimator_.n_features_in_
-        self.n_outputs_ = self.estimator_.n_outputs_
-        self.tree_ = self.estimator_.tree_
+        for attr_name in dir(self.estimator_):
+            if not attr_name.startswith("_") and attr_name.endswith("_"):
+                setattr(self, attr_name, getattr(self.estimator_, attr_name))
 
     def _empty_leaf_correction(self, proba, pos=0):
         """Leaves with empty posteriors are assigned values.

From 9530594eb720e9dd290ef9d3f020f8a09fcff820 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 14:30:28 -0500
Subject: [PATCH 09/14] Merge

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 pyproject.toml | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8424d8a42..536364240 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -15,9 +15,6 @@ requires = [
   "numpy>=1.25; python_version>='3.9'"
 ]
 
-[lint.per-file-ignores]
-'__init__.py' = ['F401']
-
 [project]
 name = "scikit-tree"
 version = "0.7.0dev0"
@@ -266,7 +263,12 @@ extend-exclude = [
   'validation'
 ]
 line-length = 88
-lint.ignore = ['E731']
+
+[tool.ruff.lint]
+ignore = ['E731']
+
+[tool.ruff.lint.per-file-ignores]
+'__init__.py' = ['F401']
 
 [tool.spin]
 package = 'sktree'

From 8d68c2a6164392f24dc3e2c10cff8f556fce66ac Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 14:53:34 -0500
Subject: [PATCH 10/14] Try again

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_honest_forest.py     | 37 +++++++++-----------------
 sktree/tests/test_honest_forest.py    | 38 ++++++++++++++++++++-------
 sktree/tree/_classes.py               | 22 ++++++++++++++++
 sktree/tree/_honest_tree.py           | 20 +++++++-------
 sktree/tree/_multiview.py             |  6 ++++-
 sktree/tree/tests/test_honest_tree.py | 34 +++++++++++++++---------
 6 files changed, 99 insertions(+), 58 deletions(-)

diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index 525e875b3..f08189a62 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -277,9 +277,6 @@ class labels (multi-output problem).
         The number of classes (single output problem), or a list containing the
         number of classes for each output (multi-output problem).
 
-    n_features_ : int
-        The number of features when ``fit`` is performed.
-
     n_features_in_ : int
         Number of features seen during :term:`fit`.
 
@@ -508,6 +505,9 @@ def fit(self, X, y, sample_weight=None, classes=None, **fit_params):
 
         super().fit(X, y, sample_weight=sample_weight, classes=classes, **fit_params)
 
+        # Inherit attributes from the tree estimator
+        self._inherit_estimator_attributes()
+
         # Compute honest decision function
         self.honest_decision_function_ = self._predict_proba(
             X, indices=self.honest_indices_, impute_missing=np.nan
@@ -538,9 +538,15 @@ def _make_estimator(self, append=True, random_state=None):
 
     def _inherit_estimator_attributes(self):
         """Initialize necessary attributes from the provided tree estimator"""
-        for attr_name in dir(self.estimator_[0]):
-            if not attr_name.startswith("_") and attr_name.endswith("_"):
-                setattr(self, attr_name, getattr(self.estimator_, attr_name))
+        if hasattr(self.estimators_[0], "_inheritable_fitted_attribute"):
+            for attr in self.estimators_[0]._inheritable_fitted_attribute:
+                setattr(self, attr, getattr(self.estimators_[0], attr))
+
+        self.classes_ = self.estimators_[0].classes_
+        self.max_features_ = self.estimators_[0].max_features_
+        self.n_classes_ = self.estimators_[0].n_classes_
+        self.n_features_in_ = self.estimators_[0].n_features_in_
+        self.n_outputs_ = self.estimators_[0].n_outputs_
 
     def predict_proba(self, X):
         """
@@ -644,25 +650,6 @@ def oob_samples_(self):
     def _more_tags(self):
         return {"multioutput": False}
 
-    def apply(self, X):
-        """
-        Apply trees in the forest to X, return leaf indices.
-
-        Parameters
-        ----------
-        X : {array-like, sparse matrix} of shape (n_samples, n_features)
-            The input samples. Internally, its dtype will be converted to
-            ``dtype=np.float32``. If a sparse matrix is provided, it will be
-            converted into a sparse ``csr_matrix``.
-
-        Returns
-        -------
-        X_leaves : ndarray of shape (n_samples, n_estimators)
-            For each datapoint x in X and for each tree in the forest,
-            return the index of the leaf x ends up in.
-        """
-        return self.estimator_.apply(X)
-
     def decision_path(self, X):
         """
         Return the decision path in the forest.
diff --git a/sktree/tests/test_honest_forest.py b/sktree/tests/test_honest_forest.py
index c06fb716c..8aa694838 100644
--- a/sktree/tests/test_honest_forest.py
+++ b/sktree/tests/test_honest_forest.py
@@ -437,23 +437,30 @@ def test_honest_forest_with_sklearn_trees_with_mi():
     assert_allclose(np.mean(sk_scores), np.mean(scores), atol=0.005)
 
 
-def test_honest_forest_with_tree_estimator_params():
+@pytest.mark.parametrize(
+    "tree, tree_kwargs",
+    [
+        (MultiViewDecisionTreeClassifier(), {"feature_set_ends": [10, 20]}),
+        (ObliqueDecisionTreeClassifier(), {"feature_combinations": 2}),
+        (PatchObliqueDecisionTreeClassifier(), {"max_patch_dims": 5}),
+    ],
+)
+def test_honest_forest_with_tree_estimator_params(tree, tree_kwargs):
+    """Test that honest forest inherits all the fitted parameters of the tree estimator."""
     X = np.ones((20, 4))
     X[10:] *= -1
     y = [0] * 10 + [1] * 10
 
     # test with a parameter that is a repeat of an init parameter
     clf = HonestForestClassifier(
-        tree_estimator=DecisionTreeClassifier(),
-        random_state=0,
-        feature_set_ends=[10, 20],
+        tree_estimator=DecisionTreeClassifier(), random_state=0, **tree_kwargs
     )
     with pytest.raises(ValueError, match=r"Invalid parameter\(s\)"):
         clf.fit(X, y)
 
     # test with a parameter that is not in any init signature
     clf = HonestForestClassifier(
-        tree_estimator=MultiViewDecisionTreeClassifier(),
+        tree_estimator=tree,
         random_state=0,
         blah=0,
     )
@@ -461,9 +468,20 @@ def test_honest_forest_with_tree_estimator_params():
         clf.fit(X, y)
 
     # passing in a valid argument to the tree_estimator should work
-    clf = HonestForestClassifier(
-        tree_estimator=MultiViewDecisionTreeClassifier(),
-        random_state=0,
-        feature_set_ends=[10, 20],
-    )
+    clf = HonestForestClassifier(tree_estimator=tree, random_state=0, **tree_kwargs)
     clf.fit(X, y)
+    checked_attrs = [
+        "classes_",
+        "n_classes_",
+        "n_features_in_",
+        "n_outputs_",
+    ]
+    checked_attrs + getattr(clf.estimator_, "_inheritable_fitted_attribute", [])
+    for attr_name in checked_attrs:
+        if not attr_name.startswith("_") and attr_name.endswith("_"):
+            if isinstance(getattr(clf, attr_name), np.ndarray):
+                np.testing.assert_array_equal(
+                    getattr(clf, attr_name), getattr(clf.estimators_[0], attr_name)
+                )
+            else:
+                assert getattr(clf, attr_name) == getattr(clf.estimators_[0], attr_name)
diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py
index 1ce5adb34..67b534a9d 100644
--- a/sktree/tree/_classes.py
+++ b/sktree/tree/_classes.py
@@ -990,6 +990,12 @@ def _build_tree(
 
         return self
 
+    @property
+    def _inheritable_fitted_attribute(self):
+        return {
+            "feature_combinations_",
+        }
+
 
 class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
     """An oblique decision tree Regressor.
@@ -1852,6 +1858,16 @@ def _more_tags(self):
         allow_nan = False
         return {"multilabel": True, "allow_nan": allow_nan}
 
+    @property
+    def _inheritable_fitted_attribute(self):
+        return {
+            "feature_combinations_",
+            "min_patch_dims_",
+            "max_patch_dims_",
+            "dim_contiguous_",
+            "data_dims_",
+        }
+
 
 class PatchObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
     """A oblique decision tree regressor that operates over patches of data.
@@ -2747,6 +2763,12 @@ def _build_tree(
             self.classes_ = self.classes_[0]
         return self
 
+    @property
+    def _inheritable_fitted_attribute(self):
+        return {
+            "feature_combinations_",
+        }
+
 
 class ExtraObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
     """An oblique decision tree Regressor.
diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py
index 5f181dfd7..86d0a46d4 100644
--- a/sktree/tree/_honest_tree.py
+++ b/sktree/tree/_honest_tree.py
@@ -456,7 +456,7 @@ def partial_fit(self, X, y, sample_weight=None, check_input=True, classes=None):
             check_input=check_input,
             classes=classes,
         )
-        # self._inherit_estimator_attributes()
+        self._inherit_estimator_attributes()
 
         # update the number of classes, unsplit
         if y.ndim == 1:
@@ -652,7 +652,7 @@ def _fit(
                 check_input=check_input,
                 missing_values_in_feature_mask=missing_values_in_feature_mask,
             )
-        # self._inherit_estimator_attributes()
+        self._inherit_estimator_attributes()
 
         # fit the leaves on the non-structure indices
         not_honest_mask = np.ones(len(y), dtype=bool)
@@ -730,17 +730,19 @@ def _set_leaf_nodes(self, leaf_ids, y):
 
     def _inherit_estimator_attributes(self):
         """Initialize necessary attributes from the provided tree estimator"""
-        # basic_inherited_attrs = [
-        #     "tree_",
-        #     "n_classes_",
-        #     "classes_",
-        #     "n_outputs_",
-        # ]
-
         if hasattr(self.estimator_, "_inheritable_fitted_attribute"):
             for attr in self.estimator_._inheritable_fitted_attribute:
                 setattr(self, attr, getattr(self.estimator_, attr))
 
+        self.classes_ = self.estimator_.classes_
+        self.max_features_ = self.estimator_.max_features_
+        self.n_classes_ = self.estimator_.n_classes_
+        self.n_features_in_ = self.estimator_.n_features_in_
+        self.n_outputs_ = self.estimator_.n_outputs_
+        self.tree_ = self.estimator_.tree_
+        self.builder_ = self.estimator_.builder_
+        self.min_samples_split_ = self.estimator_.min_samples_split_
+
     def _empty_leaf_correction(self, proba, pos=0):
         """Leaves with empty posteriors are assigned values.
 
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index 0d50a96a6..f408f1e72 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -476,7 +476,6 @@ def _build_tree(
                     max_features = 0
 
             self.max_features_ = max_features
-            print(self.max_features_, self.max_features_per_set_)
 
         if not isinstance(self.splitter, ObliqueSplitter):
             splitter = SPLITTERS[self.splitter](
@@ -579,10 +578,15 @@ def _fit(
 
     @property
     def _inheritable_fitted_attribute(self):
+        """Define additional attributes to pass onto a parent meta tree-estimator.
+
+        Used for passing parameters to HonestTreeClassifier.
+        """
         return {
             "max_features_",
             "feature_combinations_",
             "feature_set_ends_",
             "n_feature_sets_",
+            "n_features_in_set_",
             "max_features_per_set_",
         }
diff --git a/sktree/tree/tests/test_honest_tree.py b/sktree/tree/tests/test_honest_tree.py
index e84172a45..907c386f1 100644
--- a/sktree/tree/tests/test_honest_tree.py
+++ b/sktree/tree/tests/test_honest_tree.py
@@ -63,23 +63,30 @@ def test_toy_accuracy():
     np.testing.assert_array_equal(clf.predict(X), y)
 
 
-def test_honest_tree_with_tree_estimator_params():
+@pytest.mark.parametrize(
+    "tree, tree_kwargs",
+    [
+        (MultiViewDecisionTreeClassifier(), {"feature_set_ends": [10, 20]}),
+        (ObliqueDecisionTreeClassifier(), {"feature_combinations": 2}),
+        (PatchObliqueDecisionTreeClassifier(), {"max_patch_dims": 5}),
+    ],
+)
+def test_honest_tree_with_tree_estimator_params(tree, tree_kwargs):
+    """Test that honest tree inherits all the fitted parameters of the tree estimator."""
     X = np.ones((20, 4))
     X[10:] *= -1
     y = [0] * 10 + [1] * 10
 
     # test with a parameter that is a repeat of an init parameter
     clf = HonestTreeClassifier(
-        tree_estimator=DecisionTreeClassifier(),
-        random_state=0,
-        feature_set_ends=[10, 20],
+        tree_estimator=DecisionTreeClassifier(), random_state=0, **tree_kwargs
     )
     with pytest.raises(ValueError, match=r"Invalid parameter\(s\)"):
         clf.fit(X, y)
 
     # test with a parameter that is not in any init signature
     clf = HonestTreeClassifier(
-        tree_estimator=MultiViewDecisionTreeClassifier(),
+        tree_estimator=tree,
         random_state=0,
         blah=0,
     )
@@ -87,15 +94,16 @@ def test_honest_tree_with_tree_estimator_params():
         clf.fit(X, y)
 
     # passing in a valid argument to the tree_estimator should work
-    clf = HonestTreeClassifier(
-        tree_estimator=MultiViewDecisionTreeClassifier(),
-        random_state=0,
-        feature_set_ends=[10, 20],
-    )
+    clf = HonestTreeClassifier(tree_estimator=tree, random_state=0, **tree_kwargs)
     clf.fit(X, y)
-    # for attr_name in dir(clf.estimators_[0]):
-    #     if not attr_name.startswith("_") and attr_name.endswith("_"):
-    #         assert hasattr(clf, attr_name, getattr(clf.estimators_[0], attr_name))
+    for attr_name in dir(clf.estimator_):
+        if not attr_name.startswith("_") and attr_name.endswith("_"):
+            if isinstance(getattr(clf, attr_name), np.ndarray):
+                np.testing.assert_array_equal(
+                    getattr(clf, attr_name), getattr(clf.estimator_, attr_name)
+                )
+            else:
+                assert getattr(clf, attr_name) == getattr(clf.estimator_, attr_name)
 
 
 @pytest.mark.parametrize(

From d3c1d556474f07f5707aefd5cccca3efd51e6846 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 16:04:47 -0500
Subject: [PATCH 11/14] Fix

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/tree/_honest_tree.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py
index 86d0a46d4..a35e84f99 100644
--- a/sktree/tree/_honest_tree.py
+++ b/sktree/tree/_honest_tree.py
@@ -740,8 +740,10 @@ def _inherit_estimator_attributes(self):
         self.n_features_in_ = self.estimator_.n_features_in_
         self.n_outputs_ = self.estimator_.n_outputs_
         self.tree_ = self.estimator_.tree_
-        self.builder_ = self.estimator_.builder_
-        self.min_samples_split_ = self.estimator_.min_samples_split_
+
+        # XXX: scikit-learn trees do not store their builder, or min_samples_split_
+        self.builder_ = getattr(self.estimator_, "builder_", None)
+        self.min_samples_split_ = getattr(self.estimator_, "min_samples_split_", None)
 
     def _empty_leaf_correction(self, proba, pos=0):
         """Leaves with empty posteriors are assigned values.

From 7482c28d8946d4e6284a87d4197ed53b3d9652b8 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 16:30:10 -0500
Subject: [PATCH 12/14] Fix

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_honest_forest.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index f08189a62..24d9cecae 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -542,12 +542,6 @@ def _inherit_estimator_attributes(self):
             for attr in self.estimators_[0]._inheritable_fitted_attribute:
                 setattr(self, attr, getattr(self.estimators_[0], attr))
 
-        self.classes_ = self.estimators_[0].classes_
-        self.max_features_ = self.estimators_[0].max_features_
-        self.n_classes_ = self.estimators_[0].n_classes_
-        self.n_features_in_ = self.estimators_[0].n_features_in_
-        self.n_outputs_ = self.estimators_[0].n_outputs_
-
     def predict_proba(self, X):
         """
         Predict class probabilities for X.

From e177338c52c2bc2e8f0d720dcd97c478c7f81e62 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 17:05:09 -0500
Subject: [PATCH 13/14] Try new test

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/stats/tests/test_forestht.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 5a0cfff42..091eff99a 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -863,7 +863,7 @@ def test_build_coleman_forest_multiview():
         honest_fraction=0.5,
         bootstrap=True,
         max_samples=1.6,
-        max_features=[1, 2],
+        max_features=[1, 1],
         tree_estimator=MultiViewDecisionTreeClassifier(),
         feature_set_ends=[2, 5],
     )
@@ -874,7 +874,7 @@ def test_build_coleman_forest_multiview():
         honest_fraction=0.5,
         bootstrap=True,
         max_samples=1.6,
-        max_features=[1, 2],
+        max_features=[1, 1],
         tree_estimator=MultiViewDecisionTreeClassifier(),
         feature_set_ends=[2, 5],
     )
@@ -899,7 +899,6 @@ def test_build_coleman_forest_multiview():
         clf, perm_clf, X, y, metric="s@98"
     )
     assert forest_result.pvalue > 0.05, f"{forest_result.pvalue}"
-    assert forest_result.observe_stat < 0.05, f"{forest_result.observe_stat}"
 
 
 def test_build_permutation_forest():

From 97dcee4c8bc743c8642da1f70b8bba982f0c80a8 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Fri, 23 Feb 2024 22:46:35 -0500
Subject: [PATCH 14/14] Ensure traits are passed thru

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 sktree/ensemble/_honest_forest.py  |  4 ++--
 sktree/tests/test_honest_forest.py |  2 +-
 sktree/tree/_classes.py            | 12 ++++++------
 sktree/tree/_multiview.py          |  4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/sktree/ensemble/_honest_forest.py b/sktree/ensemble/_honest_forest.py
index 24d9cecae..b1647b9ca 100644
--- a/sktree/ensemble/_honest_forest.py
+++ b/sktree/ensemble/_honest_forest.py
@@ -538,8 +538,8 @@ def _make_estimator(self, append=True, random_state=None):
 
     def _inherit_estimator_attributes(self):
         """Initialize necessary attributes from the provided tree estimator"""
-        if hasattr(self.estimators_[0], "_inheritable_fitted_attribute"):
-            for attr in self.estimators_[0]._inheritable_fitted_attribute:
+        if hasattr(self.tree_estimator, "_inheritable_fitted_attribute"):
+            for attr in self.tree_estimator._inheritable_fitted_attribute:
                 setattr(self, attr, getattr(self.estimators_[0], attr))
 
     def predict_proba(self, X):
diff --git a/sktree/tests/test_honest_forest.py b/sktree/tests/test_honest_forest.py
index 8aa694838..3ad9c065a 100644
--- a/sktree/tests/test_honest_forest.py
+++ b/sktree/tests/test_honest_forest.py
@@ -476,7 +476,7 @@ def test_honest_forest_with_tree_estimator_params(tree, tree_kwargs):
         "n_features_in_",
         "n_outputs_",
     ]
-    checked_attrs + getattr(clf.estimator_, "_inheritable_fitted_attribute", [])
+    checked_attrs + getattr(tree, "_inheritable_fitted_attribute", [])
     for attr_name in checked_attrs:
         if not attr_name.startswith("_") and attr_name.endswith("_"):
             if isinstance(getattr(clf, attr_name), np.ndarray):
diff --git a/sktree/tree/_classes.py b/sktree/tree/_classes.py
index 67b534a9d..1b0e95cfa 100644
--- a/sktree/tree/_classes.py
+++ b/sktree/tree/_classes.py
@@ -992,9 +992,9 @@ def _build_tree(
 
     @property
     def _inheritable_fitted_attribute(self):
-        return {
+        return [
             "feature_combinations_",
-        }
+        ]
 
 
 class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
@@ -1860,13 +1860,13 @@ def _more_tags(self):
 
     @property
     def _inheritable_fitted_attribute(self):
-        return {
+        return [
             "feature_combinations_",
             "min_patch_dims_",
             "max_patch_dims_",
             "dim_contiguous_",
             "data_dims_",
-        }
+        ]
 
 
 class PatchObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
@@ -2765,9 +2765,9 @@ def _build_tree(
 
     @property
     def _inheritable_fitted_attribute(self):
-        return {
+        return [
             "feature_combinations_",
-        }
+        ]
 
 
 class ExtraObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
diff --git a/sktree/tree/_multiview.py b/sktree/tree/_multiview.py
index f408f1e72..52b70c0df 100644
--- a/sktree/tree/_multiview.py
+++ b/sktree/tree/_multiview.py
@@ -582,11 +582,11 @@ def _inheritable_fitted_attribute(self):
 
         Used for passing parameters to HonestTreeClassifier.
         """
-        return {
+        return [
             "max_features_",
             "feature_combinations_",
             "feature_set_ends_",
             "n_feature_sets_",
             "n_features_in_set_",
             "max_features_per_set_",
-        }
+        ]