From e0300502a5fef905a05b6a028513f53f3932da73 Mon Sep 17 00:00:00 2001
From: YuxinB <99897042+YuxinB@users.noreply.github.com>
Date: Thu, 12 Oct 2023 12:10:16 -0400
Subject: [PATCH 01/19] Startify sampling when split tran/test data

---
 sktree/stats/forestht.py | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 4d6dc7b77..e57d72dba 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -161,7 +161,7 @@ def reset(self):
         self._is_fitted = False
         self._seeds = None
 
-    def _get_estimators_indices(self, sample_separate=False):
+    def _get_estimators_indices(self, y, sample_separate=False):
         indices = np.arange(self._n_samples_, dtype=int)
 
         # Get drawn indices along both sample and feature axes
@@ -191,7 +191,7 @@ def _get_estimators_indices(self, sample_separate=False):
                 # Operations accessing random_state must be performed identically
                 # to those in `_parallel_build_trees()`
                 indices_train, indices_test = train_test_split(
-                    indices, test_size=self.test_size, shuffle=True, random_state=seed
+                    indices, test_size=self.test_size, shuffle=True, stratify = y ,random_state=seed
                 )
 
                 yield indices_train, indices_test
@@ -206,13 +206,16 @@ def _get_estimators_indices(self, sample_separate=False):
             indices_train, indices_test = train_test_split(
                 indices,
                 test_size=self.test_size,
+                stratify = y,
                 random_state=self._seeds,
+
             )
+            
             for _ in self.estimator_.estimators_:
                 yield indices_train, indices_test
 
-    @property
-    def train_test_samples_(self):
+    
+    def train_test_samples_(self,y):
         """
         The subset of drawn samples for each base estimator.
 
@@ -229,7 +232,7 @@ def train_test_samples_(self):
 
         return [
             (indices_train, indices_test)
-            for indices_train, indices_test in self._get_estimators_indices()
+            for indices_train, indices_test in self._get_estimators_indices(y)
         ]
 
     def _statistic(
@@ -332,6 +335,9 @@ def statistic(
         if self._type_of_target_ is None:
             self._type_of_target_ = type_of_target(y)
 
+        # if self.sample_dataset_per_tree and not self.permute_per_tree:
+        #     raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True")
+
         if covariate_index is None:
             self.estimator_ = self._get_estimator()
             estimator = self.estimator_
@@ -426,8 +432,7 @@ def test(
         y : ArrayLike of shape (n_samples, n_outputs)
             The target matrix.
         covariate_index : ArrayLike, optional of shape (n_covariates,)
-            The index array of covariates to shuffle, will shuffle all columns by
-            default (corresponding to None).
+            The index array of covariates to shuffle, by default None.
         metric : str, optional
             The metric to compute, by default "mse".
         n_repeats : int, optional
@@ -463,9 +468,6 @@ def test(
             observe_stat = self.observe_stat_
 
         # next permute the data
-        if covariate_index is None:
-            covariate_index = np.arange(X.shape[1], dtype=int)
-
         permute_stat, permute_posteriors, permute_samples = self.statistic(
             X,
             y,
@@ -493,7 +495,7 @@ def test(
             # If not sampling a new dataset per tree, then we may either be
             # permuting the covariate index per tree or per forest. If not permuting
             # there is only one train and test split, so we can just use that
-            _, indices_test = self.train_test_samples_[0]
+            _, indices_test = self.train_test_samples_(y)[0]
             indices_test = observe_samples
             y_test = y[indices_test, :]
             y_pred_proba_normal = observe_posteriors[:, indices_test, :]
@@ -725,12 +727,12 @@ def _statistic(
                     self._type_of_target_,
                 )
                 for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(sample_separate=True)
+                    self._get_estimators_indices(y,sample_separate=True)
                 )
             )
         else:
             # fitting a forest will only get one unique train/test split
-            indices_train, indices_test = self.train_test_samples_[0]
+            indices_train, indices_test = self.train_test_samples_(y)[0]
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]
             y_train, y_test = y[indices_train, :], y[indices_test, :]
@@ -946,12 +948,13 @@ def _statistic(
                     self._type_of_target_,
                 )
                 for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(sample_separate=True)
+                    self._get_estimators_indices(y,sample_separate=True)
                 )
             )
         else:
             # fitting a forest will only get one unique train/test split
-            indices_train, indices_test = self.train_test_samples_[0]
+            indices_train, indices_test = self.train_test_samples_(y)[0]
+            
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]
             y_train = y[indices_train, :]

From 5d60959becd120746dcf2498745d3a3a2a31186e Mon Sep 17 00:00:00 2001
From: YuxinB <99897042+YuxinB@users.noreply.github.com>
Date: Thu, 12 Oct 2023 15:48:49 -0400
Subject: [PATCH 02/19] Stratified_Sample, Let startify = None for Regressor

---
 sktree/stats/forestht.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index e57d72dba..d1beccde6 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -161,9 +161,26 @@ def reset(self):
         self._is_fitted = False
         self._seeds = None
 
-    def _get_estimators_indices(self, y, sample_separate=False):
+    def _get_estimators_indices(self, stratifier, sample_separate=False):
+        
+        # Check stratifier
+        # if stratifier is None, stratifier is regressor
+        if stratifier is not None:
+            if self._n_samples_ is not None and stratifier.shape[0] != self._n_samples_:
+                raise RuntimeError(
+                    f"stratifier must have {self._n_samples_} samples, got {stratifier.shape[0]}. "
+                    f"If running on a new dataset, call the 'reset' method."
+                )
+            
+            if self._type_of_target_ is not None and type_of_target(stratifier) != self._type_of_target_:
+                raise RuntimeError(
+                    f"stratifier must have type {self._type_of_target_}, got {type_of_target(stratifier)}. "
+                    f"If running on a new dataset, call the 'reset' method."
+                )
+
         indices = np.arange(self._n_samples_, dtype=int)
 
+
         # Get drawn indices along both sample and feature axes
         rng = np.random.default_rng(self.estimator_.random_state)
 
@@ -191,7 +208,7 @@ def _get_estimators_indices(self, y, sample_separate=False):
                 # Operations accessing random_state must be performed identically
                 # to those in `_parallel_build_trees()`
                 indices_train, indices_test = train_test_split(
-                    indices, test_size=self.test_size, shuffle=True, stratify = y ,random_state=seed
+                    indices, test_size=self.test_size, shuffle=True, stratify = stratifier ,random_state=seed
                 )
 
                 yield indices_train, indices_test
@@ -206,7 +223,7 @@ def _get_estimators_indices(self, y, sample_separate=False):
             indices_train, indices_test = train_test_split(
                 indices,
                 test_size=self.test_size,
-                stratify = y,
+                stratify = stratifier,
                 random_state=self._seeds,
 
             )
@@ -215,7 +232,7 @@ def _get_estimators_indices(self, y, sample_separate=False):
                 yield indices_train, indices_test
 
     
-    def train_test_samples_(self,y):
+    def train_test_samples_(self,stratifier):
         """
         The subset of drawn samples for each base estimator.
 
@@ -232,7 +249,7 @@ def train_test_samples_(self,y):
 
         return [
             (indices_train, indices_test)
-            for indices_train, indices_test in self._get_estimators_indices(y)
+            for indices_train, indices_test in self._get_estimators_indices(stratifier = stratifier)
         ]
 
     def _statistic(
@@ -495,7 +512,7 @@ def test(
             # If not sampling a new dataset per tree, then we may either be
             # permuting the covariate index per tree or per forest. If not permuting
             # there is only one train and test split, so we can just use that
-            _, indices_test = self.train_test_samples_(y)[0]
+            _, indices_test = self.train_test_samples_(stratifier=y)[0]
             indices_test = observe_samples
             y_test = y[indices_test, :]
             y_pred_proba_normal = observe_posteriors[:, indices_test, :]
@@ -732,7 +749,7 @@ def _statistic(
             )
         else:
             # fitting a forest will only get one unique train/test split
-            indices_train, indices_test = self.train_test_samples_(y)[0]
+            indices_train, indices_test = self.train_test_samples_(stratifier=None)[0]
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]
             y_train, y_test = y[indices_train, :], y[indices_test, :]
@@ -953,7 +970,7 @@ def _statistic(
             )
         else:
             # fitting a forest will only get one unique train/test split
-            indices_train, indices_test = self.train_test_samples_(y)[0]
+            indices_train, indices_test = self.train_test_samples_(stratifier=y)[0]
             
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]

From 78837d2457d1eb947542048f0fc0842285e65f51 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Tue, 17 Oct 2023 10:21:05 -0400
Subject: [PATCH 03/19] FIX correct changes & black format

---
 sktree/stats/forestht.py | 40 ++++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index d1beccde6..00ae6371f 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -162,7 +162,7 @@ def reset(self):
         self._seeds = None
 
     def _get_estimators_indices(self, stratifier, sample_separate=False):
-        
+
         # Check stratifier
         # if stratifier is None, stratifier is regressor
         if stratifier is not None:
@@ -171,8 +171,11 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
                     f"stratifier must have {self._n_samples_} samples, got {stratifier.shape[0]}. "
                     f"If running on a new dataset, call the 'reset' method."
                 )
-            
-            if self._type_of_target_ is not None and type_of_target(stratifier) != self._type_of_target_:
+
+            if (
+                self._type_of_target_ is not None
+                and type_of_target(stratifier) != self._type_of_target_
+            ):
                 raise RuntimeError(
                     f"stratifier must have type {self._type_of_target_}, got {type_of_target(stratifier)}. "
                     f"If running on a new dataset, call the 'reset' method."
@@ -180,7 +183,6 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
 
         indices = np.arange(self._n_samples_, dtype=int)
 
-
         # Get drawn indices along both sample and feature axes
         rng = np.random.default_rng(self.estimator_.random_state)
 
@@ -208,7 +210,11 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
                 # Operations accessing random_state must be performed identically
                 # to those in `_parallel_build_trees()`
                 indices_train, indices_test = train_test_split(
-                    indices, test_size=self.test_size, shuffle=True, stratify = stratifier ,random_state=seed
+                    indices,
+                    test_size=self.test_size,
+                    shuffle=True,
+                    stratify=stratifier,
+                    random_state=seed,
                 )
 
                 yield indices_train, indices_test
@@ -223,16 +229,14 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
             indices_train, indices_test = train_test_split(
                 indices,
                 test_size=self.test_size,
-                stratify = stratifier,
+                stratify=stratifier,
                 random_state=self._seeds,
-
             )
-            
+
             for _ in self.estimator_.estimators_:
                 yield indices_train, indices_test
 
-    
-    def train_test_samples_(self,stratifier):
+    def train_test_samples_(self, stratifier):
         """
         The subset of drawn samples for each base estimator.
 
@@ -249,7 +253,7 @@ def train_test_samples_(self,stratifier):
 
         return [
             (indices_train, indices_test)
-            for indices_train, indices_test in self._get_estimators_indices(stratifier = stratifier)
+            for indices_train, indices_test in self._get_estimators_indices(stratifier=stratifier)
         ]
 
     def _statistic(
@@ -352,9 +356,6 @@ def statistic(
         if self._type_of_target_ is None:
             self._type_of_target_ = type_of_target(y)
 
-        # if self.sample_dataset_per_tree and not self.permute_per_tree:
-        #     raise ValueError("sample_dataset_per_tree is only valid when permute_per_tree=True")
-
         if covariate_index is None:
             self.estimator_ = self._get_estimator()
             estimator = self.estimator_
@@ -449,7 +450,8 @@ def test(
         y : ArrayLike of shape (n_samples, n_outputs)
             The target matrix.
         covariate_index : ArrayLike, optional of shape (n_covariates,)
-            The index array of covariates to shuffle, by default None.
+            The index array of covariates to shuffle, will shuffle all columns by
+            default (corresponding to None).
         metric : str, optional
             The metric to compute, by default "mse".
         n_repeats : int, optional
@@ -484,6 +486,9 @@ def test(
             observe_posteriors = self.observe_posteriors_
             observe_stat = self.observe_stat_
 
+        if covariate_index is None:
+            covariate_index = np.arange(X.shape[1], dtype=int)
+
         # next permute the data
         permute_stat, permute_posteriors, permute_samples = self.statistic(
             X,
@@ -744,7 +749,7 @@ def _statistic(
                     self._type_of_target_,
                 )
                 for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(y,sample_separate=True)
+                    self._get_estimators_indices(y, sample_separate=True)
                 )
             )
         else:
@@ -965,13 +970,12 @@ def _statistic(
                     self._type_of_target_,
                 )
                 for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(y,sample_separate=True)
+                    self._get_estimators_indices(y, sample_separate=True)
                 )
             )
         else:
             # fitting a forest will only get one unique train/test split
             indices_train, indices_test = self.train_test_samples_(stratifier=y)[0]
-            
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]
             y_train = y[indices_train, :]

From 4f88518efc1504fd0b569b13700bf9885a3f1d05 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Tue, 17 Oct 2023 10:24:50 -0400
Subject: [PATCH 04/19] DOC modify warning text

---
 sktree/stats/forestht.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 00ae6371f..371b16199 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -168,7 +168,8 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
         if stratifier is not None:
             if self._n_samples_ is not None and stratifier.shape[0] != self._n_samples_:
                 raise RuntimeError(
-                    f"stratifier must have {self._n_samples_} samples, got {stratifier.shape[0]}. "
+                    f"Stratifier must have {self._n_samples_} samples, "
+                    "got {stratifier.shape[0]}. "
                     f"If running on a new dataset, call the 'reset' method."
                 )
 
@@ -177,7 +178,8 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
                 and type_of_target(stratifier) != self._type_of_target_
             ):
                 raise RuntimeError(
-                    f"stratifier must have type {self._type_of_target_}, got {type_of_target(stratifier)}. "
+                    f"Stratifier must have type {self._type_of_target_}, "
+                    f"got {type_of_target(stratifier)}. "
                     f"If running on a new dataset, call the 'reset' method."
                 )
 

From ffb81368ff1a2fc0c0c7bc8496f170890ffbffaa Mon Sep 17 00:00:00 2001
From: YuxinB <99897042+YuxinB@users.noreply.github.com>
Date: Tue, 17 Oct 2023 13:01:49 -0400
Subject: [PATCH 05/19] Add unit test for verifying stratified sampling

---
 requirements.txt                    |  1 +
 sktree/stats/tests/test_forestht.py | 44 +++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 92f3a6b2b..f3ec2a094 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,4 @@
 numpy>=1.25
 scipy
 scikit-learn>=1.3
+black=22.12.0
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index cecf34b8c..462cbab55 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -69,6 +69,50 @@ def test_featureimportance_forest_permute_pertree(sample_dataset_per_tree):
         est.statistic(iris_X[:n_samples], iris_y[:n_samples], [0, 1.0], metric="mi")
 
 
+@pytest.mark.parametrize("sample_dataset_per_tree", [True, False])
+def test_featureimportance_forest_startified(sample_dataset_per_tree):
+    est = FeatureImportanceForestClassifier(
+        estimator=RandomForestClassifier(
+            n_estimators=10,
+            random_state=seed,
+        ),
+        permute_per_tree=True,
+        test_size=0.7,
+        random_state=seed,
+        sample_dataset_per_tree=sample_dataset_per_tree,
+    )
+    n_samples = 100
+    est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mse")
+
+
+    iris_X_class0 = iris_X[iris_y==0]
+    iris_X_class1 = iris_X[iris_y==1]
+    iris_y_class0 = iris_y[iris_y==0]
+    iris_y_class1 = iris_y[iris_y==1]
+
+    assert (
+        len(est.train_test_samples_(iris_y[:n_samples])[0][1]) == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+    ), f"{len(est.train_test_samples_(iris_y[:n_samples])[0][1])} {len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
+    assert len(est.train_test_samples_(iris_y[:n_samples])[0][0]) == est._n_samples_ - (len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size)
+
+    est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mse")
+    assert (
+        len(est.train_test_samples_(iris_y[:n_samples])[0][1]) == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+    ), f"{len(est.train_test_samples_(iris_y[:n_samples])[0][1])} {len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
+    assert len(est.train_test_samples_(iris_y[:n_samples])[0][0]) == est._n_samples_ - (len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size)
+
+    with pytest.raises(RuntimeError, match="Metric must be"):
+        est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
+
+    # covariate index must be an iterable
+    with pytest.raises(RuntimeError, match="covariate_index must be an iterable"):
+        est.statistic(iris_X[:n_samples], iris_y[:n_samples], 0, metric="mi")
+
+    # covariate index must be an iterable of ints
+    with pytest.raises(RuntimeError, match="Not all covariate_index"):
+        est.statistic(iris_X[:n_samples], iris_y[:n_samples], [0, 1.0], metric="mi")
+
+
 def test_featureimportance_forest_errors():
     permute_per_tree = False
     sample_dataset_per_tree = True

From 3ff6340d99828a783f990330e79875638c4f7eca Mon Sep 17 00:00:00 2001
From: YuxinB <99897042+YuxinB@users.noreply.github.com>
Date: Tue, 17 Oct 2023 13:19:27 -0400
Subject: [PATCH 06/19] Correct Typo for Stratified

---
 sktree/stats/tests/test_forestht.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 462cbab55..68ecb8e4d 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -70,7 +70,7 @@ def test_featureimportance_forest_permute_pertree(sample_dataset_per_tree):
 
 
 @pytest.mark.parametrize("sample_dataset_per_tree", [True, False])
-def test_featureimportance_forest_startified(sample_dataset_per_tree):
+def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     est = FeatureImportanceForestClassifier(
         estimator=RandomForestClassifier(
             n_estimators=10,

From 70a14a57a89d2f9b7e339f455018c4d1c608c1dd Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Tue, 17 Oct 2023 21:46:52 -0400
Subject: [PATCH 07/19] Fixed example and whatsnew

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 doc/whats_new/v0.3.rst                        |  3 +-
 ...t_MI_gigantic_hypothesis_testing_forest.py | 10 +++---
 sktree/_lib/sklearn_fork                      |  2 +-
 sktree/stats/forestht.py                      | 35 ++++++++++++-------
 sktree/stats/tests/test_forestht.py           | 31 ++++++++++------
 5 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/doc/whats_new/v0.3.rst b/doc/whats_new/v0.3.rst
index fec97bb01..7b163ef19 100644
--- a/doc/whats_new/v0.3.rst
+++ b/doc/whats_new/v0.3.rst
@@ -15,6 +15,7 @@ Changelog
 - |Fix| Fixes a bug in consistency of train/test samples when ``random_state`` is not set in FeatureImportanceForestClassifier and FeatureImportanceForestRegressor, by `Adam Li`_ (:pr:`135`)
 - |Fix| Fixes a bug where covariate indices were not shuffled by default when running FeatureImportanceForestClassifier and FeatureImportanceForestRegressor test methods, by `Sambit Panda`_ (:pr:`140`)
 - |Enhancement| Add multi-view splitter for axis-aligned decision trees, by `Adam Li`_ (:pr:`129`)
+- |Enhancement| Add stratified sampling option to ``FeatureImportance*`` via the ``stratify`` keyword argument, by `Yuxin Bai`_ (:pr:`143`)
 
 Code and Documentation Contributors
 -----------------------------------
@@ -24,4 +25,4 @@ the project since version inception, including:
 
 * `Adam Li`_
 * `Sambit Panda`_
-
+* `Yuxin Bai`_
diff --git a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
index 423bc63dc..149580fb5 100644
--- a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
+++ b/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
@@ -49,8 +49,8 @@
 # We simulate the two feature sets, and the target variable. We then combine them
 # into a single dataset to perform hypothesis testing.
 
-n_samples = 1000
-n_features_set = 500
+n_samples = 2000
+n_features_set = 20
 mean = 1.0
 sigma = 2.0
 beta = 5.0
@@ -91,7 +91,7 @@
 # computed as the proportion of samples in the null distribution that are less than the
 # observed test statistic.
 
-n_estimators = 200
+n_estimators = 100
 max_features = "sqrt"
 test_size = 0.2
 n_repeats = 1000
@@ -103,12 +103,12 @@
         max_features=max_features,
         tree_estimator=DecisionTreeClassifier(),
         random_state=seed,
-        honest_fraction=0.7,
+        honest_fraction=0.25,
         n_jobs=n_jobs,
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=True,
+    permute_per_tree=False,
     sample_dataset_per_tree=False,
 )
 
diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index 6c7a5f44e..1adb20907 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit 6c7a5f44eb4ec3bea5dd6a9e4d5db748d12b209e
+Subproject commit 1adb209077f12adac8f760196ae5260abab0cbdd
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 371b16199..140686aff 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -122,6 +122,7 @@ def __init__(
         test_size=0.2,
         permute_per_tree=True,
         sample_dataset_per_tree=True,
+        stratify=True,
     ):
         self.estimator = estimator
         self.random_state = random_state
@@ -129,6 +130,7 @@ def __init__(
         self.test_size = test_size
         self.permute_per_tree = permute_per_tree
         self.sample_dataset_per_tree = sample_dataset_per_tree
+        self.stratify = stratify
 
         self.n_samples_test_ = None
         self._n_samples_ = None
@@ -160,9 +162,9 @@ def reset(self):
         self.n_features_in_ = None
         self._is_fitted = False
         self._seeds = None
+        self._y = None
 
-    def _get_estimators_indices(self, stratifier, sample_separate=False):
-
+    def _get_estimators_indices(self, stratifier=None, sample_separate=False):
         # Check stratifier
         # if stratifier is None, stratifier is regressor
         if stratifier is not None:
@@ -173,6 +175,8 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
                     f"If running on a new dataset, call the 'reset' method."
                 )
 
+            # Type of target should be one that fits a classifier as this is
+            # the only instance where stratification is needed.
             if (
                 self._type_of_target_ is not None
                 and type_of_target(stratifier) != self._type_of_target_
@@ -238,7 +242,8 @@ def _get_estimators_indices(self, stratifier, sample_separate=False):
             for _ in self.estimator_.estimators_:
                 yield indices_train, indices_test
 
-    def train_test_samples_(self, stratifier):
+    @property
+    def train_test_samples_(self):
         """
         The subset of drawn samples for each base estimator.
 
@@ -253,6 +258,9 @@ def train_test_samples_(self, stratifier):
         if self._n_samples_ is None:
             raise RuntimeError("The estimator must be fitted before accessing this attribute.")
 
+        # Stratifier uses a cached _y attribute if available
+        stratifier = self._y if is_classifier(self.estimator_) and self.stratify else None
+
         return [
             (indices_train, indices_test)
             for indices_train, indices_test in self._get_estimators_indices(stratifier=stratifier)
@@ -365,6 +373,10 @@ def statistic(
             self.permuted_estimator_ = self._get_estimator()
             estimator = self.permuted_estimator_
 
+        # Store a cache of the y variable
+        if is_classifier(self._get_estimator()):
+            self._y = y.copy()
+
         # Infer type of target y
         if not hasattr(self, "_type_of_target"):
             self._type_of_target_ = type_of_target(y)
@@ -519,7 +531,7 @@ def test(
             # If not sampling a new dataset per tree, then we may either be
             # permuting the covariate index per tree or per forest. If not permuting
             # there is only one train and test split, so we can just use that
-            _, indices_test = self.train_test_samples_(stratifier=y)[0]
+            _, indices_test = self.train_test_samples_[0]
             indices_test = observe_samples
             y_test = y[indices_test, :]
             y_pred_proba_normal = observe_posteriors[:, indices_test, :]
@@ -658,6 +670,7 @@ def __init__(
             test_size=test_size,
             permute_per_tree=permute_per_tree,
             sample_dataset_per_tree=sample_dataset_per_tree,
+            stratify=False,
         )
 
     def _get_estimator(self):
@@ -750,13 +763,11 @@ def _statistic(
                     self.permute_per_tree,
                     self._type_of_target_,
                 )
-                for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(y, sample_separate=True)
-                )
+                for idx, (indices_train, indices_test) in enumerate(self.train_test_samples_)
             )
         else:
             # fitting a forest will only get one unique train/test split
-            indices_train, indices_test = self.train_test_samples_(stratifier=None)[0]
+            indices_train, indices_test = self.train_test_samples_[0]
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]
             y_train, y_test = y[indices_train, :], y[indices_test, :]
@@ -903,6 +914,7 @@ def __init__(
         test_size=0.2,
         permute_per_tree=True,
         sample_dataset_per_tree=True,
+        stratify=True,
     ):
         super().__init__(
             estimator=estimator,
@@ -911,6 +923,7 @@ def __init__(
             test_size=test_size,
             permute_per_tree=permute_per_tree,
             sample_dataset_per_tree=sample_dataset_per_tree,
+            stratify=stratify,
         )
 
     def _get_estimator(self):
@@ -971,13 +984,11 @@ def _statistic(
                     self.permute_per_tree,
                     self._type_of_target_,
                 )
-                for idx, (indices_train, indices_test) in enumerate(
-                    self._get_estimators_indices(y, sample_separate=True)
-                )
+                for idx, (indices_train, indices_test) in enumerate(self.train_test_samples_)
             )
         else:
             # fitting a forest will only get one unique train/test split
-            indices_train, indices_test = self.train_test_samples_(stratifier=y)[0]
+            indices_train, indices_test = self.train_test_samples_[0]
 
             X_train, X_test = X[indices_train, :], X[indices_test, :]
             y_train = y[indices_train, :]
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 68ecb8e4d..e50ed261c 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -84,22 +84,31 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     n_samples = 100
     est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mse")
 
-
-    iris_X_class0 = iris_X[iris_y==0]
-    iris_X_class1 = iris_X[iris_y==1]
-    iris_y_class0 = iris_y[iris_y==0]
-    iris_y_class1 = iris_y[iris_y==1]
+    iris_y_class0 = iris_y[iris_y == 0]
+    iris_y_class1 = iris_y[iris_y == 1]
 
     assert (
-        len(est.train_test_samples_(iris_y[:n_samples])[0][1]) == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
-    ), f"{len(est.train_test_samples_(iris_y[:n_samples])[0][1])} {len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
-    assert len(est.train_test_samples_(iris_y[:n_samples])[0][0]) == est._n_samples_ - (len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size)
+        len(est.train_test_samples_[0][1])
+        == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+    ), (
+        f"{len(est.train_test_samples_[0][1])} "
+        f"{len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
+    )
+    assert len(est.train_test_samples_[0][0]) == est._n_samples_ - (
+        len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+    )
 
     est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mse")
     assert (
-        len(est.train_test_samples_(iris_y[:n_samples])[0][1]) == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
-    ), f"{len(est.train_test_samples_(iris_y[:n_samples])[0][1])} {len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
-    assert len(est.train_test_samples_(iris_y[:n_samples])[0][0]) == est._n_samples_ - (len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size)
+        len(est.train_test_samples_[0][1])
+        == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+    ), (
+        f"{len(est.train_test_samples_[0][1])} "
+        f"{len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
+    )
+    assert len(est.train_test_samples_[0][0]) == est._n_samples_ - (
+        len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+    )
 
     with pytest.raises(RuntimeError, match="Metric must be"):
         est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")

From f555e2cc8e2baa9acffaf11784293f9915107e50 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Wed, 18 Oct 2023 14:29:36 -0400
Subject: [PATCH 08/19] ENH correct tests & add coverage

---
 sktree/stats/forestht.py            |  6 ++----
 sktree/stats/tests/test_forestht.py | 32 ++++++++++++++---------------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 140686aff..d2e44fd0a 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -363,6 +363,8 @@ def statistic(
 
         if self._n_samples_ is None:
             self._n_samples_, self.n_features_in_ = X.shape
+
+        # Infer type of target y
         if self._type_of_target_ is None:
             self._type_of_target_ = type_of_target(y)
 
@@ -377,10 +379,6 @@ def statistic(
         if is_classifier(self._get_estimator()):
             self._y = y.copy()
 
-        # Infer type of target y
-        if not hasattr(self, "_type_of_target"):
-            self._type_of_target_ = type_of_target(y)
-
         # XXX: this can be improved as an extra fit can be avoided, by just doing error-checking
         # and then setting the internal meta data structures
         # first run a dummy fit on the samples to initialize the
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index e50ed261c..6e226a78f 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -82,44 +82,42 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
         sample_dataset_per_tree=sample_dataset_per_tree,
     )
     n_samples = 100
-    est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mse")
+    est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
 
     iris_y_class0 = iris_y[iris_y == 0]
     iris_y_class1 = iris_y[iris_y == 1]
 
     assert (
         len(est.train_test_samples_[0][1])
-        == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+        == sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
     ), (
         f"{len(est.train_test_samples_[0][1])} "
-        f"{len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
+        f"{sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size}"
     )
     assert len(est.train_test_samples_[0][0]) == est._n_samples_ - (
-        len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+        sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
     )
 
-    est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mse")
+    est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mi")
     assert (
         len(est.train_test_samples_[0][1])
-        == len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+        == sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
     ), (
         f"{len(est.train_test_samples_[0][1])} "
-        f"{len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size}"
+        f"{sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size}"
     )
     assert len(est.train_test_samples_[0][0]) == est._n_samples_ - (
-        len(iris_y_class1) * est.test_size + len(iris_y_class0) * est.test_size
+        sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
     )
 
-    with pytest.raises(RuntimeError, match="Metric must be"):
-        est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
+    # Test if y has different shape
+    with pytest.raises(RuntimeError, match="Stratifier must have"):
+        est.statistic(iris_X[: n_samples - 1], iris_y[: n_samples - 1], metric="mi")
 
-    # covariate index must be an iterable
-    with pytest.raises(RuntimeError, match="covariate_index must be an iterable"):
-        est.statistic(iris_X[:n_samples], iris_y[:n_samples], 0, metric="mi")
-
-    # covariate index must be an iterable of ints
-    with pytest.raises(RuntimeError, match="Not all covariate_index"):
-        est.statistic(iris_X[:n_samples], iris_y[:n_samples], [0, 1.0], metric="mi")
+    # Test if y has different type
+    with pytest.raises(RuntimeError, match="Stratifier must have type"):
+        iris_y = np.hstack((iris_y[:n_samples].reshape(-1, 1), iris_y[:n_samples].reshape(-1, 1)))
+        est.statistic(iris_X[:n_samples], iris_y, metric="mi")
 
 
 def test_featureimportance_forest_errors():

From 4595df33b9791b55540c670c1246a19dac0fdbdc Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Wed, 18 Oct 2023 14:31:38 -0400
Subject: [PATCH 09/19] FIX change n_samples for test to be valid

---
 sktree/stats/tests/test_forestht.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 6e226a78f..77450cf15 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -81,7 +81,7 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
         random_state=seed,
         sample_dataset_per_tree=sample_dataset_per_tree,
     )
-    n_samples = 100
+    n_samples = 80
     est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
 
     iris_y_class0 = iris_y[iris_y == 0]

From 30b6d3e006f8a2d56d4bf44c4512898468aabd48 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Wed, 18 Oct 2023 14:36:45 -0400
Subject: [PATCH 10/19] DOC update name for MIGHT & black format

---
 ...rest.py => plot_MI_genuine_hypothesis_testing_forest.py} | 6 +++---
 sktree/stats/tests/test_forestht.py                         | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)
 rename examples/hypothesis_testing/{plot_MI_gigantic_hypothesis_testing_forest.py => plot_MI_genuine_hypothesis_testing_forest.py} (96%)

diff --git a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py b/examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
similarity index 96%
rename from examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
rename to examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
index 149580fb5..e6831a9e7 100644
--- a/examples/hypothesis_testing/plot_MI_gigantic_hypothesis_testing_forest.py
+++ b/examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
@@ -1,7 +1,7 @@
 """
-===========================================================
-Mutual Information for Gigantic Hypothesis Testing (MIGHT)
-===========================================================
+=========================================================
+Mutual Information for Genuine Hypothesis Testing (MIGHT)
+=========================================================
 
 An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
 multivariate hypothesis test, on simulated datasets. Here, we present a simulation
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 77450cf15..3d6015e30 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -116,8 +116,10 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
 
     # Test if y has different type
     with pytest.raises(RuntimeError, match="Stratifier must have type"):
-        iris_y = np.hstack((iris_y[:n_samples].reshape(-1, 1), iris_y[:n_samples].reshape(-1, 1)))
-        est.statistic(iris_X[:n_samples], iris_y, metric="mi")
+        iris_y_new = np.hstack(
+            (iris_y[:n_samples].reshape(-1, 1), iris_y[:n_samples].reshape(-1, 1))
+        )
+        est.statistic(iris_X[:n_samples], iris_y_new, metric="mi")
 
 
 def test_featureimportance_forest_errors():

From 9a7459d06d47f3385d567014a31534daa091a7e3 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Wed, 18 Oct 2023 14:48:27 -0400
Subject: [PATCH 11/19] FIX update the test for stratification

---
 sktree/stats/tests/test_forestht.py | 31 +++++++++++------------------
 1 file changed, 12 insertions(+), 19 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 3d6015e30..8f9230d5f 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -81,33 +81,26 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
         random_state=seed,
         sample_dataset_per_tree=sample_dataset_per_tree,
     )
-    n_samples = 80
+    n_samples = 100
     est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
 
     iris_y_class0 = iris_y[iris_y == 0]
     iris_y_class1 = iris_y[iris_y == 1]
 
-    assert (
-        len(est.train_test_samples_[0][1])
-        == sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
-    ), (
-        f"{len(est.train_test_samples_[0][1])} "
-        f"{sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size}"
-    )
-    assert len(est.train_test_samples_[0][0]) == est._n_samples_ - (
-        sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
+    _, indices_test = est.train_test_samples_[0]
+    y_test = y[indices_test, :]
+
+    assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
+        f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"
     )
 
     est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mi")
-    assert (
-        len(est.train_test_samples_[0][1])
-        == sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
-    ), (
-        f"{len(est.train_test_samples_[0][1])} "
-        f"{sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size}"
-    )
-    assert len(est.train_test_samples_[0][0]) == est._n_samples_ - (
-        sum(iris_y_class1) * est.test_size + sum(iris_y_class0) * est.test_size
+
+    _, indices_test = est.train_test_samples_[0]
+    y_test = y[indices_test, :]
+
+    assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
+        f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"
     )
 
     # Test if y has different shape

From e0cbb60cbf5fe431a499c360508755207e194f3f Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Wed, 18 Oct 2023 19:03:13 -0400
Subject: [PATCH 12/19] FIX correct test variables

---
 sktree/stats/tests/test_forestht.py | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 8f9230d5f..bb0216aff 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -84,11 +84,8 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     n_samples = 100
     est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
 
-    iris_y_class0 = iris_y[iris_y == 0]
-    iris_y_class1 = iris_y[iris_y == 1]
-
     _, indices_test = est.train_test_samples_[0]
-    y_test = y[indices_test, :]
+    y_test = iris_y[indices_test, :]
 
     assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
         f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"
@@ -97,7 +94,7 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mi")
 
     _, indices_test = est.train_test_samples_[0]
-    y_test = y[indices_test, :]
+    y_test = iris_y[indices_test, :]
 
     assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
         f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"

From e248a7c39c8b5f6d8613f86b6bd4602bc04a2163 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Wed, 18 Oct 2023 20:17:18 -0400
Subject: [PATCH 13/19] FIX correct variable shape

---
 sktree/stats/tests/test_forestht.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index bb0216aff..692db2c27 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -85,7 +85,7 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     est.statistic(iris_X[:n_samples], iris_y[:n_samples], metric="mi")
 
     _, indices_test = est.train_test_samples_[0]
-    y_test = iris_y[indices_test, :]
+    y_test = iris_y[indices_test]
 
     assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
         f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"
@@ -94,7 +94,7 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mi")
 
     _, indices_test = est.train_test_samples_[0]
-    y_test = iris_y[indices_test, :]
+    y_test = iris_y[indices_test]
 
     assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
         f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"

From 8ba06ef4643d0ca566d0469e9e712c7680cc4e68 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Thu, 19 Oct 2023 09:11:23 -0400
Subject: [PATCH 14/19] FIX correct test method

---
 sktree/stats/tests/test_forestht.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 692db2c27..351447e62 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -87,8 +87,8 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     _, indices_test = est.train_test_samples_[0]
     y_test = iris_y[indices_test]
 
-    assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
-        f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"
+    assert len(y_test[y_test == 0]) == len(y_test[y_test == 1]), (
+        f"{len(y_test[y_test==0])} " f"{len(y_test[y_test==1])}"
     )
 
     est.test(iris_X[:n_samples], iris_y[:n_samples], [0, 1], n_repeats=10, metric="mi")
@@ -96,8 +96,8 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
     _, indices_test = est.train_test_samples_[0]
     y_test = iris_y[indices_test]
 
-    assert sum(y_test[y_test == 0]) == sum(y_test[y_test == 1]), (
-        f"{sum(y_test[y_test==0])} " f"{sum(y_test[y_test==1])}"
+    assert len(y_test[y_test == 0]) == len(y_test[y_test == 1]), (
+        f"{len(y_test[y_test==0])} " f"{len(y_test[y_test==1])}"
     )
 
     # Test if y has different shape

From 5d516a74df29825af7a8c11ab7adf99db05c5bca Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Thu, 19 Oct 2023 09:26:51 -0400
Subject: [PATCH 15/19] FIX disable check_input for correct error

---
 sktree/stats/tests/test_forestht.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 351447e62..e810d653c 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -102,14 +102,16 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
 
     # Test if y has different shape
     with pytest.raises(RuntimeError, match="Stratifier must have"):
-        est.statistic(iris_X[: n_samples - 1], iris_y[: n_samples - 1], metric="mi")
+        est.statistic(
+            iris_X[: n_samples - 1], iris_y[: n_samples - 1], metric="mi", check_input=False
+        )
 
     # Test if y has different type
     with pytest.raises(RuntimeError, match="Stratifier must have type"):
         iris_y_new = np.hstack(
             (iris_y[:n_samples].reshape(-1, 1), iris_y[:n_samples].reshape(-1, 1))
         )
-        est.statistic(iris_X[:n_samples], iris_y_new, metric="mi")
+        est.statistic(iris_X[:n_samples], iris_y_new, metric="mi", check_input=False)
 
 
 def test_featureimportance_forest_errors():

From 735a10be530a9477d42e7512e2478841d88ded92 Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Thu, 19 Oct 2023 09:38:54 -0400
Subject: [PATCH 16/19] FIX remove duplicate checks

---
 sktree/stats/forestht.py            | 23 -----------------------
 sktree/stats/tests/test_forestht.py | 13 -------------
 2 files changed, 36 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index d2e44fd0a..89c02f5ad 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -165,28 +165,6 @@ def reset(self):
         self._y = None
 
     def _get_estimators_indices(self, stratifier=None, sample_separate=False):
-        # Check stratifier
-        # if stratifier is None, stratifier is regressor
-        if stratifier is not None:
-            if self._n_samples_ is not None and stratifier.shape[0] != self._n_samples_:
-                raise RuntimeError(
-                    f"Stratifier must have {self._n_samples_} samples, "
-                    "got {stratifier.shape[0]}. "
-                    f"If running on a new dataset, call the 'reset' method."
-                )
-
-            # Type of target should be one that fits a classifier as this is
-            # the only instance where stratification is needed.
-            if (
-                self._type_of_target_ is not None
-                and type_of_target(stratifier) != self._type_of_target_
-            ):
-                raise RuntimeError(
-                    f"Stratifier must have type {self._type_of_target_}, "
-                    f"got {type_of_target(stratifier)}. "
-                    f"If running on a new dataset, call the 'reset' method."
-                )
-
         indices = np.arange(self._n_samples_, dtype=int)
 
         # Get drawn indices along both sample and feature axes
@@ -231,7 +209,6 @@ def _get_estimators_indices(self, stratifier=None, sample_separate=False):
                 else:
                     self._seeds = self.estimator_.random_state
 
-            # TODO: make random_state consistent
             indices_train, indices_test = train_test_split(
                 indices,
                 test_size=self.test_size,
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index e810d653c..e71e5e09b 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -100,19 +100,6 @@ def test_featureimportance_forest_stratified(sample_dataset_per_tree):
         f"{len(y_test[y_test==0])} " f"{len(y_test[y_test==1])}"
     )
 
-    # Test if y has different shape
-    with pytest.raises(RuntimeError, match="Stratifier must have"):
-        est.statistic(
-            iris_X[: n_samples - 1], iris_y[: n_samples - 1], metric="mi", check_input=False
-        )
-
-    # Test if y has different type
-    with pytest.raises(RuntimeError, match="Stratifier must have type"):
-        iris_y_new = np.hstack(
-            (iris_y[:n_samples].reshape(-1, 1), iris_y[:n_samples].reshape(-1, 1))
-        )
-        est.statistic(iris_X[:n_samples], iris_y_new, metric="mi", check_input=False)
-
 
 def test_featureimportance_forest_errors():
     permute_per_tree = False

From 47857c32574b8f26423daf91a93856652c81042a Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Thu, 19 Oct 2023 10:47:14 -0400
Subject: [PATCH 17/19] DOC add docstring for stratify

---
 sktree/stats/forestht.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 89c02f5ad..56a044c5c 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -122,7 +122,7 @@ def __init__(
         test_size=0.2,
         permute_per_tree=True,
         sample_dataset_per_tree=True,
-        stratify=True,
+        stratify=False,
     ):
         self.estimator = estimator
         self.random_state = random_state
@@ -645,7 +645,6 @@ def __init__(
             test_size=test_size,
             permute_per_tree=permute_per_tree,
             sample_dataset_per_tree=sample_dataset_per_tree,
-            stratify=False,
         )
 
     def _get_estimator(self):
@@ -837,6 +836,9 @@ class FeatureImportanceForestClassifier(BaseForestHT):
     sample_dataset_per_tree : bool, default=False
         Whether to sample the dataset per tree or per forest.
 
+    stratify : bool, default=True
+        Whether to stratify the samples by class labels.
+
     Attributes
     ----------
     estimator_ : BaseForest

From 35eb7767eb8fb4dce8289fb0f4d675756b1484b9 Mon Sep 17 00:00:00 2001
From: YuxinB <99897042+YuxinB@users.noreply.github.com>
Date: Thu, 19 Oct 2023 13:18:36 -0400
Subject: [PATCH 18/19] Add contributor

---
 doc/whats_new/_contributors.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/whats_new/_contributors.rst b/doc/whats_new/_contributors.rst
index 3e5ca2110..eb441d66d 100644
--- a/doc/whats_new/_contributors.rst
+++ b/doc/whats_new/_contributors.rst
@@ -26,3 +26,4 @@
 .. _SUKI-O : https://github.com/SUKI-O
 .. _Ronan Perry : https://rflperry.github.io/
 .. _Haoyin Xu : https://github.com/PSSF23
+.. _Yuxin Bai : https://github.com/YuxinB

From 3332e9af42ca945d9e3587bfe28f679f69463c3f Mon Sep 17 00:00:00 2001
From: Haoyin Xu <haoyinxu@gmail.com>
Date: Thu, 19 Oct 2023 13:43:14 -0400
Subject: [PATCH 19/19] DOC update reference

---
 .../plot_MI_imbalanced_hyppo_testing.py                   | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
index 882f80c3d..c8a5478a4 100644
--- a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
+++ b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -1,7 +1,7 @@
 """
-===============================================================================
-Mutual Information for Gigantic Hypothesis Testing (MIGHT) with Imbalanced Data
-===============================================================================
+==============================================================================
+Mutual Information for Genuine Hypothesis Testing (MIGHT) with Imbalanced Data
+==============================================================================
 
 Here, we demonstrate how to do hypothesis testing on highly imbalanced data
 in terms of their feature-set dimensionalities.
@@ -17,7 +17,7 @@
 
 For other examples of hypothesis testing, see the following:
 
-- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_MI_gigantic_hypothesis_testing_forest.py`
+- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_MI_genuine_hypothesis_testing_forest.py`
 - :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_might_auc.py`
 
 For more information on the multi-view decision-tree, see