neurodata · adam2392 · Nov 9, 2023 · Oct 16, 2023 · Oct 16, 2023 · Oct 16, 2023
diff --git a/doc/whats_new/v0.4.rst b/doc/whats_new/v0.4.rst
@@ -12,7 +12,8 @@ Version 0.4
 
 Changelog
 ---------
-- 
+
+- |API| ``FeatureImportanceForest*`` now has a hyperparameter to control the number of permutations is done per forest ``permute_per_forest_fraction``, by `Adam Li`_ (:pr:`145`)
 
 Code and Documentation Contributors
 -----------------------------------

diff --git a/examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py b/examples/hypothesis_testing/plot_MI_genuine_hypothesis_testing_forest.py
@@ -108,19 +108,13 @@
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=False,
-    sample_dataset_per_tree=False,
 )
 
-print(
-    f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
-    f"{est.sample_dataset_per_tree}"
-)
 # we test for the first feature set, which is important and thus should return a pvalue < 0.05
 stat, pvalue = est.test(
     X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats
 )
-print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+print(f"Estimated MI difference for the important feature set: {stat} with Pvalue: {pvalue}")
 
 # we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
 stat, pvalue = est.test(
@@ -130,7 +124,7 @@
     metric="mi",
     n_repeats=n_repeats,
 )
-print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+print(f"Estimated MI difference for the unimportant feature set: {stat} with Pvalue: {pvalue}")
 
 # %%
 # References

diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -134,16 +134,10 @@ def make_multiview_classification(
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=False,
-    sample_dataset_per_tree=False,
 )
 
 mv_results = dict()
 
-print(
-    f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
-    f"{est.sample_dataset_per_tree}"
-)
 # we test for the first feature set, which is important and thus should return a pvalue < 0.05
 stat, pvalue = est.test(
     X, y, covariate_index=np.arange(10, dtype=int), metric="mi", n_repeats=n_repeats
@@ -179,8 +173,6 @@ def make_multiview_classification(
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=False,
-    sample_dataset_per_tree=False,
 )
 
 rf_results = dict()

diff --git a/examples/hypothesis_testing/plot_might_auc.py b/examples/hypothesis_testing/plot_might_auc.py
@@ -84,7 +84,7 @@
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=True,
+    permute_forest_fraction=1.0 / n_estimators,
     sample_dataset_per_tree=True,
 )
 

diff --git a/examples/hypothesis_testing/plot_might_mv_auc.py b/examples/hypothesis_testing/plot_might_mv_auc.py
@@ -72,7 +72,7 @@
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=True,
+    permute_forest_fraction=1.0 / n_estimators,
     sample_dataset_per_tree=True,
 )
 
@@ -104,7 +104,7 @@
     ),
     random_state=seed,
     test_size=test_size,
-    permute_per_tree=True,
+    permute_forest_fraction=1.0 / n_estimators,
     sample_dataset_per_tree=True,
 )
 

diff --git a/sktree/experimental/mutual_info.py b/sktree/experimental/mutual_info.py
@@ -147,7 +147,7 @@ def mutual_info_ksg(
     algorithm="kd_tree",
     n_jobs: int = -1,
     transform: str = "rank",
-    random_seed: int = None,
+    random_seed: Optional[int] = None,
 ):
     """Compute the generalized (conditional) mutual information KSG estimate.