Skip to content

Commit

Permalink
[ENH] Add option to permute per forest fraction (#145)
Browse files Browse the repository at this point in the history
* Add option to permute per forest fraction
* Add sep parallel func for building and predicting
* Add additional pickle test
* Remove any mention of permute_per_tree

---------

Signed-off-by: Adam Li <adam2392@gmail.com>
Co-authored-by: Haoyin Xu <haoyinxu@gmail.com>
  • Loading branch information
adam2392 and PSSF23 committed Nov 9, 2023
1 parent a055049 commit e4728fa
Show file tree
Hide file tree
Showing 17 changed files with 423 additions and 208 deletions.
3 changes: 2 additions & 1 deletion doc/whats_new/v0.4.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ Version 0.4

Changelog
---------
-

- |API| ``FeatureImportanceForest*`` now has a hyperparameter to control the number of permutations is done per forest ``permute_per_forest_fraction``, by `Adam Li`_ (:pr:`145`)

Code and Documentation Contributors
-----------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,19 +108,13 @@
),
random_state=seed,
test_size=test_size,
permute_per_tree=False,
sample_dataset_per_tree=False,
)

print(
f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
f"{est.sample_dataset_per_tree}"
)
# we test for the first feature set, which is important and thus should return a pvalue < 0.05
stat, pvalue = est.test(
X, y, covariate_index=np.arange(n_features_set, dtype=int), metric="mi", n_repeats=n_repeats
)
print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
print(f"Estimated MI difference for the important feature set: {stat} with Pvalue: {pvalue}")

# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
stat, pvalue = est.test(
Expand All @@ -130,7 +124,7 @@
metric="mi",
n_repeats=n_repeats,
)
print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
print(f"Estimated MI difference for the unimportant feature set: {stat} with Pvalue: {pvalue}")

# %%
# References
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,16 +134,10 @@ def make_multiview_classification(
),
random_state=seed,
test_size=test_size,
permute_per_tree=False,
sample_dataset_per_tree=False,
)

mv_results = dict()

print(
f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
f"{est.sample_dataset_per_tree}"
)
# we test for the first feature set, which is important and thus should return a pvalue < 0.05
stat, pvalue = est.test(
X, y, covariate_index=np.arange(10, dtype=int), metric="mi", n_repeats=n_repeats
Expand Down Expand Up @@ -179,8 +173,6 @@ def make_multiview_classification(
),
random_state=seed,
test_size=test_size,
permute_per_tree=False,
sample_dataset_per_tree=False,
)

rf_results = dict()
Expand Down
2 changes: 1 addition & 1 deletion examples/hypothesis_testing/plot_might_auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
),
random_state=seed,
test_size=test_size,
permute_per_tree=True,
permute_forest_fraction=1.0 / n_estimators,
sample_dataset_per_tree=True,
)

Expand Down
4 changes: 2 additions & 2 deletions examples/hypothesis_testing/plot_might_mv_auc.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
),
random_state=seed,
test_size=test_size,
permute_per_tree=True,
permute_forest_fraction=1.0 / n_estimators,
sample_dataset_per_tree=True,
)

Expand Down Expand Up @@ -104,7 +104,7 @@
),
random_state=seed,
test_size=test_size,
permute_per_tree=True,
permute_forest_fraction=1.0 / n_estimators,
sample_dataset_per_tree=True,
)

Expand Down
2 changes: 1 addition & 1 deletion sktree/experimental/mutual_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def mutual_info_ksg(
algorithm="kd_tree",
n_jobs: int = -1,
transform: str = "rank",
random_seed: int = None,
random_seed: Optional[int] = None,
):
"""Compute the generalized (conditional) mutual information KSG estimate.
Expand Down
Loading

0 comments on commit e4728fa

Please sign in to comment.