Skip to content

Commit

Permalink
DOC optimize tutorial & change function name
Browse files Browse the repository at this point in the history
  • Loading branch information
PSSF23 committed May 31, 2024
1 parent 4a9f653 commit 070161a
Show file tree
Hide file tree
Showing 11 changed files with 42 additions and 31 deletions.
8 changes: 4 additions & 4 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ The trees that comprise those forests are also available as standalone classes.

Outlier Detection
-----------------
Isolation forests are a model implemented in scikit-learn, which is an ensemble of
Isolation forests are a model implemented in scikit-learn, which is an ensemble of
extremely randomized axis-aligned decision tree models. Extended isolation forests
replaces the base tree model with an oblique tree, which allows a more flexible model
for detecting outliers.
Expand Down Expand Up @@ -151,10 +151,10 @@ tree models.
PermutationForestRegressor
build_coleman_forest
build_permutation_forest
build_hyppo_oob_forest
build_hyppo_cv_forest
build_oob_forest
build_cv_forest
PermutationHonestForestClassifier

Datasets
------------------------------
We provide some convenience functions for simulating datasets beyond
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1a_SA98.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -76,7 +76,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1b_MI.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -77,7 +77,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1c_pAUC.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -78,7 +78,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1d_HD.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -75,7 +75,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
8 changes: 7 additions & 1 deletion examples/treeple/treeple_tutorial_1_2_pvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,14 @@
#
# .. math:: I(X; Y) = H(Y) - H(Y\mid X)
#
# Under the null hypothesis :math:`H_0`, the conditional entropy ``H(Y | X)``
# is equal to the class entropy ``H(Y)``, so the *MI* becomes zero. Thus, if
# the *MI* is significantly larger than zero, we can reject the null hypothesis
# :math:`H_0`.
#
# With a binary class simulation as an example, this tutorial will show
# how to use ``treeple`` to use the statistic and the p-value.
# how to use ``treeple`` to calculate the statistic and test the
# hypothesis with data.

# %%
# Create a simulation with two gaussians
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_2_1a_SA98_multiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest
from sktree.tree import MultiViewDecisionTreeClassifier

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
Expand Down Expand Up @@ -95,7 +95,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, Z_X, y)
_, observe_proba = build_oob_forest(est, Z_X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
6 changes: 3 additions & 3 deletions examples/treeple/treeple_tutorial_2_1b_CMI.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest
from sktree.tree import MultiViewDecisionTreeClassifier

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
Expand Down Expand Up @@ -95,7 +95,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, Z_X, y)
_, observe_proba = build_oob_forest(est, Z_X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down Expand Up @@ -129,7 +129,7 @@
)

# fit the model and obtain the tree posteriors
_, single_proba = build_hyppo_oob_forest(est, Z, y)
_, single_proba = build_oob_forest(est, Z, y)

# generate forest posteriors for the two classes
single_proba = np.nanmean(single_proba, axis=0)
Expand Down
9 changes: 7 additions & 2 deletions examples/treeple/treeple_tutorial_2_2_pvalue_multiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@
#
# Conditional mutual information (*CMI*) measures the dependence of *Y* on
# *X* conditioned on *Z*. It can be calculated by the difference between
# the joint MI (``I([X, Z]; Y)``) and the MI on Z (``I(Y; Z)``):
# the joint *MI* (``I([X, Z]; Y)``) and the *MI* of Y on Z (``I(Y; Z)``):
#
# .. math:: I(X; Y | Z) = I([X, Z]; Y) - I(Y; Z)
# .. math:: I(Y; X \mid Z) = I(Y; [X, Z]) - I(Y; Z)
#
# Under the null hypothesis :math:`H_0`, the joint *MI* ``I(Y; [X, Z])``
# is equal to the *MI* of Y on Z ``I(Y; Z)``, so the *CMI* becomes zero. Thus, if
# the *CMI* is significantly larger than zero, we can reject the null hypothesis
# :math:`H_0`.
#
# With a multiview binary class simulation as an example, this tutorial
# will show how to use ``treeple`` to calculate the statistic and test the
Expand Down
8 changes: 4 additions & 4 deletions sktree/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
FeatureImportanceForestClassifier,
FeatureImportanceForestRegressor,
build_coleman_forest,
build_hyppo_cv_forest,
build_hyppo_oob_forest,
build_cv_forest,
build_oob_forest,
build_permutation_forest,
)
from .monte_carlo import PermutationTest
Expand All @@ -16,8 +16,8 @@
"PermutationForestClassifier",
"PermutationForestRegressor",
"PermutationTest",
"build_hyppo_cv_forest",
"build_hyppo_oob_forest",
"build_cv_forest",
"build_oob_forest",
"build_coleman_forest",
"build_permutation_forest",
"PermutationHonestForestClassifier",
Expand Down
14 changes: 7 additions & 7 deletions sktree/stats/forestht.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,7 @@ def build_coleman_forest(
metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric]

# build two sets of forests
est, orig_forest_proba = build_hyppo_oob_forest(est, X, y, verbose=verbose)
est, orig_forest_proba = build_oob_forest(est, X, y, verbose=verbose)

X_null = np.copy(X)
y_null = np.copy(y)
Expand All @@ -1307,7 +1307,7 @@ def build_coleman_forest(
rng.shuffle(temp_col)
X_null[:, covariate_index] = temp_col

perm_est, perm_forest_proba = build_hyppo_oob_forest(perm_est, X_null, y_null, verbose=verbose)
perm_est, perm_forest_proba = build_oob_forest(perm_est, X_null, y_null, verbose=verbose)

# get the number of jobs
n_jobs = est.n_jobs
Expand Down Expand Up @@ -1433,7 +1433,7 @@ def build_permutation_forest(
)

# train the original forest on unpermuted data
est, orig_forest_proba = build_hyppo_oob_forest(est, X, y, verbose=verbose)
est, orig_forest_proba = build_oob_forest(est, X, y, verbose=verbose)
y_pred_proba_orig = np.nanmean(orig_forest_proba, axis=0)
observe_test_stat = metric_func(y, y_pred_proba_orig, **metric_kwargs)

Expand All @@ -1452,7 +1452,7 @@ def build_permutation_forest(
perm_est = clone(perm_est)
perm_est.set_params(random_state=rng.integers(0, np.iinfo(np.int32).max))

perm_est, perm_forest_proba = build_hyppo_oob_forest(
perm_est, perm_forest_proba = build_oob_forest(
perm_est, X_perm, y, verbose=verbose, covariate_index=covariate_index
)

Expand All @@ -1474,7 +1474,7 @@ def build_permutation_forest(
return forest_result


def build_hyppo_oob_forest(est: ForestClassifier, X, y, verbose=False, **est_kwargs):
def build_oob_forest(est: ForestClassifier, X, y, verbose=False, **est_kwargs):
"""Build a hypothesis testing forest using oob samples.
Parameters
Expand Down Expand Up @@ -1532,7 +1532,7 @@ def build_hyppo_oob_forest(est: ForestClassifier, X, y, verbose=False, **est_kwa
return est, all_proba


def build_hyppo_cv_forest(
def build_cv_forest(
est,
X,
y,
Expand All @@ -1541,7 +1541,7 @@ def build_hyppo_cv_forest(
verbose=False,
seed=None,
):
"""Build a hypothesis testing forest using oob samples.
"""Build a hypothesis testing forest using using cross-validation.
Parameters
----------
Expand Down

0 comments on commit 070161a

Please sign in to comment.