Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DOC optimize documentation & change function name #280

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ The trees that comprise those forests are also available as standalone classes.

Outlier Detection
-----------------
Isolation forests are a model implemented in scikit-learn, which is an ensemble of
Isolation forests are a model implemented in scikit-learn, which is an ensemble of
extremely randomized axis-aligned decision tree models. Extended isolation forests
replaces the base tree model with an oblique tree, which allows a more flexible model
for detecting outliers.
Expand Down Expand Up @@ -151,10 +151,10 @@ tree models.
PermutationForestRegressor
build_coleman_forest
build_permutation_forest
build_hyppo_oob_forest
build_hyppo_cv_forest
build_oob_forest
build_cv_forest
PermutationHonestForestClassifier

Datasets
------------------------------
We provide some convenience functions for simulating datasets beyond
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1a_SA98.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -76,7 +76,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1b_MI.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -77,7 +77,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1c_pAUC.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -78,7 +78,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_1_1d_HD.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
PALETTE = sns.color_palette("Set1")
Expand Down Expand Up @@ -75,7 +75,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, X, y)
_, observe_proba = build_oob_forest(est, X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
8 changes: 7 additions & 1 deletion examples/treeple/treeple_tutorial_1_2_pvalue.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,14 @@
#
# .. math:: I(X; Y) = H(Y) - H(Y\mid X)
#
# Under the null hypothesis :math:`H_0`, the conditional entropy ``H(Y | X)``
# is equal to the class entropy ``H(Y)``, so the *MI* becomes zero. Thus, if
# the *MI* is significantly larger than zero, we can reject the null hypothesis
# :math:`H_0`.
#
# With a binary class simulation as an example, this tutorial will show
# how to use ``treeple`` to use the statistic and the p-value.
# how to use ``treeple`` to calculate the statistic and test the
# hypothesis with data.

# %%
# Create a simulation with two gaussians
Expand Down
4 changes: 2 additions & 2 deletions examples/treeple/treeple_tutorial_2_1a_SA98_multiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest
from sktree.tree import MultiViewDecisionTreeClassifier

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
Expand Down Expand Up @@ -95,7 +95,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, Z_X, y)
_, observe_proba = build_oob_forest(est, Z_X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down
6 changes: 3 additions & 3 deletions examples/treeple/treeple_tutorial_2_1b_CMI.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from sktree.datasets import make_trunk_classification
from sktree.ensemble import HonestForestClassifier
from sktree.stats import build_hyppo_oob_forest
from sktree.stats import build_oob_forest
from sktree.tree import MultiViewDecisionTreeClassifier

sns.set(color_codes=True, style="white", context="talk", font_scale=1.5)
Expand Down Expand Up @@ -95,7 +95,7 @@
)

# fit the model and obtain the tree posteriors
_, observe_proba = build_hyppo_oob_forest(est, Z_X, y)
_, observe_proba = build_oob_forest(est, Z_X, y)

# generate forest posteriors for the two classes
observe_proba = np.nanmean(observe_proba, axis=0)
Expand Down Expand Up @@ -129,7 +129,7 @@
)

# fit the model and obtain the tree posteriors
_, single_proba = build_hyppo_oob_forest(est, Z, y)
_, single_proba = build_oob_forest(est, Z, y)

# generate forest posteriors for the two classes
single_proba = np.nanmean(single_proba, axis=0)
Expand Down
9 changes: 7 additions & 2 deletions examples/treeple/treeple_tutorial_2_2_pvalue_multiview.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,14 @@
#
# Conditional mutual information (*CMI*) measures the dependence of *Y* on
# *X* conditioned on *Z*. It can be calculated by the difference between
# the joint MI (``I([X, Z]; Y)``) and the MI on Z (``I(Y; Z)``):
# the joint *MI* (``I([X, Z]; Y)``) and the *MI* of Y on Z (``I(Y; Z)``):
#
# .. math:: I(X; Y | Z) = I([X, Z]; Y) - I(Y; Z)
# .. math:: I(Y; X \mid Z) = I(Y; [X, Z]) - I(Y; Z)
#
# Under the null hypothesis :math:`H_0`, the joint *MI* ``I(Y; [X, Z])``
# is equal to the *MI* of Y on Z ``I(Y; Z)``, so the *CMI* becomes zero. Thus, if
# the *CMI* is significantly larger than zero, we can reject the null hypothesis
# :math:`H_0`.
#
# With a multiview binary class simulation as an example, this tutorial
# will show how to use ``treeple`` to calculate the statistic and test the
Expand Down
8 changes: 4 additions & 4 deletions sktree/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
FeatureImportanceForestClassifier,
FeatureImportanceForestRegressor,
build_coleman_forest,
build_hyppo_cv_forest,
build_hyppo_oob_forest,
build_cv_forest,
build_oob_forest,
build_permutation_forest,
)
from .monte_carlo import PermutationTest
Expand All @@ -16,8 +16,8 @@
"PermutationForestClassifier",
"PermutationForestRegressor",
"PermutationTest",
"build_hyppo_cv_forest",
"build_hyppo_oob_forest",
"build_cv_forest",
"build_oob_forest",
"build_coleman_forest",
"build_permutation_forest",
"PermutationHonestForestClassifier",
Expand Down
14 changes: 7 additions & 7 deletions sktree/stats/forestht.py
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,7 @@ def build_coleman_forest(
metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric]

# build two sets of forests
est, orig_forest_proba = build_hyppo_oob_forest(est, X, y, verbose=verbose)
est, orig_forest_proba = build_oob_forest(est, X, y, verbose=verbose)

X_null = np.copy(X)
y_null = np.copy(y)
Expand All @@ -1307,7 +1307,7 @@ def build_coleman_forest(
rng.shuffle(temp_col)
X_null[:, covariate_index] = temp_col

perm_est, perm_forest_proba = build_hyppo_oob_forest(perm_est, X_null, y_null, verbose=verbose)
perm_est, perm_forest_proba = build_oob_forest(perm_est, X_null, y_null, verbose=verbose)

# get the number of jobs
n_jobs = est.n_jobs
Expand Down Expand Up @@ -1433,7 +1433,7 @@ def build_permutation_forest(
)

# train the original forest on unpermuted data
est, orig_forest_proba = build_hyppo_oob_forest(est, X, y, verbose=verbose)
est, orig_forest_proba = build_oob_forest(est, X, y, verbose=verbose)
y_pred_proba_orig = np.nanmean(orig_forest_proba, axis=0)
observe_test_stat = metric_func(y, y_pred_proba_orig, **metric_kwargs)

Expand All @@ -1452,7 +1452,7 @@ def build_permutation_forest(
perm_est = clone(perm_est)
perm_est.set_params(random_state=rng.integers(0, np.iinfo(np.int32).max))

perm_est, perm_forest_proba = build_hyppo_oob_forest(
perm_est, perm_forest_proba = build_oob_forest(
perm_est, X_perm, y, verbose=verbose, covariate_index=covariate_index
)

Expand All @@ -1474,7 +1474,7 @@ def build_permutation_forest(
return forest_result


def build_hyppo_oob_forest(est: ForestClassifier, X, y, verbose=False, **est_kwargs):
def build_oob_forest(est: ForestClassifier, X, y, verbose=False, **est_kwargs):
"""Build a hypothesis testing forest using oob samples.

Parameters
Expand Down Expand Up @@ -1532,7 +1532,7 @@ def build_hyppo_oob_forest(est: ForestClassifier, X, y, verbose=False, **est_kwa
return est, all_proba


def build_hyppo_cv_forest(
def build_cv_forest(
est,
X,
y,
Expand All @@ -1541,7 +1541,7 @@ def build_hyppo_cv_forest(
verbose=False,
seed=None,
):
"""Build a hypothesis testing forest using oob samples.
"""Build a hypothesis testing forest using using cross-validation.

Parameters
----------
Expand Down
Loading