Skip to content

Commit

Permalink
Update docs for v0.2
Browse files Browse the repository at this point in the history
Signed-off-by: Adam Li <adam2392@gmail.com>
  • Loading branch information
adam2392 committed Oct 9, 2023
1 parent 3e50757 commit 18c67ec
Show file tree
Hide file tree
Showing 105 changed files with 5,497 additions and 1,959 deletions.
Binary file added .pymon
Binary file not shown.
2 changes: 1 addition & 1 deletion v0.2/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 08d17cba0db21befacb226b9f7a187ed
config: b0c18907bb177ae895c6d330acbc0e3f
tags: 645f666f9bcd5a90fca523b33c5a78b7
Binary file not shown.
Binary file not shown.
127 changes: 127 additions & 0 deletions v0.2/_downloads/71f52ddc897be17d747e41dffb3b0762/plot_might_auc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
===================================================================================
Compute partial AUC using Mutual Information for Genuine Hypothesis Testing (MIGHT)
===================================================================================
An example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric
multivariate hypothesis test, on simulated datasets. Here, we present a simulation
of how MIGHT is used to evaluate how a "feature set is important for predicting the target".
We simulate a dataset with 1000 features, 500 samples, and a binary class target
variable. Within each feature set, there is 500 features associated with one feature
set, and another 500 features associated with another feature set. One could think of
these for example as different datasets collected on the same patient in a biomedical setting.
The first feature set (X) is strongly correlated with the target, and the second
feature set (W) is weakly correlated with the target (y).
We then use MIGHT to calculate the partial AUC of these sets.
"""

import numpy as np
from scipy.special import expit

from sktree import HonestForestClassifier
from sktree.stats import FeatureImportanceForestClassifier
from sktree.tree import DecisionTreeClassifier

seed = 12345
rng = np.random.default_rng(seed)

# %%
# Simulate data
# -------------
# We simulate the two feature sets, and the target variable. We then combine them
# into a single dataset to perform hypothesis testing.

n_samples = 1000
n_features_set = 500
mean = 1.0
sigma = 2.0
beta = 5.0

unimportant_mean = 0.0
unimportant_sigma = 4.5

# first sample the informative features, and then the uniformative features
X_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))
X_important = np.hstack(
[
X_important,
rng.normal(
loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)
),
]
)

X_unimportant = rng.normal(
loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)
)

# simulate the binary target variable
y = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)

# %%
# Use partial AUC as test statistic
# ---------------------------------
# You can specify the maximum specificity by modifying ``max_fpr`` in ``statistic``.

n_estimators = 125
max_features = "sqrt"
metric = "auc"
test_size = 0.2
n_jobs = -1
honest_fraction = 0.7
max_fpr = 0.1

est = FeatureImportanceForestClassifier(
estimator=HonestForestClassifier(
n_estimators=n_estimators,
max_features=max_features,
tree_estimator=DecisionTreeClassifier(),
random_state=seed,
honest_fraction=honest_fraction,
n_jobs=n_jobs,
),
random_state=seed,
test_size=test_size,
permute_per_tree=True,
sample_dataset_per_tree=True,
)

# we test for the first feature set, which is important and thus should return a higher AUC
stat, posterior_arr, samples = est.statistic(
X_important,
y,
metric=metric,
return_posteriors=True,
)

print(f"ASH-90 / Partial AUC: {stat}")
print(f"Shape of Observed Samples: {samples.shape}")
print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")

# %%
# Repeat for the second feature set
# ---------------------------------
# This feature set has a smaller statistic, which is expected due to its weak correlation.

stat, posterior_arr, samples = est.statistic(
X_unimportant,
y,
metric=metric,
return_posteriors=True,
)

print(f"ASH-90 / Partial AUC: {stat}")
print(f"Shape of Observed Samples: {samples.shape}")
print(f"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}")

# %%
# All posteriors are saved within the model
# -----------------------------------------
# Extract the results from the model variables anytime. You can save the model with ``pickle``.
#
# ASH-90 / Partial AUC: ``est.observe_stat_``
# Observed Samples: ``est.observe_samples_``
# Tree Posteriors for the positive class: ``est.observe_posteriors_`` (n_trees, n_samples_test, 1)
# True Labels: ``est.y_true_final_``
104 changes: 104 additions & 0 deletions v0.2/_downloads/b3e7bddab5998eaeaa7413978232d146/plot_might_auc.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n# Compute partial AUC using Mutual Information for Genuine Hypothesis Testing (MIGHT)\n\nAn example using :class:`~sktree.stats.FeatureImportanceForestClassifier` for nonparametric\nmultivariate hypothesis test, on simulated datasets. Here, we present a simulation\nof how MIGHT is used to evaluate how a \"feature set is important for predicting the target\".\n\nWe simulate a dataset with 1000 features, 500 samples, and a binary class target\nvariable. Within each feature set, there is 500 features associated with one feature\nset, and another 500 features associated with another feature set. One could think of\nthese for example as different datasets collected on the same patient in a biomedical setting.\nThe first feature set (X) is strongly correlated with the target, and the second\nfeature set (W) is weakly correlated with the target (y).\n\nWe then use MIGHT to calculate the partial AUC of these sets.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import numpy as np\nfrom scipy.special import expit\n\nfrom sktree import HonestForestClassifier\nfrom sktree.stats import FeatureImportanceForestClassifier\nfrom sktree.tree import DecisionTreeClassifier\n\nseed = 12345\nrng = np.random.default_rng(seed)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simulate data\nWe simulate the two feature sets, and the target variable. We then combine them\ninto a single dataset to perform hypothesis testing.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_samples = 1000\nn_features_set = 500\nmean = 1.0\nsigma = 2.0\nbeta = 5.0\n\nunimportant_mean = 0.0\nunimportant_sigma = 4.5\n\n# first sample the informative features, and then the uniformative features\nX_important = rng.normal(loc=mean, scale=sigma, size=(n_samples, 10))\nX_important = np.hstack(\n [\n X_important,\n rng.normal(\n loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set - 10)\n ),\n ]\n)\n\nX_unimportant = rng.normal(\n loc=unimportant_mean, scale=unimportant_sigma, size=(n_samples, n_features_set)\n)\n\n# simulate the binary target variable\ny = rng.binomial(n=1, p=expit(beta * X_important[:, :10].sum(axis=1)), size=n_samples)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use partial AUC as test statistic\nYou can specify the maximum specificity by modifying ``max_fpr`` in ``statistic``.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"n_estimators = 125\nmax_features = \"sqrt\"\nmetric = \"auc\"\ntest_size = 0.2\nn_jobs = -1\nhonest_fraction = 0.7\nmax_fpr = 0.1\n\nest = FeatureImportanceForestClassifier(\n estimator=HonestForestClassifier(\n n_estimators=n_estimators,\n max_features=max_features,\n tree_estimator=DecisionTreeClassifier(),\n random_state=seed,\n honest_fraction=honest_fraction,\n n_jobs=n_jobs,\n ),\n random_state=seed,\n test_size=test_size,\n permute_per_tree=True,\n sample_dataset_per_tree=True,\n)\n\n# we test for the first feature set, which is important and thus should return a higher AUC\nstat, posterior_arr, samples = est.statistic(\n X_important,\n y,\n metric=metric,\n return_posteriors=True,\n)\n\nprint(f\"ASH-90 / Partial AUC: {stat}\")\nprint(f\"Shape of Observed Samples: {samples.shape}\")\nprint(f\"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Repeat for the second feature set\nThis feature set has a smaller statistic, which is expected due to its weak correlation.\n\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"stat, posterior_arr, samples = est.statistic(\n X_unimportant,\n y,\n metric=metric,\n return_posteriors=True,\n)\n\nprint(f\"ASH-90 / Partial AUC: {stat}\")\nprint(f\"Shape of Observed Samples: {samples.shape}\")\nprint(f\"Shape of Tree Posteriors for the positive class: {posterior_arr.shape}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## All posteriors are saved within the model\nExtract the results from the model variables anytime. You can save the model with ``pickle``.\n\nASH-90 / Partial AUC: ``est.observe_stat_``\nObserved Samples: ``est.observe_samples_``\nTree Posteriors for the positive class: ``est.observe_posteriors_`` (n_trees, n_samples_test, 1)\nTrue Labels: ``est.y_true_final_``\n\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.15"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Binary file modified v0.2/_images/sphx_glr_plot_extended_isolation_forest_006.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extended_isolation_forest_007.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extended_isolation_forest_008.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extended_isolation_forest_009.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extended_isolation_forest_010.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extra_oblique_random_forest_001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extra_oblique_random_forest_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extra_orf_sample_size_001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_extra_orf_sample_size_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added v0.2/_images/sphx_glr_plot_might_auc_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_oblique_random_forest_001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_oblique_random_forest_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_overlapping_gaussians_001.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified v0.2/_images/sphx_glr_plot_overlapping_gaussians_thumb.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
80 changes: 56 additions & 24 deletions v0.2/_modules/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
<!DOCTYPE html>


<html lang="en" >
<html lang="en" data-theme="auto">

<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Overview: module code &#8212; scikit-tree 0.2.0 documentation</title>
<title>Overview: module code &#8212; scikit-tree 0.2.1 documentation</title>



<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.mode = localStorage.getItem("mode") || "auto";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>

Expand Down Expand Up @@ -46,6 +46,10 @@
<script src="../_static/clipboard.min.js"></script>
<script src="../_static/copybutton.js"></script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/index';</script>
<script>
DOCUMENTATION_OPTIONS.theme_switcher_json_url = 'https://raw.githubusercontent.com/neurodata/scikit-tree/main/doc/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '0.2.1';
</script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="canonical" href="https://neurodata.github.io/scikit-tree/stable/index.html" />
Expand All @@ -58,7 +62,7 @@
</head>


<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="auto">



Expand Down Expand Up @@ -114,7 +118,7 @@



<p class="title logo__title">scikit-tree 0.2.0 documentation</p>
<p class="title logo__title">scikit-tree 0.2.1 documentation</p>

</a></div>

Expand Down Expand Up @@ -188,17 +192,31 @@
</div>


<div class="navbar-item"><div class="dropdown">
<button type="button" class="btn btn-primary btn-sm navbar-btn dropdown-toggle" id="dLabelMore" data-toggle="dropdown">
v0.2.0
<span class="caret"></span>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>

<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button type="button" class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle" data-bs-toggle="dropdown">
0.2.1 <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div class="dropdown-menu list-group-flush py-0" aria-labelledby="dLabelMore">
<a class="list-group-item list-group-item-action py-1" href="https:///docs.neurodata.io/scikit-tree/v0.3/index.html">v0.3 (development)</a>
<a class="list-group-item list-group-item-action py-1" href="https:///docs.neurodata.io/scikit-tree/v0.2/index.html">v0.2</a>
<a class="list-group-item list-group-item-action py-1" href="https:///docs.neurodata.io/scikit-tree/v0.1/index.html">v0.1</a>
<div class="version-switcher__menu dropdown-menu list-group-flush py-0">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
</div>
`);
</script></div>

<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
Expand Down Expand Up @@ -301,17 +319,31 @@

<div class="sidebar-header-items__end">

<div class="navbar-item"><div class="dropdown">
<button type="button" class="btn btn-primary btn-sm navbar-btn dropdown-toggle" id="dLabelMore" data-toggle="dropdown">
v0.2.0
<span class="caret"></span>
<div class="navbar-item">
<script>
document.write(`
<button class="theme-switch-button btn btn-sm btn-outline-primary navbar-btn rounded-circle" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch" data-mode="light"><i class="fa-solid fa-sun"></i></span>
<span class="theme-switch" data-mode="dark"><i class="fa-solid fa-moon"></i></span>
<span class="theme-switch" data-mode="auto"><i class="fa-solid fa-circle-half-stroke"></i></span>
</button>
`);
</script></div>

<div class="navbar-item">
<script>
document.write(`
<div class="version-switcher__container dropdown">
<button type="button" class="version-switcher__button btn btn-sm navbar-btn dropdown-toggle" data-bs-toggle="dropdown">
0.2.1 <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div class="dropdown-menu list-group-flush py-0" aria-labelledby="dLabelMore">
<a class="list-group-item list-group-item-action py-1" href="https:///docs.neurodata.io/scikit-tree/v0.3/index.html">v0.3 (development)</a>
<a class="list-group-item list-group-item-action py-1" href="https:///docs.neurodata.io/scikit-tree/v0.2/index.html">v0.2</a>
<a class="list-group-item list-group-item-action py-1" href="https:///docs.neurodata.io/scikit-tree/v0.1/index.html">v0.1</a>
<div class="version-switcher__menu dropdown-menu list-group-flush py-0">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
</div>
`);
</script></div>

<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
Expand Down Expand Up @@ -441,7 +473,7 @@ <h1>All modules for which code is available</h1>
<div class="footer-item">
<p class="copyright">

© Copyright 2022-2023, scikit-tree Developers. Last updated on 2023-10-05.
© Copyright 2022-2023, scikit-tree Developers. Last updated on 2023-10-09.
<br/>

</p>
Expand Down
Loading

0 comments on commit 18c67ec

Please sign in to comment.