From 486e5a21a14062091565a7ad3b2ec833b7ff67c1 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Mon, 16 Oct 2023 11:28:11 -0400
Subject: [PATCH] [DOC] Example of Multiview MIGHT and Forest (#142)

* Adding multiview dtc example
* Adding imbalanced MIGHT example


---------

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .gitignore                                    |   1 +
 doc/modules/ensemble.rst                      |   4 +-
 doc/modules/supervised_tree.rst               |   2 +-
 examples/calibration/README.txt               |   6 +
 .../plot_overlapping_gaussians.py             |   0
 .../plot_MI_imbalanced_hyppo_testing.py       | 237 ++++++++++++++++++
 examples/multiview/README.txt                 |   6 +
 examples/multiview/plot_multiview_dtc.py      | 147 +++++++++++
 examples/sklearn_vs_sktree/README.txt         |   6 +
 .../{ => sklearn_vs_sktree}/plot_iris_dtc.py  |   0
 examples/sparse_oblique_trees/README.txt      |   6 +
 .../plot_extra_oblique_random_forest.py       |   2 +-
 .../plot_extra_orf_sample_size.py             |   0
 ...ique_axis_aligned_forests_sparse_parity.py |   0
 .../plot_oblique_forests_iris.py              |   0
 .../plot_oblique_random_forest.py             |   2 +-
 .../plot_multiview_axis_aligned_splitter.py   |   3 +
 .../plot_sparse_projection_matrix.py          |   4 +-
 18 files changed, 419 insertions(+), 7 deletions(-)
 create mode 100644 examples/calibration/README.txt
 rename examples/{ => calibration}/plot_overlapping_gaussians.py (100%)
 create mode 100644 examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
 create mode 100644 examples/multiview/README.txt
 create mode 100644 examples/multiview/plot_multiview_dtc.py
 create mode 100644 examples/sklearn_vs_sktree/README.txt
 rename examples/{ => sklearn_vs_sktree}/plot_iris_dtc.py (100%)
 create mode 100644 examples/sparse_oblique_trees/README.txt
 rename examples/{ => sparse_oblique_trees}/plot_extra_oblique_random_forest.py (98%)
 rename examples/{ => sparse_oblique_trees}/plot_extra_orf_sample_size.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_oblique_axis_aligned_forests_sparse_parity.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_oblique_forests_iris.py (100%)
 rename examples/{ => sparse_oblique_trees}/plot_oblique_random_forest.py (97%)

diff --git a/.gitignore b/.gitignore
index 39a3a651a..db44ed79b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,7 @@ doc/coverages
 doc/samples
 cover
 examples/*.jpg
+examples/**/*.jpg
 
 env/
 html/
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 1f76b888f..38685efad 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -22,8 +22,8 @@ more information and intuition, see
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_plot_oblique_random_forest.py`
- * :ref:`sphx_glr_auto_examples_plot_oblique_axis_aligned_forests_sparse_parity.py`
+ * :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_random_forest.py`
+ * :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_axis_aligned_forests_sparse_parity.py`
 
 .. topic:: References
 
diff --git a/doc/modules/supervised_tree.rst b/doc/modules/supervised_tree.rst
index 91824e04c..1aa0a602a 100644
--- a/doc/modules/supervised_tree.rst
+++ b/doc/modules/supervised_tree.rst
@@ -41,7 +41,7 @@ as follows:
     >>> clf = tree.ObliqueDecisionTreeClassifier()
     >>> clf = clf.fit(X, y)
 
-.. figure:: ../auto_examples/images/sphx_glr_plot_iris_dtc_002.png
+.. figure:: ../auto_examples/sklearn_vs_sktree/images/sphx_glr_plot_iris_dtc_002.png
    :target: ../auto_examples/plot_iris_dtc.html
    :align: center
 
diff --git a/examples/calibration/README.txt b/examples/calibration/README.txt
new file mode 100644
index 000000000..84d5f5758
--- /dev/null
+++ b/examples/calibration/README.txt
@@ -0,0 +1,6 @@
+.. _calibration_examples:
+
+Calibrated decision trees via honesty
+-------------------------------------
+
+Examples demonstrating the usage of honest decision trees to obtain calibrated predictions.
diff --git a/examples/plot_overlapping_gaussians.py b/examples/calibration/plot_overlapping_gaussians.py
similarity index 100%
rename from examples/plot_overlapping_gaussians.py
rename to examples/calibration/plot_overlapping_gaussians.py
diff --git a/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
new file mode 100644
index 000000000..882f80c3d
--- /dev/null
+++ b/examples/hypothesis_testing/plot_MI_imbalanced_hyppo_testing.py
@@ -0,0 +1,237 @@
+"""
+===============================================================================
+Mutual Information for Gigantic Hypothesis Testing (MIGHT) with Imbalanced Data
+===============================================================================
+
+Here, we demonstrate how to do hypothesis testing on highly imbalanced data
+in terms of their feature-set dimensionalities.
+using mutual information as a test statistic. We use the framework of
+:footcite:`coleman2022scalable` to estimate pvalues efficiently.
+
+Here, we simulate two feature sets, one of which is important for the target,
+but significantly smaller in dimensionality than the other feature set, which
+is unimportant for the target. We then use the MIGHT framework to test for
+the importance of each feature set. Instead of leveraging a normal honest random
+forest to estimate the posteriors, here we leverage a multi-view honest random
+forest, with knowledge of the multi-view structure of the ``X`` data.
+
+For other examples of hypothesis testing, see the following:
+
+- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_MI_gigantic_hypothesis_testing_forest.py`
+- :ref:`sphx_glr_auto_examples_hypothesis_testing_plot_might_auc.py`
+
+For more information on the multi-view decision-tree, see
+:ref:`sphx_glr_auto_examples_multiview_plot_multiview_dtc.py`.
+"""
+
+import matplotlib.pyplot as plt
+import numpy as np
+from sklearn.datasets import make_blobs
+
+from sktree import HonestForestClassifier
+from sktree.stats import FeatureImportanceForestClassifier
+from sktree.tree import DecisionTreeClassifier, MultiViewDecisionTreeClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+# %%
+# Simulate data
+# -------------
+# We simulate the two feature sets, and the target variable. We then combine them
+# into a single dataset to perform hypothesis testing.
+seed = 12345
+rng = np.random.default_rng(seed)
+
+
+def make_multiview_classification(
+    n_samples=100, n_features_1=10, n_features_2=1000, cluster_std=2.0, seed=None
+):
+    rng = np.random.default_rng(seed=seed)
+
+    # Create a high-dimensional multiview dataset with a low-dimensional informative
+    # subspace in one view of the dataset.
+    X0_first, y0 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1 // 2,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    X1_first, y1 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1 // 2,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    # create the first views for y=0 and y=1
+    X0_first = np.concatenate(
+        (X0_first, rng.standard_normal(size=(n_samples, n_features_1 // 2))), axis=1
+    )
+    X1_first = np.concatenate(
+        (X1_first, rng.standard_normal(size=(n_samples, n_features_1 // 2))), axis=1
+    )
+    y1[:] = 1
+
+    # add the second view for y=0 and y=1, which is completely noise
+    X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+
+    # combine the views and targets
+    X = np.vstack((X0, X1))
+    y = np.hstack((y0, y1)).T
+
+    # add noise to the data
+    X = X + rng.standard_normal(size=X.shape)
+
+    return X, y
+
+
+n_samples = 100
+n_features = 10000
+n_features_views = [10, n_features]
+
+X, y = make_multiview_classification(
+    n_samples=n_samples,
+    n_features_1=10,
+    n_features_2=n_features,
+    cluster_std=2.0,
+    seed=seed,
+)
+# %%
+# Perform hypothesis testing using Mutual Information
+# ---------------------------------------------------
+# Here, we use :class:`~sktree.stats.FeatureImportanceForestClassifier` to perform the hypothesis
+# test. The test statistic is computed by comparing the metric (i.e. mutual information) estimated
+# between two forests. One forest is trained on the original dataset, and one forest is trained
+# on a permuted dataset, where the rows of the ``covariate_index`` columns are shuffled randomly.
+#
+# The null distribution is then estimated in an efficient manner using the framework of
+# :footcite:`coleman2022scalable`. The sample evaluations of each forest (i.e. the posteriors)
+# are sampled randomly ``n_repeats`` times to generate a null distribution. The pvalue is then
+# computed as the proportion of samples in the null distribution that are less than the
+# observed test statistic.
+
+n_estimators = 200
+max_features = "sqrt"
+test_size = 0.2
+n_repeats = 1000
+n_jobs = -1
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=MultiViewDecisionTreeClassifier(feature_set_ends=n_features_views),
+        random_state=seed,
+        honest_fraction=0.5,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=False,
+    sample_dataset_per_tree=False,
+)
+
+mv_results = dict()
+
+print(
+    f"Permutation per tree: {est.permute_per_tree} and sampling dataset per tree: "
+    f"{est.sample_dataset_per_tree}"
+)
+# we test for the first feature set, which is important and thus should return a pvalue < 0.05
+stat, pvalue = est.test(
+    X, y, covariate_index=np.arange(10, dtype=int), metric="mi", n_repeats=n_repeats
+)
+mv_results["important_feature_stat"] = stat
+mv_results["important_feature_pvalue"] = pvalue
+print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+
+# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
+stat, pvalue = est.test(
+    X,
+    y,
+    covariate_index=np.arange(10, n_features, dtype=int),
+    metric="mi",
+    n_repeats=n_repeats,
+)
+mv_results["unimportant_feature_stat"] = stat
+mv_results["unimportant_feature_pvalue"] = pvalue
+print(f"Estimated MI difference: {stat} with Pvalue: {pvalue}")
+
+# %%
+# Let's investigate what happens when we do not use a multi-view decision tree.
+# All other parameters are kept the same.
+
+est = FeatureImportanceForestClassifier(
+    estimator=HonestForestClassifier(
+        n_estimators=n_estimators,
+        max_features=max_features,
+        tree_estimator=DecisionTreeClassifier(),
+        random_state=seed,
+        honest_fraction=0.5,
+        n_jobs=n_jobs,
+    ),
+    random_state=seed,
+    test_size=test_size,
+    permute_per_tree=False,
+    sample_dataset_per_tree=False,
+)
+
+rf_results = dict()
+
+# we test for the first feature set, which is important and thus should return a pvalue < 0.05
+stat, pvalue = est.test(
+    X, y, covariate_index=np.arange(10, dtype=int), metric="mi", n_repeats=n_repeats
+)
+rf_results["important_feature_stat"] = stat
+rf_results["important_feature_pvalue"] = pvalue
+print(f"Estimated MI difference using regular decision-trees: {stat} with Pvalue: {pvalue}")
+
+# we test for the second feature set, which is unimportant and thus should return a pvalue > 0.05
+stat, pvalue = est.test(
+    X,
+    y,
+    covariate_index=np.arange(10, n_features, dtype=int),
+    metric="mi",
+    n_repeats=n_repeats,
+)
+rf_results["unimportant_feature_stat"] = stat
+rf_results["unimportant_feature_pvalue"] = pvalue
+print(f"Estimated MI difference using regular decision-trees: {stat} with Pvalue: {pvalue}")
+
+fig, ax = plt.subplots(figsize=(5, 3))
+
+# plot pvalues
+ax.bar(0, rf_results["important_feature_pvalue"], label="Important Feature Set (RF)")
+ax.bar(1, rf_results["unimportant_feature_pvalue"], label="Unimportant Feature Set (RF)")
+ax.bar(2, mv_results["important_feature_pvalue"], label="Important Feature Set (MV)")
+ax.bar(3, mv_results["unimportant_feature_pvalue"], label="Unimportant Feature Set (MV)")
+ax.axhline(0.05, color="k", linestyle="--", label="alpha=0.05")
+ax.set(ylabel="Log10(PValue)", xlim=[-0.5, 3.5], yscale="log")
+ax.legend()
+
+fig.tight_layout()
+plt.show()
+
+# %%
+# Discussion
+# ----------
+# We see that the multi-view decision tree is able to detect the important feature set,
+# while the regular decision tree is not. This is because the regular decision tree
+# is not aware of the multi-view structure of the data, and thus is challenged
+# by the imbalanced dimensionality of the feature sets. I.e. it rarely splits on
+# the first low-dimensional feature set, and thus is unable to detect its importance.
+#
+# Note both approaches still fail to reject the null hypothesis (for alpha of 0.05)
+# when testing the unimportant feature set. The difference in the two approaches
+# show the statistical power of the multi-view decision tree is higher than the
+# regular decision tree in this simulation.
+
+# %%
+# References
+# ----------
+# .. footbibliography::
diff --git a/examples/multiview/README.txt b/examples/multiview/README.txt
new file mode 100644
index 000000000..127ad6e57
--- /dev/null
+++ b/examples/multiview/README.txt
@@ -0,0 +1,6 @@
+.. _multiview_examples:
+
+Multi-view learning with Decision-trees
+---------------------------------------
+
+Examples demonstrating multi-view learning using random forest variants.
diff --git a/examples/multiview/plot_multiview_dtc.py b/examples/multiview/plot_multiview_dtc.py
new file mode 100644
index 000000000..37629093c
--- /dev/null
+++ b/examples/multiview/plot_multiview_dtc.py
@@ -0,0 +1,147 @@
+"""
+============================================================
+Analyze a multi-view dataset with a multi-view random forest
+============================================================
+
+An example using :class:`~sktree.MultiViewRandomForestClassifier` for high-dimensional
+classification when there are multiple feature sets that are correlated with the
+target variable, ``y``. The multi-view random forest is a variant of the random forest
+that samples from each feature set uniformly, instead of sampling from all features
+uniformly. This is useful when there are multiple feature sets, and some feature sets
+have vastly different dimensionality from others.
+
+In this case, ``X`` is a matrix of shape ``(n_samples, n_features)``, where
+``n_features`` is the sum of the number of features in each feature set. If the multi-view
+structure is known, then one can pass this to the multi-view random forest via the
+``feature_set_ends`` parameter.
+
+For a visualization of how the multi-view splitter works, see
+:ref:`sphx_glr_auto_examples_splitters_plot_multiview_axis_aligned_splitter.py`.
+"""
+
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import seaborn as sns
+from sklearn.datasets import make_blobs
+from sklearn.model_selection import cross_val_score
+
+from sktree import MultiViewRandomForestClassifier, RandomForestClassifier
+
+seed = 12345
+rng = np.random.default_rng(seed)
+
+
+def make_multiview_classification(
+    n_samples=100, n_features_1=5, n_features_2=1000, cluster_std=2.0, seed=None
+):
+    rng = np.random.default_rng(seed=seed)
+
+    # Create a high-dimensional multiview dataset with a low-dimensional informative
+    # subspace in one view of the dataset.
+    X0_first, y0 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+
+    X1_first, y1 = make_blobs(
+        n_samples=n_samples,
+        cluster_std=cluster_std,
+        n_features=n_features_1,
+        random_state=rng.integers(1, 10000),
+        centers=1,
+    )
+    y1[:] = 1
+    X0 = np.concatenate([X0_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X1 = np.concatenate([X1_first, rng.standard_normal(size=(n_samples, n_features_2))], axis=1)
+    X = np.vstack((X0, X1))
+    y = np.hstack((y0, y1)).T
+
+    X = X + rng.standard_normal(size=X.shape)
+
+    return X, y
+
+
+# %%
+# Simulate data
+# -------------
+# We simulate a 2-view dataset with both views containing informative low-dimensional features.
+# The first view has five dimensions, while the second view will vary from five to a thousand
+# dimensions. The sample-size will be kept fixed, so we can compare the performance of
+# regular Random forests with Multi-view Random Forests.
+
+n_samples = 100
+n_features_views = np.linspace(5, 10000, 5).astype(int)
+
+datasets = []
+for idx, n_features in enumerate(n_features_views):
+    X, y = make_multiview_classification(
+        n_samples=n_samples,
+        n_features_1=5,
+        n_features_2=n_features,
+        cluster_std=2.0,
+        seed=seed + idx,
+    )
+    datasets.append((X, y))
+
+# %%
+# Fit Random Forest and Multi-view Random Forest
+# ----------------------------------------------
+# Here, we fit both forests over all the datasets.
+
+n_estimators = 100
+n_jobs = -1
+
+scores = defaultdict(list)
+
+for idx, ((X, y), n_features) in enumerate(zip(datasets, n_features_views)):
+    feature_set_ends = np.array([5, n_features + 5])
+
+    rf = RandomForestClassifier(
+        n_estimators=n_estimators,
+        n_jobs=n_jobs,
+        random_state=seed,
+    )
+
+    mvrf = MultiViewRandomForestClassifier(
+        n_estimators=n_estimators,
+        n_jobs=n_jobs,
+        feature_set_ends=n_features_views,
+        random_state=seed,
+    )
+
+    # obtain the cross-validation score
+    rf_score = cross_val_score(rf, X, y, cv=2).mean()
+    mvrf_score = cross_val_score(mvrf, X, y, cv=2).mean()
+
+    scores["rf"].append(rf_score)
+    scores["mvrf"].append(mvrf_score)
+
+# %%
+# Visualize scores and compare performance
+# ----------------------------------------
+# Now, we can compare the performance from the cross-validation experiment.
+
+scores["n_features"] = n_features_views
+df = pd.DataFrame(scores)
+
+# melt the dataframe, to make it easier to plot
+df = pd.melt(df, id_vars=["n_features"], var_name="model", value_name="score")
+
+fig, ax = plt.subplots()
+sns.lineplot(data=df, x="n_features", y="score", marker="o", hue="model", ax=ax)
+ax.set_ylabel("CV Score")
+ax.set_xlabel("Number of features in second view")
+ax.set_title("Random Forest vs Multi-view Random Forest")
+plt.show()
+
+# %%
+# As we can see, the multi-view random forest outperforms the regular random forest
+# as the number of features in the second view increases. This is because the multi-view
+# random forest samples from each feature-set uniformly, while the regular random forest
+# samples from all features uniformly. This is a key difference between the two forests.
diff --git a/examples/sklearn_vs_sktree/README.txt b/examples/sklearn_vs_sktree/README.txt
new file mode 100644
index 000000000..d942d71f8
--- /dev/null
+++ b/examples/sklearn_vs_sktree/README.txt
@@ -0,0 +1,6 @@
+.. _sklearn_examples:
+
+Comparing sklearn and sktree decision trees
+-------------------------------------------
+
+Examples demonstrating the difference between sklearn and sktree decision trees.
diff --git a/examples/plot_iris_dtc.py b/examples/sklearn_vs_sktree/plot_iris_dtc.py
similarity index 100%
rename from examples/plot_iris_dtc.py
rename to examples/sklearn_vs_sktree/plot_iris_dtc.py
diff --git a/examples/sparse_oblique_trees/README.txt b/examples/sparse_oblique_trees/README.txt
new file mode 100644
index 000000000..61c596af1
--- /dev/null
+++ b/examples/sparse_oblique_trees/README.txt
@@ -0,0 +1,6 @@
+.. _sporf_examples:
+
+Sparse oblique projections with oblique decision-trees
+------------------------------------------------------
+
+Examples demonstrating learning using oblique random forests.
diff --git a/examples/plot_extra_oblique_random_forest.py b/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
similarity index 98%
rename from examples/plot_extra_oblique_random_forest.py
rename to examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
index 586edd833..fd920cc16 100644
--- a/examples/plot_extra_oblique_random_forest.py
+++ b/examples/sparse_oblique_trees/plot_extra_oblique_random_forest.py
@@ -59,7 +59,7 @@
 range, hence the complexity is `O(n)`. This makes the algorithm more suitable for large datasets.
 
 To see how sample-sizes affect the performance of Extra Oblique Trees vs regular Oblique Trees,
-see :ref:`sphx_glr_auto_examples_plot_extra_orf_sample_size.py`
+see :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_extra_orf_sample_size.py`
 
 References
 ----------
diff --git a/examples/plot_extra_orf_sample_size.py b/examples/sparse_oblique_trees/plot_extra_orf_sample_size.py
similarity index 100%
rename from examples/plot_extra_orf_sample_size.py
rename to examples/sparse_oblique_trees/plot_extra_orf_sample_size.py
diff --git a/examples/plot_oblique_axis_aligned_forests_sparse_parity.py b/examples/sparse_oblique_trees/plot_oblique_axis_aligned_forests_sparse_parity.py
similarity index 100%
rename from examples/plot_oblique_axis_aligned_forests_sparse_parity.py
rename to examples/sparse_oblique_trees/plot_oblique_axis_aligned_forests_sparse_parity.py
diff --git a/examples/plot_oblique_forests_iris.py b/examples/sparse_oblique_trees/plot_oblique_forests_iris.py
similarity index 100%
rename from examples/plot_oblique_forests_iris.py
rename to examples/sparse_oblique_trees/plot_oblique_forests_iris.py
diff --git a/examples/plot_oblique_random_forest.py b/examples/sparse_oblique_trees/plot_oblique_random_forest.py
similarity index 97%
rename from examples/plot_oblique_random_forest.py
rename to examples/sparse_oblique_trees/plot_oblique_random_forest.py
index 34cb8c68a..dca3f19da 100644
--- a/examples/plot_oblique_random_forest.py
+++ b/examples/sparse_oblique_trees/plot_oblique_random_forest.py
@@ -18,7 +18,7 @@
 are subsampled due to computational constraints.
 
 For an example of using extra-oblique trees/forests in practice on data, see the following
-example :ref:`sphx_glr_auto_examples_plot_extra_oblique_random_forest.py`.
+example :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_extra_oblique_random_forest.py`.
 """
 
 from datetime import datetime
diff --git a/examples/splitters/plot_multiview_axis_aligned_splitter.py b/examples/splitters/plot_multiview_axis_aligned_splitter.py
index 8fc5b32c2..7ceb200da 100644
--- a/examples/splitters/plot_multiview_axis_aligned_splitter.py
+++ b/examples/splitters/plot_multiview_axis_aligned_splitter.py
@@ -122,3 +122,6 @@
 # In contrast, the normal splitter in :class:`sklearn.tree.DecisionTreeClassifier` samples
 # randomly across all ``n_features`` features because it is not aware of the multi-view structure.
 # This is the key difference between the two splitters.
+#
+# For an example of using the multi-view splitter in practice on simulated data, see
+# :ref:`sphx_glr_auto_examples_multiview_plot_multiview_dtc.py`.
diff --git a/examples/splitters/plot_sparse_projection_matrix.py b/examples/splitters/plot_sparse_projection_matrix.py
index 6eb53e0ca..c77c128a0 100644
--- a/examples/splitters/plot_sparse_projection_matrix.py
+++ b/examples/splitters/plot_sparse_projection_matrix.py
@@ -125,5 +125,5 @@
 # For an example of using oblique trees/forests in practice on data, see the following
 # examples:
 #
-# - :ref:`sphx_glr_auto_examples_plot_oblique_forests_iris.py`
-# - :ref:`sphx_glr_auto_examples_plot_oblique_random_forest.py`
+# - :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_forests_iris.py`
+# - :ref:`sphx_glr_auto_examples_sparse_oblique_trees_plot_oblique_random_forest.py`