From cf8e9119a77e3e2d2d45020d6785912dda3854c9 Mon Sep 17 00:00:00 2001
From: Adam Li <adam2392@gmail.com>
Date: Sat, 9 Sep 2023 22:01:18 -0400
Subject: [PATCH] Adds benchmarking scripts and pipeline (#124)

Signed-off-by: Adam Li <adam2392@gmail.com>
---
 .gitignore                                    |   6 +
 asv.conf.json                                 | 180 +++++++++++
 benchmarks/__init__.py                        |   1 +
 benchmarks/common.py                          | 282 ++++++++++++++++++
 benchmarks/config.json                        |  33 ++
 benchmarks/datasets.py                        | 161 ++++++++++
 benchmarks/ensemble_supervised.py             |  74 +++++
 benchmarks/utils.py                           |  46 +++
 benchmarks_nonasv/README.md                   |   2 +
 benchmarks_nonasv/bench_mnist.py              | 185 ++++++++++++
 benchmarks_nonasv/bench_oblique_tree.py       | 169 +++++++++++
 .../bench_plot_urf.py                         |   0
 12 files changed, 1139 insertions(+)
 create mode 100644 asv.conf.json
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/common.py
 create mode 100644 benchmarks/config.json
 create mode 100644 benchmarks/datasets.py
 create mode 100644 benchmarks/ensemble_supervised.py
 create mode 100644 benchmarks/utils.py
 create mode 100644 benchmarks_nonasv/README.md
 create mode 100644 benchmarks_nonasv/bench_mnist.py
 create mode 100644 benchmarks_nonasv/bench_oblique_tree.py
 rename {benchmarks => benchmarks_nonasv}/bench_plot_urf.py (100%)

diff --git a/.gitignore b/.gitignore
index b05462bf4..a1ecb917d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,12 @@ doc/samples
 cover
 examples/*.jpg
 
+env/
+html/
+results/
+scikit-learn/
+benchmarks/cache/
+
 # Pycharm
 .idea/
 
diff --git a/asv.conf.json b/asv.conf.json
new file mode 100644
index 000000000..2dad0808a
--- /dev/null
+++ b/asv.conf.json
@@ -0,0 +1,180 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "scikit-tree",
+
+    // The project's homepage
+    "project_url": "https://docs.neurodata.io/scikit-tree/",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": ".",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building, installing, and
+    // uninstalling the project. See asv.conf.json documentation.
+    //
+    // export ASV_ENV_DIR=/Users/adam2392/miniforge3
+    "install_command": [
+        // "source /Users/adam2392/miniforge3/etc/profile.d/conda.sh",
+        // "conda activate base"
+        "spin build -j 6 --clean",
+        "pip install --no-build-isolation --editable ."
+    ],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+    "build_command": [
+        "spin build -j 6 --clean",
+        "pip install --no-build-isolation --editable ."
+    ],
+
+    // List of branches to benchmark. If not provided, defaults to "master
+    // (for git) or "default" (for mercurial).
+    "branches": ["main"],
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    //"install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/neurodata/scikit-tree/commit/",
+
+    // The Pythons you'd like to test against. If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    // "pythons": ["/Users/adam2392/miniforge3/envs/sktree/bin/python"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+    // "conda_channels": ["conda-forge", "defaults"]
+
+    // The matrix of dependencies to test. Each key is the name of a
+    // package (in PyPI) and the values are version numbers. An empty
+    // list or empty string indicates to just test against the default
+    // (latest) version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed via
+    // pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    "matrix": {
+        "numpy": [],
+        "scipy": [],
+        "cython": ["0.29.36"],
+        "joblib": [],
+        "threadpoolctl": [],
+        "pandas": [],
+        "meson": [],
+        "meson-python": [],
+        "scikit-learn": [],
+        "spin": [],
+        "click": [],
+        "rich-click": [],
+        "doit": [],
+        "pydevtool": [],
+        "build": []
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "six": null}, // don't run without six on conda
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "numpy": "1.8"},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "libpython": ""},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    // "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    "env_dir": ".asv/env",
+    "results_dir": ".asv/results",
+    "html_dir": ".asv/html"
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    // "results_dir": "results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    // "html_dir": "html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
\ No newline at end of file
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 000000000..be81f6c14
--- /dev/null
+++ b/benchmarks/__init__.py
@@ -0,0 +1 @@
+"""Benchmark suite for scikit-tree using ASV"""
diff --git a/benchmarks/common.py b/benchmarks/common.py
new file mode 100644
index 000000000..16c9dd724
--- /dev/null
+++ b/benchmarks/common.py
@@ -0,0 +1,282 @@
+import itertools
+import json
+import os
+import pickle
+import timeit
+from abc import ABC, abstractmethod
+from multiprocessing import cpu_count
+from pathlib import Path
+
+import numpy as np
+
+
+def get_from_config():
+    """Get benchmarks configuration from the config.json file"""
+    current_path = Path(__file__).resolve().parent
+
+    config_path = current_path / "config.json"
+    with open(config_path, "r") as config_file:
+        config_file = "".join(line for line in config_file if line and "//" not in line)
+        config = json.loads(config_file)
+
+    profile = os.getenv("SKLBENCH_PROFILE", config["profile"])
+
+    n_jobs_vals_env = os.getenv("SKLBENCH_NJOBS")
+    if n_jobs_vals_env:
+        n_jobs_vals = eval(n_jobs_vals_env)
+    else:
+        n_jobs_vals = config["n_jobs_vals"]
+    if not n_jobs_vals:
+        n_jobs_vals = list(range(1, 1 + cpu_count()))
+
+    cache_path = current_path / "cache"
+    cache_path.mkdir(exist_ok=True)
+    (cache_path / "estimators").mkdir(exist_ok=True)
+    (cache_path / "tmp").mkdir(exist_ok=True)
+
+    save_estimators = os.getenv("SKLBENCH_SAVE_ESTIMATORS", config["save_estimators"])
+    save_dir = os.getenv("ASV_COMMIT", "new")[:8]
+
+    if save_estimators:
+        (cache_path / "estimators" / save_dir).mkdir(exist_ok=True)
+
+    base_commit = os.getenv("SKLBENCH_BASE_COMMIT", config["base_commit"])
+
+    bench_predict = os.getenv("SKLBENCH_PREDICT", config["bench_predict"])
+    bench_transform = os.getenv("SKLBENCH_TRANSFORM", config["bench_transform"])
+
+    return (
+        profile,
+        n_jobs_vals,
+        save_estimators,
+        save_dir,
+        base_commit,
+        bench_predict,
+        bench_transform,
+    )
+
+
+def get_estimator_path(benchmark, directory, params, save=False):
+    """Get path of pickled fitted estimator"""
+    path = Path(__file__).resolve().parent / "cache"
+    path = (path / "estimators" / directory) if save else (path / "tmp")
+
+    filename = (
+        benchmark.__class__.__name__ + "_estimator_" + "_".join(list(map(str, params))) + ".pkl"
+    )
+
+    return path / filename
+
+
+def clear_tmp():
+    """Clean the tmp directory"""
+    path = Path(__file__).resolve().parent / "cache" / "tmp"
+    for child in path.iterdir():
+        child.unlink()
+
+
+class Benchmark(ABC):
+    """Abstract base class for all the benchmarks"""
+
+    timer = timeit.default_timer  # wall time
+    processes = 1
+    timeout = 500
+
+    (
+        profile,
+        n_jobs_vals,
+        save_estimators,
+        save_dir,
+        base_commit,
+        bench_predict,
+        bench_transform,
+    ) = get_from_config()
+
+    if profile == "fast":
+        warmup_time = 0
+        repeat = 1
+        number = 1
+        min_run_count = 1
+        data_size = "small"
+    elif profile == "regular":
+        warmup_time = 1
+        repeat = (3, 100, 30)
+        data_size = "small"
+    elif profile == "large_scale":
+        warmup_time = 1
+        repeat = 3
+        number = 1
+        data_size = "large"
+
+    @property
+    @abstractmethod
+    def params(self):
+        pass
+
+
+class Estimator(ABC):
+    """Abstract base class for all benchmarks of estimators"""
+
+    @abstractmethod
+    def make_data(self, params):
+        """Return the dataset for a combination of parameters"""
+        # The datasets are cached using joblib.Memory so it's fast and can be
+        # called for each repeat
+        pass
+
+    @abstractmethod
+    def make_estimator(self, params):
+        """Return an instance of the estimator for a combination of parameters"""
+        pass
+
+    def skip(self, params):
+        """Return True if the benchmark should be skipped for these params"""
+        return False
+
+    def setup_cache(self):
+        """Pickle a fitted estimator for all combinations of parameters"""
+        # This is run once per benchmark class.
+
+        clear_tmp()
+
+        param_grid = list(itertools.product(*self.params))
+
+        for params in param_grid:
+            if self.skip(params):
+                continue
+
+            estimator = self.make_estimator(params)
+            X, _, y, _ = self.make_data(params)
+
+            estimator.fit(X, y)
+
+            est_path = get_estimator_path(
+                self, Benchmark.save_dir, params, Benchmark.save_estimators
+            )
+            with est_path.open(mode="wb") as f:
+                pickle.dump(estimator, f)
+
+    def setup(self, *params):
+        """Generate dataset and load the fitted estimator"""
+        # This is run once per combination of parameters and per repeat so we
+        # need to avoid doing expensive operations there.
+
+        if self.skip(params):
+            raise NotImplementedError
+
+        self.X, self.X_val, self.y, self.y_val = self.make_data(params)
+
+        est_path = get_estimator_path(self, Benchmark.save_dir, params, Benchmark.save_estimators)
+        with est_path.open(mode="rb") as f:
+            self.estimator = pickle.load(f)
+
+        self.make_scorers()
+
+    def time_fit(self, *args):
+        self.estimator.fit(self.X, self.y)
+
+    def peakmem_fit(self, *args):
+        self.estimator.fit(self.X, self.y)
+
+    def track_train_score(self, *args):
+        if hasattr(self.estimator, "predict"):
+            y_pred = self.estimator.predict(self.X)
+        else:
+            y_pred = None
+        return float(self.train_scorer(self.y, y_pred))
+
+    def track_test_score(self, *args):
+        if hasattr(self.estimator, "predict"):
+            y_val_pred = self.estimator.predict(self.X_val)
+        else:
+            y_val_pred = None
+        return float(self.test_scorer(self.y_val, y_val_pred))
+
+
+class Predictor(ABC):
+    """Abstract base class for benchmarks of estimators implementing predict"""
+
+    if Benchmark.bench_predict:
+
+        def time_predict(self, *args):
+            self.estimator.predict(self.X)
+
+        def peakmem_predict(self, *args):
+            self.estimator.predict(self.X)
+
+        if Benchmark.base_commit is not None:
+
+            def track_same_prediction(self, *args):
+                est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
+                with est_path.open(mode="rb") as f:
+                    estimator_base = pickle.load(f)
+
+                y_val_pred_base = estimator_base.predict(self.X_val)
+                y_val_pred = self.estimator.predict(self.X_val)
+
+                return np.allclose(y_val_pred_base, y_val_pred)
+
+    @property
+    @abstractmethod
+    def params(self):
+        pass
+
+
+class Transformer(ABC):
+    """Abstract base class for benchmarks of estimators implementing transform"""
+
+    if Benchmark.bench_transform:
+
+        def time_transform(self, *args):
+            self.estimator.transform(self.X)
+
+        def peakmem_transform(self, *args):
+            self.estimator.transform(self.X)
+
+        if Benchmark.base_commit is not None:
+
+            def track_same_transform(self, *args):
+                est_path = get_estimator_path(self, Benchmark.base_commit, args, True)
+                with est_path.open(mode="rb") as f:
+                    estimator_base = pickle.load(f)
+
+                X_val_t_base = estimator_base.transform(self.X_val)
+                X_val_t = self.estimator.transform(self.X_val)
+
+                return np.allclose(X_val_t_base, X_val_t)
+
+    @property
+    @abstractmethod
+    def params(self):
+        pass
+
+
+def get_mem_info():
+    """Get information about available memory"""
+    import psutil
+
+    vm = psutil.virtual_memory()
+    return {
+        "memtotal": vm.total,
+        "memavailable": vm.available,
+    }
+
+
+def set_mem_rlimit(max_mem=None):
+    """
+    Set address space rlimit
+    """
+    import resource
+
+    if max_mem is None:
+        mem_info = get_mem_info()
+        max_mem = int(mem_info["memtotal"] * 0.7)
+    cur_limit = resource.getrlimit(resource.RLIMIT_AS)
+    if cur_limit[0] > 0:
+        max_mem = min(max_mem, cur_limit[0])
+
+    try:
+        resource.setrlimit(resource.RLIMIT_AS, (max_mem, cur_limit[1]))
+    except ValueError:
+        # on macOS may raise: current limit exceeds maximum limit
+        pass
diff --git a/benchmarks/config.json b/benchmarks/config.json
new file mode 100644
index 000000000..d1e12dcc3
--- /dev/null
+++ b/benchmarks/config.json
@@ -0,0 +1,33 @@
+{
+    // "regular": Bencharks are run on small to medium datasets. Each benchmark
+    //            is run multiple times and averaged.
+    // "fast": Benchmarks are run on small to medium datasets. Each benchmark
+    //         is run only once. May provide unstable benchmarks.
+    // "large_scale": Benchmarks are run on large datasets. Each benchmark is
+    //                run multiple times and averaged. This profile is meant to
+    //                benchmark scalability and will take hours on single core.
+    // Can be overridden by environment variable SKLBENCH_PROFILE.
+    "profile": "regular",
+
+    // List of values of n_jobs to use for estimators which accept this 
+    // parameter (-1 means all cores). An empty list means all values from 1 to
+    // the maximum number of available cores.
+    // Can be overridden by environment variable SKLBENCH_NJOBS.
+    "n_jobs_vals": [1],
+
+    // If true, fitted estimators are saved in ./cache/estimators/<commit hash>
+    // Can be overridden by environment variable SKLBENCH_SAVE_ESTIMATORS.
+    "save_estimators": false,
+
+    // Commit hash to compare estimator predictions with.
+    // If null, predictions are not compared.
+    // Can be overridden by environment variable SKLBENCH_BASE_COMMIT.
+    "base_commit": null,
+
+    // If false, the predict (resp. transform) method of the estimators won't
+    // be benchmarked.
+    // Can be overridden by environment variables SKLBENCH_PREDICT and
+    // SKLBENCH_TRANSFORM.
+    "bench_predict": true,
+    "bench_transform": true
+}
\ No newline at end of file
diff --git a/benchmarks/datasets.py b/benchmarks/datasets.py
new file mode 100644
index 000000000..4139fb020
--- /dev/null
+++ b/benchmarks/datasets.py
@@ -0,0 +1,161 @@
+from pathlib import Path
+
+import numpy as np
+import scipy.sparse as sp
+from joblib import Memory
+from sklearn.datasets import (
+    fetch_20newsgroups,
+    fetch_olivetti_faces,
+    fetch_openml,
+    load_digits,
+    make_blobs,
+    make_classification,
+    make_regression,
+)
+from sklearn.decomposition import TruncatedSVD
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MaxAbsScaler, StandardScaler
+
+# memory location for caching datasets
+M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
+
+
+@M.cache
+def _blobs_dataset(n_samples=500000, n_features=3, n_clusters=100, dtype=np.float32):
+    X, _ = make_blobs(
+        n_samples=n_samples, n_features=n_features, centers=n_clusters, random_state=0
+    )
+    X = X.astype(dtype, copy=False)
+
+    X, X_val = train_test_split(X, test_size=0.1, random_state=0)
+    return X, X_val, None, None
+
+
+@M.cache
+def _20newsgroups_highdim_dataset(n_samples=None, ngrams=(1, 1), dtype=np.float32):
+    newsgroups = fetch_20newsgroups(random_state=0)
+    vectorizer = TfidfVectorizer(ngram_range=ngrams, dtype=dtype)
+    X = vectorizer.fit_transform(newsgroups.data[:n_samples])
+    y = newsgroups.target[:n_samples]
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _20newsgroups_lowdim_dataset(n_components=100, ngrams=(1, 1), dtype=np.float32):
+    newsgroups = fetch_20newsgroups()
+    vectorizer = TfidfVectorizer(ngram_range=ngrams)
+    X = vectorizer.fit_transform(newsgroups.data)
+    X = X.astype(dtype, copy=False)
+    svd = TruncatedSVD(n_components=n_components)
+    X = svd.fit_transform(X)
+    y = newsgroups.target
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _mnist_dataset(dtype=np.float32):
+    X, y = fetch_openml("mnist_784", version=1, return_X_y=True, as_frame=False, parser="pandas")
+    X = X.astype(dtype, copy=False)
+    X = MaxAbsScaler().fit_transform(X)
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _digits_dataset(n_samples=None, dtype=np.float32):
+    X, y = load_digits(return_X_y=True)
+    X = X.astype(dtype, copy=False)
+    X = MaxAbsScaler().fit_transform(X)
+    X = X[:n_samples]
+    y = y[:n_samples]
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _synth_regression_dataset(n_samples=100000, n_features=100, dtype=np.float32):
+    X, y = make_regression(
+        n_samples=n_samples,
+        n_features=n_features,
+        n_informative=n_features // 10,
+        noise=50,
+        random_state=0,
+    )
+    X = X.astype(dtype, copy=False)
+    X = StandardScaler().fit_transform(X)
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _synth_regression_sparse_dataset(
+    n_samples=10000, n_features=10000, density=0.01, dtype=np.float32
+):
+    X = sp.random(m=n_samples, n=n_features, density=density, format="csr", random_state=0)
+    X.data = np.random.RandomState(0).randn(X.getnnz())
+    X = X.astype(dtype, copy=False)
+    coefs = sp.random(m=n_features, n=1, density=0.5, random_state=0)
+    coefs.data = np.random.RandomState(0).randn(coefs.getnnz())
+    y = X.dot(coefs.toarray()).reshape(-1)
+    y += 0.2 * y.std() * np.random.randn(n_samples)
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _synth_classification_dataset(n_samples=1000, n_features=10000, n_classes=2, dtype=np.float32):
+    X, y = make_classification(
+        n_samples=n_samples,
+        n_features=n_features,
+        n_classes=n_classes,
+        random_state=0,
+        n_informative=n_features,
+        n_redundant=0,
+    )
+    X = X.astype(dtype, copy=False)
+    X = StandardScaler().fit_transform(X)
+
+    X, X_val, y, y_val = train_test_split(X, y, test_size=0.1, random_state=0)
+    return X, X_val, y, y_val
+
+
+@M.cache
+def _olivetti_faces_dataset():
+    dataset = fetch_olivetti_faces(shuffle=True, random_state=42)
+    faces = dataset.data
+    n_samples, n_features = faces.shape
+    faces_centered = faces - faces.mean(axis=0)
+    # local centering
+    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
+    X = faces_centered
+
+    X, X_val = train_test_split(X, test_size=0.1, random_state=0)
+    return X, X_val, None, None
+
+
+@M.cache
+def _random_dataset(n_samples=1000, n_features=1000, representation="dense", dtype=np.float32):
+    if representation == "dense":
+        X = np.random.RandomState(0).random_sample((n_samples, n_features))
+        X = X.astype(dtype, copy=False)
+    else:
+        X = sp.random(
+            n_samples,
+            n_features,
+            density=0.05,
+            format="csr",
+            dtype=dtype,
+            random_state=0,
+        )
+
+    X, X_val = train_test_split(X, test_size=0.1, random_state=0)
+    return X, X_val, None, None
diff --git a/benchmarks/ensemble_supervised.py b/benchmarks/ensemble_supervised.py
new file mode 100644
index 000000000..cba1e8189
--- /dev/null
+++ b/benchmarks/ensemble_supervised.py
@@ -0,0 +1,74 @@
+from sktree.ensemble import ObliqueRandomForestClassifier
+
+from .common import Benchmark, Estimator, Predictor
+from .datasets import (
+    _20newsgroups_highdim_dataset,
+    _20newsgroups_lowdim_dataset,
+    _synth_classification_dataset,
+)
+from .utils import make_gen_classif_scorers
+
+
+class ObliqueRandomForestClassifierBenchmark(Predictor, Estimator, Benchmark):
+    """
+    Benchmarks for RandomForestClassifier.
+    """
+
+    param_names = ["representation", "n_jobs"]
+    params = (["dense", "sparse"], Benchmark.n_jobs_vals)
+
+    def setup_cache(self):
+        super().setup_cache()
+
+    def make_data(self, params):
+        representation, n_jobs = params
+
+        if representation == "sparse":
+            data = _20newsgroups_highdim_dataset()
+        else:
+            data = _20newsgroups_lowdim_dataset()
+
+        return data
+
+    def make_estimator(self, params):
+        representation, n_jobs = params
+
+        n_estimators = 500 if Benchmark.data_size == "large" else 100
+
+        estimator = ObliqueRandomForestClassifier(
+            n_estimators=n_estimators,
+            min_samples_split=10,
+            max_features="log2",
+            n_jobs=n_jobs,
+            random_state=0,
+        )
+
+        return estimator
+
+    def make_scorers(self):
+        make_gen_classif_scorers(self)
+
+
+class ObliqueRandomForestClassifierBenchmarkSynth(Predictor, Estimator, Benchmark):
+    """
+    Benchmarks for Oblique RF Classifier using synthetic classification data.
+    """
+
+    param_names = []
+    params = ()
+
+    def setup_cache(self):
+        super().setup_cache()
+
+    def make_data(self, params):
+        data = _synth_classification_dataset(n_samples=10000, n_features=100, n_classes=5)
+
+        return data
+
+    def make_estimator(self, params):
+        estimator = ObliqueRandomForestClassifier(max_leaf_nodes=15, random_state=0)
+
+        return estimator
+
+    def make_scorers(self):
+        make_gen_classif_scorers(self)
diff --git a/benchmarks/utils.py b/benchmarks/utils.py
new file mode 100644
index 000000000..f52be99cb
--- /dev/null
+++ b/benchmarks/utils.py
@@ -0,0 +1,46 @@
+import numpy as np
+from sklearn.metrics import balanced_accuracy_score, r2_score
+
+
+def neg_mean_inertia(X, labels, centers):
+    return -(np.asarray(X - centers[labels]) ** 2).sum(axis=1).mean()
+
+
+def make_gen_classif_scorers(caller):
+    caller.train_scorer = balanced_accuracy_score
+    caller.test_scorer = balanced_accuracy_score
+
+
+def make_gen_reg_scorers(caller):
+    caller.test_scorer = r2_score
+    caller.train_scorer = r2_score
+
+
+def neg_mean_data_error(X, U, V):
+    return -np.sqrt(((X - U.dot(V)) ** 2).mean())
+
+
+def make_dict_learning_scorers(caller):
+    caller.train_scorer = lambda _, __: (
+        neg_mean_data_error(
+            caller.X, caller.estimator.transform(caller.X), caller.estimator.components_
+        )
+    )
+    caller.test_scorer = lambda _, __: (
+        neg_mean_data_error(
+            caller.X_val,
+            caller.estimator.transform(caller.X_val),
+            caller.estimator.components_,
+        )
+    )
+
+
+def explained_variance_ratio(Xt, X):
+    return np.var(Xt, axis=0).sum() / np.var(X, axis=0).sum()
+
+
+def make_pca_scorers(caller):
+    caller.train_scorer = lambda _, __: caller.estimator.explained_variance_ratio_.sum()
+    caller.test_scorer = lambda _, __: (
+        explained_variance_ratio(caller.estimator.transform(caller.X_val), caller.X_val)
+    )
diff --git a/benchmarks_nonasv/README.md b/benchmarks_nonasv/README.md
new file mode 100644
index 000000000..e735c0741
--- /dev/null
+++ b/benchmarks_nonasv/README.md
@@ -0,0 +1,2 @@
+A set of scripts that can be run to analyze runtime and performance of scikit-tree
+estimators.
diff --git a/benchmarks_nonasv/bench_mnist.py b/benchmarks_nonasv/bench_mnist.py
new file mode 100644
index 000000000..0e9adec99
--- /dev/null
+++ b/benchmarks_nonasv/bench_mnist.py
@@ -0,0 +1,185 @@
+"""
+=======================
+MNIST dataset benchmark
+=======================
+
+Benchmark on the MNIST dataset.  The dataset comprises 70,000 samples
+and 784 features. Here, we consider the task of predicting
+10 classes -  digits from 0 to 9 from their raw images. By contrast to the
+covertype dataset, the feature space is homogeneous.
+
+Example of output :
+    [..]
+
+    Classification performance:
+    ===========================
+    Classifier               train-time   test-time   error-rate
+    ------------------------------------------------------------
+    ExtraTrees                   42.99s       0.57s       0.0294
+    RandomForest                 42.70s       0.49s       0.0318
+    ObliqueRandomForest         135.81s       0.56s       0.0486
+    PatchObliqueRandomForest     16.67s       0.06s       0.0824
+    ExtraObliqueRandomForest     20.69s       0.02s       0.1219
+    dummy                         0.00s       0.01s       0.8973
+"""
+
+# License: BSD 3 clause
+
+import argparse
+import os
+from time import time
+
+import numpy as np
+from joblib import Memory
+from sklearn.datasets import fetch_openml, get_data_home
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
+from sklearn.metrics import zero_one_loss
+from sklearn.utils import check_array
+
+from sktree import ObliqueRandomForestClassifier, PatchObliqueRandomForestClassifier
+
+# Memoize the data extraction and memory map the resulting
+# train / test splits in readonly mode
+memory = Memory(os.path.join(get_data_home(), "mnist_benchmark_data"), mmap_mode="r")
+
+
+@memory.cache
+def load_data(dtype=np.float32, order="F"):
+    """Load the data, then cache and memmap the train/test split"""
+    ######################################################################
+    # Load dataset
+    print("Loading dataset...")
+    data = fetch_openml("mnist_784", as_frame=True, parser="pandas")
+    X = check_array(data["data"], dtype=dtype, order=order)
+    y = data["target"]
+
+    # Normalize features
+    X = X / 255
+
+    # Create train-test split (as [Joachims, 2006])
+    print("Creating train-test split...")
+    n_train = 60000
+    X_train = X[:n_train]
+    y_train = y[:n_train]
+    X_test = X[n_train:]
+    y_test = y[n_train:]
+
+    return X_train, X_test, y_train, y_test
+
+
+ESTIMATORS = {
+    "dummy": DummyClassifier(),
+    "ExtraTrees": ExtraTreesClassifier(),
+    "RandomForest": RandomForestClassifier(),
+    "ObliqueRandomForest": ObliqueRandomForestClassifier(),
+    "PatchObliqueRandomForest": PatchObliqueRandomForestClassifier(),
+}
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--classifiers",
+        nargs="+",
+        choices=ESTIMATORS,
+        type=str,
+        default=["ExtraTrees"],
+        help="list of classifiers to benchmark.",
+    )
+    parser.add_argument(
+        "--n-jobs",
+        nargs="?",
+        default=1,
+        type=int,
+        help=("Number of concurrently running workers for " "models that support parallelism."),
+    )
+    parser.add_argument(
+        "--order",
+        nargs="?",
+        default="C",
+        type=str,
+        choices=["F", "C"],
+        help="Allow to choose between fortran and C ordered data",
+    )
+    parser.add_argument(
+        "--random-seed",
+        nargs="?",
+        default=0,
+        type=int,
+        help="Common seed used by random number generator.",
+    )
+    args = vars(parser.parse_args())
+
+    print(__doc__)
+
+    X_train, X_test, y_train, y_test = load_data(order=args["order"])
+
+    print("")
+    print("Dataset statistics:")
+    print("===================")
+    print("%s %d" % ("number of features:".ljust(25), X_train.shape[1]))
+    print("%s %d" % ("number of classes:".ljust(25), np.unique(y_train).size))
+    print("%s %s" % ("data type:".ljust(25), X_train.dtype))
+    print(
+        "%s %d (size=%dMB)"
+        % (
+            "number of train samples:".ljust(25),
+            X_train.shape[0],
+            int(X_train.nbytes / 1e6),
+        )
+    )
+    print(
+        "%s %d (size=%dMB)"
+        % (
+            "number of test samples:".ljust(25),
+            X_test.shape[0],
+            int(X_test.nbytes / 1e6),
+        )
+    )
+
+    print()
+    print("Training Classifiers")
+    print("====================")
+    error, train_time, test_time = {}, {}, {}
+    for name in sorted(args["classifiers"]):
+        print("Training %s ... " % name, end="")
+        estimator = ESTIMATORS[name]
+        estimator_params = estimator.get_params()
+
+        estimator.set_params(
+            **{p: args["random_seed"] for p in estimator_params if p.endswith("random_state")}
+        )
+
+        if "n_jobs" in estimator_params:
+            estimator.set_params(n_jobs=args["n_jobs"])
+
+        time_start = time()
+        estimator.fit(X_train, y_train)
+        train_time[name] = time() - time_start
+
+        time_start = time()
+        y_pred = estimator.predict(X_test)
+        test_time[name] = time() - time_start
+
+        error[name] = zero_one_loss(y_test, y_pred)
+
+        print("done")
+
+    print()
+    print("Classification performance:")
+    print("===========================")
+    print(
+        "{0: <24} {1: >10} {2: >11} {3: >12}".format(
+            "Classifier  ", "train-time", "test-time", "error-rate"
+        )
+    )
+    print("-" * 60)
+    for name in sorted(args["classifiers"], key=error.get):
+        print(
+            "{0: <23} {1: >10.2f}s {2: >10.2f}s {3: >12.4f}".format(
+                name, train_time[name], test_time[name], error[name]
+            )
+        )
+
+    print()
diff --git a/benchmarks_nonasv/bench_oblique_tree.py b/benchmarks_nonasv/bench_oblique_tree.py
new file mode 100644
index 000000000..246972646
--- /dev/null
+++ b/benchmarks_nonasv/bench_oblique_tree.py
@@ -0,0 +1,169 @@
+"""
+To run this, you'll need to have installed.
+
+  * scikit-learn
+  * scikit-tree
+
+Does two benchmarks
+
+First, we fix a training set, increase the number of
+samples to classify and plot number of classified samples as a
+function of time.
+
+In the second benchmark, we increase the number of dimensions of the
+training set, classify a sample and plot the time taken as a function
+of the number of dimensions.
+"""
+import gc
+from datetime import datetime
+
+import matplotlib.pyplot as plt
+import numpy as np
+
+# to store the results
+scikit_classifier_results = []
+scikit_regressor_results = []
+sklearn_classifier_results = []
+sklearn_regressor_results = []
+
+mu_second = 0.0 + 10**6  # number of microseconds in a second
+
+
+def bench_scikitlearn_tree_classifier(X, Y):
+    """Benchmark with scikit-learn decision tree classifier"""
+
+    from sklearn.tree import DecisionTreeClassifier
+
+    gc.collect()
+
+    # start time
+    tstart = datetime.now()
+    clf = DecisionTreeClassifier()
+    clf.fit(X, Y).predict(X)
+    delta = datetime.now() - tstart
+    # stop time
+
+    sklearn_classifier_results.append(delta.seconds + delta.microseconds / mu_second)
+
+
+def bench_scikitlearn_tree_regressor(X, Y):
+    """Benchmark with scikit-learn decision tree regressor"""
+
+    from sklearn.tree import DecisionTreeRegressor
+
+    gc.collect()
+
+    # start time
+    tstart = datetime.now()
+    clf = DecisionTreeRegressor()
+    clf.fit(X, Y).predict(X)
+    delta = datetime.now() - tstart
+    # stop time
+
+    sklearn_regressor_results.append(delta.seconds + delta.microseconds / mu_second)
+
+
+def bench_oblique_tree_classifier(X, Y):
+    """Benchmark with scikit-learn decision tree classifier"""
+
+    from sktree.tree import ObliqueDecisionTreeClassifier
+
+    gc.collect()
+
+    # start time
+    tstart = datetime.now()
+    clf = ObliqueDecisionTreeClassifier()
+    clf.fit(X, Y).predict(X)
+    delta = datetime.now() - tstart
+    # stop time
+
+    scikit_classifier_results.append(delta.seconds + delta.microseconds / mu_second)
+
+
+def bench_oblique_tree_regressor(X, Y):
+    """Benchmark with scikit-learn decision tree regressor"""
+
+    from sktree.tree import ObliqueDecisionTreeRegressor
+
+    gc.collect()
+
+    # start time
+    tstart = datetime.now()
+    clf = ObliqueDecisionTreeRegressor()
+    clf.fit(X, Y).predict(X)
+    delta = datetime.now() - tstart
+    # stop time
+
+    scikit_regressor_results.append(delta.seconds + delta.microseconds / mu_second)
+
+
+if __name__ == "__main__":
+    print("============================================")
+    print("Warning: this is going to take a looong time")
+    print("============================================")
+
+    n = 10
+    step = 10000
+    n_samples = 10000
+    dim = 10
+    n_classes = 10
+    for i in range(n):
+        print("============================================")
+        print("Entering iteration %s of %s" % (i, n))
+        print("============================================")
+        n_samples += step
+        X = np.random.randn(n_samples, dim)
+        Y = np.random.randint(0, n_classes, (n_samples,))
+        bench_oblique_tree_classifier(X, Y)
+        bench_scikitlearn_tree_classifier(X, Y)
+        Y = np.random.randn(n_samples)
+        bench_oblique_tree_regressor(X, Y)
+        bench_scikitlearn_tree_regressor(X, Y)
+
+    xx = range(0, n * step, step)
+    plt.figure("scikit-tree oblique tree benchmark results")
+    plt.subplot(211)
+    plt.title("Learning with varying number of samples")
+    plt.plot(xx, scikit_classifier_results, "g-", label="classification")
+    plt.plot(xx, scikit_regressor_results, "r-", label="regression")
+    plt.plot(xx, sklearn_regressor_results, "b--", label="sklearn-regression")
+    plt.plot(xx, sklearn_classifier_results, "o--", label="sklearn-classification")
+    plt.legend(loc="upper left")
+    plt.xlabel("number of samples")
+    plt.ylabel("Time (s)")
+
+    scikit_classifier_results = []
+    scikit_regressor_results = []
+    sklearn_classifier_results = []
+    sklearn_regressor_results = []
+    n = 10
+    step = 500
+    start_dim = 500
+    n_classes = 10
+
+    dim = start_dim
+    for i in range(0, n):
+        print("============================================")
+        print("Entering iteration %s of %s" % (i, n))
+        print("============================================")
+        dim += step
+        X = np.random.randn(100, dim)
+        Y = np.random.randint(0, n_classes, (100,))
+        bench_oblique_tree_classifier(X, Y)
+        bench_scikitlearn_tree_classifier(X, Y)
+        Y = np.random.randn(100)
+        bench_oblique_tree_regressor(X, Y)
+        bench_scikitlearn_tree_regressor(X, Y)
+
+    xx = np.arange(start_dim, start_dim + n * step, step)
+    plt.subplot(212)
+    plt.title("Learning in high dimensional spaces")
+    plt.plot(xx, scikit_classifier_results, "g-", label="classification")
+    plt.plot(xx, scikit_regressor_results, "r-", label="regression")
+    plt.plot(xx, sklearn_regressor_results, "b--", label="sklearn-regression")
+    plt.plot(xx, sklearn_classifier_results, "o--", label="sklearn-classification")
+    plt.legend(loc="upper left")
+    plt.xlabel("number of dimensions")
+    plt.ylabel("Time (s)")
+    plt.axis("tight")
+    plt.show()
diff --git a/benchmarks/bench_plot_urf.py b/benchmarks_nonasv/bench_plot_urf.py
similarity index 100%
rename from benchmarks/bench_plot_urf.py
rename to benchmarks_nonasv/bench_plot_urf.py