diff --git a/.circleci/config.yml b/.circleci/config.yml index 06eeee38b..7943c45ab 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -128,7 +128,7 @@ jobs: ls build-install echo "After..." ls build-install/** - export PYTHONPATH=$PWD/build-install/usr/lib/python3.8/site-packages + export PYTHONPATH=$PWD/build-install/usr/lib/python3.9/site-packages ./spin docs - store_artifacts: diff --git a/build_requirements.txt b/build_requirements.txt index 13c52c519..95bc6c98e 100644 --- a/build_requirements.txt +++ b/build_requirements.txt @@ -3,7 +3,7 @@ meson-python>=0.16.0 cython>=3.0.10 ninja numpy -scikit-learn>=1.4.2 +scikit-learn>=1.5.0 click rich-click doit diff --git a/doc/conf.py b/doc/conf.py index 70cbec662..89cc3d732 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -89,6 +89,7 @@ ("py:mod", "sktree.tree"), ("py:mod", "sktree.stats"), ("py:class", "sklearn.utils.metadata_routing.MetadataRequest"), + ("py:obj", "MetadataRouter"), ] # The name of a reST role (builtin or Sphinx extension) to use as the default diff --git a/doc/whats_new/v0.8.rst b/doc/whats_new/v0.8.rst index d2ffd5776..f021d1412 100644 --- a/doc/whats_new/v0.8.rst +++ b/doc/whats_new/v0.8.rst @@ -18,6 +18,9 @@ Changelog encoded as infinity value. This is now fixed, and the estimators will raise an ValueError if missing-values are encountered in ``X`` input array. By `Adam Li`_ (:pr:`#264`) +- |Feature| Simulations in ``sktree.datasets.hyppo`` now throw a warning instead + of an error when the number of samples is less than the number of dimensions. + By `Sambit Panda`_ (:pr:`#279`) Code and Documentation Contributors ----------------------------------- diff --git a/sktree/datasets/hyppo.py b/sktree/datasets/hyppo.py index 5e5b961dc..1fcaa1d1d 100644 --- a/sktree/datasets/hyppo.py +++ b/sktree/datasets/hyppo.py @@ -1,3 +1,5 @@ +import warnings + import numpy as np from scipy.integrate import nquad from scipy.stats import entropy, multivariate_normal @@ -159,9 +161,11 @@ class distribution approaches the first class distribution by a factor of :math: .. footbibliography:: """ if n_dim < n_informative: - raise ValueError( - f"Number of informative dimensions {n_informative} must be less than number " - f"of dimensions, {n_dim}" + n_informative = n_dim + warnings.warn( + "Number of informative dimensions {n_informative} must be less than number " + f"of dimensions, {n_dim}. Setting n_informative to n_dim.", + RuntimeWarning, ) if simulation not in MARRON_WAND_SIMS.keys(): raise ValueError( @@ -347,9 +351,11 @@ def make_trunk_mixture_classification( .. footbibliography:: """ if n_dim < n_informative: - raise ValueError( - f"Number of informative dimensions {n_informative} must be less than number " - f"of dimensions, {n_dim}" + n_informative = n_dim + warnings.warn( + "Number of informative dimensions {n_informative} must be less than number " + f"of dimensions, {n_dim}. Setting n_informative to n_dim.", + RuntimeWarning, ) if mix < 0 or mix > 1: # type: ignore raise ValueError("Mix must be between 0 and 1.") @@ -506,9 +512,11 @@ def make_trunk_classification( .. footbibliography:: """ if n_dim < n_informative: - raise ValueError( - f"Number of informative dimensions {n_informative} must be less than number " - f"of dimensions, {n_dim}" + n_informative = n_dim + warnings.warn( + "Number of informative dimensions {n_informative} must be less than number " + f"of dimensions, {n_dim}. Setting n_informative to n_dim.", + RuntimeWarning, ) rng = np.random.default_rng(seed=seed) diff --git a/sktree/datasets/tests/test_hyppo.py b/sktree/datasets/tests/test_hyppo.py index bc7e20e69..458b8521e 100644 --- a/sktree/datasets/tests/test_hyppo.py +++ b/sktree/datasets/tests/test_hyppo.py @@ -90,7 +90,7 @@ def test_make_trunk_generator_errors(trunk_gen): trunk_gen(n_samples=50, rho=0.5, band_type="invalid_band_type") # Test with an invalid band type - with pytest.raises(ValueError, match="Number of informative dimensions"): + with pytest.warns(RuntimeWarning, match="Number of informative dimensions"): trunk_gen(n_samples=50, n_dim=10, n_informative=11, rho=0.5) diff --git a/sktree/experimental/monte_carlo.py b/sktree/experimental/monte_carlo.py index aab6a25cd..565a4da40 100644 --- a/sktree/experimental/monte_carlo.py +++ b/sktree/experimental/monte_carlo.py @@ -4,7 +4,8 @@ from numpy.typing import ArrayLike from scipy.sparse import issparse from sklearn.neighbors import NearestNeighbors -from sklearn.utils import _approximate_mode, _safe_indexing, check_array, check_consistent_length +from sklearn.utils import _safe_indexing, check_array, check_consistent_length +from sklearn.utils.extmath import _approximate_mode def _conditional_shuffle(nbrs: ArrayLike, replace: bool = False, seed=None) -> ArrayLike: