Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change ValueError to RuntimeWarning when n_dim < n_informative #279

Merged
merged 9 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ jobs:
ls build-install
echo "After..."
ls build-install/**
export PYTHONPATH=$PWD/build-install/usr/lib/python3.8/site-packages
export PYTHONPATH=$PWD/build-install/usr/lib/python3.9/site-packages
./spin docs

- store_artifacts:
Expand Down
3 changes: 3 additions & 0 deletions doc/whats_new/v0.8.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ Changelog
encoded as infinity value. This is now fixed, and the estimators will raise an
ValueError if missing-values are encountered in ``X`` input array.
By `Adam Li`_ (:pr:`#264`)
- |Feature| Simulations in ``sktree.datasets.hyppo`` now throw a warning instead
of an error when the number of samples is less than the number of dimensions.
By `Sambit Panda`_ (:pr:`#279`)

Code and Documentation Contributors
-----------------------------------
Expand Down
26 changes: 17 additions & 9 deletions sktree/datasets/hyppo.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
from scipy.integrate import nquad
from scipy.stats import entropy, multivariate_normal
Expand Down Expand Up @@ -159,9 +161,11 @@ class distribution approaches the first class distribution by a factor of :math:
.. footbibliography::
"""
if n_dim < n_informative:
raise ValueError(
f"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}"
n_informative = n_dim
warnings.warn(
"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}. Setting n_informative to n_dim.",
RuntimeWarning,
)
if simulation not in MARRON_WAND_SIMS.keys():
raise ValueError(
Expand Down Expand Up @@ -347,9 +351,11 @@ def make_trunk_mixture_classification(
.. footbibliography::
"""
if n_dim < n_informative:
raise ValueError(
f"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}"
n_informative = n_dim
warnings.warn(
"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}. Setting n_informative to n_dim.",
RuntimeWarning,
)
if mix < 0 or mix > 1: # type: ignore
raise ValueError("Mix must be between 0 and 1.")
Expand Down Expand Up @@ -506,9 +512,11 @@ def make_trunk_classification(
.. footbibliography::
"""
if n_dim < n_informative:
raise ValueError(
f"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}"
n_informative = n_dim
warnings.warn(
"Number of informative dimensions {n_informative} must be less than number "
f"of dimensions, {n_dim}. Setting n_informative to n_dim.",
RuntimeWarning,
)
rng = np.random.default_rng(seed=seed)

Expand Down
2 changes: 1 addition & 1 deletion sktree/datasets/tests/test_hyppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def test_make_trunk_generator_errors(trunk_gen):
trunk_gen(n_samples=50, rho=0.5, band_type="invalid_band_type")

# Test with an invalid band type
with pytest.raises(ValueError, match="Number of informative dimensions"):
with pytest.warns(RuntimeWarning, match="Number of informative dimensions"):
trunk_gen(n_samples=50, n_dim=10, n_informative=11, rho=0.5)


Expand Down
Loading