feat: add alpha index in views

prokolyvakis · Nov 8, 2023 · 3c99f3c · 3c99f3c
1 parent d3de1cb
commit 3c99f3c
Show file tree

Hide file tree

Showing 9 changed files with 61 additions and 22 deletions.
diff --git a/.github/workflows/test_workflow.yml b/.github/workflows/test_workflow.yml
@@ -6,7 +6,7 @@ on: push
 
 jobs:
 
-    test_fast:
+    test_workflow:
 
         runs-on: ubuntu-latest
 
@@ -30,5 +30,5 @@ jobs:
 
         -   name: Run test suite
             run: |
-                pdm run pytest -v --maxfail 2
+                pdm run pytest -v --maxfail 2 tests
             timeout-minutes: 10
diff --git a/mudpod/misc.py b/mudpod/misc.py
@@ -26,14 +26,15 @@ def __post_init__(self):
             f'The type {self.dtype} is not supported!'
         )
 
-    def compute(self, arr: np.ndarray, o: np.ndarray) -> np.ndarray:
+    def compute(self, arr: np.ndarray, o: np.ndarray, a: float) -> np.ndarray:
         """Computes the distances from a given point.
 
         Args:
             arr: A 2D numpy array with the first dimension being the number of different
                     datapoints and the second being the features' size.
             o: A 2D numpy array with the first dimension always equal to 1 and
                     the second being the features' size.
+            a: An exponent power for the computed distances.
         Returns:
             The distances with respect to the observer `o`.
         """
@@ -44,12 +45,17 @@ def dist(x: np.ndarray) -> float:
                 c = np.cov(arr.T)
                 return mahalanobis(x, o, c)
 
-        return np.apply_along_axis(
+        ds = np.apply_along_axis(
             dist,
             0,
             arr.T
         )
 
+        if np.isclose(a, 1.0):
+            return ds
+
+        return np.power(ds, a)
+
 
 def assert_correct_input_size(arr: np.ndarray) -> None:
     """Assert that the tensor is a 2D array.

diff --git a/mudpod/observer.py b/mudpod/observer.py
@@ -34,11 +34,19 @@ class PercentileObserver(Observer):
     dtype: InitVar[str] = 'mahalanobis'
     # The distance type.
 
+    alpha: float = 1.0
+    # The \alpha-unimodality positive index.
+
     distance: Distance = field(init=False, repr=True)
     # The distance.
 
     def __post_init__(self, dtype: str):
         assert 0 < self.percentile < 1, 'The percentile should lie in (0, 1) interval.'
+
+        assert self.alpha > 0, (
+            f'The \alpha-unimodality index should be positive, {self.alpha} was given!'
+        )
+
         self.distance = Distance(dtype=dtype)
 
     def get(self, arr: np.ndarray) -> np.ndarray:
@@ -53,7 +61,7 @@ def get(self, arr: np.ndarray) -> np.ndarray:
         assert_correct_input_size(arr)
         m = np.mean(arr, axis=0)
 
-        ds = self.distance.compute(arr, m)
+        ds = self.distance.compute(arr, m, self.alpha)
 
         t = np.percentile(
             ds,

diff --git a/mudpod/projections.py b/mudpod/projections.py
@@ -80,10 +80,17 @@ class View:
     dtype: InitVar[str] = 'mahalanobis'
     # The distance type.
 
+    alpha: float = 1.0
+    # The \alpha-unimodality positive index.
+
     distance: Distance = field(init=False, repr=True)
     # The distance.
 
     def __post_init__(self, dtype: str):
+        assert self.alpha > 0, (
+            f'The \alpha-unimodality index should be positive, {self.alpha} was given!'
+        )
+
         self.distance = Distance(dtype=dtype)
 
     def project(self, arr: np.ndarray, seeding: bool = False) -> np.ndarray:
@@ -125,4 +132,4 @@ def distances(self, arr: np.ndarray, seeding: bool = False) -> np.ndarray:
         x = self.project(arr, seeding)
         o = self.observer.get(x)
 
-        return self.distance.compute(x, o)
+        return self.distance.compute(x, o, self.alpha)
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "mudpod"
-version = "0.1"
+version = "0.2"
 description = " Testing Unimodality in Multivariate Euclidean Space"
 authors = [
     {name = "Prodromos Kolyvakis", email = "prokolyvakis@gmail.com"},

diff --git a/tests/mudpod/test_clustering.py b/tests/mudpod/test_clustering.py
@@ -13,12 +13,13 @@
 
 
 @pytest.mark.parametrize("projector", [JohnsonLindenstrauss()])
-@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
+@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
+@pytest.mark.parametrize("alpha", [1, 2, 4])
 @pytest.mark.parametrize("n_features", [200, 400])
-def test_dip_means_fit(projector, observer, n_features) -> None:
+def test_dip_means_fit(projector, observer_fn, alpha, n_features) -> None:
     """Test that the dip means class is properly initialized"""
 
-    v = View(projector, observer)
+    v = View(projector, observer_fn(alpha), alpha)
     dp = DipMeans(view=v, pval=0.05, sim_num=10, workers_num=10)
 
     x, _ = make_blobs(

diff --git a/tests/mudpod/test_observer.py b/tests/mudpod/test_observer.py
@@ -9,10 +9,11 @@
 set_seed(42)
 
 
+@pytest.mark.parametrize("alpha", [1, 2, 4])
 @pytest.mark.parametrize("dtype", sorted(['euclidean', 'mahalanobis']))
-def test_percentile_observer(dtype: str) -> None:
+def test_percentile_observer(alpha: float, dtype: str) -> None:
     """Test that the percentile observer works smoothly."""
-    os = PercentileObserver(percentile=0.12, dtype=dtype)
+    os = PercentileObserver(percentile=0.12, alpha=alpha, dtype=dtype)
 
     m = np.array([0, 0])
     c = np.array([[6, -3], [-3, 3.5]])
@@ -21,3 +22,16 @@ def test_percentile_observer(dtype: str) -> None:
     o = os.get(x)
 
     assert o is not None
+
+
+def test_percentile_observer_assertions() -> None:
+    """Test that the assertions of the PercentileObserver work properly."""
+
+    with pytest.raises(AssertionError):
+        _ = PercentileObserver(percentile=-1)
+
+    with pytest.raises(AssertionError):
+        _ = PercentileObserver(percentile=12)
+
+    with pytest.raises(AssertionError):
+        _ = PercentileObserver(percentile=0.42, alpha=-1)
diff --git a/tests/mudpod/test_projections.py b/tests/mudpod/test_projections.py
@@ -35,13 +35,14 @@ def test_johnson_lindenstrauss_dim(samples_num, projection_dim) -> None:
     "projector",
     [IdentityProjector(), JohnsonLindenstrauss()]
 )
-@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
+@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
+@pytest.mark.parametrize("alpha", [1, 2, 4])
 @pytest.mark.parametrize("dtype", sorted(['euclidean', 'mahalanobis']))
 @pytest.mark.parametrize("n_features", [200, 400])
-def test_view(projector, observer, dtype, n_features) -> None:
+def test_view(projector, observer_fn, alpha, dtype, n_features) -> None:
     """A functional test for the View class."""
 
-    v = View(projector, observer, dtype)
+    v = View(projector, observer_fn(alpha), alpha)
 
     x, _ = make_blobs(
         n_samples=1000,

diff --git a/tests/mudpod/test_unimodality.py b/tests/mudpod/test_unimodality.py
@@ -14,12 +14,13 @@
 
 
 @pytest.mark.parametrize("projector", [JohnsonLindenstrauss()])
-@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
+@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
+@pytest.mark.parametrize("alpha", [1, 2, 4])
 @pytest.mark.parametrize("n_features", [200, 400])
-def test_unimodality_tester(projector, observer, n_features) -> None:
+def test_unimodality_tester(projector, observer_fn, alpha, n_features) -> None:
     """Test that the unimodality tester works properly."""
 
-    v = View(projector, observer)
+    v = View(projector, observer_fn(alpha), alpha)
 
     x, _ = make_blobs(
         n_samples=1000,
@@ -35,15 +36,16 @@ def test_unimodality_tester(projector, observer, n_features) -> None:
 
 
 @pytest.mark.parametrize("projector", [JohnsonLindenstrauss()])
-@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
+@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
+@pytest.mark.parametrize("alpha", [1, 2, 4])
 @pytest.mark.parametrize("n_features", [200, 400])
 @pytest.mark.parametrize("workers_num", [0, 1, 10])
 @pytest.mark.parametrize("sim_num", [10, 20])
 def test_monte_carlo_unimodality_tester(
-        projector, observer, n_features, workers_num, sim_num) -> None:
+        projector, observer_fn, alpha, n_features, workers_num, sim_num) -> None:
     """Test that the Monte Carlo unimodality tester works properly."""
 
-    v = View(projector, observer)
+    v = View(projector, observer_fn(alpha), alpha)
 
     x, _ = make_blobs(
         n_samples=1000,
@@ -61,7 +63,7 @@ def test_monte_carlo_unimodality_tester(
 
 
 def test_monte_carlo_unimodality_tester_assertions() -> None:
-    """Test that the assertions of the MonteCarloUnimodalityTest."""
+    """Test that the assertions of the MonteCarloUnimodalityTest work properly."""
 
     o = PercentileObserver(0.95)