Skip to content

Commit

Permalink
feat: add alpha index in views
Browse files Browse the repository at this point in the history
  • Loading branch information
prokolyvakis committed Nov 8, 2023
1 parent d3de1cb commit 3c99f3c
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 22 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test_workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ on: push

jobs:

test_fast:
test_workflow:

runs-on: ubuntu-latest

Expand All @@ -30,5 +30,5 @@ jobs:
- name: Run test suite
run: |
pdm run pytest -v --maxfail 2
pdm run pytest -v --maxfail 2 tests
timeout-minutes: 10
10 changes: 8 additions & 2 deletions mudpod/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,15 @@ def __post_init__(self):
f'The type {self.dtype} is not supported!'
)

def compute(self, arr: np.ndarray, o: np.ndarray) -> np.ndarray:
def compute(self, arr: np.ndarray, o: np.ndarray, a: float) -> np.ndarray:
"""Computes the distances from a given point.
Args:
arr: A 2D numpy array with the first dimension being the number of different
datapoints and the second being the features' size.
o: A 2D numpy array with the first dimension always equal to 1 and
the second being the features' size.
a: An exponent power for the computed distances.
Returns:
The distances with respect to the observer `o`.
"""
Expand All @@ -44,12 +45,17 @@ def dist(x: np.ndarray) -> float:
c = np.cov(arr.T)
return mahalanobis(x, o, c)

return np.apply_along_axis(
ds = np.apply_along_axis(
dist,
0,
arr.T
)

if np.isclose(a, 1.0):
return ds

return np.power(ds, a)


def assert_correct_input_size(arr: np.ndarray) -> None:
"""Assert that the tensor is a 2D array.
Expand Down
10 changes: 9 additions & 1 deletion mudpod/observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,19 @@ class PercentileObserver(Observer):
dtype: InitVar[str] = 'mahalanobis'
# The distance type.

alpha: float = 1.0
# The \alpha-unimodality positive index.

distance: Distance = field(init=False, repr=True)
# The distance.

def __post_init__(self, dtype: str):
assert 0 < self.percentile < 1, 'The percentile should lie in (0, 1) interval.'

assert self.alpha > 0, (
f'The \alpha-unimodality index should be positive, {self.alpha} was given!'
)

self.distance = Distance(dtype=dtype)

def get(self, arr: np.ndarray) -> np.ndarray:
Expand All @@ -53,7 +61,7 @@ def get(self, arr: np.ndarray) -> np.ndarray:
assert_correct_input_size(arr)
m = np.mean(arr, axis=0)

ds = self.distance.compute(arr, m)
ds = self.distance.compute(arr, m, self.alpha)

t = np.percentile(
ds,
Expand Down
9 changes: 8 additions & 1 deletion mudpod/projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,10 +80,17 @@ class View:
dtype: InitVar[str] = 'mahalanobis'
# The distance type.

alpha: float = 1.0
# The \alpha-unimodality positive index.

distance: Distance = field(init=False, repr=True)
# The distance.

def __post_init__(self, dtype: str):
assert self.alpha > 0, (
f'The \alpha-unimodality index should be positive, {self.alpha} was given!'
)

self.distance = Distance(dtype=dtype)

def project(self, arr: np.ndarray, seeding: bool = False) -> np.ndarray:
Expand Down Expand Up @@ -125,4 +132,4 @@ def distances(self, arr: np.ndarray, seeding: bool = False) -> np.ndarray:
x = self.project(arr, seeding)
o = self.observer.get(x)

return self.distance.compute(x, o)
return self.distance.compute(x, o, self.alpha)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "mudpod"
version = "0.1"
version = "0.2"
description = " Testing Unimodality in Multivariate Euclidean Space"
authors = [
{name = "Prodromos Kolyvakis", email = "prokolyvakis@gmail.com"},
Expand Down
7 changes: 4 additions & 3 deletions tests/mudpod/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,13 @@


@pytest.mark.parametrize("projector", [JohnsonLindenstrauss()])
@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
@pytest.mark.parametrize("alpha", [1, 2, 4])
@pytest.mark.parametrize("n_features", [200, 400])
def test_dip_means_fit(projector, observer, n_features) -> None:
def test_dip_means_fit(projector, observer_fn, alpha, n_features) -> None:
"""Test that the dip means class is properly initialized"""

v = View(projector, observer)
v = View(projector, observer_fn(alpha), alpha)
dp = DipMeans(view=v, pval=0.05, sim_num=10, workers_num=10)

x, _ = make_blobs(
Expand Down
18 changes: 16 additions & 2 deletions tests/mudpod/test_observer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
set_seed(42)


@pytest.mark.parametrize("alpha", [1, 2, 4])
@pytest.mark.parametrize("dtype", sorted(['euclidean', 'mahalanobis']))
def test_percentile_observer(dtype: str) -> None:
def test_percentile_observer(alpha: float, dtype: str) -> None:
"""Test that the percentile observer works smoothly."""
os = PercentileObserver(percentile=0.12, dtype=dtype)
os = PercentileObserver(percentile=0.12, alpha=alpha, dtype=dtype)

m = np.array([0, 0])
c = np.array([[6, -3], [-3, 3.5]])
Expand All @@ -21,3 +22,16 @@ def test_percentile_observer(dtype: str) -> None:
o = os.get(x)

assert o is not None


def test_percentile_observer_assertions() -> None:
"""Test that the assertions of the PercentileObserver work properly."""

with pytest.raises(AssertionError):
_ = PercentileObserver(percentile=-1)

with pytest.raises(AssertionError):
_ = PercentileObserver(percentile=12)

with pytest.raises(AssertionError):
_ = PercentileObserver(percentile=0.42, alpha=-1)
7 changes: 4 additions & 3 deletions tests/mudpod/test_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,14 @@ def test_johnson_lindenstrauss_dim(samples_num, projection_dim) -> None:
"projector",
[IdentityProjector(), JohnsonLindenstrauss()]
)
@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
@pytest.mark.parametrize("alpha", [1, 2, 4])
@pytest.mark.parametrize("dtype", sorted(['euclidean', 'mahalanobis']))
@pytest.mark.parametrize("n_features", [200, 400])
def test_view(projector, observer, dtype, n_features) -> None:
def test_view(projector, observer_fn, alpha, dtype, n_features) -> None:
"""A functional test for the View class."""

v = View(projector, observer, dtype)
v = View(projector, observer_fn(alpha), alpha)

x, _ = make_blobs(
n_samples=1000,
Expand Down
16 changes: 9 additions & 7 deletions tests/mudpod/test_unimodality.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@


@pytest.mark.parametrize("projector", [JohnsonLindenstrauss()])
@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
@pytest.mark.parametrize("alpha", [1, 2, 4])
@pytest.mark.parametrize("n_features", [200, 400])
def test_unimodality_tester(projector, observer, n_features) -> None:
def test_unimodality_tester(projector, observer_fn, alpha, n_features) -> None:
"""Test that the unimodality tester works properly."""

v = View(projector, observer)
v = View(projector, observer_fn(alpha), alpha)

x, _ = make_blobs(
n_samples=1000,
Expand All @@ -35,15 +36,16 @@ def test_unimodality_tester(projector, observer, n_features) -> None:


@pytest.mark.parametrize("projector", [JohnsonLindenstrauss()])
@pytest.mark.parametrize("observer", [PercentileObserver(0.95)])
@pytest.mark.parametrize("observer_fn", [lambda a: PercentileObserver(0.95, a)])
@pytest.mark.parametrize("alpha", [1, 2, 4])
@pytest.mark.parametrize("n_features", [200, 400])
@pytest.mark.parametrize("workers_num", [0, 1, 10])
@pytest.mark.parametrize("sim_num", [10, 20])
def test_monte_carlo_unimodality_tester(
projector, observer, n_features, workers_num, sim_num) -> None:
projector, observer_fn, alpha, n_features, workers_num, sim_num) -> None:
"""Test that the Monte Carlo unimodality tester works properly."""

v = View(projector, observer)
v = View(projector, observer_fn(alpha), alpha)

x, _ = make_blobs(
n_samples=1000,
Expand All @@ -61,7 +63,7 @@ def test_monte_carlo_unimodality_tester(


def test_monte_carlo_unimodality_tester_assertions() -> None:
"""Test that the assertions of the MonteCarloUnimodalityTest."""
"""Test that the assertions of the MonteCarloUnimodalityTest work properly."""

o = PercentileObserver(0.95)

Expand Down

0 comments on commit 3c99f3c

Please sign in to comment.