Skip to content

Commit

Permalink
Extend giotto-ai#428 to Amplitude and PairwiseDistance, add regressio…
Browse files Browse the repository at this point in the history
…n test, improve edge case handling in _metrics
  • Loading branch information
ulupo committed Sep 7, 2020
1 parent 4d5ec8c commit 690af90
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 29 deletions.
16 changes: 12 additions & 4 deletions gtda/diagrams/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,8 @@ def persistence_images(diagrams, sampling, step_size, sigma, weights):
# WARNING: modifies `diagrams` in place
persistence_images_ = \
np.zeros((len(diagrams), len(sampling), len(sampling)), dtype=float)
# If both step sizes are zero, we return a trivial image
if (step_size == 0).all():
# If either step size is zero, we return a trivial image
if (step_size == 0).any():
return persistence_images_

# Transform diagrams from (birth, death, dim) to (birth, persistence, dim)
Expand Down Expand Up @@ -310,9 +310,13 @@ def _parallel_pairwise(
none_dict = {dim: None for dim in homology_dimensions}
samplings = effective_metric_params.pop("samplings", none_dict)
step_sizes = effective_metric_params.pop("step_sizes", none_dict)
if metric in ["heat", "persistence_image"]:
parallel_kwargs = {"mmap_mode": "c"}
else:
parallel_kwargs = {}

n_columns = len(X2)
distance_matrices = Parallel(n_jobs=n_jobs)(
distance_matrices = Parallel(n_jobs=n_jobs, **parallel_kwargs)(
delayed(metric_func)(
_subdiagrams(X1, [dim], remove_dim=True),
_subdiagrams(X2[s], [dim], remove_dim=True),
Expand Down Expand Up @@ -416,8 +420,12 @@ def _parallel_amplitude(X, metric, metric_params, homology_dimensions, n_jobs):
none_dict = {dim: None for dim in homology_dimensions}
samplings = effective_metric_params.pop("samplings", none_dict)
step_sizes = effective_metric_params.pop("step_sizes", none_dict)
if metric in ["heat", "persistence_image"]:
parallel_kwargs = {"mmap_mode": "c"}
else:
parallel_kwargs = {}

amplitude_arrays = Parallel(n_jobs=n_jobs)(
amplitude_arrays = Parallel(n_jobs=n_jobs, **parallel_kwargs)(
delayed(amplitude_func)(
_subdiagrams(X[s], [dim], remove_dim=True),
sampling=samplings[dim],
Expand Down
4 changes: 2 additions & 2 deletions gtda/diagrams/distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,9 +229,9 @@ def transform(self, X, y=None):
"""
check_is_fitted(self)
X = check_diagrams(X, copy=True)
Xt = check_diagrams(X, copy=True)

Xt = _parallel_pairwise(X, self._X, self.metric,
Xt = _parallel_pairwise(Xt, self._X, self.metric,
self.effective_metric_params_,
self.homology_dimensions_,
self.n_jobs)
Expand Down
6 changes: 3 additions & 3 deletions gtda/diagrams/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ def transform(self, X, y=None):
self.effective_metric_params_,
self.homology_dimensions_,
self.n_jobs)
if self.order is None:
return Xt
Xt = np.linalg.norm(Xt, axis=1, ord=self.order).reshape(-1, 1)
if self.order is not None:
Xt = np.linalg.norm(Xt, axis=1, ord=self.order).reshape(-1, 1)

return Xt
27 changes: 24 additions & 3 deletions gtda/diagrams/tests/test_distance.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def test_not_fitted(transformer):

@pytest.mark.parametrize(('metric', 'metric_params'), parameters_distance)
@pytest.mark.parametrize('order', [2., None])
@pytest.mark.parametrize('n_jobs', [1, 2, 4])
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_dd_transform(metric, metric_params, order, n_jobs):
# X_fit == X_transform
dd = PairwiseDistance(metric=metric, metric_params=metric_params,
Expand Down Expand Up @@ -297,7 +297,7 @@ def test_dd_transform(metric, metric_params, order, n_jobs):

@pytest.mark.parametrize(('metric', 'metric_params'), parameters_amplitude)
@pytest.mark.parametrize('order', [None, 2.])
@pytest.mark.parametrize('n_jobs', [1, 2, 4])
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_da_transform(metric, metric_params, order, n_jobs):
n_expected_columns = n_homology_dimensions if order is None else 1

Expand All @@ -315,7 +315,7 @@ def test_da_transform(metric, metric_params, order, n_jobs):

@pytest.mark.parametrize(('metric', 'metric_params', 'order'),
[('bottleneck', None, None)])
@pytest.mark.parametrize('n_jobs', [1, 2, 4])
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_da_transform_bottleneck(metric, metric_params, order, n_jobs):
da = Amplitude(metric=metric, metric_params=metric_params,
order=order, n_jobs=n_jobs)
Expand All @@ -340,3 +340,24 @@ def test_pi_zero_weight_function(transformer_cls, order, Xnew):
X_res = transformer.fit(X1).transform(Xnew)

assert np.array_equal(X_res, np.zeros_like(X_res))


@pytest.mark.parametrize('metric', ['heat', 'persistence_image'])
@pytest.mark.parametrize('transformer_cls', [Amplitude, PairwiseDistance])
def test_large_hk_pi_parallel(metric, transformer_cls):
"""Test that Amplitude and PairwiseDistance do not break with a read-only
error when the input array is at least 1MB, the metric is either 'heat'
or 'persistence_image', and more than 1 process is used (triggering
joblib's use of memmaps)."""
X = np.linspace(0, 100, 300000)
n_bins = 10
diagrams = np.expand_dims(
np.stack([X, X, np.zeros(len(X))]).transpose(), axis=0
)

transformer = transformer_cls(
metric=metric, metric_params={'sigma': 1, 'n_bins': n_bins}, n_jobs=2
)
Xt = transformer.fit_transform(diagrams)

assert_almost_equal(Xt, np.zeros_like(Xt))
42 changes: 25 additions & 17 deletions gtda/diagrams/tests/test_features_representations.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,9 @@ def test_fit_transform_plot_wrong_hom_dims(transformer):
transformer.fit_transform_plot(X, sample=0, homology_dimensions=(2,))


def test_pe_transform():
pe = PersistenceEntropy()
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_pe_transform(n_jobs):
pe = PersistenceEntropy(n_jobs=n_jobs)
diagram_res = np.array([[1., 0.91829583405]])

assert_almost_equal(pe.fit_transform(X), diagram_res)
Expand All @@ -82,22 +83,25 @@ def test_pe_transform():


@pytest.mark.parametrize('n_bins', list(range(10, 51, 10)))
def test_bc_transform_shape(n_bins):
bc = BettiCurve(n_bins=n_bins)
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_bc_transform_shape(n_bins, n_jobs):
bc = BettiCurve(n_bins=n_bins, n_jobs=n_jobs)
X_res = bc.fit_transform(X)
assert X_res.shape == (1, bc._n_dimensions, n_bins)


@pytest.mark.parametrize('n_bins', list(range(10, 51, 10)))
@pytest.mark.parametrize('n_layers', list(range(1, 10)))
def test_pl_transform_shape(n_bins, n_layers):
pl = PersistenceLandscape(n_bins=n_bins, n_layers=n_layers)
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_pl_transform_shape(n_bins, n_layers, n_jobs):
pl = PersistenceLandscape(n_bins=n_bins, n_layers=n_layers, n_jobs=n_jobs)
X_res = pl.fit_transform(X)
assert X_res.shape == (1, pl._n_dimensions, n_layers, n_bins)


def test_pi_zero_weight_function():
pi = PersistenceImage(weight_function=lambda x: x * 0.)
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_pi_zero_weight_function(n_jobs):
pi = PersistenceImage(weight_function=lambda x: x * 0., n_jobs=n_jobs)
X_res = pi.fit_transform(X)
assert np.array_equal(X_res, np.zeros_like(X_res))

Expand Down Expand Up @@ -153,18 +157,20 @@ def test_large_pi_null_parallel():
assert_almost_equal(pi.fit_transform(diagrams)[0], 0)


def test_silhouette_transform():
sht = Silhouette(n_bins=31, power=1.)
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_silhouette_transform(n_jobs):
sht = Silhouette(n_bins=31, power=1., n_jobs=n_jobs)
X_sht_res = np.array([0., 0.05, 0.1, 0.15, 0.2, 0.25, 0.2, 0.15, 0.1,
0.05, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0., 0.05,
0.1, 0.15, 0.2, 0.25, 0.2, 0.15, 0.1, 0.05, 0.])

assert_almost_equal(sht.fit_transform(X)[0][0], X_sht_res)


def test_silhouette_big_order():
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_silhouette_big_order(n_jobs):
diagrams = np.array([[[0, 2, 0], [1, 4, 0]]])
sht_10 = Silhouette(n_bins=41, power=10.)
sht_10 = Silhouette(n_bins=41, power=10., n_jobs=n_jobs)
X_sht_res = np.array([0., 0.00170459, 0.00340919, 0.00511378, 0.00681837,
0.00852296, 0.01022756, 0.01193215, 0.01363674,
0.01534133, 0.01704593, 0.11363674, 0.21022756,
Expand All @@ -179,10 +185,11 @@ def test_silhouette_big_order():
assert_almost_equal(sht_10.fit_transform(diagrams)[0][0], X_sht_res)


@pytest.mark.parametrize('transformer', [HeatKernel(), PersistenceImage()])
def test_all_pts_the_same(transformer):
@pytest.mark.parametrize('transformer_cls', [HeatKernel, PersistenceImage])
@pytest.mark.parametrize('n_jobs', [1, 2, -1])
def test_all_pts_the_same(transformer_cls, n_jobs):
X = np.zeros((1, 4, 3))
X_res = transformer.fit_transform(X)
X_res = transformer_cls(n_jobs=n_jobs).fit_transform(X)
assert np.array_equal(X_res, np.zeros_like(X_res))


Expand Down Expand Up @@ -222,13 +229,14 @@ def get_input(pts, dims):
return X


@pytest.mark.parametrize('n_jobs', [1, 2])
@given(pts_gen, dims_gen)
def test_hk_shape(pts, dims):
def test_hk_shape(n_jobs, pts, dims):
n_bins = 10
X = get_input(pts, dims)
sigma = (np.max(X[:, :, :2]) - np.min(X[:, :, :2])) / 2

hk = HeatKernel(sigma=sigma, n_bins=n_bins)
hk = HeatKernel(sigma=sigma, n_bins=n_bins, n_jobs=n_jobs)
num_dimensions = len(np.unique(dims))
X_t = hk.fit_transform(X)

Expand Down

0 comments on commit 690af90

Please sign in to comment.