Skip to content

Commit

Permalink
Enable edge collapser for non-zero vertex weights (#558)
Browse files Browse the repository at this point in the history
* Enable edge collapser for non-zero vertex weights

And hence also for negative vertex and edge weights

* Remove unnecessary warning filters in test

* Add relevant entry in release notes

* Add test of collapser with negative weights
  • Loading branch information
ulupo authored Jan 13, 2021
1 parent 8dc0464 commit 1f4b48e
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 65 deletions.
1 change: 1 addition & 0 deletions doc/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Major Features and Improvements
- Wheels for Python 3.9 have been added (`#528 <https://github.com/giotto-ai/giotto-tda/pull/528>`_).
- Weighted Rips filtrations, and in particular distance-to-measure (DTM) based filtrations, are now supported in ``ripser`` and by the new ``WeightedRipsPersistence`` transformer (`#541 <https://github.com/giotto-ai/giotto-tda/pull/541>`_).
- See "Backwards-Incompatible Changes" for major improvements to ``ParallelClustering`` and therefore ``make_mapper_pipeline`` which are also major breaking changes.
- GUDHI's edge collapser can now be used with arbitrary vertex and edge weights (`#558 <https://github.com/giotto-ai/giotto-tda/pull/558>`_).
- ``GraphGeodesicDistance`` can now take rectangular input (the number of vertices is inferred to be ``max(x.shape)``), and ``KNeighborsGraph`` can now take sparse input (`#537 <https://github.com/giotto-ai/giotto-tda/pull/537>`_).
- ``VietorisRipsPersistence`` now takes a ``metric_params`` parameter (`#541 <https://github.com/giotto-ai/giotto-tda/pull/541>`_).

Expand Down
30 changes: 16 additions & 14 deletions gtda/externals/python/ripser_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,19 @@ def _resolve_symmetry_conflicts(coo):
return _row, _col, _data


def _collapse_coo(row, col, data, thresh):
"""Run edge collapser on off-diagonal data and then reinsert diagonal
data."""
diag = row == col
row_diag, col_diag, data_diag = row[diag], col[diag], data[diag]
row, col, data = gtda_collapser. \
flag_complex_collapse_edges_coo(row, col, data, thresh)

return (np.hstack([row_diag, row]),
np.hstack([col_diag, col]),
np.hstack([data_diag, data]))


def _compute_dtm_weights(dm, n_neighbors, weights_r):
with catch_warnings():
simplefilter("ignore", category=EfficiencyWarning)
Expand Down Expand Up @@ -401,7 +414,6 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
use_sparse_computer = True
if issparse(dm):
row, col, data = _resolve_symmetry_conflicts(dm.tocoo()) # Upper diag
has_nonzeros_in_diag = (dm.diagonal() != 0).any()

if weights is not None:
if (dm < 0).nnz:
Expand Down Expand Up @@ -437,16 +449,9 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
data = _weight_filtration_sparse(row, col, data, weights,
weights_p)

has_nonzeros_in_diag = np.any(weights)

if collapse_edges:
if has_nonzeros_in_diag:
warn("Edge collapses are not supported when any of the "
"diagonal entries are non-zero. Computing persistent "
"homology without using edge collapse.")
else:
row, col, data = gtda_collapser.\
flag_complex_collapse_edges_coo(row, col, data, thresh)
row, col, data = _collapse_coo(row, col, data, thresh)

else:
if weights is not None:
if (dm < 0).any():
Expand Down Expand Up @@ -477,10 +482,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
(row, col) = np.triu_indices_from(dm)
data = dm[(row, col)]
if collapse_edges:
warn("Edge collapses are not supported when any of the "
"diagonal entries are non-zero. Computing persistent "
"homology without using edge collapse.")

row, col, data = _collapse_coo(row, col, data, thresh)
elif collapse_edges:
row, col, data = gtda_collapser.\
flag_complex_collapse_edges_dense(dm, thresh)
Expand Down
116 changes: 66 additions & 50 deletions gtda/externals/python/tests/test_ripser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,71 +14,68 @@ def get_dense_distance_matrices(draw):
"""Generate 2d dense square arrays of floats, with zero along the
diagonal."""
shapes = draw(integers(min_value=2, max_value=30))
distance_matrix = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
np.fill_diagonal(distance_matrix, 0)
return distance_matrix
dm = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
np.fill_diagonal(dm, 0)
return dm


@composite
def get_sparse_distance_matrices(draw):
"""Generate 2d upper triangular sparse matrices of floats, with zero along
the diagonal."""
shapes = draw(integers(min_value=2, max_value=40))
distance_matrix = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
distance_matrix = np.triu(distance_matrix, k=1)
distance_matrix = coo_matrix(distance_matrix)
row, col, data = \
distance_matrix.row, distance_matrix.col, distance_matrix.data
dm = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
dm = np.triu(dm, k=1)
dm = coo_matrix(dm)
row, col, data = dm.row, dm.col, dm.data
not_inf_idx = data != np.inf
row = row[not_inf_idx]
col = col[not_inf_idx]
data = data[not_inf_idx]
shape_kwargs = {} if data.size else {"shape": (0, 0)}
distance_matrix = coo_matrix((data, (row, col)), **shape_kwargs)
return distance_matrix
dm = coo_matrix((data, (row, col)), **shape_kwargs)
return dm


@settings(deadline=500)
@given(distance_matrix=get_sparse_distance_matrices())
def test_coo_below_diagonal_and_mixed_same_as_above(distance_matrix):
@given(dm=get_sparse_distance_matrices())
def test_coo_below_diagonal_and_mixed_same_as_above(dm):
"""Test that if we feed sparse matrices representing the same undirected
weighted graph we obtain the same results regardless of whether all entries
are above the diagonal, all are below the diagonal, or neither.
Furthermore, test that conflicts between stored data in the upper and lower
triangle are resolved in favour of the upper triangle."""
ripser_kwargs = {"maxdim": 2, "metric": "precomputed"}

pd_above = ripser(distance_matrix, **ripser_kwargs)['dgms']
pd_above = ripser(dm, **ripser_kwargs)['dgms']

pd_below = ripser(distance_matrix.T, **ripser_kwargs)['dgms']
pd_below = ripser(dm.T, **ripser_kwargs)['dgms']

_row, _col, _data = (distance_matrix.row, distance_matrix.col,
distance_matrix.data)
_row, _col, _data = dm.row, dm.col, dm.data
coo_shape_kwargs = {} if _data.size else {"shape": (0, 0)}
to_transpose_mask = np.full(len(_row), False)
to_transpose_mask[np.random.choice(np.arange(len(_row)),
size=len(_row) // 2,
replace=False)] = True
row = np.concatenate([_col[to_transpose_mask], _row[~to_transpose_mask]])
col = np.concatenate([_row[to_transpose_mask], _col[~to_transpose_mask]])
distance_matrix_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs)
pd_mixed = ripser(distance_matrix_mixed, **ripser_kwargs)['dgms']
dm_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs)
pd_mixed = ripser(dm_mixed, **ripser_kwargs)['dgms']

row = np.concatenate([row, _row[to_transpose_mask]])
col = np.concatenate([col, _col[to_transpose_mask]])
data = np.random.random(len(row))
data[:len(_row)] = _data
distance_matrix_conflicts = coo_matrix((data, (row, col)),
**coo_shape_kwargs)
pd_conflicts = ripser(distance_matrix_conflicts, **ripser_kwargs)['dgms']
dm_conflicts = coo_matrix((data, (row, col)), **coo_shape_kwargs)
pd_conflicts = ripser(dm_conflicts, **ripser_kwargs)['dgms']

for i in range(ripser_kwargs["maxdim"] + 1):
pd_above[i] = np.sort(pd_above[i], axis=0)
Expand All @@ -91,17 +88,14 @@ def test_coo_below_diagonal_and_mixed_same_as_above(distance_matrix):
@pytest.mark.parametrize('thresh', [False, True])
@pytest.mark.parametrize('coeff', [2, 7])
@settings(deadline=500)
@given(distance_matrix=get_dense_distance_matrices())
def test_collapse_consistent_with_no_collapse_dense(thresh,
coeff, distance_matrix):
thresh = np.max(distance_matrix) / 2 if thresh else np.inf
@given(dm=get_dense_distance_matrices())
def test_collapse_consistent_with_no_collapse_dense(thresh, coeff, dm):
thresh = np.max(dm) / 2 if thresh else np.inf
maxdim = 3
pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=True)['dgms']
pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=False)['dgms']
pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=True)['dgms']
pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=False)['dgms']
for i in range(maxdim + 1):
pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
Expand All @@ -111,26 +105,48 @@ def test_collapse_consistent_with_no_collapse_dense(thresh,
@pytest.mark.parametrize('thresh', [False, True])
@pytest.mark.parametrize('coeff', [2, 7])
@settings(deadline=500)
@given(distance_matrix=get_sparse_distance_matrices())
def test_collapse_consistent_with_no_collapse_coo(thresh,
coeff, distance_matrix):
if thresh and distance_matrix.nnz:
thresh = np.max(distance_matrix) / 2
@given(dm=get_sparse_distance_matrices())
def test_collapse_consistent_with_no_collapse_coo(thresh, coeff, dm):
if thresh and dm.nnz:
thresh = np.max(dm) / 2
else:
thresh = np.inf
maxdim = 3
pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=True)['dgms']
pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=False)['dgms']
pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=True)['dgms']
pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=False)['dgms']
for i in range(maxdim + 1):
pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
assert_almost_equal(pd_collapse[i], pd_no_collapse[i])


def test_collapser_with_negative_weights():
"""Test that collapser works as expected when some of the vertex and edge
weights are negative."""
n_points = 20
dm = np.random.random((n_points, n_points))
np.fill_diagonal(dm, -np.random.random(n_points))
dm -= 0.2
dm_sparse = coo_matrix(dm)

maxdim = 2
pd_collapse_dense = ripser(dm, metric='precomputed', maxdim=maxdim,
collapse_edges=True)['dgms']
pd_collapse_sparse = ripser(dm_sparse, metric='precomputed',
maxdim=maxdim, collapse_edges=True)['dgms']
pd_no_collapse = ripser(dm, metric='precomputed', maxdim=maxdim,
collapse_edges=False)['dgms']

for i in range(maxdim + 1):
pd_collapse_dense[i] = np.sort(pd_collapse_dense[i], axis=0)
pd_collapse_sparse[i] = np.sort(pd_collapse_dense[i], axis=0)
pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
assert_almost_equal(pd_collapse_dense[i], pd_no_collapse[i])
assert_almost_equal(pd_collapse_sparse[i], pd_no_collapse[i])


def test_coo_results_independent_of_order():
"""Regression test for PR #465"""
data = np.array([6., 8., 2., 4., 5., 9., 10., 3., 1., 1.])
Expand Down
1 change: 0 additions & 1 deletion gtda/homology/tests/test_simplicial.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def test_wrp_same_as_vrp_when_zero_weights(X, metric, weight_params,
(X_dist_sparse, 'precomputed')])
@pytest.mark.parametrize('weight_params', [{'p': 1}, {'p': 2}, {'p': np.inf}])
@pytest.mark.parametrize('collapse_edges', [True, False])
@pytest.mark.filterwarnings('ignore:Edge collapses are not supported')
def test_wrp_transform(X, metric, weight_params, collapse_edges):
wrp = WeightedRipsPersistence(weight_params=weight_params,
metric=metric,
Expand Down

0 comments on commit 1f4b48e

Please sign in to comment.