Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable edge collapser for non-zero vertex weights #558

Merged
merged 4 commits into from
Jan 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Major Features and Improvements
- Wheels for Python 3.9 have been added (`#528 <https://github.com/giotto-ai/giotto-tda/pull/528>`_).
- Weighted Rips filtrations, and in particular distance-to-measure (DTM) based filtrations, are now supported in ``ripser`` and by the new ``WeightedRipsPersistence`` transformer (`#541 <https://github.com/giotto-ai/giotto-tda/pull/541>`_).
- See "Backwards-Incompatible Changes" for major improvements to ``ParallelClustering`` and therefore ``make_mapper_pipeline`` which are also major breaking changes.
- GUDHI's edge collapser can now be used with arbitrary vertex and edge weights (`#558 <https://github.com/giotto-ai/giotto-tda/pull/558>`_).
- ``GraphGeodesicDistance`` can now take rectangular input (the number of vertices is inferred to be ``max(x.shape)``), and ``KNeighborsGraph`` can now take sparse input (`#537 <https://github.com/giotto-ai/giotto-tda/pull/537>`_).
- ``VietorisRipsPersistence`` now takes a ``metric_params`` parameter (`#541 <https://github.com/giotto-ai/giotto-tda/pull/541>`_).

Expand Down
30 changes: 16 additions & 14 deletions gtda/externals/python/ripser_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,19 @@ def _resolve_symmetry_conflicts(coo):
return _row, _col, _data


def _collapse_coo(row, col, data, thresh):
"""Run edge collapser on off-diagonal data and then reinsert diagonal
data."""
MonkeyBreaker marked this conversation as resolved.
Show resolved Hide resolved
diag = row == col
row_diag, col_diag, data_diag = row[diag], col[diag], data[diag]
row, col, data = gtda_collapser. \
flag_complex_collapse_edges_coo(row, col, data, thresh)

return (np.hstack([row_diag, row]),
np.hstack([col_diag, col]),
np.hstack([data_diag, data]))


def _compute_dtm_weights(dm, n_neighbors, weights_r):
with catch_warnings():
simplefilter("ignore", category=EfficiencyWarning)
Expand Down Expand Up @@ -401,7 +414,6 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
use_sparse_computer = True
if issparse(dm):
row, col, data = _resolve_symmetry_conflicts(dm.tocoo()) # Upper diag
has_nonzeros_in_diag = (dm.diagonal() != 0).any()

if weights is not None:
if (dm < 0).nnz:
Expand Down Expand Up @@ -437,16 +449,9 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
data = _weight_filtration_sparse(row, col, data, weights,
weights_p)

has_nonzeros_in_diag = np.any(weights)

if collapse_edges:
if has_nonzeros_in_diag:
warn("Edge collapses are not supported when any of the "
"diagonal entries are non-zero. Computing persistent "
"homology without using edge collapse.")
else:
row, col, data = gtda_collapser.\
flag_complex_collapse_edges_coo(row, col, data, thresh)
row, col, data = _collapse_coo(row, col, data, thresh)

else:
if weights is not None:
if (dm < 0).any():
Expand Down Expand Up @@ -477,10 +482,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
(row, col) = np.triu_indices_from(dm)
data = dm[(row, col)]
if collapse_edges:
warn("Edge collapses are not supported when any of the "
"diagonal entries are non-zero. Computing persistent "
"homology without using edge collapse.")

row, col, data = _collapse_coo(row, col, data, thresh)
elif collapse_edges:
row, col, data = gtda_collapser.\
flag_complex_collapse_edges_dense(dm, thresh)
Expand Down
116 changes: 66 additions & 50 deletions gtda/externals/python/tests/test_ripser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,71 +14,68 @@ def get_dense_distance_matrices(draw):
"""Generate 2d dense square arrays of floats, with zero along the
diagonal."""
shapes = draw(integers(min_value=2, max_value=30))
distance_matrix = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
np.fill_diagonal(distance_matrix, 0)
return distance_matrix
dm = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
np.fill_diagonal(dm, 0)
return dm
Comment on lines -17 to +23
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Appreciate to uniform naming of variables ! 💯



@composite
def get_sparse_distance_matrices(draw):
"""Generate 2d upper triangular sparse matrices of floats, with zero along
the diagonal."""
shapes = draw(integers(min_value=2, max_value=40))
distance_matrix = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
distance_matrix = np.triu(distance_matrix, k=1)
distance_matrix = coo_matrix(distance_matrix)
row, col, data = \
distance_matrix.row, distance_matrix.col, distance_matrix.data
dm = draw(arrays(dtype=np.float,
elements=floats(allow_nan=False,
allow_infinity=True,
min_value=0),
shape=(shapes, shapes), unique=False))
dm = np.triu(dm, k=1)
dm = coo_matrix(dm)
row, col, data = dm.row, dm.col, dm.data
not_inf_idx = data != np.inf
row = row[not_inf_idx]
col = col[not_inf_idx]
data = data[not_inf_idx]
shape_kwargs = {} if data.size else {"shape": (0, 0)}
distance_matrix = coo_matrix((data, (row, col)), **shape_kwargs)
return distance_matrix
dm = coo_matrix((data, (row, col)), **shape_kwargs)
return dm


@settings(deadline=500)
@given(distance_matrix=get_sparse_distance_matrices())
def test_coo_below_diagonal_and_mixed_same_as_above(distance_matrix):
@given(dm=get_sparse_distance_matrices())
def test_coo_below_diagonal_and_mixed_same_as_above(dm):
"""Test that if we feed sparse matrices representing the same undirected
weighted graph we obtain the same results regardless of whether all entries
are above the diagonal, all are below the diagonal, or neither.
Furthermore, test that conflicts between stored data in the upper and lower
triangle are resolved in favour of the upper triangle."""
ripser_kwargs = {"maxdim": 2, "metric": "precomputed"}

pd_above = ripser(distance_matrix, **ripser_kwargs)['dgms']
pd_above = ripser(dm, **ripser_kwargs)['dgms']

pd_below = ripser(distance_matrix.T, **ripser_kwargs)['dgms']
pd_below = ripser(dm.T, **ripser_kwargs)['dgms']

_row, _col, _data = (distance_matrix.row, distance_matrix.col,
distance_matrix.data)
_row, _col, _data = dm.row, dm.col, dm.data
coo_shape_kwargs = {} if _data.size else {"shape": (0, 0)}
to_transpose_mask = np.full(len(_row), False)
to_transpose_mask[np.random.choice(np.arange(len(_row)),
size=len(_row) // 2,
replace=False)] = True
row = np.concatenate([_col[to_transpose_mask], _row[~to_transpose_mask]])
col = np.concatenate([_row[to_transpose_mask], _col[~to_transpose_mask]])
distance_matrix_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs)
pd_mixed = ripser(distance_matrix_mixed, **ripser_kwargs)['dgms']
dm_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs)
pd_mixed = ripser(dm_mixed, **ripser_kwargs)['dgms']

row = np.concatenate([row, _row[to_transpose_mask]])
col = np.concatenate([col, _col[to_transpose_mask]])
data = np.random.random(len(row))
data[:len(_row)] = _data
distance_matrix_conflicts = coo_matrix((data, (row, col)),
**coo_shape_kwargs)
pd_conflicts = ripser(distance_matrix_conflicts, **ripser_kwargs)['dgms']
dm_conflicts = coo_matrix((data, (row, col)), **coo_shape_kwargs)
pd_conflicts = ripser(dm_conflicts, **ripser_kwargs)['dgms']

for i in range(ripser_kwargs["maxdim"] + 1):
pd_above[i] = np.sort(pd_above[i], axis=0)
Expand All @@ -91,17 +88,14 @@ def test_coo_below_diagonal_and_mixed_same_as_above(distance_matrix):
@pytest.mark.parametrize('thresh', [False, True])
@pytest.mark.parametrize('coeff', [2, 7])
@settings(deadline=500)
@given(distance_matrix=get_dense_distance_matrices())
def test_collapse_consistent_with_no_collapse_dense(thresh,
coeff, distance_matrix):
thresh = np.max(distance_matrix) / 2 if thresh else np.inf
@given(dm=get_dense_distance_matrices())
def test_collapse_consistent_with_no_collapse_dense(thresh, coeff, dm):
thresh = np.max(dm) / 2 if thresh else np.inf
maxdim = 3
pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=True)['dgms']
pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=False)['dgms']
pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=True)['dgms']
pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=False)['dgms']
for i in range(maxdim + 1):
pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
Expand All @@ -111,26 +105,48 @@ def test_collapse_consistent_with_no_collapse_dense(thresh,
@pytest.mark.parametrize('thresh', [False, True])
@pytest.mark.parametrize('coeff', [2, 7])
@settings(deadline=500)
@given(distance_matrix=get_sparse_distance_matrices())
def test_collapse_consistent_with_no_collapse_coo(thresh,
coeff, distance_matrix):
if thresh and distance_matrix.nnz:
thresh = np.max(distance_matrix) / 2
@given(dm=get_sparse_distance_matrices())
def test_collapse_consistent_with_no_collapse_coo(thresh, coeff, dm):
if thresh and dm.nnz:
thresh = np.max(dm) / 2
else:
thresh = np.inf
maxdim = 3
pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=True)['dgms']
pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
coeff=coeff, metric='precomputed',
collapse_edges=False)['dgms']
pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=True)['dgms']
pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
metric='precomputed', collapse_edges=False)['dgms']
for i in range(maxdim + 1):
pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
assert_almost_equal(pd_collapse[i], pd_no_collapse[i])


def test_collapser_with_negative_weights():
"""Test that collapser works as expected when some of the vertex and edge
weights are negative."""
n_points = 20
dm = np.random.random((n_points, n_points))
np.fill_diagonal(dm, -np.random.random(n_points))
dm -= 0.2
dm_sparse = coo_matrix(dm)

maxdim = 2
pd_collapse_dense = ripser(dm, metric='precomputed', maxdim=maxdim,
collapse_edges=True)['dgms']
pd_collapse_sparse = ripser(dm_sparse, metric='precomputed',
maxdim=maxdim, collapse_edges=True)['dgms']
pd_no_collapse = ripser(dm, metric='precomputed', maxdim=maxdim,
collapse_edges=False)['dgms']

for i in range(maxdim + 1):
pd_collapse_dense[i] = np.sort(pd_collapse_dense[i], axis=0)
pd_collapse_sparse[i] = np.sort(pd_collapse_dense[i], axis=0)
pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
assert_almost_equal(pd_collapse_dense[i], pd_no_collapse[i])
assert_almost_equal(pd_collapse_sparse[i], pd_no_collapse[i])


def test_coo_results_independent_of_order():
"""Regression test for PR #465"""
data = np.array([6., 8., 2., 4., 5., 9., 10., 3., 1., 1.])
Expand Down
1 change: 0 additions & 1 deletion gtda/homology/tests/test_simplicial.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,6 @@ def test_wrp_same_as_vrp_when_zero_weights(X, metric, weight_params,
(X_dist_sparse, 'precomputed')])
@pytest.mark.parametrize('weight_params', [{'p': 1}, {'p': 2}, {'p': np.inf}])
@pytest.mark.parametrize('collapse_edges', [True, False])
@pytest.mark.filterwarnings('ignore:Edge collapses are not supported')
def test_wrp_transform(X, metric, weight_params, collapse_edges):
wrp = WeightedRipsPersistence(weight_params=weight_params,
metric=metric,
Expand Down