Enable edge collapser for non-zero vertex weights (#558)

* Enable edge collapser for non-zero vertex weights And hence also for negative vertex and edge weights * Remove unnecessary warning filters in test * Add relevant entry in release notes * Add test of collapser with negative weights
giotto-ai · Jan 13, 2021 · 1f4b48e · 1f4b48e
1 parent 8dc0464
commit 1f4b48e
Show file tree

Hide file tree

Showing 4 changed files with 83 additions and 65 deletions.
diff --git a/doc/release.rst b/doc/release.rst
@@ -14,6 +14,7 @@ Major Features and Improvements
 - Wheels for Python 3.9 have been added (`#528 <https://github.com/giotto-ai/giotto-tda/pull/528>`_).
 - Weighted Rips filtrations, and in particular distance-to-measure (DTM) based filtrations, are now supported in ``ripser`` and by the new ``WeightedRipsPersistence`` transformer (`#541 <https://github.com/giotto-ai/giotto-tda/pull/541>`_).
 - See "Backwards-Incompatible Changes" for major improvements to ``ParallelClustering`` and therefore ``make_mapper_pipeline`` which are also major breaking changes.
+- GUDHI's edge collapser can now be used with arbitrary vertex and edge weights (`#558 <https://github.com/giotto-ai/giotto-tda/pull/558>`_).
 - ``GraphGeodesicDistance`` can now take rectangular input (the number of vertices is inferred to be ``max(x.shape)``), and ``KNeighborsGraph`` can now take sparse input (`#537 <https://github.com/giotto-ai/giotto-tda/pull/537>`_).
 - ``VietorisRipsPersistence`` now takes a ``metric_params`` parameter (`#541 <https://github.com/giotto-ai/giotto-tda/pull/541>`_).
 

diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py
@@ -162,6 +162,19 @@ def _resolve_symmetry_conflicts(coo):
         return _row, _col, _data
 
 
+def _collapse_coo(row, col, data, thresh):
+    """Run edge collapser on off-diagonal data and then reinsert diagonal
+    data."""
+    diag = row == col
+    row_diag, col_diag, data_diag = row[diag], col[diag], data[diag]
+    row, col, data = gtda_collapser. \
+        flag_complex_collapse_edges_coo(row, col, data, thresh)
+
+    return (np.hstack([row_diag, row]),
+            np.hstack([col_diag, col]),
+            np.hstack([data_diag, data]))
+
+
 def _compute_dtm_weights(dm, n_neighbors, weights_r):
     with catch_warnings():
         simplefilter("ignore", category=EfficiencyWarning)
@@ -401,7 +414,6 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
     use_sparse_computer = True
     if issparse(dm):
         row, col, data = _resolve_symmetry_conflicts(dm.tocoo())  # Upper diag
-        has_nonzeros_in_diag = (dm.diagonal() != 0).any()
 
         if weights is not None:
             if (dm < 0).nnz:
@@ -437,16 +449,9 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             data = _weight_filtration_sparse(row, col, data, weights,
                                              weights_p)
 
-            has_nonzeros_in_diag = np.any(weights)
-
         if collapse_edges:
-            if has_nonzeros_in_diag:
-                warn("Edge collapses are not supported when any of the "
-                     "diagonal entries are non-zero. Computing persistent "
-                     "homology without using edge collapse.")
-            else:
-                row, col, data = gtda_collapser.\
-                    flag_complex_collapse_edges_coo(row, col, data, thresh)
+            row, col, data = _collapse_coo(row, col, data, thresh)
+
     else:
         if weights is not None:
             if (dm < 0).any():
@@ -477,10 +482,7 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
             (row, col) = np.triu_indices_from(dm)
             data = dm[(row, col)]
             if collapse_edges:
-                warn("Edge collapses are not supported when any of the "
-                     "diagonal entries are non-zero. Computing persistent "
-                     "homology without using edge collapse.")
-
+                row, col, data = _collapse_coo(row, col, data, thresh)
         elif collapse_edges:
             row, col, data = gtda_collapser.\
                 flag_complex_collapse_edges_dense(dm, thresh)

diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py
@@ -14,71 +14,68 @@ def get_dense_distance_matrices(draw):
     """Generate 2d dense square arrays of floats, with zero along the
     diagonal."""
     shapes = draw(integers(min_value=2, max_value=30))
-    distance_matrix = draw(arrays(dtype=np.float,
-                                  elements=floats(allow_nan=False,
-                                                  allow_infinity=True,
-                                                  min_value=0),
-                                  shape=(shapes, shapes), unique=False))
-    np.fill_diagonal(distance_matrix, 0)
-    return distance_matrix
+    dm = draw(arrays(dtype=np.float,
+                     elements=floats(allow_nan=False,
+                                     allow_infinity=True,
+                                     min_value=0),
+                     shape=(shapes, shapes), unique=False))
+    np.fill_diagonal(dm, 0)
+    return dm
 
 
 @composite
 def get_sparse_distance_matrices(draw):
     """Generate 2d upper triangular sparse matrices of floats, with zero along
     the diagonal."""
     shapes = draw(integers(min_value=2, max_value=40))
-    distance_matrix = draw(arrays(dtype=np.float,
-                                  elements=floats(allow_nan=False,
-                                                  allow_infinity=True,
-                                                  min_value=0),
-                                  shape=(shapes, shapes), unique=False))
-    distance_matrix = np.triu(distance_matrix, k=1)
-    distance_matrix = coo_matrix(distance_matrix)
-    row, col, data = \
-        distance_matrix.row, distance_matrix.col, distance_matrix.data
+    dm = draw(arrays(dtype=np.float,
+                     elements=floats(allow_nan=False,
+                                     allow_infinity=True,
+                                     min_value=0),
+                     shape=(shapes, shapes), unique=False))
+    dm = np.triu(dm, k=1)
+    dm = coo_matrix(dm)
+    row, col, data = dm.row, dm.col, dm.data
     not_inf_idx = data != np.inf
     row = row[not_inf_idx]
     col = col[not_inf_idx]
     data = data[not_inf_idx]
     shape_kwargs = {} if data.size else {"shape": (0, 0)}
-    distance_matrix = coo_matrix((data, (row, col)), **shape_kwargs)
-    return distance_matrix
+    dm = coo_matrix((data, (row, col)), **shape_kwargs)
+    return dm
 
 
 @settings(deadline=500)
-@given(distance_matrix=get_sparse_distance_matrices())
-def test_coo_below_diagonal_and_mixed_same_as_above(distance_matrix):
+@given(dm=get_sparse_distance_matrices())
+def test_coo_below_diagonal_and_mixed_same_as_above(dm):
     """Test that if we feed sparse matrices representing the same undirected
     weighted graph we obtain the same results regardless of whether all entries
     are above the diagonal, all are below the diagonal, or neither.
     Furthermore, test that conflicts between stored data in the upper and lower
     triangle are resolved in favour of the upper triangle."""
     ripser_kwargs = {"maxdim": 2, "metric": "precomputed"}
 
-    pd_above = ripser(distance_matrix, **ripser_kwargs)['dgms']
+    pd_above = ripser(dm, **ripser_kwargs)['dgms']
 
-    pd_below = ripser(distance_matrix.T, **ripser_kwargs)['dgms']
+    pd_below = ripser(dm.T, **ripser_kwargs)['dgms']
 
-    _row, _col, _data = (distance_matrix.row, distance_matrix.col,
-                         distance_matrix.data)
+    _row, _col, _data = dm.row, dm.col, dm.data
     coo_shape_kwargs = {} if _data.size else {"shape": (0, 0)}
     to_transpose_mask = np.full(len(_row), False)
     to_transpose_mask[np.random.choice(np.arange(len(_row)),
                                        size=len(_row) // 2,
                                        replace=False)] = True
     row = np.concatenate([_col[to_transpose_mask], _row[~to_transpose_mask]])
     col = np.concatenate([_row[to_transpose_mask], _col[~to_transpose_mask]])
-    distance_matrix_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs)
-    pd_mixed = ripser(distance_matrix_mixed, **ripser_kwargs)['dgms']
+    dm_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs)
+    pd_mixed = ripser(dm_mixed, **ripser_kwargs)['dgms']
 
     row = np.concatenate([row, _row[to_transpose_mask]])
     col = np.concatenate([col, _col[to_transpose_mask]])
     data = np.random.random(len(row))
     data[:len(_row)] = _data
-    distance_matrix_conflicts = coo_matrix((data, (row, col)),
-                                           **coo_shape_kwargs)
-    pd_conflicts = ripser(distance_matrix_conflicts, **ripser_kwargs)['dgms']
+    dm_conflicts = coo_matrix((data, (row, col)), **coo_shape_kwargs)
+    pd_conflicts = ripser(dm_conflicts, **ripser_kwargs)['dgms']
 
     for i in range(ripser_kwargs["maxdim"] + 1):
         pd_above[i] = np.sort(pd_above[i], axis=0)
@@ -91,17 +88,14 @@ def test_coo_below_diagonal_and_mixed_same_as_above(distance_matrix):
 @pytest.mark.parametrize('thresh', [False, True])
 @pytest.mark.parametrize('coeff', [2, 7])
 @settings(deadline=500)
-@given(distance_matrix=get_dense_distance_matrices())
-def test_collapse_consistent_with_no_collapse_dense(thresh,
-                                                    coeff, distance_matrix):
-    thresh = np.max(distance_matrix) / 2 if thresh else np.inf
+@given(dm=get_dense_distance_matrices())
+def test_collapse_consistent_with_no_collapse_dense(thresh, coeff, dm):
+    thresh = np.max(dm) / 2 if thresh else np.inf
     maxdim = 3
-    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                         coeff=coeff, metric='precomputed',
-                         collapse_edges=True)['dgms']
-    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                            coeff=coeff, metric='precomputed',
-                            collapse_edges=False)['dgms']
+    pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
+                         metric='precomputed', collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
+                            metric='precomputed', collapse_edges=False)['dgms']
     for i in range(maxdim + 1):
         pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
         pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
@@ -111,26 +105,48 @@ def test_collapse_consistent_with_no_collapse_dense(thresh,
 @pytest.mark.parametrize('thresh', [False, True])
 @pytest.mark.parametrize('coeff', [2, 7])
 @settings(deadline=500)
-@given(distance_matrix=get_sparse_distance_matrices())
-def test_collapse_consistent_with_no_collapse_coo(thresh,
-                                                  coeff, distance_matrix):
-    if thresh and distance_matrix.nnz:
-        thresh = np.max(distance_matrix) / 2
+@given(dm=get_sparse_distance_matrices())
+def test_collapse_consistent_with_no_collapse_coo(thresh, coeff, dm):
+    if thresh and dm.nnz:
+        thresh = np.max(dm) / 2
     else:
         thresh = np.inf
     maxdim = 3
-    pd_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                         coeff=coeff, metric='precomputed',
-                         collapse_edges=True)['dgms']
-    pd_no_collapse = ripser(distance_matrix, thresh=thresh, maxdim=maxdim,
-                            coeff=coeff, metric='precomputed',
-                            collapse_edges=False)['dgms']
+    pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
+                         metric='precomputed', collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff,
+                            metric='precomputed', collapse_edges=False)['dgms']
     for i in range(maxdim + 1):
         pd_collapse[i] = np.sort(pd_collapse[i], axis=0)
         pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
         assert_almost_equal(pd_collapse[i], pd_no_collapse[i])
 
 
+def test_collapser_with_negative_weights():
+    """Test that collapser works as expected when some of the vertex and edge
+    weights are negative."""
+    n_points = 20
+    dm = np.random.random((n_points, n_points))
+    np.fill_diagonal(dm, -np.random.random(n_points))
+    dm -= 0.2
+    dm_sparse = coo_matrix(dm)
+
+    maxdim = 2
+    pd_collapse_dense = ripser(dm, metric='precomputed', maxdim=maxdim,
+                               collapse_edges=True)['dgms']
+    pd_collapse_sparse = ripser(dm_sparse, metric='precomputed',
+                                maxdim=maxdim, collapse_edges=True)['dgms']
+    pd_no_collapse = ripser(dm, metric='precomputed', maxdim=maxdim,
+                            collapse_edges=False)['dgms']
+
+    for i in range(maxdim + 1):
+        pd_collapse_dense[i] = np.sort(pd_collapse_dense[i], axis=0)
+        pd_collapse_sparse[i] = np.sort(pd_collapse_dense[i], axis=0)
+        pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0)
+        assert_almost_equal(pd_collapse_dense[i], pd_no_collapse[i])
+        assert_almost_equal(pd_collapse_sparse[i], pd_no_collapse[i])
+
+
 def test_coo_results_independent_of_order():
     """Regression test for PR #465"""
     data = np.array([6., 8., 2., 4., 5., 9., 10., 3., 1., 1.])

diff --git a/gtda/homology/tests/test_simplicial.py b/gtda/homology/tests/test_simplicial.py
@@ -195,7 +195,6 @@ def test_wrp_same_as_vrp_when_zero_weights(X, metric, weight_params,
                                        (X_dist_sparse, 'precomputed')])
 @pytest.mark.parametrize('weight_params', [{'p': 1}, {'p': 2}, {'p': np.inf}])
 @pytest.mark.parametrize('collapse_edges', [True, False])
-@pytest.mark.filterwarnings('ignore:Edge collapses are not supported')
 def test_wrp_transform(X, metric, weight_params, collapse_edges):
     wrp = WeightedRipsPersistence(weight_params=weight_params,
                                   metric=metric,