From f5495b483ae9d2fd4153968d3ce2607aa913706c Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 28 Dec 2021 14:43:53 +0100 Subject: [PATCH 1/6] Replace ripser.py and robinhood submodules with giotto-ph dependency --- .gitmodules | 6 - CMakeLists.txt | 50 -- examples/persistent_homology_graphs.ipynb | 10 +- gtda/externals/__init__.py | 3 +- gtda/externals/bindings/ripser_bindings.cpp | 57 --- gtda/externals/python/__init__.py | 1 - gtda/externals/python/ripser_interface.py | 520 -------------------- gtda/externals/python/tests/test_ripser.py | 166 ------- gtda/externals/ripser | 1 - gtda/externals/robinhood | 1 - gtda/homology/simplicial.py | 42 +- requirements.txt | 1 + setup.cfg | 1 - 13 files changed, 26 insertions(+), 833 deletions(-) delete mode 100644 gtda/externals/bindings/ripser_bindings.cpp delete mode 100644 gtda/externals/python/ripser_interface.py delete mode 100644 gtda/externals/python/tests/test_ripser.py delete mode 160000 gtda/externals/ripser delete mode 160000 gtda/externals/robinhood diff --git a/.gitmodules b/.gitmodules index 3960071bb..9544dff81 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,3 @@ -[submodule "gtda/externals/ripser"] - path = gtda/externals/ripser - url = https://github.com/scikit-tda/ripser.py.git [submodule "gtda/externals/gudhi-devel"] path = gtda/externals/gudhi-devel url = https://github.com/giotto-ai/gudhi-devel @@ -14,6 +11,3 @@ [submodule "gtda/externals/pybind11"] path = gtda/externals/pybind11 url = https://github.com/pybind/pybind11 -[submodule "gtda/externals/robinhood"] - path = gtda/externals/robinhood - url = https://github.com/martinus/robin-hood-hashing diff --git a/CMakeLists.txt b/CMakeLists.txt index 0ea01857c..93762eeb3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,59 +9,9 @@ include_directories(${Boost_INCLUDE_DIRS}) find_package(OpenMP) -set(RIPSER_SRC_DIR "gtda/externals/ripser") set(GUDHI_SRC_DIR "gtda/externals/gudhi-devel/src") set(HERA_DIR "gtda/externals/hera") set(EIGEN_DIR "gtda/externals/eigen") -set(ROBINHOOD_DIR "gtda/externals/robinhood") - -####################################################################### -# Ripser # -####################################################################### - -pybind11_add_module(gtda_ripser MODULE "${BINDINGS_DIR}/ripser_bindings.cpp") -set_property(TARGET gtda_ripser PROPERTY CXX_STANDARD 14) - -if(OpenMP_FOUND) - target_link_libraries(gtda_ripser PRIVATE OpenMP::OpenMP_CXX) -endif() - -target_compile_definitions(gtda_ripser PRIVATE ASSEMBLE_REDUCTION_MATRIX=1) -target_include_directories(gtda_ripser PRIVATE "${RIPSER_SRC_DIR}/ripser") -target_compile_definitions(gtda_ripser PRIVATE USE_ROBINHOOD_HASHMAP=1) -target_include_directories(gtda_ripser PRIVATE "${ROBINHOOD_DIR}/src/include") - -if(MSVC) - target_compile_options(gtda_ripser PUBLIC $<$: /Wall /O2>) - target_compile_options(gtda_ripser PUBLIC $<$:/O1 /DEBUG:FULL /Zi /Zo>) -else() - target_compile_options(gtda_ripser PUBLIC $<$: -O3>) - target_compile_options(gtda_ripser PUBLIC $<$: -O2 -ggdb -D_GLIBCXX_DEBUG>) -endif() - -####################################################################### -# Ripser - Coefficient enable # -####################################################################### - -pybind11_add_module(gtda_ripser_coeff MODULE "${BINDINGS_DIR}/ripser_bindings.cpp") -set_property(TARGET gtda_ripser_coeff PROPERTY CXX_STANDARD 14) - -if(OpenMP_FOUND) - target_link_libraries(gtda_ripser_coeff PRIVATE OpenMP::OpenMP_CXX) -endif() - -target_compile_definitions(gtda_ripser_coeff PRIVATE USE_COEFFICIENTS=1 ASSEMBLE_REDUCTION_MATRIX=1) -target_include_directories(gtda_ripser_coeff PRIVATE "${RIPSER_SRC_DIR}/ripser") -target_compile_definitions(gtda_ripser_coeff PRIVATE USE_ROBINHOOD_HASHMAP=1) -target_include_directories(gtda_ripser_coeff PRIVATE "${ROBINHOOD_DIR}/src/include") - -if(MSVC) - target_compile_options(gtda_ripser_coeff PUBLIC $<$: /Wall /O2>) - target_compile_options(gtda_ripser_coeff PUBLIC $<$:/O1 /DEBUG:FULL /Zi /Zo>) -else() - target_compile_options(gtda_ripser_coeff PUBLIC $<$: -O3>) - target_compile_options(gtda_ripser_coeff PUBLIC $<$: -O2 -ggdb -D_GLIBCXX_DEBUG>) -endif() ####################################################################### # Wasserstein # diff --git a/examples/persistent_homology_graphs.ipynb b/examples/persistent_homology_graphs.ipynb index 4d6e83b53..542b5b518 100644 --- a/examples/persistent_homology_graphs.ipynb +++ b/examples/persistent_homology_graphs.ipynb @@ -507,9 +507,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": false - }, + "metadata": {}, "outputs": [], "source": [ "X_ggd = GraphGeodesicDistance(directed=True, unweighted=True).fit_transform([directed_circle])\n", @@ -651,13 +649,13 @@ "- Persistence diagrams are great for data exploration, but to feed their content to machine learning algorithms one must make sure the algorithm used is **independent of the relative ordering** of the birth-death pairs in each homology dimension. [gtda.diagrams](https://giotto-ai.github.io/gtda-docs/latest/modules/diagrams.html) contains a suite of vector representations, feature extraction methods and kernel methods that convert persistence diagrams into data structures ready for machine learning algorithms. Simple examples of their use are contained in [Topological feature extraction using VietorisRipsPersistence and PersistenceEntropy](https://giotto-ai.github.io/gtda-docs/latest/notebooks/vietoris_rips_quickstart.html), [Classifying 3D shapes](https://giotto-ai.github.io/gtda-docs/latest/notebooks/classifying_shapes.html) and [Lorenz attractor](https://giotto-ai.github.io/gtda-docs/latest/notebooks/lorenz_attractor.html).\n", "- In addition to ``GraphGeodesicDistance``, [gtda.graphs](https://giotto-ai.github.io/gtda-docs/latest/modules/graphs.html) also contains transformers for the creation of graphs from point cloud or time series data.\n", "- Despite the name, [gtda.point_clouds](https://giotto-ai.github.io/gtda-docs/latest/modules/point_clouds.html) contains transformers for the alteration of distance matrices (which are just adjacency matrices of weighted graphs) as a preprocessing step for persistent homology.\n", - "- ``VietorisRipsPersistence`` builds on the [ripser.py](https://ripser.scikit-tda.org/index.html) project. Its website contains two tutorials on additional ways in which graphs can be constructed from [time series data](https://ripser.scikit-tda.org/notebooks/Lower%20Star%20Time%20Series.html) or [image data](https://ripser.scikit-tda.org/notebooks/Lower%20Star%20Image%20Filtrations.html), and fed to the clique complex filtration construction. With a few simple modifications, the code can be adapted to the API of ``VietorisRipsPersistence``." + "- ``VietorisRipsPersistence`` builds on the [giotto-ph](https://github.com/giotto-ai/giotto-ph) and [ripser.py](https://ripser.scikit-tda.org/index.html) projects. The latter's website contains two tutorials on additional ways in which graphs can be constructed from [time series data](https://ripser.scikit-tda.org/notebooks/Lower%20Star%20Time%20Series.html) or [image data](https://ripser.scikit-tda.org/notebooks/Lower%20Star%20Image%20Filtrations.html), and fed to the clique complex filtration construction. With a few simple modifications, the code can be adapted to the API of ``VietorisRipsPersistence``." ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -671,7 +669,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.9.2" } }, "nbformat": 4, diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py index b39577ddf..589361415 100644 --- a/gtda/externals/__init__.py +++ b/gtda/externals/__init__.py @@ -5,13 +5,12 @@ from .modules.gtda_wasserstein import wasserstein_distance from .modules.gtda_collapser import flag_complex_collapse_edges_dense, \ flag_complex_collapse_edges_sparse, flag_complex_collapse_edges_coo -from .python import ripser, SparseRipsComplex, CechComplex, CubicalComplex, \ +from .python import SparseRipsComplex, CechComplex, CubicalComplex, \ PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex __all__ = [ 'bottleneck_distance', 'wasserstein_distance', - 'ripser', 'SparseRipsComplex', 'CechComplex', 'CubicalComplex', diff --git a/gtda/externals/bindings/ripser_bindings.cpp b/gtda/externals/bindings/ripser_bindings.cpp deleted file mode 100644 index e28b5fe05..000000000 --- a/gtda/externals/bindings/ripser_bindings.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/****************************************************************************** - * Author: Julián Burella Pérez - * Description: ripser's rips persistence interfacing with pybind11 - * License: TBD - *****************************************************************************/ - -#include - -// PYBIND11 -#include -#include -#include -#include - -namespace py = pybind11; - -#if defined USE_COEFFICIENTS -PYBIND11_MODULE(gtda_ripser_coeff, m) { -#else -PYBIND11_MODULE(gtda_ripser, m) { -#endif - - using namespace pybind11::literals; - m.doc() = "Ripser python interface"; - - // Because `ripser` could have two different modules after compilation - // It's necessary to add `py::module_local()` to prevent following issue: - // ImportError: generic_type: type "ripserResults" is already registered! - // When same python module imports gtda_ripser and gtda_ripser_coeff - py::class_(m, "ripserResults", py::module_local()) - .def_readwrite("births_and_deaths_by_dim", - &ripserResults::births_and_deaths_by_dim) - .def_readwrite("cocycles_by_dim", &ripserResults::cocycles_by_dim) - .def_readwrite("num_edges", &ripserResults::num_edges); - - m.def("rips_dm", - [](py::array_t& D, int N, int modulus, int dim_max, - float threshold, int do_cocycles) { - ripserResults ret = rips_dm((float*)D.request().ptr, N, modulus, - dim_max, threshold, do_cocycles); - return ret; - }, - "D"_a, "N"_a, "modulus"_a, "dim_max"_a, "threshold"_a, "do_cocycles"_a, - "ripser distance matrix"); - m.def("rips_dm_sparse", - [](py::array_t& I, py::array_t& J, py::array_t& V, - int NEdges, int N, int modulus, int dim_max, float threshold, - int do_cocycles) { - ripserResults ret = - rips_dm_sparse((int*)I.request().ptr, (int*)J.request().ptr, - (float*)V.request().ptr, NEdges, N, modulus, - dim_max, threshold, do_cocycles); - return ret; - }, - "I"_a, "J"_a, "V"_a, "NEdges"_a, "N"_a, "modulus"_a, "dim_max"_a, - "threshold"_a, "do_cocycles"_a, "ripser sparse distance matrix"); -} diff --git a/gtda/externals/python/__init__.py b/gtda/externals/python/__init__.py index 341ef75fb..d44f3846d 100644 --- a/gtda/externals/python/__init__.py +++ b/gtda/externals/python/__init__.py @@ -1,4 +1,3 @@ -from .ripser_interface import ripser from .cubical_complex_interface import CubicalComplex from .simplex_tree_interface import SimplexTree from .periodic_cubical_complex_interface import PeriodicCubicalComplex diff --git a/gtda/externals/python/ripser_interface.py b/gtda/externals/python/ripser_interface.py deleted file mode 100644 index 45eab891a..000000000 --- a/gtda/externals/python/ripser_interface.py +++ /dev/null @@ -1,520 +0,0 @@ -""" -MIT License -Copyright (c) 2018 Christopher Tralie and Nathaniel Saul -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" - -import gc -from warnings import catch_warnings, simplefilter - -import numpy as np -from scipy.sparse import issparse, csr_matrix -from scipy.spatial.distance import squareform -from sklearn.exceptions import EfficiencyWarning -from sklearn.metrics.pairwise import pairwise_distances -from sklearn.neighbors import kneighbors_graph -from sklearn.utils.validation import column_or_1d - -from ..modules import gtda_ripser, gtda_ripser_coeff, gtda_collapser - - -def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=0): - if coeff == 2: - ret = gtda_ripser.rips_dm(DParam, DParam.shape[0], coeff, maxHomDim, - thresh, do_cocycles) - else: - ret = gtda_ripser_coeff.rips_dm(DParam, DParam.shape[0], coeff, - maxHomDim, thresh, do_cocycles) - return ret - - -def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=0): - if coeff == 2: - ret = gtda_ripser.rips_dm_sparse(I, J, V, I.size, N, coeff, maxHomDim, - thresh, do_cocycles) - else: - ret = gtda_ripser_coeff.rips_dm_sparse(I, J, V, I.size, N, coeff, - maxHomDim, thresh, do_cocycles) - return ret - - -def dpoint2pointcloud(X, i, metric): - """Return the distance from the ith point in a Euclidean point cloud - to the rest of the points. - - Parameters - ---------- - X: ndarray (n_samples, n_features) - A numpy array of data - - i: int - The index of the point from which to return all distances - - metric: string or callable - The metric to use when calculating distance between instances in a - feature array - - """ - ds = pairwise_distances(X, X[i, :][None, :], metric=metric).flatten() - ds[i] = 0 - return ds - - -def get_greedy_perm(X, n_perm=None, metric="euclidean"): - """Compute a furthest point sampling permutation of a set of points - - Parameters - ---------- - X: ndarray (n_samples, n_features) - A numpy array of either data or distance matrix - - n_perm: int - Number of points to take in the permutation - - metric: string or callable - The metric to use when calculating distance between instances in a - feature array - - Returns - ------- - idx_perm: ndarray(n_perm) - Indices of points in the greedy permutation - - lambdas: ndarray(n_perm) - Covering radii at different points - - dperm2all: ndarray(n_perm, n_samples) - Distances from points in the greedy permutation to points - in the original point set - - """ - if not n_perm: - n_perm = X.shape[0] - # By default, takes the first point in the list to be the - # first point in the permutation, but could be random - idx_perm = np.zeros(n_perm, dtype=np.int64) - lambdas = np.zeros(n_perm) - if metric == 'precomputed': - dpoint2all = lambda i: X[i, :] - else: - dpoint2all = lambda i: dpoint2pointcloud(X, i, metric) - ds = dpoint2all(0) - dperm2all = [ds] - for i in range(1, n_perm): - idx = np.argmax(ds) - idx_perm[i] = idx - lambdas[i - 1] = ds[idx] - dperm2all.append(dpoint2all(idx)) - ds = np.minimum(ds, dperm2all[-1]) - lambdas[-1] = np.max(ds) - dperm2all = np.array(dperm2all) - return idx_perm, lambdas, dperm2all - - -def _resolve_symmetry_conflicts(coo): - """Given a sparse matrix in COO format, filter out any entry at location - (i, j) strictly below the diagonal if the entry at (j, i) is also - stored. Return row, column and data information for an upper diagonal - COO matrix.""" - _row, _col, _data = coo.row, coo.col, coo.data - - below_diag = _col < _row - # Check if there is anything below the main diagonal - if below_diag.any(): - # Initialize filtered COO data with information in the upper triangle - in_upper_triangle = np.logical_not(below_diag) - row = _row[in_upper_triangle] - col = _col[in_upper_triangle] - data = _data[in_upper_triangle] - - # Filter out entries below the diagonal for which entries at - # transposed positions are already available - upper_triangle_indices = set(zip(row, col)) - additions = tuple( - zip(*((j, i, x) for (i, j, x) in zip(_row[below_diag], - _col[below_diag], - _data[below_diag]) - if (j, i) not in upper_triangle_indices)) - ) - # Add surviving entries below the diagonal to final COO data - if additions: - row_add, col_add, data_add = additions - row = np.concatenate([row, row_add]) - col = np.concatenate([col, col_add]) - data = np.concatenate([data, data_add]) - - return row, col, data - else: - return _row, _col, _data - - -def _collapse_coo(row, col, data, thresh): - """Run edge collapser on off-diagonal data and then reinsert diagonal - data.""" - diag = row == col - row_diag, col_diag, data_diag = row[diag], col[diag], data[diag] - row, col, data = gtda_collapser. \ - flag_complex_collapse_edges_coo(row, col, data, thresh) - - return (np.hstack([row_diag, row]), - np.hstack([col_diag, col]), - np.hstack([data_diag, data])) - - -def _compute_dtm_weights(dm, n_neighbors, weights_r): - with catch_warnings(): - simplefilter("ignore", category=EfficiencyWarning) - knn = kneighbors_graph(dm, n_neighbors=n_neighbors, - metric="precomputed", mode="distance", - include_self=False) - - weights = np.squeeze(np.asarray(knn.power(weights_r).sum(axis=1))) - weights /= n_neighbors + 1 - weights **= (1 / weights_r) - weights *= 2 - - return weights - - -def _weight_filtration(dist, weights_x, weights_y, p): - """Create a weighted distance matrix. For dense data, `weights_x` is a - column vector, `weights_y` is a 1D array, `dist` is the original distance - matrix, and the computations exploit array broadcasting. For sparse data, - all three are 1D arrays. `p` can only be ``numpy.inf``, ``1``, or ``2``.""" - if p == np.inf: - return np.maximum(dist, np.maximum(weights_x, weights_y)) - elif p == 1: - return np.where(dist <= np.abs(weights_x - weights_y) / 2, - np.maximum(weights_x, weights_y), - dist + (weights_x + weights_y) / 2) - elif p == 2: - with np.errstate(divide='ignore', invalid='ignore'): - return np.where( - dist <= np.abs(weights_x**2 - weights_y**2)**.5 / 2, - np.maximum(weights_x, weights_y), - np.sqrt((dist**2 + ((weights_x + weights_y) / 2)**2) * - (dist**2 + ((weights_x - weights_y) / 2)**2)) / dist - ) - else: - raise NotImplementedError(f"Weighting not supported for p = {p}") - - -def _weight_filtration_sparse(row, col, data, weights, p): - weights_x = weights[row] - weights_y = weights[col] - - return _weight_filtration(data, weights_x, weights_y, p) - - -def _weight_filtration_dense(dm, weights, p): - weights_2d = weights[:, None] - - return _weight_filtration(dm, weights_2d, weights, p) - - -def _check_weights(weights, n_points): - weights = column_or_1d(weights) - if len(weights) != n_points: - raise ValueError( - f"Input distance/adjacency matrix implies {n_points} " - f"vertices but {len(weights)} weights were passed." - ) - if np.any(weights < 0): - raise ValueError("All weights must be non-negative." - "Negative weights passed.") - - return weights - - -def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean", - metric_params={}, weights=None, weight_params=None, - collapse_edges=False, n_perm=None): - """Compute persistence diagrams for X data array using Ripser [1]_. - - If X is not a distance matrix, it will be converted to a distance matrix - using the chosen metric. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_features) - A numpy array of either data or distance matrix. Can also be a sparse - distance matrix of type scipy.sparse - - maxdim : int, optional, default: ``1`` - Maximum homology dimension computed. Will compute all dimensions lower - than and equal to this value. For 1, H_0 and H_1 will be computed. - - thresh : float, optional, default: ``numpy.inf`` - Maximum distances considered when constructing filtration. If - ``numpy.inf``, compute the entire filtration. - - coeff : int prime, optional, default: ``2`` - Compute homology with coefficients in the prime field Z/pZ for p=coeff. - - metric : string or callable, optional, default: ``'euclidean'`` - The metric to use when calculating distance between instances in a - feature array. If set to ``'precomputed'``, input data is interpreted - as a distance matrix or of adjacency matrices of a weighted undirected - graph. If a string, it must be one of the options allowed by - :func:`scipy.spatial.distance.pdist` for its metric parameter, or a - or a metric listed in - :obj:`sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`, including - ``'euclidean'``, ``'manhattan'`` or ``'cosine'``. If a callable, it - should take pairs of vectors (1D arrays) as input and, for each two - vectors in a pair, it should return a scalar indicating the - distance/dissimilarity between them. - - metric_params : dict, optional, default: ``{}`` - Additional parameters to be passed to the distance function. - - weights : ``"DTM"``, ndarray or None, optional, default: ``None`` - If not ``None``, the persistence of a weighted Vietoris-Rips filtration - is computed as described in [3]_, and this parameter determines the - vertex weights in the modified adjacency matrix. ``"DTM"`` denotes the - empirical distance-to-measure function defined, following [3]_, by - - .. math:: w(x) = 2\\left\\(\\frac{1}{n+1} \\sum_{k=1}^n - \\mathrm{dist}(x, x_k)^r \\right)^{1/r}. - - Here, :math:`\\mathrm{dist}` is the distance metric used, :math:`x_k` - is the :math:`k`-th :math:`\\mathrm{dist}`-nearest neighbour of - :math:`x` (:math:`x` is not considered a neighbour of itself), - :math:`n` is the number of nearest neighbors to include, and :math:`r` - is a parameter (see `weight_params`). If an ndarray is passed, it is - interpreted as a user-defined list of vertex weights for the modified - adjacency matrix. In either case, the edge weights - :math:`\\{w_{ij}\\}_{i, j}` for the modified adjacency matrix are - computed from the original distances and the new vertex weights - :math:`\\{w_i\\}_i` as follows: - - .. math:: w_{ij} = \\begin{cases} \\max\\{ w_i, w_j \\} - &\\text{if } 2\\mathrm{dist}_{ij} \\leq - |w_i^p - w_j^p|^{\\frac{1}{p}} \\ - t &\\text{otherwise} \\end{cases} - - where :math:`t` is the only positive root of - - .. math:: 2 \\mathrm{dist}_{ij} = (t^p - w_i^p)^\\frac{1}{p} + - (t^p - w_j^p)^\\frac{1}{p} - - and :math:`p` is a parameter specified in `metric_params`. - - weight_params : dict or None, optional, default: ``None`` - Parameters to be used in the case of weighted filtrations, see - `weights`. In this case, the key ``"p"`` determines the power to be - used in computing edge weights from vertex weights. It can be one of - ``1``, ``2`` or ``np.inf`` and defaults to ``1``. If `weights` is - ``"DTM"``, the additional keys ``"r"`` (default: ``2``) and - ``"n_neighbors"`` (default: ``3``) are available (see `weights`, - where the latter corresponds to :math:`n`). - - - collapse_edges : bool, optional, default: ``False`` - Whether to use the edge collapse algorithm as described in [2]_ prior - to calling ``ripser``. - - n_perm : int or None, optional, default: ``None`` - The number of points to subsample in a "greedy permutation", or a - furthest point sampling of the points. These points will be used in - lieu of the full point cloud for a faster computation, at the expense - of some accuracy, which can be bounded as a maximum bottleneck distance - to all diagrams on the original point set. - - Returns - ------- - A dictionary holding all of the results of the computation - { - 'dgms': list (size maxdim) of ndarray (n_pairs, 2) - A list of persistence diagrams, one for each dimension less - than maxdim. Each diagram is an ndarray of size (n_pairs, 2) - with the first column representing the birth time and the - second column representing the death time of each pair. - 'num_edges': int - The number of edges added during the computation - 'dperm2all': None or ndarray (n_perm, n_samples) - ``None`` if n_perm is ``None``. Otherwise, the distance from all - points in the permutation to all points in the dataset. - 'idx_perm': ndarray(n_perm) if n_perm > 0 - Index into the original point cloud of the points used - as a subsample in the greedy permutation - 'r_cover': float - Covering radius of the subsampled points. - If n_perm <= 0, then the full point cloud was used and this is 0 - } - - Notes - ----- - `Ripser `_ is used as a C++ backend - for computing Vietoris–Rips persistent homology. Python bindings were - modified for performance from the `ripser.py - `_ package. - - `GUDHI `_ is used as a C++ backend - for the edge collapse algorithm described in [2]_. - - References - ---------- - .. [1] U. Bauer, "Ripser: efficient computation of Vietoris–Rips - persistence barcodes", 2019; `arXiv:1908.02518 - `_. - - .. [2] J.-D. Boissonnat and S. Pritam, "Edge Collapse and Persistence of - Flag Complexes"; in *36th International Symposium on Computational - Geometry (SoCG 2020)*, pp. 19:1–19:15, Schloss - Dagstuhl-Leibniz–Zentrum für Informatik, 2020; - `DOI: 10.4230/LIPIcs.SoCG.2020.19 - `_. - - .. [3] H. Anai et al, "DTM-Based Filtrations"; in *Topological Data - Analysis* (Abel Symposia, vol 15), Springer, 2020; - `DOI: 10.1007/978-3-030-43408-3_2 - `_. - - """ - if n_perm and issparse(X): - raise Exception("Greedy permutation is not supported for sparse " - "distance matrices") - if n_perm and n_perm > X.shape[0]: - raise Exception("Number of points in greedy permutation is greater " - "than number of points in the point cloud") - if n_perm and n_perm < 0: - raise Exception("There should be a strictly positive number of points " - "in the greedy permutation") - - idx_perm = np.arange(X.shape[0]) - r_cover = 0.0 - if n_perm: - idx_perm, lambdas, dperm2all = \ - get_greedy_perm(X, n_perm=n_perm, metric=metric) - r_cover = lambdas[-1] - dm = dperm2all[:, idx_perm] - else: - if metric == 'precomputed': - dm = X - else: - dm = pairwise_distances(X, metric=metric, **metric_params) - dperm2all = None - - n_points = max(dm.shape) - - use_sparse_computer = True - if issparse(dm): - row, col, data = _resolve_symmetry_conflicts(dm.tocoo()) # Upper diag - - if weights is not None: - if (dm < 0).nnz: - raise ValueError("Distance matrix has negative entries. " - "Weighted Rips filtration unavailable.") - - weight_params = {} if weight_params is None else weight_params - weights_p = weight_params.get("p", 1) - - # Restrict to off-diagonal entries for weights computation since - # diagonal ones are given by `weights`. Explicitly set the diagonal - # to 0 -- this is also important for DTM since otherwise - # kneighbors_graph with include_self=False skips the first true - # neighbor. - off_diag = row != col - row, col, data = (np.hstack([row[off_diag], np.arange(n_points)]), - np.hstack([col[off_diag], np.arange(n_points)]), - np.hstack([data[off_diag], np.zeros(n_points)])) - - if isinstance(weights, str) and (weights == "DTM"): - n_neighbors = weight_params.get("n_neighbors", 3) - weights_r = weight_params.get("r", 2) - - # CSR matrix must be symmetric for kneighbors_graph to give - # correct results - dm = csr_matrix((np.hstack([data, data[:-n_points]]), - (np.hstack([row, col[:-n_points]]), - np.hstack([col, row[:-n_points]])))) - weights = _compute_dtm_weights(dm, n_neighbors, weights_r) - else: - weights = _check_weights(weights, n_points) - - data = _weight_filtration_sparse(row, col, data, weights, - weights_p) - - if collapse_edges: - row, col, data = _collapse_coo(row, col, data, thresh) - - else: - if weights is not None: - if (dm < 0).any(): - raise ValueError("Distance matrix has negative entries. " - "Weighted Rips filtration unavailable.") - - weight_params = {} if weight_params is None else weight_params - weights_p = weight_params.get("p", 1) - - if isinstance(weights, str) and (weights == "DTM"): - n_neighbors = weight_params.get("n_neighbors", 3) - weights_r = weight_params.get("r", 2) - - if not np.array_equal(dm, dm.T): - dm = np.triu(dm, k=1) - dm += dm.T - - weights = _compute_dtm_weights(dm, n_neighbors, weights_r) - else: - weights = _check_weights(weights, n_points) - - dm = _weight_filtration_dense(dm, weights, weights_p) - np.fill_diagonal(dm, weights) - - if (dm.diagonal() != 0).any(): - # Convert to sparse format, because currently that's the only - # one handling nonzero births - (row, col) = np.triu_indices_from(dm) - data = dm[(row, col)] - if collapse_edges: - row, col, data = _collapse_coo(row, col, data, thresh) - elif collapse_edges: - row, col, data = gtda_collapser.\ - flag_complex_collapse_edges_dense(dm, thresh) - else: - use_sparse_computer = False - - if use_sparse_computer: - res = DRFDMSparse(np.asarray(row, dtype=np.int32, order="C"), - np.asarray(col, dtype=np.int32, order="C"), - np.asarray(data, dtype=np.float32, order="C"), - n_points, - maxdim, - thresh, - coeff) - else: - # Only consider strict upper diagonal - DParam = squareform(dm, checks=False).astype(np.float32) - # Run garbage collector to free up memory taken by `dm` - del dm - gc.collect() - res = DRFDM(DParam, maxdim, thresh, coeff) - - # Unwrap persistence diagrams - dgms = res.births_and_deaths_by_dim - for dim in range(len(dgms)): - N = int(len(dgms[dim]) / 2) - dgms[dim] = np.reshape(np.array(dgms[dim]), [N, 2]) - - ret = {"dgms": dgms, - "num_edges": res.num_edges, - "dperm2all": dperm2all, - "idx_perm": idx_perm, - "r_cover": r_cover} - - return ret diff --git a/gtda/externals/python/tests/test_ripser.py b/gtda/externals/python/tests/test_ripser.py deleted file mode 100644 index 2794c2f84..000000000 --- a/gtda/externals/python/tests/test_ripser.py +++ /dev/null @@ -1,166 +0,0 @@ -import numpy as np -import pytest -from hypothesis import given, settings -from hypothesis.extra.numpy import arrays -from hypothesis.strategies import floats, integers, composite -from numpy.testing import assert_almost_equal -from scipy.sparse import coo_matrix - -from gtda.externals import ripser - - -@composite -def get_dense_distance_matrices(draw): - """Generate 2d dense square arrays of floats, with zero along the - diagonal.""" - shapes = draw(integers(min_value=2, max_value=30)) - dm = draw(arrays(dtype=float, - elements=floats(allow_nan=False, - allow_infinity=True, - min_value=0), - shape=(shapes, shapes), unique=False)) - np.fill_diagonal(dm, 0) - return dm - - -@composite -def get_sparse_distance_matrices(draw): - """Generate 2d upper triangular sparse matrices of floats, with zero along - the diagonal.""" - shapes = draw(integers(min_value=2, max_value=40)) - dm = draw(arrays(dtype=float, - elements=floats(allow_nan=False, - allow_infinity=True, - min_value=0), - shape=(shapes, shapes), unique=False)) - dm = np.triu(dm, k=1) - dm = coo_matrix(dm) - row, col, data = dm.row, dm.col, dm.data - not_inf_idx = data != np.inf - row = row[not_inf_idx] - col = col[not_inf_idx] - data = data[not_inf_idx] - shape_kwargs = {} if data.size else {"shape": (0, 0)} - dm = coo_matrix((data, (row, col)), **shape_kwargs) - return dm - - -@settings(deadline=500) -@given(dm=get_sparse_distance_matrices()) -def test_coo_below_diagonal_and_mixed_same_as_above(dm): - """Test that if we feed sparse matrices representing the same undirected - weighted graph we obtain the same results regardless of whether all entries - are above the diagonal, all are below the diagonal, or neither. - Furthermore, test that conflicts between stored data in the upper and lower - triangle are resolved in favour of the upper triangle.""" - ripser_kwargs = {"maxdim": 2, "metric": "precomputed"} - - pd_above = ripser(dm, **ripser_kwargs)['dgms'] - - pd_below = ripser(dm.T, **ripser_kwargs)['dgms'] - - _row, _col, _data = dm.row, dm.col, dm.data - coo_shape_kwargs = {} if _data.size else {"shape": (0, 0)} - to_transpose_mask = np.full(len(_row), False) - to_transpose_mask[np.random.choice(np.arange(len(_row)), - size=len(_row) // 2, - replace=False)] = True - row = np.concatenate([_col[to_transpose_mask], _row[~to_transpose_mask]]) - col = np.concatenate([_row[to_transpose_mask], _col[~to_transpose_mask]]) - dm_mixed = coo_matrix((_data, (row, col)), **coo_shape_kwargs) - pd_mixed = ripser(dm_mixed, **ripser_kwargs)['dgms'] - - row = np.concatenate([row, _row[to_transpose_mask]]) - col = np.concatenate([col, _col[to_transpose_mask]]) - data = np.random.random(len(row)) - data[:len(_row)] = _data - dm_conflicts = coo_matrix((data, (row, col)), **coo_shape_kwargs) - pd_conflicts = ripser(dm_conflicts, **ripser_kwargs)['dgms'] - - for i in range(ripser_kwargs["maxdim"] + 1): - pd_above[i] = np.sort(pd_above[i], axis=0) - pd_below[i] = np.sort(pd_below[i], axis=0) - pd_mixed[i] = np.sort(pd_mixed[i], axis=0) - pd_conflicts[i] = np.sort(pd_conflicts[i], axis=0) - assert_almost_equal(pd_above[i], pd_below[i]) - - -@pytest.mark.parametrize('thresh', [False, True]) -@pytest.mark.parametrize('coeff', [2, 7]) -@settings(deadline=500) -@given(dm=get_dense_distance_matrices()) -def test_collapse_consistent_with_no_collapse_dense(thresh, coeff, dm): - thresh = np.max(dm) / 2 if thresh else np.inf - maxdim = 3 - pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff, - metric='precomputed', collapse_edges=True)['dgms'] - pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff, - metric='precomputed', collapse_edges=False)['dgms'] - for i in range(maxdim + 1): - pd_collapse[i] = np.sort(pd_collapse[i], axis=0) - pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) - assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) - - -@pytest.mark.parametrize('thresh', [False, True]) -@pytest.mark.parametrize('coeff', [2, 7]) -@settings(deadline=500) -@given(dm=get_sparse_distance_matrices()) -def test_collapse_consistent_with_no_collapse_coo(thresh, coeff, dm): - if thresh and dm.nnz: - thresh = np.max(dm) / 2 - else: - thresh = np.inf - maxdim = 3 - pd_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff, - metric='precomputed', collapse_edges=True)['dgms'] - pd_no_collapse = ripser(dm, thresh=thresh, maxdim=maxdim, coeff=coeff, - metric='precomputed', collapse_edges=False)['dgms'] - for i in range(maxdim + 1): - pd_collapse[i] = np.sort(pd_collapse[i], axis=0) - pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) - assert_almost_equal(pd_collapse[i], pd_no_collapse[i]) - - -def test_collapser_with_negative_weights(): - """Test that collapser works as expected when some of the vertex and edge - weights are negative.""" - n_points = 20 - dm = np.random.random((n_points, n_points)) - np.fill_diagonal(dm, -np.random.random(n_points)) - dm -= 0.2 - dm_sparse = coo_matrix(dm) - - maxdim = 2 - pd_collapse_dense = ripser(dm, metric='precomputed', maxdim=maxdim, - collapse_edges=True)['dgms'] - pd_collapse_sparse = ripser(dm_sparse, metric='precomputed', - maxdim=maxdim, collapse_edges=True)['dgms'] - pd_no_collapse = ripser(dm, metric='precomputed', maxdim=maxdim, - collapse_edges=False)['dgms'] - - for i in range(maxdim + 1): - pd_collapse_dense[i] = np.sort(pd_collapse_dense[i], axis=0) - pd_collapse_sparse[i] = np.sort(pd_collapse_dense[i], axis=0) - pd_no_collapse[i] = np.sort(pd_no_collapse[i], axis=0) - assert_almost_equal(pd_collapse_dense[i], pd_no_collapse[i]) - assert_almost_equal(pd_collapse_sparse[i], pd_no_collapse[i]) - - -def test_coo_results_independent_of_order(): - """Regression test for PR #465""" - data = np.array([6., 8., 2., 4., 5., 9., 10., 3., 1., 1.]) - row = np.array([0, 0, 0, 0, 1, 1, 1, 2, 2, 3]) - col = np.array([4, 1, 3, 2, 4, 3, 2, 3, 4, 4]) - dm = coo_matrix((data, (row, col))) - diagrams = ripser(dm, metric="precomputed")['dgms'] - diagrams_csr = ripser(dm.tocsr(), metric="precomputed")['dgms'] - expected = [np.array([[0., 1.], - [0., 1.], - [0., 2.], - [0., 5.], - [0., np.inf]]), - np.array([], dtype=float).reshape(0, 2)] - for i in range(2): - assert np.array_equal(diagrams[i], expected[i]) - assert np.array_equal(diagrams_csr[i], expected[i]) diff --git a/gtda/externals/ripser b/gtda/externals/ripser deleted file mode 160000 index f784e1f38..000000000 --- a/gtda/externals/ripser +++ /dev/null @@ -1 +0,0 @@ -Subproject commit f784e1f381094219316855c4dc6c2abd494a8a07 diff --git a/gtda/externals/robinhood b/gtda/externals/robinhood deleted file mode 160000 index c0801327e..000000000 --- a/gtda/externals/robinhood +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c0801327ea589d8f0f941aa5d3ca4f9f770f4ea9 diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index 81cd2199f..40a8bfeac 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -5,6 +5,7 @@ from types import FunctionType import numpy as np +from gph import ripser_parallel as ripser from joblib import Parallel, delayed from pyflagser import flagser_weighted from scipy.sparse import coo_matrix @@ -15,7 +16,7 @@ from ._utils import _postprocess_diagrams from ..base import PlotterMixin -from ..externals.python import ripser, SparseRipsComplex, CechComplex +from ..externals.python import SparseRipsComplex, CechComplex from ..plotting import plot_diagram from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval @@ -125,19 +126,18 @@ class VietorisRipsPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Notes ----- - `Ripser `_ [1]_ is used as a C++ backend - for computing Vietoris–Rips persistent homology. Python bindings were - modified for performance from the `ripser.py - `_ package. + `giotto-ph `_ [1]_ is used as a C++ + backend for computing Vietoris–Rips persistent homology. `GUDHI `_ is used as a C++ backend for the edge collapse algorithm described in [2]_. References ---------- - .. [1] U. Bauer, "Ripser: efficient computation of Vietoris–Rips - persistence barcodes", 2019; `arXiv:1908.02518 - `_. + .. [1] J. Burella Pérez et al, "giotto-ph: A Python Library for + High-Performance Computation of Persistent Homology of Vietoris–Rips + Filtrations", 2021; `arXiv:2107.05412 + `_. .. [2] J.-D. Boissonnat and S. Pritam, "Edge Collapse and Persistence of Flag Complexes"; in *36th International Symposium on Computational @@ -489,19 +489,18 @@ class WeightedRipsPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Notes ----- - `Ripser `_ [1]_ is used as a C++ backend - for computing Vietoris–Rips persistent homology. Python bindings were - modified for performance from the `ripser.py - `_ package. + `giotto-ph `_ [1]_ is used as a C++ + backend for computing Vietoris–Rips persistent homology. `GUDHI `_ is used as a C++ backend for the edge collapse algorithm described in [2]_. References ---------- - .. [1] U. Bauer, "Ripser: efficient computation of Vietoris–Rips - persistence barcodes", 2019; `arXiv:1908.02518 - `_. + .. [1] J. Burella Pérez et al, "giotto-ph: A Python Library for + High-Performance Computation of Persistent Homology of Vietoris–Rips + Filtrations", 2021; `arXiv:2107.05412 + `_. .. [2] J.-D. Boissonnat and S. Pritam, "Edge Collapse and Persistence of Flag Complexes"; in *36th International Symposium on Computational @@ -1079,16 +1078,15 @@ class WeakAlphaPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Notes ----- Delaunay triangulation are computed by :class:`scipy.spatial.Delaunay`. - `Ripser `_ [1]_ is used as a C++ backend - for computing Vietoris–Rips persistent homology. Python bindings were - modified for performance from the `ripser.py - `_ package. + `giotto-ph `_ [1]_ is used as a C++ + backend for computing Vietoris–Rips persistent homology. References ---------- - .. [1] U. Bauer, "Ripser: efficient computation of Vietoris–Rips - persistence barcodes", 2019; `arXiv:1908.02518 - `_. + .. [1] J. Burella Pérez et al, "giotto-ph: A Python Library for + High-Performance Computation of Persistent Homology of Vietoris–Rips + Filtrations", 2021; `arXiv:2107.05412 + `_. """ diff --git a/requirements.txt b/requirements.txt index b5c0517ff..866b12934 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ numpy >= 1.19.1 scipy >= 1.5.0 joblib >= 0.16.0 scikit-learn >= 0.23.1 +giotto-ph >= 0.2.1 pyflagser >= 0.4.3 python-igraph >= 0.8.2 plotly >= 4.8.2 diff --git a/setup.cfg b/setup.cfg index 640b86b31..c3fe714f0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -12,7 +12,6 @@ addopts = --ignore gtda/externals/gudhi-devel/ --ignore gtda/externals/hera/ --ignore gtda/externals/pybind11/ - --ignore gtda/externals/ripser/ -ra From db91b973e4d7975e89ded61cc822c8a2644a2ae5 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 28 Dec 2021 21:06:42 +0100 Subject: [PATCH 2/6] Remove reference to GUDHI for edge collapser --- gtda/homology/simplicial.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index 40a8bfeac..5f70c472a 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -127,10 +127,7 @@ class VietorisRipsPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Notes ----- `giotto-ph `_ [1]_ is used as a C++ - backend for computing Vietoris–Rips persistent homology. - - `GUDHI `_ is used as a C++ backend - for the edge collapse algorithm described in [2]_. + backend for computing Vietoris–Rips persistent homology and edge collapses. References ---------- @@ -490,10 +487,7 @@ class WeightedRipsPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Notes ----- `giotto-ph `_ [1]_ is used as a C++ - backend for computing Vietoris–Rips persistent homology. - - `GUDHI `_ is used as a C++ backend - for the edge collapse algorithm described in [2]_. + backend for computing Vietoris–Rips persistent homology and edge collapses. References ---------- From 95a3d0a96c34738de57ca1700d6e947d7ea1c5ff Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 28 Dec 2021 21:12:10 +0100 Subject: [PATCH 3/6] Remove GUDHI edge collapser --- CMakeLists.txt | 26 ---- azure-pipelines.yml | 4 +- gtda/externals/__init__.py | 7 +- .../externals/bindings/collapser_bindings.cpp | 142 ------------------ gtda/externals/python/tests/test_collapser.py | 87 ----------- 5 files changed, 3 insertions(+), 263 deletions(-) delete mode 100644 gtda/externals/bindings/collapser_bindings.cpp delete mode 100644 gtda/externals/python/tests/test_collapser.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 93762eeb3..aa4b4479c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -288,29 +288,3 @@ else() target_compile_options(gtda_cech_complex PUBLIC $<$: -Ofast -shared -pthread -fPIC -fwrapv -Wall -fno-strict-aliasing -frounding-math>) target_compile_options(gtda_cech_complex PUBLIC $<$:-O2 -ggdb -D_GLIBCXX_DEBUG>) endif() - -####################################################################### -# Collapser # -####################################################################### - -pybind11_add_module(gtda_collapser MODULE "${BINDINGS_DIR}/collapser_bindings.cpp") -set_property(TARGET gtda_collapser PROPERTY CXX_STANDARD 14) - -if(OpenMP_FOUND) - target_link_libraries(gtda_collapser PRIVATE OpenMP::OpenMP_CXX) -endif() - -target_link_libraries(gtda_collapser LINK_PUBLIC ${Boost_LIBRARIES}) -target_compile_definitions(gtda_collapser PRIVATE BOOST_RESULT_OF_USE_DECLTYPE=1 BOOST_ALL_NO_LIB=1 BOOST_SYSTEM_NO_DEPRECATED=1) - -target_include_directories(gtda_collapser PRIVATE "${GUDHI_SRC_DIR}/common/include") -target_include_directories(gtda_collapser PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") -target_include_directories(gtda_collapser PRIVATE "${EIGEN_DIR}") - -if(MSVC) - target_compile_options(gtda_collapser PUBLIC $<$: /O2 /Wall /fp:strict>) - target_compile_options(gtda_collapser PUBLIC $<$:/O1 /DEBUG:FULL /Zi /Zo>) -else() - target_compile_options(gtda_collapser PUBLIC $<$: -Ofast -shared -pthread -fPIC -fwrapv -Wall -fno-strict-aliasing -frounding-math>) - target_compile_options(gtda_collapser PUBLIC $<$:-O2 -ggdb -D_GLIBCXX_DEBUG>) -endif() diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 5d11711b7..b406ed8ed 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -46,7 +46,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-wheels-v2021.07.27" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-wheels-v2021.07.28" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache @@ -146,7 +146,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-v2021.07.27" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-v2021.07.28" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache diff --git a/gtda/externals/__init__.py b/gtda/externals/__init__.py index 589361415..d975d9632 100644 --- a/gtda/externals/__init__.py +++ b/gtda/externals/__init__.py @@ -3,8 +3,6 @@ from .modules.gtda_bottleneck import bottleneck_distance from .modules.gtda_wasserstein import wasserstein_distance -from .modules.gtda_collapser import flag_complex_collapse_edges_dense, \ - flag_complex_collapse_edges_sparse, flag_complex_collapse_edges_coo from .python import SparseRipsComplex, CechComplex, CubicalComplex, \ PeriodicCubicalComplex, SimplexTree, WitnessComplex, StrongWitnessComplex @@ -17,8 +15,5 @@ 'PeriodicCubicalComplex', 'SimplexTree', 'WitnessComplex', - 'StrongWitnessComplex', - 'flag_complex_collapse_edges_dense', - 'flag_complex_collapse_edges_sparse', - 'flag_complex_collapse_edges_coo' + 'StrongWitnessComplex' ] diff --git a/gtda/externals/bindings/collapser_bindings.cpp b/gtda/externals/bindings/collapser_bindings.cpp deleted file mode 100644 index f3b964e93..000000000 --- a/gtda/externals/bindings/collapser_bindings.cpp +++ /dev/null @@ -1,142 +0,0 @@ -/****************************************************************************** - * Description: gudhi's collapser interfacing with pybind11 - * License: AGPL3 - *****************************************************************************/ - -#include - -#include -#include -#include -#include - -namespace py = pybind11; - -/* GUDHI Collapser required types */ -using Filtration_value = float; -using Vertex_handle = int32_t; -using Filtered_edge = - std::tuple; -using Filtered_edge_list = std::vector; - -/* Sparse matrix input types */ -using Sparse_matrix = Eigen::SparseMatrix; -using triplet_vec = Eigen::Triplet; - -/* COO data input types */ -using Row_idx = std::vector; -using Col_idx = std::vector; -using Filtration_values = std::vector; -using COO_data = std::tuple; - -/* Dense matrix input types */ -using Distance_matrix_np = py::array_t; - -/* constants */ -const Filtration_value filtration_max = - std::numeric_limits::infinity(); - -/* Generates COO sparse matrix data from a filtered edge list - * This function is called after computing edge collapse - */ -static COO_data gen_coo_matrix(Filtered_edge_list& collapsed_edges) { - Row_idx row; - Col_idx col; - Filtration_values data; - - /* allocate memory beforehand */ - row.reserve(collapsed_edges.size()); - col.reserve(collapsed_edges.size()); - data.reserve(collapsed_edges.size()); - - for (auto& t : collapsed_edges) { - row.push_back(std::get<0>(t)); - col.push_back(std::get<1>(t)); - data.push_back(std::get<2>(t)); - } - - return COO_data(row, col, data); -} - -PYBIND11_MODULE(gtda_collapser, m) { - using namespace pybind11::literals; - - m.doc() = "Collapser bindings for GUDHI implementation"; - m.def( - "flag_complex_collapse_edges_sparse", - [](Sparse_matrix& sm, Filtration_value thresh = filtration_max) { - Filtered_edge_list graph; - - /* Convert from sparse format to Filtered_edge_list */ - /* Applying threshold to the input data */ - int size = sm.outerSize(); - for (size_t k = 0; k < size; ++k) - for (Eigen::SparseMatrix::InnerIterator it(sm, k); - it; ++it) { - /* Apply threshold to the input data, ignoring lower diagonal - * entries */ - if (it.col() > it.row() && it.value() <= thresh) - graph.push_back(Filtered_edge(it.row(), it.col(), it.value())); - } - - /* Start collapser */ - auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - - return gen_coo_matrix(vec_triples); - }, - "sm"_a, "thresh"_a = filtration_max, - "Implicitly constructs a flag complex from edges, " - "collapses edges while preserving the persistent homology"); - - m.def( - "flag_complex_collapse_edges_coo", - [](py::array_t& row_, py::array_t& col_, - py::array_t& data_, - Filtration_value thresh = filtration_max) { - Filtered_edge_list graph; - - Vertex_handle* row = (Vertex_handle*)row_.request().ptr; - Vertex_handle* col = (Vertex_handle*)col_.request().ptr; - Filtration_value* data = (Filtration_value*)data_.request().ptr; - - /* Convert from COO input format to Filtered_edge_list */ - /* Applying threshold to the input data */ - int size = data_.size(); - for (size_t k = 0; k < size; ++k) - /* Apply threshold to the input data, ignoring lower diagonal entries - */ - if (col[k] > row[k] && data[k] <= thresh) - graph.push_back(Filtered_edge(row[k], col[k], data[k])); - - /* Start collapser */ - auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - - return gen_coo_matrix(vec_triples); - }, - "row"_a, "column"_a, "data"_a, "thresh"_a = filtration_max, - "Implicitly constructs a flag complex from edges, " - "collapses edges while preserving the persistent homology"); - - m.def( - "flag_complex_collapse_edges_dense", - [](Distance_matrix_np& dm, Filtration_value thresh = filtration_max) { - Filtered_edge_list graph; - - /* Convert from dense format to Filtered edge list */ - /* Applying threshold to the input data */ - for (size_t i = 0; i < dm.shape(0); i++) - for (size_t j = 0; j < dm.shape(1); j++) - /* Apply threshold to the input data, ignoring lower diagonal - * entries */ - if (j > i && (*(dm.data(i, j)) <= thresh)) - graph.push_back(Filtered_edge(i, j, *(dm.data(i, j)))); - - /* Start collapser */ - auto vec_triples = Gudhi::collapse::flag_complex_collapse_edges(graph); - - return gen_coo_matrix(vec_triples); - }, - "dm"_a, "thresh"_a = filtration_max, - "Implicitly constructs a flag complex from edges, " - "collapses edges while preserving the persistent homology"); -} diff --git a/gtda/externals/python/tests/test_collapser.py b/gtda/externals/python/tests/test_collapser.py deleted file mode 100644 index e43bbf9e5..000000000 --- a/gtda/externals/python/tests/test_collapser.py +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/env python - -""" Test comes from -https://github.com/GUDHI/gudhi-devel/blob/master/src/Collapse/example/edge_collapse_basic_example.cpp -""" - -import numpy as np -from gtda.externals.modules.gtda_collapser import \ - flag_complex_collapse_edges_dense, \ - flag_complex_collapse_edges_sparse, \ - flag_complex_collapse_edges_coo -from scipy.sparse import coo_matrix, csr_matrix - -X = np.array([[0, 1, 1.], - [1, 2, 1.], - [2, 3, 1.], - [0, 3, np.inf], - [3, 0, 1.], - [0, 2, 2.], - [1, 3, 2.]]) -tX = np.transpose(X) -tX = np.array([tX[0].astype(np.int32), tX[1].astype(np.int32), tX[2]]) -X_expected_row = [0, 1, 2] -X_expected_col = [1, 2, 3] -X_expected_data = [1.0, 1.0, 1.0] - - -def check_collapse(collapsed, removed): - coo = collapsed.tocoo() - cooT = np.array([coo.row, coo.col, coo.data]).transpose() - for elem in removed: - if (cooT == elem).all(axis=1).any(): - return False - return True - - -def test_simple_csr_example(): - X_ = csr_matrix((tX[2], (tX[0].astype(np.int32), - tX[1].astype(np.int32)))).toarray() - coo_ = flag_complex_collapse_edges_sparse(X_) - coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert check_collapse(coo, [[1, 3, 2]]) - - -def test_simple_coo_example(): - coo_ = flag_complex_collapse_edges_coo( - tX[0], tX[1], tX[2]) - coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert check_collapse(coo, [[1, 3, 2]]) - - -def test_simple_dense_example(): - data = csr_matrix((tX[2], (tX[0].astype(np.int32), - tX[1].astype(np.int32)))).toarray() - coo_ = flag_complex_collapse_edges_dense(data) - coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert check_collapse(coo, [[1, 3, 2]]) - - -def test_csr_expected_output(): - X_ = csr_matrix((tX[2], (tX[0].astype(np.int32), - tX[1].astype(np.int32)))).toarray() - coo_ = flag_complex_collapse_edges_sparse(X_) - coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert np.equal(coo.row, X_expected_row).all() - assert np.equal(coo.col, X_expected_col).all() - assert np.equal(coo.data, X_expected_data).all() - - -def test_coo_expected_output(): - coo_ = flag_complex_collapse_edges_coo( - tX[0], tX[1], tX[2]) - coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - print(coo) - assert np.equal(coo.row, X_expected_row).all() - assert np.equal(coo.col, X_expected_col).all() - assert np.equal(coo.data, X_expected_data).all() - - -def test_dense_expected_output(): - data = csr_matrix((tX[2], (tX[0].astype(np.int32), - tX[1].astype(np.int32)))).toarray() - coo_ = flag_complex_collapse_edges_dense(data) - coo = coo_matrix((coo_[2], (coo_[0], coo_[1]))) - assert np.equal(coo.row, X_expected_row).all() - assert np.equal(coo.col, X_expected_col).all() - assert np.equal(coo.data, X_expected_data).all() From 6bd5a7ced669d197a8d00235c21429e411f90bfa Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 29 Dec 2021 10:31:26 +0100 Subject: [PATCH 4/6] Remove eigen submodule and various target_include_directories from CMakeLists --- .gitmodules | 3 --- CMakeLists.txt | 11 ----------- gtda/externals/eigen | 1 - 3 files changed, 15 deletions(-) delete mode 160000 gtda/externals/eigen diff --git a/.gitmodules b/.gitmodules index 9544dff81..7079c110b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,9 +5,6 @@ [submodule "gtda/externals/hera"] path = gtda/externals/hera url = https://github.com/grey-narn/hera -[submodule "gtda/externals/eigen"] - path = gtda/externals/eigen - url = https://gitlab.com/libeigen/eigen [submodule "gtda/externals/pybind11"] path = gtda/externals/pybind11 url = https://github.com/pybind/pybind11 diff --git a/CMakeLists.txt b/CMakeLists.txt index aa4b4479c..f475a3fe7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,6 @@ find_package(OpenMP) set(GUDHI_SRC_DIR "gtda/externals/gudhi-devel/src") set(HERA_DIR "gtda/externals/hera") -set(EIGEN_DIR "gtda/externals/eigen") ####################################################################### # Wasserstein # @@ -127,8 +126,6 @@ target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Cech_comp target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Subsampling/include") target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/python/include") -target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") -target_include_directories(gtda_simplex_tree PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_simplex_tree PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -185,8 +182,6 @@ target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Cech_c target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/common/include") -target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") -target_include_directories(gtda_witness_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_witness_complex PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -216,8 +211,6 @@ target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR} target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/common/include") -target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") -target_include_directories(gtda_strong_witness_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_strong_witness_complex PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -248,8 +241,6 @@ target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Pe target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Rips_complex/include") target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Subsampling/include") target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") -target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") -target_include_directories(gtda_sparse_rips_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_sparse_rips_complex PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -278,8 +269,6 @@ target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/Cech_comp target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/common/include") -target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") -target_include_directories(gtda_cech_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_cech_complex PUBLIC $<$: /O2 /Wall /fp:strict>) diff --git a/gtda/externals/eigen b/gtda/externals/eigen deleted file mode 160000 index 25424d91f..000000000 --- a/gtda/externals/eigen +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 25424d91f60a9f858e7dc1c7936021cc1dd72019 From 7bdfe2956077e19c0f8bb06bdab41a6b14782d91 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 29 Dec 2021 10:33:36 +0100 Subject: [PATCH 5/6] Update ccache --- azure-pipelines.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index b406ed8ed..f0a4c2db6 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -46,7 +46,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-wheels-v2021.07.28" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-wheels-v2021.12.29" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache @@ -146,7 +146,7 @@ jobs: - task: Cache@2 inputs: - key: '"ccache-v2021.07.28" | $(Agent.OS) | "$(python.version)"' + key: '"ccache-v2021.12.29" | $(Agent.OS) | "$(python.version)"' path: $(CCACHE_DIR) displayName: ccache From 9dac7d4203f4cb9bd2458ff1d8773a3bd3bbd624 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 29 Dec 2021 10:31:26 +0100 Subject: [PATCH 6/6] Revert "Remove eigen submodule and various target_include_directories from CMakeLists" This reverts commit 6bd5a7ced669d197a8d00235c21429e411f90bfa. --- .gitmodules | 3 +++ CMakeLists.txt | 11 +++++++++++ gtda/externals/eigen | 1 + 3 files changed, 15 insertions(+) create mode 160000 gtda/externals/eigen diff --git a/.gitmodules b/.gitmodules index 7079c110b..9544dff81 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,6 +5,9 @@ [submodule "gtda/externals/hera"] path = gtda/externals/hera url = https://github.com/grey-narn/hera +[submodule "gtda/externals/eigen"] + path = gtda/externals/eigen + url = https://gitlab.com/libeigen/eigen [submodule "gtda/externals/pybind11"] path = gtda/externals/pybind11 url = https://github.com/pybind/pybind11 diff --git a/CMakeLists.txt b/CMakeLists.txt index f475a3fe7..aa4b4479c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,6 +11,7 @@ find_package(OpenMP) set(GUDHI_SRC_DIR "gtda/externals/gudhi-devel/src") set(HERA_DIR "gtda/externals/hera") +set(EIGEN_DIR "gtda/externals/eigen") ####################################################################### # Wasserstein # @@ -126,6 +127,8 @@ target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Cech_comp target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Subsampling/include") target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/python/include") +target_include_directories(gtda_simplex_tree PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") +target_include_directories(gtda_simplex_tree PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_simplex_tree PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -182,6 +185,8 @@ target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Cech_c target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/common/include") +target_include_directories(gtda_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") +target_include_directories(gtda_witness_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_witness_complex PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -211,6 +216,8 @@ target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR} target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/common/include") +target_include_directories(gtda_strong_witness_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") +target_include_directories(gtda_strong_witness_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_strong_witness_complex PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -241,6 +248,8 @@ target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Pe target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Rips_complex/include") target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Subsampling/include") target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") +target_include_directories(gtda_sparse_rips_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") +target_include_directories(gtda_sparse_rips_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_sparse_rips_complex PUBLIC $<$: /O2 /Wall /fp:strict>) @@ -269,6 +278,8 @@ target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/Cech_comp target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/Persistent_cohomology/include") target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/python/include") target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/common/include") +target_include_directories(gtda_cech_complex PRIVATE "${GUDHI_SRC_DIR}/Collapse/include") +target_include_directories(gtda_cech_complex PRIVATE "${EIGEN_DIR}") if(MSVC) target_compile_options(gtda_cech_complex PUBLIC $<$: /O2 /Wall /fp:strict>) diff --git a/gtda/externals/eigen b/gtda/externals/eigen new file mode 160000 index 000000000..25424d91f --- /dev/null +++ b/gtda/externals/eigen @@ -0,0 +1 @@ +Subproject commit 25424d91f60a9f858e7dc1c7936021cc1dd72019