Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve ripser bindings performance #501

Merged
merged 3 commits into from
Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 15 additions & 9 deletions gtda/externals/bindings/collapser_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,15 @@
#include <gudhi/Flag_complex_edge_collapser.h>

#include <pybind11/eigen.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>

namespace py = pybind11;

/* GUDHI Collapser required types */
using Filtration_value = float;
using Vertex_handle = uint32_t;
using Vertex_handle = int32_t;
using Filtered_edge =
std::tuple<Vertex_handle, Vertex_handle, Filtration_value>;
using Filtered_edge_list = std::vector<Filtered_edge>;
Expand All @@ -29,7 +30,7 @@ using Filtration_values = std::vector<Filtration_value>;
using COO_data = std::tuple<Row_idx, Col_idx, Filtration_values>;

/* Dense matrix input types */
using Distance_matrix = std::vector<std::vector<Filtration_value>>;
using Distance_matrix_np = py::array_t<Filtration_value>;

/* constants */
const Filtration_value filtration_max =
Expand Down Expand Up @@ -86,13 +87,18 @@ PYBIND11_MODULE(gtda_collapser, m) {
"collapses edges while preserving the persistent homology");

m.def("flag_complex_collapse_edges_coo",
[](Row_idx& row, Col_idx& col, Filtration_values& data,
[](py::array_t<Vertex_handle>& row_, py::array_t<Vertex_handle>& col_,
py::array_t<Filtration_value>& data_,
Filtration_value thresh = filtration_max) {
Filtered_edge_list graph;

Vertex_handle* row = (Vertex_handle*)row_.request().ptr;
Vertex_handle* col = (Vertex_handle*)col_.request().ptr;
Filtration_value* data = (Filtration_value*)data_.request().ptr;

/* Convert from COO input format to Filtered_edge_list */
/* Applying threshold to the input data */
int size = data.size();
int size = data_.size();
for (size_t k = 0; k < size; ++k)
if (data[k] <= thresh)
graph.push_back(Filtered_edge(row[k], col[k], data[k]));
Expand All @@ -108,15 +114,15 @@ PYBIND11_MODULE(gtda_collapser, m) {
"collapses edges while preserving the persistent homology");

m.def("flag_complex_collapse_edges_dense",
[](Distance_matrix& dm, Filtration_value thresh = filtration_max) {
[](Distance_matrix_np& dm, Filtration_value thresh = filtration_max) {
Filtered_edge_list graph;

/* Convert from dense format to Filtered edge list */
/* Applying threshold to the input data */
for (size_t i = 0; i < dm.size(); i++)
for (size_t j = 0; j < dm[i].size(); j++)
if (j > i && (dm[i][j] <= thresh))
graph.push_back(Filtered_edge(i, j, dm[i][j]));
for (size_t i = 0; i < dm.shape(0); i++)
for (size_t j = 0; j < dm.shape(1); j++)
if (j > i && (*(dm.data(i, j)) <= thresh))
graph.push_back(Filtered_edge(i, j, *(dm.data(i, j))));

/* Start collapser */
auto vec_triples =
Expand Down
15 changes: 9 additions & 6 deletions gtda/externals/bindings/ripser_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,10 @@
#include <ripser.cpp>

// PYBIND11
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>

namespace py = pybind11;

Expand All @@ -32,21 +34,22 @@ PYBIND11_MODULE(gtda_ripser, m) {
.def_readwrite("num_edges", &ripserResults::num_edges);

m.def("rips_dm",
[](std::vector<float> D, int N, int modulus, int dim_max,
[](py::array_t<float>& D, int N, int modulus, int dim_max,
float threshold, int do_cocycles) {
ripserResults ret =
rips_dm(&D[0], N, modulus, dim_max, threshold, do_cocycles);
ripserResults ret = rips_dm((float*)D.request().ptr, N, modulus,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm sure it's right but it's too much for my brain, what does this line do?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's related on pybind11 underlying layer to manage objects. numpy in pybind11 is a special type, using pybind11 classes to represent it. With request().ptr I ask to have access to the underlying data inside the object.

It's not easy to see this in the pybind11 documentation, the API is currently not well documented in my opinion.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see. And is (float*) about passing a pointer instead of an object?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well that's something we unfortunately inherit from ripser.py, but yeah you're right

Copy link
Collaborator

@ulupo ulupo Sep 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that this was the core of your intervention in this PR! Could you explain where exactly you change the behaviour so that pointers are passed and memory is saved? Sorry, I know these are basic questions...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also what's the difference between the previous & syntax and the new (float*) one?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The & syntax is passing by reference, it's a c++ syntax, one of it's useful uses is to prevent the copy of an object.
The float* syntax is to have a pointer to the memory of the data, it's more a C syntax and in C++ we try to move away from it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the first question:

I prevent copying the memory from Python to C++ with the & syntax (It's a bit more complicated than that, but to have the big picture).
I then need to have an access to the raw pointer of the data float* because it's a requirement from the C++ ripser in ripser.py

Copy link
Collaborator

@ulupo ulupo Sep 25, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So I guess my next question is why the & was not sufficient, if it already prevented copies which was the purpose of this PR?

dim_max, threshold, do_cocycles);
return ret;
},
"D"_a, "N"_a, "modulus"_a, "dim_max"_a, "threshold"_a, "do_cocycles"_a,
"ripser distance matrix");
m.def("rips_dm_sparse",
[](std::vector<int> I, std::vector<int> J, std::vector<float> V,
[](py::array_t<int>& I, py::array_t<int>& J, py::array_t<float>& V,
int NEdges, int N, int modulus, int dim_max, float threshold,
int do_cocycles) {
ripserResults ret =
rips_dm_sparse(&I[0], &J[0], &V[0], NEdges, N, modulus, dim_max,
threshold, do_cocycles);
rips_dm_sparse((int*)I.request().ptr, (int*)J.request().ptr,
ulupo marked this conversation as resolved.
Show resolved Hide resolved
(float*)V.request().ptr, NEdges, N, modulus,
dim_max, threshold, do_cocycles);
return ret;
},
"I"_a, "J"_a, "V"_a, "NEdges"_a, "N"_a, "modulus"_a, "dim_max"_a,
Expand Down
18 changes: 6 additions & 12 deletions gtda/externals/python/ripser_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,7 @@ def DRFDM(DParam, maxHomDim, thresh=-1, coeff=2, do_cocycles=0):
else:
ret = gtda_ripser_coeff.rips_dm(DParam, DParam.shape[0], coeff,
maxHomDim, thresh, do_cocycles)
ret_rips = {}
ret_rips.update({"births_and_deaths_by_dim": ret.births_and_deaths_by_dim})
ret_rips.update({"num_edges": ret.num_edges})
return ret_rips
ulupo marked this conversation as resolved.
Show resolved Hide resolved
return ret


def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=0):
Expand All @@ -34,10 +31,7 @@ def DRFDMSparse(I, J, V, N, maxHomDim, thresh=-1, coeff=2, do_cocycles=0):
else:
ret = gtda_ripser_coeff.rips_dm_sparse(I, J, V, I.size, N, coeff,
maxHomDim, thresh, do_cocycles)
ret_rips = {}
ret_rips.update({"births_and_deaths_by_dim": ret.births_and_deaths_by_dim})
ret_rips.update({"num_edges": ret.num_edges})
return ret_rips
return ret


def dpoint2pointcloud(X, i, metric):
Expand Down Expand Up @@ -295,19 +289,19 @@ def ripser(X, maxdim=1, thresh=np.inf, coeff=2, metric="euclidean",
)
else:
# Only consider strict upper diagonal
idx = np.triu_indices(n_points, 1)
DParam = dm[idx].astype(np.float32)
DParam = dm[np.invert(np.tri(n_points, k=0, dtype=np.bool))].astype(
np.float32).flatten()
res = DRFDM(DParam, maxdim, thresh, coeff)

# Unwrap persistence diagrams
dgms = res["births_and_deaths_by_dim"]
dgms = res.births_and_deaths_by_dim
for dim in range(len(dgms)):
N = int(len(dgms[dim]) / 2)
dgms[dim] = np.reshape(np.array(dgms[dim]), [N, 2])

ret = {
"dgms": dgms,
"num_edges": res["num_edges"],
"num_edges": res.num_edges,
"dperm2all": dperm2all,
"idx_perm": idx_perm,
"r_cover": r_cover,
Expand Down