From c26cdac9104d0258df89f557f531da3b111ad677 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Thu, 5 Nov 2020 09:44:26 +0100
Subject: [PATCH 01/17] Refactor ParallelClustering to return array of shape
 the length of the data

---
 gtda/mapper/cluster.py | 81 ++++++++++++++++++++++--------------------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index 2e24ccb7b..431d68619 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -51,8 +51,8 @@ class ParallelClustering(BaseEstimator):
 
     clusters_ : list of list of tuple
        Labels and indices of each cluster found in :meth:`fit`. The i-th
-       entry corresponds to the i-th portion of the data; it is a list
-       of triples of the form ``(i, label, indices)``, where ``label`` is a
+       entry corresponds to the i-th portion of the data; it is a list of
+       triples of the form ``(i, label, indices)``, where ``label`` is a
        cluster label and ``indices`` is the array of indices of points
        belonging to cluster ``(i, label)``.
 
@@ -138,39 +138,53 @@ def fit(self, X, y=None, sample_weight=None):
             sample_weights = [None] * masks.shape[1]
 
         if self._precomputed:
-            single_fitter = self._fit_single_abs_labels_precomputed
+            single_labels_idx = self._single_labels_idx_precomputed
         else:
-            single_fitter = self._fit_single_abs_labels
+            single_labels_idx = self._single_labels_idx
 
-        self.clusterers_ = Parallel(
+        labels_idx = Parallel(
             n_jobs=self.n_jobs, prefer=self.parallel_backend_prefer
-            )(delayed(single_fitter)(X_tot,
-                                     np.flatnonzero(mask),
-                                     mask_num,
-                                     sample_weight=sample_weights[mask_num])
+            )(delayed(single_labels_idx)(
+                X_tot,
+                np.flatnonzero(mask),
+                mask_num,
+                sample_weight=sample_weights[mask_num]
+                )
               for mask_num, mask in enumerate(masks.T))
-        self.clusters_ = [clusterer.abs_labels_ for clusterer in
-                          self.clusterers_]
-        return self
 
-    def _fit_single_abs_labels(self, X, relative_indices, mask_num,
-                               sample_weight=None):
-        cloned_clusterer, unique_labels, unique_labels_inverse = \
-            self._fit_single(X, relative_indices, sample_weight)
-        self._create_abs_labels(cloned_clusterer, relative_indices, mask_num,
-                                unique_labels, unique_labels_inverse)
-        return cloned_clusterer
+        self.labels_ = np.empty(len(X_tot), dtype=object)
+        self.labels_[:] = [[]] * len(X_tot)
+        for relative_indices, mask_num_rel_labels in labels_idx:
+            self.labels_[relative_indices] += mask_num_rel_labels
+
+        return self
 
-    def _fit_single_abs_labels_precomputed(self, X, relative_indices, mask_num,
-                                           sample_weight=None):
+    def _single_labels_idx_precomputed(self, X, relative_indices, mask_num,
+                                       sample_weight=None):
         relative_2d_indices = np.ix_(relative_indices, relative_indices)
-        cloned_clusterer, unique_labels, unique_labels_inverse = \
-            self._fit_single(X, relative_2d_indices, sample_weight)
-        self._create_abs_labels(cloned_clusterer, relative_indices, mask_num,
-                                unique_labels, unique_labels_inverse)
-        return cloned_clusterer
 
-    def _fit_single(self, X, relative_indices, sample_weight):
+        mask_num_rel_labels = np.empty(len(relative_indices), dtype=object)
+        mask_num_rel_labels[:] = [
+            [(mask_num, label)]
+            for label in self._single_labels(X, relative_2d_indices,
+                                             sample_weight)
+            ]
+
+        return relative_indices, mask_num_rel_labels
+
+    def _single_labels_idx(self, X, relative_indices, mask_num,
+                           sample_weight=None):
+
+        mask_num_rel_labels = np.empty(len(relative_indices), dtype=object)
+        mask_num_rel_labels[:] = [
+            [(mask_num, label)]
+            for label in self._single_labels(X, relative_indices,
+                                             sample_weight)
+            ]
+
+        return relative_indices, mask_num_rel_labels
+
+    def _single_labels(self, X, relative_indices, sample_weight):
         cloned_clusterer = clone(self.clusterer)
         X_sub = X[relative_indices]
 
@@ -180,16 +194,7 @@ def _fit_single(self, X, relative_indices, sample_weight):
         else:
             cloned_clusterer.fit(X_sub)
 
-        unique_labels, unique_labels_inverse = np.unique(
-            cloned_clusterer.labels_, return_inverse=True)
-        return cloned_clusterer, unique_labels, unique_labels_inverse
-
-    @staticmethod
-    def _create_abs_labels(cloned_clusterer, relative_indices, mask_num,
-                           unique_labels, inv):
-        cloned_clusterer.abs_labels_ = [
-            (mask_num, label, relative_indices[inv == i])
-            for i, label in enumerate(unique_labels)]
+        return cloned_clusterer.labels_
 
     def fit_predict(self, X, y=None, sample_weight=None):
         """Fit to the data, and return the found clusters.
@@ -220,7 +225,7 @@ def fit_predict(self, X, y=None, sample_weight=None):
 
         """
         self.fit(X, sample_weight=sample_weight)
-        return self.clusters_
+        return self.labels_
 
     def transform(self, X, y=None):
         """Not implemented.

From 715f9cdf6545b1375428ca969182569aa6e7faba Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 15 Nov 2020 16:14:31 +0100
Subject: [PATCH 02/17] Avoid transpositions in ParallelClustering

---
 gtda/mapper/cluster.py  | 16 ++++++----------
 gtda/mapper/pipeline.py |  5 +++--
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index 431d68619..abece7568 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -144,16 +144,12 @@ def fit(self, X, y=None, sample_weight=None):
 
         labels_idx = Parallel(
             n_jobs=self.n_jobs, prefer=self.parallel_backend_prefer
-            )(delayed(single_labels_idx)(
-                X_tot,
-                np.flatnonzero(mask),
-                mask_num,
-                sample_weight=sample_weights[mask_num]
-                )
-              for mask_num, mask in enumerate(masks.T))
+            )(delayed(single_labels_idx)(X_tot, np.flatnonzero(masks[:, i]), i,
+                                         sample_weight=sample_weights[i])
+              for i in range(masks.shape[1]))
 
         self.labels_ = np.empty(len(X_tot), dtype=object)
-        self.labels_[:] = [[]] * len(X_tot)
+        self.labels_[:] = [tuple([])] * len(X_tot)
         for relative_indices, mask_num_rel_labels in labels_idx:
             self.labels_[relative_indices] += mask_num_rel_labels
 
@@ -165,7 +161,7 @@ def _single_labels_idx_precomputed(self, X, relative_indices, mask_num,
 
         mask_num_rel_labels = np.empty(len(relative_indices), dtype=object)
         mask_num_rel_labels[:] = [
-            [(mask_num, label)]
+            ((mask_num, label),)
             for label in self._single_labels(X, relative_2d_indices,
                                              sample_weight)
             ]
@@ -177,7 +173,7 @@ def _single_labels_idx(self, X, relative_indices, mask_num,
 
         mask_num_rel_labels = np.empty(len(relative_indices), dtype=object)
         mask_num_rel_labels[:] = [
-            [(mask_num, label)]
+            ((mask_num, label),)
             for label in self._single_labels(X, relative_indices,
                                              sample_weight)
             ]
diff --git a/gtda/mapper/pipeline.py b/gtda/mapper/pipeline.py
index 8dc48f89a..421e2f556 100644
--- a/gtda/mapper/pipeline.py
+++ b/gtda/mapper/pipeline.py
@@ -372,8 +372,8 @@ def make_mapper_pipeline(scaler=None,
     else:
         _scaler = scaler
 
-    # If filter_func is not a scikit-learn transformer, hope it as a
-    # callable to be applied on each row separately. Then attempt to create a
+    # If filter_func is not a scikit-learn transformer, hope it is a callable
+    # to be applied on each row separately. Then attempt to create a
     # FunctionTransformer object to implement this behaviour.
     if filter_func is None:
         from sklearn.decomposition import PCA
@@ -425,4 +425,5 @@ def make_mapper_pipeline(scaler=None,
 
     mapper_pipeline = MapperPipeline(
         steps=all_steps, memory=memory, verbose=verbose)
+
     return mapper_pipeline

From 407947d47dad9f95be1c97cc31b06db06993f205 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 16 Nov 2020 10:30:18 +0100
Subject: [PATCH 03/17] Refactor Nerve following change in ParallelClustering
 output

---
 gtda/mapper/nerve.py | 32 +++++++++++++++++++-------------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index 6c0289070..87cf5a471 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -130,22 +130,28 @@ def fit_transform(self, X, y=None):
         """
         # TODO: Include a validation step for X
         # Graph construction -- vertices with their metadata
-        nodes = reduce(iconcat, X, [])
-        graph = ig.Graph(len(nodes))
-
-        # Since `nodes` is a list, say of length N, of triples of the form
-        # (pullback_set_label, partial_cluster_label, node_elements),
-        # zip(*nodes) generates three tuples of length N, each corresponding to
-        # a type of node attribute.
-        node_attributes = zip(*nodes)
-        attribute_names = ["pullback_set_label", "partial_cluster_label",
-                           "node_elements"]
-        for i, node_attribute in enumerate(node_attributes):
-            graph.vs[attribute_names[i]] = node_attribute
+        nodes_dict = {}
+        for i, sample in enumerate(X):
+            for node_id_pair in sample:
+                nodes_dict.setdefault(node_id_pair, []).append(i)
+        nodes_dict = {key: np.array(value, dtype=np.int32)
+                      for key, value in nodes_dict.items()}
+
+        graph = ig.Graph(len(nodes_dict))
+
+        # `nodes_dict` is a dictionary of, say, N key-value pairs of the form
+        # (pullback_set_label, partial_cluster_label): node_elements. Hence,
+        # zip(*nodes_dict) generates two tuples of length N, each corresponding
+        # to a type of node attribute in the final graph.
+        node_attributes = zip(*nodes_dict)
+        graph.vs["pullback_set_label"] = next(node_attributes)
+        graph.vs["partial_cluster_label"] = next(node_attributes)
+        graph.vs["node_elements"] = [*nodes_dict.values()]
 
         # Graph construction -- edges with weights given by intersection sizes.
         # In general, we need all information in `nodes` to narrow down the set
-        # of combinations to check when `contract_nodes` is True
+        # of combinations to check, especially when `contract_nodes` is True.
+        nodes = zip(*zip(*nodes_dict), nodes_dict.values())
         node_index_pairs, weights, intersections, mapping = \
             self._generate_edge_data(nodes)
         graph.es["weight"] = 1

From a378999c36c6ed55f4d97a1038449eec29634efa Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sat, 21 Nov 2020 22:01:51 +0100
Subject: [PATCH 04/17] Simplify structure

---
 gtda/mapper/cluster.py | 76 ++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 43 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index abece7568..a9d3a4551 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -131,66 +131,56 @@ def fit(self, X, y=None, sample_weight=None):
                              "of rows.")
         self._validate_clusterer()
 
-        if sample_weight is not None:
-            sample_weights = [sample_weight[masks[:, i]]
-                              for i in range(masks.shape[1])]
+        fit_params = signature(self.clusterer.fit).parameters
+        if sample_weight is not None and "sample_weight" in fit_params:
+            sample_weight_computer = self._sample_weight_computer
         else:
-            sample_weights = [None] * masks.shape[1]
+            sample_weight_computer = lambda *args: {}
 
         if self._precomputed:
-            single_labels_idx = self._single_labels_idx_precomputed
+            idx_computer = self._idx_computer_precomputed
         else:
-            single_labels_idx = self._single_labels_idx
-
-        labels_idx = Parallel(
-            n_jobs=self.n_jobs, prefer=self.parallel_backend_prefer
-            )(delayed(single_labels_idx)(X_tot, np.flatnonzero(masks[:, i]), i,
-                                         sample_weight=sample_weights[i])
-              for i in range(masks.shape[1]))
+            idx_computer = self._idx_computer
+
+        labels_idx = Parallel(n_jobs=self.n_jobs,
+                              prefer=self.parallel_backend_prefer)(
+            delayed(self._single_labels_idx)(
+                X_tot[idx_computer(i, masks)],
+                i,
+                sample_weight_computer(i, masks, sample_weight)
+                )
+            for i in range(masks.shape[1])
+            )
 
         self.labels_ = np.empty(len(X_tot), dtype=object)
         self.labels_[:] = [tuple([])] * len(X_tot)
-        for relative_indices, mask_num_rel_labels in labels_idx:
-            self.labels_[relative_indices] += mask_num_rel_labels
+        for i, mask_num_rel_labels in enumerate(labels_idx):
+            self.labels_[idx_computer(i, masks)] += mask_num_rel_labels
 
         return self
 
-    def _single_labels_idx_precomputed(self, X, relative_indices, mask_num,
-                                       sample_weight=None):
-        relative_2d_indices = np.ix_(relative_indices, relative_indices)
-
-        mask_num_rel_labels = np.empty(len(relative_indices), dtype=object)
-        mask_num_rel_labels[:] = [
-            ((mask_num, label),)
-            for label in self._single_labels(X, relative_2d_indices,
-                                             sample_weight)
-            ]
+    @staticmethod
+    def _idx_computer(i, masks):
+        return np.flatnonzero(masks[:, i])
 
-        return relative_indices, mask_num_rel_labels
+    @staticmethod
+    def _idx_computer_precomputed(i, masks):
+        idx_one = np.flatnonzero(masks[:, i])
+        return np.ix_(idx_one, idx_one)
 
-    def _single_labels_idx(self, X, relative_indices, mask_num,
-                           sample_weight=None):
+    @staticmethod
+    def _sample_weight_computer(i, masks, sample_weight):
+        return {"sample_weight": sample_weight[masks[:, i]]}
 
-        mask_num_rel_labels = np.empty(len(relative_indices), dtype=object)
+    def _single_labels_idx(self, X, mask_num, kwargs):
+        cloned_clusterer = clone(self.clusterer)
+        mask_num_rel_labels = np.empty(len(X), dtype=object)
         mask_num_rel_labels[:] = [
             ((mask_num, label),)
-            for label in self._single_labels(X, relative_indices,
-                                             sample_weight)
+            for label in cloned_clusterer.fit(X, **kwargs).labels_
             ]
 
-        return relative_indices, mask_num_rel_labels
-
-    def _single_labels(self, X, relative_indices, sample_weight):
-        cloned_clusterer = clone(self.clusterer)
-        X_sub = X[relative_indices]
-
-        fit_params = signature(cloned_clusterer.fit).parameters
-        if 'sample_weight' in fit_params:
-            cloned_clusterer.fit(X_sub, sample_weight=sample_weight)
-        else:
-            cloned_clusterer.fit(X_sub)
-
-        return cloned_clusterer.labels_
+        return mask_num_rel_labels
 
     def fit_predict(self, X, y=None, sample_weight=None):
         """Fit to the data, and return the found clusters.

From 03a9f5fe3a21e1c45b7cf6b18b6fc54c53f4a70c Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Sun, 22 Nov 2020 12:57:00 +0100
Subject: [PATCH 05/17] Simplify further

---
 gtda/mapper/cluster.py | 55 ++++++++++++++++--------------------------
 1 file changed, 21 insertions(+), 34 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index a9d3a4551..ad6aa1530 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -133,54 +133,41 @@ def fit(self, X, y=None, sample_weight=None):
 
         fit_params = signature(self.clusterer.fit).parameters
         if sample_weight is not None and "sample_weight" in fit_params:
-            sample_weight_computer = self._sample_weight_computer
+            self._sample_weight_computer = lambda rel_indices, sample_weight: \
+                {"sample_weight": sample_weight[rel_indices]}
         else:
-            sample_weight_computer = lambda *args: {}
+            self._sample_weight_computer = lambda *args: {}
 
         if self._precomputed:
-            idx_computer = self._idx_computer_precomputed
+            self._indices_computer = lambda rel_indices: \
+                np.ix_(rel_indices, rel_indices)
         else:
-            idx_computer = self._idx_computer
-
-        labels_idx = Parallel(n_jobs=self.n_jobs,
-                              prefer=self.parallel_backend_prefer)(
-            delayed(self._single_labels_idx)(
-                X_tot[idx_computer(i, masks)],
-                i,
-                sample_weight_computer(i, masks, sample_weight)
-                )
+            self._indices_computer = lambda rel_indices: rel_indices
+
+        labels_single = Parallel(n_jobs=self.n_jobs,
+                                 prefer=self.parallel_backend_prefer)(
+            delayed(self._labels_single)(X_tot, np.flatnonzero(masks[:, i]),
+                                         sample_weight)
             for i in range(masks.shape[1])
             )
 
         self.labels_ = np.empty(len(X_tot), dtype=object)
         self.labels_[:] = [tuple([])] * len(X_tot)
-        for i, mask_num_rel_labels in enumerate(labels_idx):
-            self.labels_[idx_computer(i, masks)] += mask_num_rel_labels
+        for i, (rel_indices, rel_labels) in enumerate(labels_single):
+            n_labels = len(rel_labels)
+            labels_i = np.empty(n_labels, dtype=object)
+            labels_i[:] = [((i, rel_label),) for rel_label in rel_labels]
+            self.labels_[rel_indices] += labels_i
 
         return self
 
-    @staticmethod
-    def _idx_computer(i, masks):
-        return np.flatnonzero(masks[:, i])
-
-    @staticmethod
-    def _idx_computer_precomputed(i, masks):
-        idx_one = np.flatnonzero(masks[:, i])
-        return np.ix_(idx_one, idx_one)
-
-    @staticmethod
-    def _sample_weight_computer(i, masks, sample_weight):
-        return {"sample_weight": sample_weight[masks[:, i]]}
-
-    def _single_labels_idx(self, X, mask_num, kwargs):
+    def _labels_single(self, X, rel_indices, sample_weight):
         cloned_clusterer = clone(self.clusterer)
-        mask_num_rel_labels = np.empty(len(X), dtype=object)
-        mask_num_rel_labels[:] = [
-            ((mask_num, label),)
-            for label in cloned_clusterer.fit(X, **kwargs).labels_
-            ]
+        X_sub = X[self._indices_computer(rel_indices)]
+        kwargs = self._sample_weight_computer(rel_indices, sample_weight)
 
-        return mask_num_rel_labels
+        return (rel_indices,
+                cloned_clusterer.fit(X_sub, **kwargs).labels_)
 
     def fit_predict(self, X, y=None, sample_weight=None):
         """Fit to the data, and return the found clusters.

From 8111ce04bb8bffffa852f1e22de5647d6627ee41 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 23 Nov 2020 10:24:47 +0100
Subject: [PATCH 06/17] Solve performance problem with joblib and refitting

---
 gtda/mapper/cluster.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index ad6aa1530..cbb108fff 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -144,11 +144,16 @@ def fit(self, X, y=None, sample_weight=None):
         else:
             self._indices_computer = lambda rel_indices: rel_indices
 
+        # This seems necessary to avoid large overheads when running fit a
+        # second time. Probably due to refcounts. NOTE: Only works if done
+        # before assigning labels_single. TODO: Investigate
+        self.labels_ = None
+
         labels_single = Parallel(n_jobs=self.n_jobs,
                                  prefer=self.parallel_backend_prefer)(
-            delayed(self._labels_single)(X_tot, np.flatnonzero(masks[:, i]),
+            delayed(self._labels_single)(X_tot[rel_indices], rel_indices,
                                          sample_weight)
-            for i in range(masks.shape[1])
+            for rel_indices in map(np.flatnonzero, masks.T)
             )
 
         self.labels_ = np.empty(len(X_tot), dtype=object)
@@ -163,11 +168,9 @@ def fit(self, X, y=None, sample_weight=None):
 
     def _labels_single(self, X, rel_indices, sample_weight):
         cloned_clusterer = clone(self.clusterer)
-        X_sub = X[self._indices_computer(rel_indices)]
         kwargs = self._sample_weight_computer(rel_indices, sample_weight)
 
-        return (rel_indices,
-                cloned_clusterer.fit(X_sub, **kwargs).labels_)
+        return rel_indices, cloned_clusterer.fit(X, **kwargs).labels_
 
     def fit_predict(self, X, y=None, sample_weight=None):
         """Fit to the data, and return the found clusters.

From fb6918d7dfc17a3e855983e83fb0f330415bc74e Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 23 Nov 2020 12:20:25 +0100
Subject: [PATCH 07/17] Improve variable names following @lewtun's review

---
 gtda/mapper/nerve.py    | 12 ++++++------
 gtda/mapper/pipeline.py |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index 87cf5a471..d6686781a 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -134,24 +134,24 @@ def fit_transform(self, X, y=None):
         for i, sample in enumerate(X):
             for node_id_pair in sample:
                 nodes_dict.setdefault(node_id_pair, []).append(i)
-        nodes_dict = {key: np.array(value, dtype=np.int32)
-                      for key, value in nodes_dict.items()}
+        labels_to_indices = {key: np.array(value, dtype=np.int32)
+                             for key, value in nodes_dict.items()}
 
-        graph = ig.Graph(len(nodes_dict))
+        graph = ig.Graph(len(labels_to_indices))
 
         # `nodes_dict` is a dictionary of, say, N key-value pairs of the form
         # (pullback_set_label, partial_cluster_label): node_elements. Hence,
         # zip(*nodes_dict) generates two tuples of length N, each corresponding
         # to a type of node attribute in the final graph.
-        node_attributes = zip(*nodes_dict)
+        node_attributes = zip(*labels_to_indices)
         graph.vs["pullback_set_label"] = next(node_attributes)
         graph.vs["partial_cluster_label"] = next(node_attributes)
-        graph.vs["node_elements"] = [*nodes_dict.values()]
+        graph.vs["node_elements"] = [*labels_to_indices.values()]
 
         # Graph construction -- edges with weights given by intersection sizes.
         # In general, we need all information in `nodes` to narrow down the set
         # of combinations to check, especially when `contract_nodes` is True.
-        nodes = zip(*zip(*nodes_dict), nodes_dict.values())
+        nodes = zip(*zip(*labels_to_indices), labels_to_indices.values())
         node_index_pairs, weights, intersections, mapping = \
             self._generate_edge_data(nodes)
         graph.es["weight"] = 1
diff --git a/gtda/mapper/pipeline.py b/gtda/mapper/pipeline.py
index 421e2f556..ed98d2aca 100644
--- a/gtda/mapper/pipeline.py
+++ b/gtda/mapper/pipeline.py
@@ -372,7 +372,7 @@ def make_mapper_pipeline(scaler=None,
     else:
         _scaler = scaler
 
-    # If filter_func is not a scikit-learn transformer, hope it is a callable
+    # If filter_func is not a scikit-learn transformer, assume it is a callable
     # to be applied on each row separately. Then attempt to create a
     # FunctionTransformer object to implement this behaviour.
     if filter_func is None:

From 5f63025124f3000718a4cdc51884e9f47311aafb Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 23 Nov 2020 16:32:30 +0100
Subject: [PATCH 08/17] Fix linting

---
 gtda/mapper/nerve.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index d6686781a..91b9a8f0d 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -1,9 +1,7 @@
 """Construct the nerve of a refined Mapper cover."""
 # License: GNU AGPLv3
 
-from functools import reduce
 from itertools import combinations, filterfalse
-from operator import iconcat
 
 import igraph as ig
 import numpy as np

From a163bf3265f26112485e0b91cfca85232205d216 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 23 Nov 2020 18:01:14 +0100
Subject: [PATCH 09/17] Add explicit n_nodes arg to be able to pass node as zip
 object

---
 gtda/mapper/nerve.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index 91b9a8f0d..a1759cc51 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -134,8 +134,8 @@ def fit_transform(self, X, y=None):
                 nodes_dict.setdefault(node_id_pair, []).append(i)
         labels_to_indices = {key: np.array(value, dtype=np.int32)
                              for key, value in nodes_dict.items()}
-
-        graph = ig.Graph(len(labels_to_indices))
+        n_nodes = len(labels_to_indices)
+        graph = ig.Graph(n_nodes)
 
         # `nodes_dict` is a dictionary of, say, N key-value pairs of the form
         # (pullback_set_label, partial_cluster_label): node_elements. Hence,
@@ -151,7 +151,7 @@ def fit_transform(self, X, y=None):
         # of combinations to check, especially when `contract_nodes` is True.
         nodes = zip(*zip(*labels_to_indices), labels_to_indices.values())
         node_index_pairs, weights, intersections, mapping = \
-            self._generate_edge_data(nodes)
+            self._generate_edge_data(nodes, n_nodes)
         graph.es["weight"] = 1
         graph.add_edges(node_index_pairs)
         graph.es["weight"] = weights
@@ -174,7 +174,7 @@ def fit_transform(self, X, y=None):
 
         return graph
 
-    def _generate_edge_data(self, nodes):
+    def _generate_edge_data(self, nodes, n_nodes):
         def _in_same_pullback_set(_node_tuple):
             return _node_tuple[0][1][0] == _node_tuple[1][1][0]
 
@@ -226,7 +226,7 @@ def _subset_check_metadata_append(
             intersection_behavior = _do_nothing
 
         if self.contract_nodes:
-            mapping = np.arange(len(nodes))
+            mapping = np.arange(n_nodes)
             behavior = _subset_check_metadata_append
         else:
             mapping = None

From 6c5d8283dfbb7ae624ca94a6d25f5f964bef3550 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Mon, 23 Nov 2020 18:01:44 +0100
Subject: [PATCH 10/17] Fix precomputed behaviour and fix/enhance
 ParallelClustering tests

---
 gtda/mapper/cluster.py                        |  7 +++--
 gtda/mapper/tests/test_cluster.py             | 28 +++++++++----------
 .../tests/test_collection_transformer.py      |  2 +-
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index cbb108fff..7410c948a 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -151,8 +151,11 @@ def fit(self, X, y=None, sample_weight=None):
 
         labels_single = Parallel(n_jobs=self.n_jobs,
                                  prefer=self.parallel_backend_prefer)(
-            delayed(self._labels_single)(X_tot[rel_indices], rel_indices,
-                                         sample_weight)
+            delayed(self._labels_single)(
+                X_tot[self._indices_computer(rel_indices)],
+                rel_indices,
+                sample_weight
+                )
             for rel_indices in map(np.flatnonzero, masks.T)
             )
 
diff --git a/gtda/mapper/tests/test_cluster.py b/gtda/mapper/tests/test_cluster.py
index b9c1297db..2cf5f16f2 100644
--- a/gtda/mapper/tests/test_cluster.py
+++ b/gtda/mapper/tests/test_cluster.py
@@ -43,22 +43,24 @@ def test_parallel_clustering_transform_not_implemented():
         pc.transform(X)
 
 
+@pytest.mark.parametrize("n_jobs", [1, 2, -1])
 @pytest.mark.parametrize("sample_weight", [None, np.random.random(5)])
-def test_parallel_clustering_kmeans(sample_weight):
+def test_parallel_clustering_kmeans(n_jobs, sample_weight):
     kmeans = sk.cluster.KMeans(n_clusters=2, random_state=0)
     pc = ParallelClustering(kmeans)
     X = [np.random.random((5, 4)), np.ones((5, 4), dtype=bool)]
     single_labels = kmeans.fit_predict(X[0], sample_weight=sample_weight)
-    unique_labels, inverse = np.unique(single_labels, return_inverse=True)
+    _, inverse = np.unique(single_labels, return_inverse=True)
 
     res = pc.fit_predict(X, sample_weight=sample_weight)
-    res = [[(i, label, list(indices)) for [i, label, indices] in sublist]
-           for sublist in res]
-    exp = [[(i, label, list(np.flatnonzero(inverse == label)))
-            for label in unique_labels]
-           for i in range(X[1].shape[1])]
+    exp = np.empty(5, dtype=object)
+    exp[:] = [tuple([])] * 5
+    for i in range(4):
+        labels_i = np.empty(len(single_labels), dtype=object)
+        labels_i[:] = [((i, rel_label),) for rel_label in inverse]
+        exp[:] += labels_i
 
-    assert res == exp
+    assert np.array_equal(res, exp)
 
 
 def test_parallel_clustering_metric_affinity_precomputed_not_implemented():
@@ -75,7 +77,8 @@ def __init__(self, metric="precomputed", affinity="precomputed"):
         pc.fit(X)
 
 
-def test_parallel_clustering_precomputed():
+@pytest.mark.parametrize("n_jobs", [1, 2, -1])
+def test_parallel_clustering_precomputed(n_jobs):
     pc = ParallelClustering(sk.cluster.DBSCAN())
     masks = np.random.choice([True, False], size=20).reshape((10, 2))
     X = [np.random.random((10, 4)), masks]
@@ -84,13 +87,8 @@ def test_parallel_clustering_precomputed():
 
     res = pc.fit_predict(X)
     res_precomp = pc_precomp.fit_predict(X_precomp)
-    res = [[(i, label, list(indices)) for [i, label, indices] in sublist]
-           for sublist in res]
-    res_precomp = [[(i, label, list(indices))
-                    for [i, label, indices] in sublist]
-                   for sublist in res_precomp]
 
-    assert res == res_precomp
+    assert np.array_equal(res, res_precomp)
 
 
 @composite
diff --git a/gtda/metaestimators/tests/test_collection_transformer.py b/gtda/metaestimators/tests/test_collection_transformer.py
index a6de70728..0aad972f0 100644
--- a/gtda/metaestimators/tests/test_collection_transformer.py
+++ b/gtda/metaestimators/tests/test_collection_transformer.py
@@ -51,7 +51,7 @@ def fit_transform(self):
 
 
 @pytest.mark.parametrize("X", [X_arr, X_list])
-@pytest.mark.parametrize("n_jobs", [None, 2, -1])
+@pytest.mark.parametrize("n_jobs", [1, 2, -1])
 def test_collection_transformer_fit_transform(X, n_jobs):
     n_components = 3
     pca = PCA(n_components=n_components)

From 439cbc64132a76b5f548977ffd522371a1234576 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Tue, 24 Nov 2020 10:32:16 +0100
Subject: [PATCH 11/17] Update ParallelClustering and Nerve docs to new API

---
 gtda/mapper/cluster.py | 39 ++++++++++++++++++---------------------
 gtda/mapper/nerve.py   | 26 ++++++++++----------------
 2 files changed, 28 insertions(+), 37 deletions(-)

diff --git a/gtda/mapper/cluster.py b/gtda/mapper/cluster.py
index 7410c948a..3c4cac35f 100644
--- a/gtda/mapper/cluster.py
+++ b/gtda/mapper/cluster.py
@@ -22,10 +22,10 @@ class ParallelClustering(BaseEstimator):
     An arbitrary clustering class which stores a ``labels_`` attribute in
     ``fit`` can be passed to the constructor. Examples are most classes in
     ``sklearn.cluster``. The input of :meth:`fit` is of the form ``[X_tot,
-    masks]`` where ``X_tot`` is the full dataset, and ``masks`` is a
-    two-dimensional boolean array, each column of which indicates the
-    location of a portion of ``X_tot`` to cluster separately. Parallelism is
-    achieved over the columns of ``masks``.
+    masks]`` where ``X_tot`` is the full dataset, and ``masks`` is a 2D boolean
+    array, each column of which indicates the location of a portion of
+    ``X_tot`` to cluster separately. Parallelism is achieved over the columns
+    of ``masks``.
 
     Parameters
     ----------
@@ -45,16 +45,12 @@ class ParallelClustering(BaseEstimator):
 
     Attributes
     ----------
-    clusterers_ : tuple of object
-        Clones of `clusterer` fitted to the portions of the full data array
-        specified in :meth:`fit`.
-
-    clusters_ : list of list of tuple
-       Labels and indices of each cluster found in :meth:`fit`. The i-th
-       entry corresponds to the i-th portion of the data; it is a list of
-       triples of the form ``(i, label, indices)``, where ``label`` is a
-       cluster label and ``indices`` is the array of indices of points
-       belonging to cluster ``(i, label)``.
+    labels_ : ndarray of shape (n_samples,)
+       For each point in the dataset passed to :meth:`fit`, a tuple of pairs
+       of the form ``(i, partial_label)`` where ``i`` is the index of a boolean
+       mask which selects that point and ``partial_label`` is the cluster label
+       assigned to the point when clustering the subset of the data selected by
+       mask ``i``.
 
     References
     ----------
@@ -161,10 +157,11 @@ def fit(self, X, y=None, sample_weight=None):
 
         self.labels_ = np.empty(len(X_tot), dtype=object)
         self.labels_[:] = [tuple([])] * len(X_tot)
-        for i, (rel_indices, rel_labels) in enumerate(labels_single):
-            n_labels = len(rel_labels)
+        for i, (rel_indices, partial_labels) in enumerate(labels_single):
+            n_labels = len(partial_labels)
             labels_i = np.empty(n_labels, dtype=object)
-            labels_i[:] = [((i, rel_label),) for rel_label in rel_labels]
+            labels_i[:] = [((i, partial_label),)
+                           for partial_label in partial_labels]
             self.labels_[rel_indices] += labels_i
 
         return self
@@ -199,8 +196,8 @@ def fit_predict(self, X, y=None, sample_weight=None):
 
         Returns
         -------
-        clusters : list of list of tuple
-            See :attr:`clusters_`.
+        labels : ndarray of shape (n_samples,)
+            See :attr:`labels_`.
 
         """
         self.fit(X, sample_weight=sample_weight)
@@ -249,8 +246,8 @@ def fit_transform(self, X, y=None, **fit_params):
 
         Returns
         -------
-        Xt : list of list of tuple
-            See :attr:`clusters_`.
+        Xt : ndarray of shape (n_samples,)
+            See :attr:`labels_`.
 
         """
         Xt = self.fit_predict(X, y, **fit_params)
diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index a1759cc51..c9c40a334 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -34,8 +34,6 @@ class Nerve(BaseEstimator, TransformerMixin):
     by :func:`gtda.mapper.make_mapper_pipeline`. It corresponds the last two
     arrows in `this diagram <../../../../_images/mapper_pipeline.svg>`_.
 
-    This transformer is not intended for direct use.
-
     Parameters
     ----------
     min_intersection : int, optional, default: ``1``
@@ -94,20 +92,16 @@ def fit_transform(self, X, y=None):
 
         Parameters
         ----------
-        X : list of list of tuple
-            Data structure describing a cover of a dataset (e.g. as depicted in
-            `this diagram <../../../../_images/mapper_pipeline.svg>`_) produced
-            by the clustering step of a :class:`gtda.mapper.MapperPipeline`.
-            Each sublist corresponds to a (non-empty) pullback cover set --
-            equivalently, to a cover set in the filter range which has
-            non-empty preimage. It contains triples of the form
-            ``(pullback_set_label, partial_cluster_label, node_elements)``
-            where ``partial_cluster_label`` is a cluster label within the
-            pullback cover set identified by ``pullback_set_label``, and
-            ``node_elements`` is an array of integer indices. To each pair
-            ``(pullback_set_label, partial_cluster_label)`` there corresponds
-            a unique node in the output Mapper graph. This node represents
-            the data subset defined by the indices in ``node_elements``.
+        X : ndarray of shape (n_samples,)
+            Cluster labels describing a refined cover of a dataset produced by
+            the clustering step of a :class:`gtda.mapper.MapperPipeline`,
+            as depicted in
+            `this diagram <../../../../_images/mapper_pipeline.svg>`_. Each
+            entry is a tuple of pairs of the form
+            ``(pullback_cluster_label, partial_label)`` where
+            ``partial_cluster_label`` is a cluster label within the pullback
+            cover set identified by ``pullback_set_label``. Unique such pairs
+            correspond to nodes in the output graph.
 
         y : None
             There is no need for a target in a transformer, yet the pipeline

From 9749dab0fc41644e823be8deed6322676fa8bf7d Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Fri, 4 Dec 2020 21:09:46 +0100
Subject: [PATCH 12/17] Linting in __init__

---
 gtda/diagrams/__init__.py     | 4 ++--
 gtda/homology/__init__.py     | 5 ++---
 gtda/images/__init__.py       | 5 ++---
 gtda/plotting/__init__.py     | 4 ++--
 gtda/point_clouds/__init__.py | 5 ++---
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/gtda/diagrams/__init__.py b/gtda/diagrams/__init__.py
index c62e6fa6e..43a0b17e7 100644
--- a/gtda/diagrams/__init__.py
+++ b/gtda/diagrams/__init__.py
@@ -1,6 +1,6 @@
 """The module :mod:`gtda.diagrams` implements transformers to preprocess
-persistence diagrams, extract features from them, or compute pairwise
-distances between diagrams."""
+persistence diagrams, extract features from them, or compute pairwise distances
+between diagrams."""
 
 from .preprocessing import ForgetDimension, Scaler, Filtering
 from .distance import PairwiseDistance
diff --git a/gtda/homology/__init__.py b/gtda/homology/__init__.py
index 1918833e1..7fcc6a45b 100644
--- a/gtda/homology/__init__.py
+++ b/gtda/homology/__init__.py
@@ -1,6 +1,5 @@
-"""The module :mod:`gtda.homology` implements transformers
-to generate persistence diagrams.
-"""
+"""The module :mod:`gtda.homology` implements transformers to generate
+persistence diagrams."""
 # License: GNU AGPLv3
 
 from .simplicial import VietorisRipsPersistence, SparseRipsPersistence, \
diff --git a/gtda/images/__init__.py b/gtda/images/__init__.py
index f4b700e91..f7733feba 100644
--- a/gtda/images/__init__.py
+++ b/gtda/images/__init__.py
@@ -1,6 +1,5 @@
-"""The module :mod:`gtda.images` implements techniques
-that can be used to apply Topological Data Analysis to images.
-"""
+"""The module :mod:`gtda.images` implements techniques that can be used to
+apply Topological Data Analysis to images."""
 # License: GNU AGPLv3
 
 from .preprocessing import Binarizer, Inverter, Padder, ImageToPointCloud
diff --git a/gtda/plotting/__init__.py b/gtda/plotting/__init__.py
index 421f2e05e..c98e17011 100644
--- a/gtda/plotting/__init__.py
+++ b/gtda/plotting/__init__.py
@@ -1,5 +1,5 @@
-"""The module :mod:`gtda.plotting` implements function to plot the
-outputs of giotto-tda transformers."""
+"""The module :mod:`gtda.plotting` implements function to plot the outputs of
+giotto-tda transformers."""
 
 from .point_clouds import plot_point_cloud
 from .persistence_diagrams import plot_diagram
diff --git a/gtda/point_clouds/__init__.py b/gtda/point_clouds/__init__.py
index 7fe81f718..1850056f1 100644
--- a/gtda/point_clouds/__init__.py
+++ b/gtda/point_clouds/__init__.py
@@ -1,6 +1,5 @@
-"""The module :mod:`gtda.homology` implements transformers
-to process point clouds and modify metric spaces.
-"""
+"""The module :mod:`gtda.homology` implements transformers to process point
+clouds and modify metric spaces."""
 # License: GNU AGPLv3
 
 from .rescaling import ConsistentRescaling, ConsecutiveRescaling

From 72ed8dff73227a9228cca798b8fd7494e5393e23 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Fri, 4 Dec 2020 21:27:15 +0100
Subject: [PATCH 13/17] Fix docstring typos in Nerve

---
 gtda/mapper/nerve.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index c9c40a334..e6ac9c007 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -97,8 +97,8 @@ def fit_transform(self, X, y=None):
             the clustering step of a :class:`gtda.mapper.MapperPipeline`,
             as depicted in
             `this diagram <../../../../_images/mapper_pipeline.svg>`_. Each
-            entry is a tuple of pairs of the form
-            ``(pullback_cluster_label, partial_label)`` where
+            entry in `X` is a tuple of pairs of the form
+            ``(pullback_set_label, partial_cluster_label)`` where
             ``partial_cluster_label`` is a cluster label within the pullback
             cover set identified by ``pullback_set_label``. Unique such pairs
             correspond to nodes in the output graph.

From d878722d0b8806fe23d1bef05991710315cc9f47 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Fri, 4 Dec 2020 21:40:49 +0100
Subject: [PATCH 14/17] Improve variable name following @lewtun's comment

---
 gtda/mapper/nerve.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index e6ac9c007..2032f04d6 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -122,19 +122,19 @@ def fit_transform(self, X, y=None):
         """
         # TODO: Include a validation step for X
         # Graph construction -- vertices with their metadata
-        nodes_dict = {}
+        labels_to_indices = {}
         for i, sample in enumerate(X):
             for node_id_pair in sample:
-                nodes_dict.setdefault(node_id_pair, []).append(i)
-        labels_to_indices = {key: np.array(value, dtype=np.int32)
-                             for key, value in nodes_dict.items()}
+                labels_to_indices.setdefault(node_id_pair, []).append(i)
+        labels_to_indices = {key: np.array(value)
+                             for key, value in labels_to_indices.items()}
         n_nodes = len(labels_to_indices)
         graph = ig.Graph(n_nodes)
 
-        # `nodes_dict` is a dictionary of, say, N key-value pairs of the form
-        # (pullback_set_label, partial_cluster_label): node_elements. Hence,
-        # zip(*nodes_dict) generates two tuples of length N, each corresponding
-        # to a type of node attribute in the final graph.
+        # labels_to_indices is a dictionary of, say, N key-value pairs of the
+        # form (pullback_set_label, partial_cluster_label): node_elements.
+        # Hence, zip(*labels_to_indices) generates two tuples of length N, each
+        # corresponding to a type of node attribute in the final graph.
         node_attributes = zip(*labels_to_indices)
         graph.vs["pullback_set_label"] = next(node_attributes)
         graph.vs["partial_cluster_label"] = next(node_attributes)

From b84cb81e2297e71fec631f5483e6c1b2ec918eda Mon Sep 17 00:00:00 2001
From: Umberto Lupo <umberto.lupo@gmail.com>
Date: Thu, 10 Dec 2020 15:31:53 +0100
Subject: [PATCH 15/17] Fix linting

---
 gtda/mapper/cover.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gtda/mapper/cover.py b/gtda/mapper/cover.py
index 9db055cfb..478471853 100644
--- a/gtda/mapper/cover.py
+++ b/gtda/mapper/cover.py
@@ -213,7 +213,8 @@ def _fit_transform_balanced(self, X):
             X_rank, self.n_intervals, self.overlap_frac, is_uniform=False)
         X_rank = np.broadcast_to(X_rank[:, None],
                                  (X.shape[0], self.n_intervals))
-        Xt = np.logical_and(X_rank > self._left_limits, X_rank < self._right_limits)
+        Xt = np.logical_and(X_rank > self._left_limits,
+                            X_rank < self._right_limits)
         return Xt
 
     def _fit_transform(self, X):

From 24e8ead496719a110cab6d13a4387ccef29d0dbd Mon Sep 17 00:00:00 2001
From: Umberto Lupo <46537483+ulupo@users.noreply.github.com>
Date: Fri, 11 Dec 2020 12:01:57 +0100
Subject: [PATCH 16/17] Fix wording following @wreise's review

---
 gtda/mapper/nerve.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index 2032f04d6..b22a5ef13 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -100,7 +100,7 @@ def fit_transform(self, X, y=None):
             entry in `X` is a tuple of pairs of the form
             ``(pullback_set_label, partial_cluster_label)`` where
             ``partial_cluster_label`` is a cluster label within the pullback
-            cover set identified by ``pullback_set_label``. Unique such pairs
+            cover set identified by ``pullback_set_label``. Then unique pairs
             correspond to nodes in the output graph.
 
         y : None

From 43eb7d75b856b55ba750f9d88ab335042ae05b53 Mon Sep 17 00:00:00 2001
From: Umberto Lupo <46537483+ulupo@users.noreply.github.com>
Date: Fri, 11 Dec 2020 12:03:31 +0100
Subject: [PATCH 17/17] Fix "then" -> "the"

---
 gtda/mapper/nerve.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gtda/mapper/nerve.py b/gtda/mapper/nerve.py
index b22a5ef13..5eae5144f 100644
--- a/gtda/mapper/nerve.py
+++ b/gtda/mapper/nerve.py
@@ -100,7 +100,7 @@ def fit_transform(self, X, y=None):
             entry in `X` is a tuple of pairs of the form
             ``(pullback_set_label, partial_cluster_label)`` where
             ``partial_cluster_label`` is a cluster label within the pullback
-            cover set identified by ``pullback_set_label``. Then unique pairs
+            cover set identified by ``pullback_set_label``. The unique pairs
             correspond to nodes in the output graph.
 
         y : None