From 0aa72e09d8db65a5bb19d329e91474c869034253 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 2 Aug 2020 10:11:45 +0200 Subject: [PATCH 1/2] Clarify expected diagram properties in docs to fix #233 --- gtda/diagrams/distance.py | 9 +++++++ gtda/diagrams/features.py | 19 ++++++++++++++ gtda/diagrams/preprocessing.py | 15 ++++++++--- gtda/diagrams/representations.py | 45 ++++++++++++++++++++++++++++++++ 4 files changed, 85 insertions(+), 3 deletions(-) diff --git a/gtda/diagrams/distance.py b/gtda/diagrams/distance.py index e92e52dee..7052a1d8a 100644 --- a/gtda/diagrams/distance.py +++ b/gtda/diagrams/distance.py @@ -24,6 +24,9 @@ class PairwiseDistance(BaseEstimator, TransformerMixin): matrices or a single distance matrix between pairs of diagrams is calculated according to the following steps: + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + 1. All diagrams are partitioned into subdiagrams corresponding to distinct homology dimensions. 2. Pairwise distances between subdiagrams of equal homology @@ -146,6 +149,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of `X`. y : None There is no need for a target in a transformer, yet the pipeline @@ -190,6 +196,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of `X`. y : None There is no need for a target in a transformer, yet the pipeline diff --git a/gtda/diagrams/features.py b/gtda/diagrams/features.py index 5797672a0..e38df2363 100644 --- a/gtda/diagrams/features.py +++ b/gtda/diagrams/features.py @@ -27,6 +27,9 @@ class PersistenceEntropy(BaseEstimator, TransformerMixin): calculated as the (base e) entropies of the collections of differences d - b, normalized by the sum of all such differences. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- n_jobs : int or None, optional, default: ``None`` @@ -50,6 +53,7 @@ class PersistenceEntropy(BaseEstimator, TransformerMixin): def __init__(self, n_jobs=None): self.n_jobs = n_jobs + @staticmethod def _persistence_entropy(self, X): X_lifespan = X[:, :, 1] - X[:, :, 0] X_normalized = X_lifespan / np.sum(X_lifespan, axis=1).reshape(-1, 1) @@ -69,6 +73,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of `X`. y : None There is no need for a target in a transformer, yet the pipeline @@ -95,6 +102,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of `X`. y : None There is no need for a target in a transformer, yet the pipeline @@ -137,6 +147,9 @@ class Amplitude(BaseEstimator, TransformerMixin): 3. The final result is either :math:`\\mathbf{a}` itself or a norm of :math:`\\mathbf{a}`, specified by the parameter `order`. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'landscape'`` | \ @@ -242,6 +255,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -284,6 +300,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline diff --git a/gtda/diagrams/preprocessing.py b/gtda/diagrams/preprocessing.py index 368dedbc0..1f1efe1cc 100644 --- a/gtda/diagrams/preprocessing.py +++ b/gtda/diagrams/preprocessing.py @@ -125,6 +125,9 @@ class Scaler(BaseEstimator, TransformerMixin, PlotterMixin): two-dimensional array of amplitudes (one per diagram and homology dimension) to obtain :attr:`scale_`. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'betti'`` | \ @@ -196,6 +199,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -244,6 +250,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -262,13 +271,13 @@ def transform(self, X, y=None): return Xs def inverse_transform(self, X): - """Scale back the data to the original representation. Multiplies - by the scale found in :meth:`fit`. + """Scale back the data to the original representation. Multiplies by + the scale found in :meth:`fit`. Parameters ---------- X : ndarray of shape (n_samples, n_features, 3) - Data to apply the inverse transform to. + Data to apply the inverse transform to, c.f. :meth:`transform`. Returns ------- diff --git a/gtda/diagrams/representations.py b/gtda/diagrams/representations.py index ead22a33b..c7b659f1f 100644 --- a/gtda/diagrams/representations.py +++ b/gtda/diagrams/representations.py @@ -35,6 +35,9 @@ class BettiCurve(BaseEstimator, TransformerMixin, PlotterMixin): considered separately, and their respective Betti curves are obtained by evenly sampling the :ref:`filtration parameter `. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- n_bins : int, optional, default: ``100`` @@ -93,6 +96,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -124,6 +130,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -238,6 +247,9 @@ class PersistenceLandscape(BaseEstimator, TransformerMixin, PlotterMixin): landscapes are obtained by evenly sampling the :ref:`filtration parameter `. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- n_layers : int, optional, default: ``1`` @@ -302,6 +314,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -333,6 +348,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -462,6 +480,9 @@ class HeatKernel(BaseEstimator, TransformerMixin, PlotterMixin): diagonal, and the difference between the results of the two convolutions is computed. The result can be thought of as a (multi-channel) raster image. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- sigma : float, optional default ``1.`` @@ -534,6 +555,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -567,6 +591,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -642,6 +669,9 @@ class PersistenceImage(BaseEstimator, TransformerMixin, PlotterMixin): `. The result can be thought of as a (multi-channel) raster image. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- sigma : float, optional default ``1.`` @@ -731,6 +761,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -771,6 +804,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -850,6 +886,9 @@ class Silhouette(BaseEstimator, TransformerMixin, PlotterMixin): spaced locations from appropriate ranges of the :ref:`filtration parameter `. + Input collections of persistence diagrams for this transformer must + satisfy certain requirements, see e.g. :meth:`fit`. + Parameters ---------- power: float, optional, default: ``1.`` @@ -923,6 +962,9 @@ def fit(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline @@ -954,6 +996,9 @@ def transform(self, X, y=None): Input data. Array of persistence diagrams, each a collection of triples [b, d, q] representing persistent topological features through their birth (b), death (d) and homology dimension (q). + It is important that, for each possible homology dimension, the + number of triples for which q equals that homology dimension is + constants across the entries of X. y : None There is no need for a target in a transformer, yet the pipeline From 411402ae88b28f37afabd2b1f3783dae20c6652a Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Sun, 2 Aug 2020 10:25:07 +0200 Subject: [PATCH 2/2] Fix incorrect application of @staticmethod --- gtda/diagrams/features.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/diagrams/features.py b/gtda/diagrams/features.py index e38df2363..7b697b26a 100644 --- a/gtda/diagrams/features.py +++ b/gtda/diagrams/features.py @@ -54,7 +54,7 @@ def __init__(self, n_jobs=None): self.n_jobs = n_jobs @staticmethod - def _persistence_entropy(self, X): + def _persistence_entropy(X): X_lifespan = X[:, :, 1] - X[:, :, 0] X_normalized = X_lifespan / np.sum(X_lifespan, axis=1).reshape(-1, 1) return - np.sum(np.nan_to_num(