From dfdad0faea8287d273aafcdf559e504d3e0029c5 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Mon, 23 Mar 2020 12:06:28 +0100 Subject: [PATCH 01/29] Add below option for Filter Signed-off-by: Guillaume Tauzin --- gtda/diagrams/_utils.py | 7 +++++-- gtda/diagrams/preprocessing.py | 19 ++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/gtda/diagrams/_utils.py b/gtda/diagrams/_utils.py index 1f417a867..229f17c27 100644 --- a/gtda/diagrams/_utils.py +++ b/gtda/diagrams/_utils.py @@ -34,7 +34,7 @@ def _sample_image(image, sampled_diag): image[unique] = counts -def _filter(Xs, filtered_homology_dimensions, cutoff): +def _filter(Xs, filtered_homology_dimensions, cutoff, below): homology_dimensions = sorted(list(set(Xs[0, :, 2]))) unfiltered_homology_dimensions = sorted(list( set(homology_dimensions) - set(filtered_homology_dimensions))) @@ -47,7 +47,10 @@ def _filter(Xs, filtered_homology_dimensions, cutoff): for dim in filtered_homology_dimensions: Xdim = _subdiagrams(Xs, [dim]) min_value = np.min(Xdim[:, :, 0]) - mask = (Xdim[:, :, 1] - Xdim[:, :, 0]) <= cutoff + if below: + mask = (Xdim[:, :, 1] - Xdim[:, :, 0]) <= cutoff + else: + mask = (Xdim[:, :, 1] - Xdim[:, :, 0]) >= cutoff Xdim[mask, :] = [min_value, min_value, dim] max_points = np.max(np.sum(Xs[:, :, 1] != 0, axis=1)) Xdim = Xdim[:, :max_points, :] diff --git a/gtda/diagrams/preprocessing.py b/gtda/diagrams/preprocessing.py index 56276ac89..2d403ce9d 100644 --- a/gtda/diagrams/preprocessing.py +++ b/gtda/diagrams/preprocessing.py @@ -314,10 +314,10 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin): """Filtering of persistence diagrams. Filtering a diagram means discarding all points [b, d, q] representing - topological features whose lifetime d - b is less than or equal to a - cutoff value. Technically, discarded points are replaced by points on the - diagonal (i.e. whose birth and death values coincide), which carry no - information. + topological features based on whether their lifetimes d - b is below or a + above a cutoff value. Technically, discarded points are replaced by points + on the diagonal (i.e. whose birth and death values coincide), which carry + no information. Parameters ---------- @@ -330,6 +330,9 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin): epsilon : float, optional, default: ``0.01`` The cutoff value controlling the amount of filtering. + below : bool, optional, default: ``True`` + If ``True``, filter subdiagram points below the cutoff. + Attributes ---------- homology_dimensions_ : list @@ -348,12 +351,14 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin): 'homology_dimensions': { 'type': (list, tuple, type(None)), 'of': {'type': int, 'in': Interval(0, np.inf, closed='left')}}, - 'epsilon': {'type': Real, 'in': Interval(0, np.inf, closed='left')} + 'epsilon': {'type': Real, 'in': Interval(0, np.inf, closed='left')}, + 'below': {'type': bool} } - def __init__(self, homology_dimensions=None, epsilon=0.01): + def __init__(self, homology_dimensions=None, epsilon=0.01, below=True): self.homology_dimensions = homology_dimensions self.epsilon = epsilon + self.below = below def fit(self, X, y=None): """Store relevant homology dimensions in @@ -416,7 +421,7 @@ def transform(self, X, y=None): X = check_diagrams(X) X = _sort(X) - Xt = _filter(X, self.homology_dimensions_, self.epsilon) + Xt = _filter(X, self.homology_dimensions_, self.epsilon, self.below) return Xt def plot(self, Xt, sample=0, homology_dimensions=None): From 7e7e8abb857e6210564dd780279ee5b283e7be7c Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Mon, 23 Mar 2020 12:26:35 +0100 Subject: [PATCH 02/29] Revert "Add below option for Filter" This reverts commit dfdad0faea8287d273aafcdf559e504d3e0029c5. --- gtda/diagrams/_utils.py | 7 ++----- gtda/diagrams/preprocessing.py | 19 +++++++------------ 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/gtda/diagrams/_utils.py b/gtda/diagrams/_utils.py index 229f17c27..1f417a867 100644 --- a/gtda/diagrams/_utils.py +++ b/gtda/diagrams/_utils.py @@ -34,7 +34,7 @@ def _sample_image(image, sampled_diag): image[unique] = counts -def _filter(Xs, filtered_homology_dimensions, cutoff, below): +def _filter(Xs, filtered_homology_dimensions, cutoff): homology_dimensions = sorted(list(set(Xs[0, :, 2]))) unfiltered_homology_dimensions = sorted(list( set(homology_dimensions) - set(filtered_homology_dimensions))) @@ -47,10 +47,7 @@ def _filter(Xs, filtered_homology_dimensions, cutoff, below): for dim in filtered_homology_dimensions: Xdim = _subdiagrams(Xs, [dim]) min_value = np.min(Xdim[:, :, 0]) - if below: - mask = (Xdim[:, :, 1] - Xdim[:, :, 0]) <= cutoff - else: - mask = (Xdim[:, :, 1] - Xdim[:, :, 0]) >= cutoff + mask = (Xdim[:, :, 1] - Xdim[:, :, 0]) <= cutoff Xdim[mask, :] = [min_value, min_value, dim] max_points = np.max(np.sum(Xs[:, :, 1] != 0, axis=1)) Xdim = Xdim[:, :max_points, :] diff --git a/gtda/diagrams/preprocessing.py b/gtda/diagrams/preprocessing.py index 2d403ce9d..56276ac89 100644 --- a/gtda/diagrams/preprocessing.py +++ b/gtda/diagrams/preprocessing.py @@ -314,10 +314,10 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin): """Filtering of persistence diagrams. Filtering a diagram means discarding all points [b, d, q] representing - topological features based on whether their lifetimes d - b is below or a - above a cutoff value. Technically, discarded points are replaced by points - on the diagonal (i.e. whose birth and death values coincide), which carry - no information. + topological features whose lifetime d - b is less than or equal to a + cutoff value. Technically, discarded points are replaced by points on the + diagonal (i.e. whose birth and death values coincide), which carry no + information. Parameters ---------- @@ -330,9 +330,6 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin): epsilon : float, optional, default: ``0.01`` The cutoff value controlling the amount of filtering. - below : bool, optional, default: ``True`` - If ``True``, filter subdiagram points below the cutoff. - Attributes ---------- homology_dimensions_ : list @@ -351,14 +348,12 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin): 'homology_dimensions': { 'type': (list, tuple, type(None)), 'of': {'type': int, 'in': Interval(0, np.inf, closed='left')}}, - 'epsilon': {'type': Real, 'in': Interval(0, np.inf, closed='left')}, - 'below': {'type': bool} + 'epsilon': {'type': Real, 'in': Interval(0, np.inf, closed='left')} } - def __init__(self, homology_dimensions=None, epsilon=0.01, below=True): + def __init__(self, homology_dimensions=None, epsilon=0.01): self.homology_dimensions = homology_dimensions self.epsilon = epsilon - self.below = below def fit(self, X, y=None): """Store relevant homology dimensions in @@ -421,7 +416,7 @@ def transform(self, X, y=None): X = check_diagrams(X) X = _sort(X) - Xt = _filter(X, self.homology_dimensions_, self.epsilon, self.below) + Xt = _filter(X, self.homology_dimensions_, self.epsilon) return Xt def plot(self, Xt, sample=0, homology_dimensions=None): From 48fe0ea0007f4ee267998956c8f561edc8fd90c2 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Mon, 23 Mar 2020 12:43:52 +0100 Subject: [PATCH 03/29] Add DensityFiltration Signed-off-by: Guillaume Tauzin --- gtda/images/__init__.py | 4 +- gtda/images/filtrations.py | 209 +++++++++++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 1 deletion(-) diff --git a/gtda/images/__init__.py b/gtda/images/__init__.py index 0dd1def91..0694abf23 100644 --- a/gtda/images/__init__.py +++ b/gtda/images/__init__.py @@ -5,7 +5,8 @@ from .preprocessing import Binarizer, Inverter, Padder, ImageToPointCloud from .filtrations import HeightFiltration, RadialFiltration, \ - DilationFiltration, ErosionFiltration, SignedDistanceFiltration + DilationFiltration, ErosionFiltration, SignedDistanceFiltration, \ + DensityFiltration __all__ = [ 'Binarizer', @@ -17,4 +18,5 @@ 'DilationFiltration', 'ErosionFiltration', 'SignedDistanceFiltration', + 'DensityFiltration', ] diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 624da3f4e..1e5e0763f 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -4,6 +4,7 @@ from numbers import Real from types import FunctionType from warnings import warn +import itertools import numpy as np from joblib import Parallel, delayed, effective_n_jobs @@ -13,6 +14,7 @@ from sklearn.utils.validation import check_array, check_is_fitted from ._utils import _dilate, _erode +from .preprocessing import Padder from ..base import PlotterMixin from ..plotting import plot_heatmap from ..utils._docs import adapt_fit_transform_docs @@ -956,3 +958,210 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper'): """ return plot_heatmap(Xt[sample], colorscale=colorscale, origin=origin) + + +@adapt_fit_transform_docs +class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): + """Filtrations of 2D/3D binary images based on the number of neighboring + activated pixels. + + The density filtration assigns to each pixel of a binary image a greyscale + value equal to the number of activated pixels with a ball centered around + it. + + Parameters + ---------- + radius : float, default: ``1`` + The radius of the ball within which the number of activated pixels is + considered. + + metric : string, or callable, optional, default: ``'euclidean'`` + Determines a rule with which to calculate distances between + pairs of pixels. + If ``metric`` is a string, it must be one of the options allowed by + ``scipy.spatial.distance.pdist`` for its metric parameter, or a metric + listed in ``sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS``, including + "euclidean", "manhattan", or "cosine". + If ``metric`` is a callable function, it is called on each pair of + instances and the resulting value recorded. The callable should take + two arrays from the entry in `X` as input, and return a value + indicating the distance between them. + + metric_params : dict, optional, default: ``{}`` + Additional keyword arguments for the metric function. + + n_jobs : int or None, optional, default: ``None`` + The number of jobs to use for the computation. ``None`` means 1 unless + in a :obj:`joblib.parallel_backend` context. ``-1`` means using all + processors. + + Attributes + ---------- + mask_ : ndarray of shape + The mask applied around each pixel to calculate the number of its + activated neighbors. It is obtained from the choice of the ``radius`` + and ``metric``. Set in :meth:`fit`. + + See also + -------- + gtda.homology.CubicalPersistence, Binarizer + + References + ---------- + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. + + """ + + _hyperparameters = { + 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, + 'metric': {'type': (str, FunctionType)}, + 'metric_params': {'type': (dict, type(None))}, + } + + def __init__(self, radius=3, metric='euclidean', metric_params={}, + n_jobs=None): + self.radius = radius + self.metric = metric + self.metric_params = metric_params + self.n_jobs = n_jobs + + def _calculate_density(self, X): + Xd = np.zeros(X.shape) + + for i, j, k in self._iterator: + Xd += np.roll(np.roll( + np.roll(X, k, axis=3), j, axis=2), i, axis=1) \ + * self.mask_[self._range + i, self._range + j, + self._range + k] + return Xd + + def fit(self, X, y=None): + """Calculate :attr:`mask_` from a collection of binary images. Then, + return the estimator. + + This method is here to implement the usual scikit-learn API and hence + work in pipelines. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) + Input data. Each entry along axis 0 is interpreted as a 2D or 3D + binary image. + + y : None + There is no need of a target in a transformer, yet the pipeline API + requires this parameter. + + Returns + ------- + self : object + + """ + X = check_array(X, allow_nd=True) + self._n_dimensions = X.ndim - 1 + if (self._n_dimensions < 2) or (self._n_dimensions > 3): + warn(f"Input of `fit` contains arrays of dimension " + f"{self._n_dimensions}.") + validate_params( + self.get_params(), self._hyperparameters, exclude=['n_jobs']) + + self._range = int(np.ceil(self.radius)) + + iterator_range_list = [range(-self._range, self._range + 1) + for _ in range(self._n_dimensions)] \ + + [[0] for _ in range(3 - self._n_dimensions)] + self._iterator = tuple(itertools.product(*iterator_range_list)) + + # The mask is always 3D but not the iterator. + self.mask_ = np.ones(tuple(2 * self._range + 1 for _ in range(3)), + dtype=np.bool) + mesh_range_list = [np.arange(0, 2 * self._range + 1) for _ in range(3)] + self.mesh_ = np.stack( + np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) + + center = self._range * np.ones((1, 3)) + self.mask_ = pairwise_distances( + center, self.mesh_, metric=self.metric, + n_jobs=1, **self.metric_params).reshape(self.mask_.shape) + + self.mask_ = self.mask_ <= self.radius + + padding = np.asarray( + [*[self._range for _ in range(self._n_dimensions)], + *[0 for _ in range(3 - self._n_dimensions)]]) + self._padder = Padder(paddings=padding) + self._padder.fit(X.reshape((*X.shape[:3], -1))) + + return self + + def transform(self, X, y=None): + """For each binary image in the collection `X`, calculate a + corresponding greyscale image based on the density of its pixels. + Return the collection of greyscale images. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) + Input data. Each entry along axis 0 is interpreted as a 2D or 3D + binary image. + + y : None + There is no need of a target in a transformer, yet the pipeline API + requires this parameter. + + Returns + ------- + Xt : ndarray of shape (n_samples, n_pixels_x, + n_pixels_y [, n_pixels_z]) + Transformed collection of images. Each entry along axis 0 is a + 2D or 3D greyscale image. + + """ + check_is_fitted(self) + Xt = check_array(X, allow_nd=True, copy=True) + + Xt = Xt.reshape((*X.shape[:3], -1)) + Xt = self._padder.transform(Xt) + + Xt = Parallel(n_jobs=self.n_jobs)( + delayed(self._calculate_density)(Xt[s]) + for s in gen_even_slices(Xt.shape[0], + effective_n_jobs(self.n_jobs))) + Xt = np.concatenate(Xt) + + Xt = Xt[:, self._range: -self._range, self._range: -self._range] + + if self._n_dimensions == 3: + Xt = Xt[:, :, :, self._range: -self._range] + + Xt = Xt.reshape(X.shape) + + return Xt + + @staticmethod + def plot(Xt, sample=0, colorscale='greys', origin='upper'): + """Plot a sample from a collection of 2D greyscale images. + + Parameters + ---------- + Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) + Collection of 2D greyscale images, such as returned by + :meth:`transform`. + + sample : int, optional, default: ``0`` + Index of the sample in `Xt` to be plotted. + + colorscale : str, optional, default: ``'greys'`` + Color scale to be used in the heat map. Can be anything allowed by + :class:`plotly.graph_objects.Heatmap`. + + origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` + Position of the [0, 0] pixel of `data`, in the upper left or lower + left corner. The convention ``'upper'`` is typically used for + matrices and images. + + """ + return plot_heatmap(Xt[sample], colorscale=colorscale, origin=origin) From ee46506ca7cba50e77b21bff9b8552770526dec9 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Mon, 23 Mar 2020 12:44:07 +0100 Subject: [PATCH 04/29] Add tests for DensityFiltration Signed-off-by: Guillaume Tauzin --- gtda/images/tests/test_filtrations.py | 45 ++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/gtda/images/tests/test_filtrations.py b/gtda/images/tests/test_filtrations.py index ce58a2421..a9ccef370 100644 --- a/gtda/images/tests/test_filtrations.py +++ b/gtda/images/tests/test_filtrations.py @@ -7,7 +7,8 @@ from sklearn.exceptions import NotFittedError from gtda.images import HeightFiltration, RadialFiltration, \ - DilationFiltration, ErosionFiltration, SignedDistanceFiltration + DilationFiltration, ErosionFiltration, SignedDistanceFiltration, \ + DensityFiltration images_2D = np.stack([np.ones((3, 4)), np.concatenate([np.ones((3, 2)), np.zeros((3, 2))], @@ -253,3 +254,45 @@ def test_signed_transform(n_iterations, images, expected): assert_almost_equal(signed.fit_transform(images), expected) + + +def test_density_not_fitted(): + density = DensityFiltration() + with pytest.raises(NotFittedError): + density.transform(images_2D) + + +def test_density_errors(): + radius = 'a' + density = DensityFiltration(radius=radius) + with pytest.raises(TypeError): + density.fit(images_2D) + + +images_2D_density = np.array( + [[[6., 8., 8., 6.], [7., 10., 10., 7.], [6., 8., 8., 6.]], + [[5., 5., 3., 1.], [6., 6., 4., 1.], [5., 5., 3., 1.]], + [[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]]]) + + +images_3D_density = np.array( + [[[[10., 10.], [14., 14.], [14., 14.], [10., 10.]], + [[13., 13.], [19., 19.], [19., 19.], [13., 13.]], + [[10., 10.], [14., 14.], [14., 14.], [10., 10.]]], + [[[9., 9.], [9., 9.], [5., 5.], [1., 1.]], + [[12., 12.], [12., 12.], [7., 7.], [1., 1.]], + [[9., 9.], [9., 9.], [5., 5.], [1., 1.]]], + [[[0., 0.], [0., 0.], [0., 0.], [0., 0.]], + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], + [[0., 0.], [0., 0.], [0., 0.], [0., 0.]]]]) + + +@pytest.mark.parametrize("radius, images, expected", + [(2., images_2D, images_2D_density), + (2.2, images_2D, images_2D_density), + (2., images_3D, images_3D_density)]) +def test_density_transform(radius, images, expected): + density = DensityFiltration(radius=radius) + + assert_almost_equal(density.fit_transform(images), + expected) From e2057cfb1683f76639a43f96919d94c1f34e7281 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 30 Aug 2020 18:37:58 +0200 Subject: [PATCH 05/29] Add DensityFiltration to the doc Signed-off-by: Guillaume Tauzin --- doc/modules/images.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/modules/images.rst b/doc/modules/images.rst index 7238b1ab2..c6dd3edbe 100644 --- a/doc/modules/images.rst +++ b/doc/modules/images.rst @@ -31,3 +31,4 @@ Filtrations images.DilationFiltration images.ErosionFiltration images.SignedDistanceFiltration + images.DensityFiltration From 0df98e5446a3c88180207009178ebe8f450099b0 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 13 Sep 2020 16:47:24 +0200 Subject: [PATCH 06/29] Fix docstrings Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 4a7f8cb40..582327806 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1046,16 +1046,17 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): activated pixels. The density filtration assigns to each pixel of a binary image a greyscale - value equal to the number of activated pixels with a ball centered around - it. + value equal to the weighted number of activated pixels within a ball + centered around it. The weights are calculated based on the distance of the + activated pixels to the center of the ball. Parameters ---------- - radius : float, default: ``1`` + radius : float, optional, default: ``1.`` The radius of the ball within which the number of activated pixels is considered. - metric : string, or callable, optional, default: ``'euclidean'`` + metric : string or callable, optional, default: ``'euclidean'`` Determines a rule with which to calculate distances between pairs of pixels. If ``metric`` is a string, it must be one of the options allowed by @@ -1077,10 +1078,9 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- - mask_ : ndarray of shape - The mask applied around each pixel to calculate the number of its - activated neighbors. It is obtained from the choice of the ``radius`` - and ``metric``. Set in :meth:`fit`. + mask_ : ndarray of shape (radius, radius, [, radius]) + The mask applied around each pixel to calculate the weighted number of + its activated neighbors. Set in :meth:`fit`. See also -------- From 80f1ffa00e7495d80ec0beed0a28c5c009440c08 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 13 Sep 2020 16:49:44 +0200 Subject: [PATCH 07/29] Remove effective_metric_params in radial Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 582327806..a42bdc6b2 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -263,7 +263,7 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): two arrays from the entry in `X` as input, and return a value indicating the distance between them. - metric_params : dict or None, optional, default: ``None`` + metric_params : dict or None, optional, default: ``{}`` Additional keyword arguments for the metric function. n_jobs : int or None, optional, default: ``None`` @@ -279,11 +279,6 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): center_ : ndarray of shape (:attr:`n_dimensions_`,) Effective center of the radial filtration. Set in :meth:`fit`. - effective_metric_params_ : dict - Dictionary containing all information present in - `metric_params`. If `metric_params` is ``None``, it is set to - the empty dictionary. - mesh_ : ndarray of shape ( n_pixels_x, n_pixels_y [, n_pixels_z]) greyscale image corresponding to the radial filtration of a binary image where each pixel is activated. Set in :meth:`fit`. @@ -309,11 +304,11 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): 'center': {'type': (np.ndarray, type(None)), 'of': {'type': int}}, 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, - 'metric_params': {'type': (dict, type(None))} + 'metric_params': {'type': dict} } def __init__(self, center=None, radius=np.inf, metric='euclidean', - metric_params=None, n_jobs=None): + metric_params={}, n_jobs=None): self.center = center self.radius = radius self.metric = metric @@ -330,9 +325,9 @@ def _calculate_radial(self, X): return Xr def fit(self, X, y=None): - """Calculate :attr:`center_`, :attr:`effective_metric_params_`, - :attr:`n_dimensions_`, :attr:`mesh_` and :attr:`max_value_` from a - collection of binary images. Then, return the estimator. + """Calculate :attr:`center_`, :attr:`n_dimensions_`, :attr:`mesh_` and + :attr:`max_value_` from a collection of binary images. Then, return the + estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -366,11 +361,6 @@ def fit(self, X, y=None): self.center_ = np.copy(self.center) self.center_ = self.center_.reshape((1, -1)) - if self.metric_params is None: - self.effective_metric_params_ = {} - else: - self.effective_metric_params_ = self.metric_params.copy() - axis_order = [2, 1, 3] mesh_range_list = [np.arange(0, X.shape[i]) for i in axis_order[:self.n_dimensions_]] @@ -380,7 +370,7 @@ def fit(self, X, y=None): axis=self.n_dimensions_).reshape((-1, self.n_dimensions_)) self.mesh_ = pairwise_distances( self.center_, self.mesh_, metric=self.metric, - n_jobs=1, **self.effective_metric_params_).reshape(X.shape[1:]) + n_jobs=1, **self.metric_params).reshape(X.shape[1:]) self.mesh_[self.mesh_ > self.radius] = np.inf self.max_value_ = 0. @@ -1098,7 +1088,7 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): _hyperparameters = { 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, - 'metric_params': {'type': (dict, type(None))}, + 'metric_params': {'type': dict}, } def __init__(self, radius=3, metric='euclidean', metric_params={}, From aa499a04da8a22f61383ce55bb8a881502c8f943 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Mon, 14 Sep 2020 15:02:04 +0200 Subject: [PATCH 08/29] Apply @ulupo's suggestions on list ranges Co-authored-by: Umberto Lupo <46537483+ulupo@users.noreply.github.com> --- gtda/images/filtrations.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index a42bdc6b2..35419760f 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1140,15 +1140,15 @@ def fit(self, X, y=None): self._range = int(np.ceil(self.radius)) - iterator_range_list = [range(-self._range, self._range + 1) + iterator_range_list = [range(-self._range, self._range + 1)] * self._n_dimensions \ for _ in range(self._n_dimensions)] \ - + [[0] for _ in range(3 - self._n_dimensions)] + + [[0] * (3 - self._n_dimensions)] self._iterator = tuple(itertools.product(*iterator_range_list)) # The mask is always 3D but not the iterator. - self.mask_ = np.ones(tuple(2 * self._range + 1 for _ in range(3)), + self.mask_ = np.ones(tuple([2 * self._range + 1] * 3, dtype=np.bool) - mesh_range_list = [np.arange(0, 2 * self._range + 1) for _ in range(3)] + mesh_range_list = [np.arange(0, 2 * self._range + 1)] * 3 self.mesh_ = np.stack( np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) @@ -1159,7 +1159,7 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - padding = np.asarray( + padding = np.asarray([self._range] * self._n_dimensions + [0] * (3 - self._n_dimensions)) [*[self._range for _ in range(self._n_dimensions)], *[0 for _ in range(3 - self._n_dimensions)]]) self._padder = Padder(paddings=padding) From 9dd4fa56202dbdbfbdff23bc20a8f24226592416 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Tue, 15 Sep 2020 18:08:43 +0200 Subject: [PATCH 09/29] Remove leftover code Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 35419760f..7ba826c96 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1147,7 +1147,7 @@ def fit(self, X, y=None): # The mask is always 3D but not the iterator. self.mask_ = np.ones(tuple([2 * self._range + 1] * 3, - dtype=np.bool) + dtype=np.bool)) mesh_range_list = [np.arange(0, 2 * self._range + 1)] * 3 self.mesh_ = np.stack( np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) @@ -1159,9 +1159,8 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - padding = np.asarray([self._range] * self._n_dimensions + [0] * (3 - self._n_dimensions)) - [*[self._range for _ in range(self._n_dimensions)], - *[0 for _ in range(3 - self._n_dimensions)]]) + padding = np.asarray([self._range] * self._n_dimensions + \ + [0] * (3 - self._n_dimensions)) self._padder = Padder(paddings=padding) self._padder.fit(X.reshape((*X.shape[:3], -1))) From d1faa01c81233c22cc804693954445da9a174450 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 20 Sep 2020 12:44:52 +0200 Subject: [PATCH 10/29] Turn warning into ValueError Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 330 ++++++----------------------------- gtda/images/preprocessing.py | 158 +++++++---------- 2 files changed, 117 insertions(+), 371 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index df3a0df2e..d545192ae 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -3,8 +3,6 @@ from numbers import Real from types import FunctionType -from warnings import warn -import itertools import numpy as np from joblib import Parallel, delayed, effective_n_jobs @@ -14,7 +12,6 @@ from sklearn.utils.validation import check_array, check_is_fitted from ._utils import _dilate, _erode -from .preprocessing import Padder from ..base import PlotterMixin from ..plotting import plot_heatmap from ..utils._docs import adapt_fit_transform_docs @@ -69,10 +66,10 @@ class HeightFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -117,15 +114,15 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + n_dimensions = X.ndim - 1 + if (n_dimensions < 2) or (n_dimensions > 3): + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.direction is None: - self.direction_ = np.ones(self.n_dimensions_,) + self.direction_ = np.ones(n_dimensions_,) else: self.direction_ = np.copy(self.direction) self.direction_ = self.direction_ / np.linalg.norm(self.direction_) @@ -134,10 +131,10 @@ def fit(self, X, y=None): mesh_range_list = \ [np.arange(X.shape[order]) if self.direction_[i] >= 0 else -np.flip(np.arange(X.shape[order])) for i, order - in enumerate(axis_order[: self.n_dimensions_])] + in enumerate(axis_order[: n_dimensions_])] self.mesh_ = np.stack(np.meshgrid(*mesh_range_list, indexing='xy'), - axis=self.n_dimensions_) + axis=n_dimensions_) self.max_value_ = 0. self.max_value_ = np.max(self._calculate_height( @@ -263,7 +260,7 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): two arrays from the entry in `X` as input, and return a value indicating the distance between them. - metric_params : dict or None, optional, default: ``{}`` + metric_params : dict or None, optional, default: ``None`` Additional keyword arguments for the metric function. n_jobs : int or None, optional, default: ``None`` @@ -279,6 +276,11 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): center_ : ndarray of shape (:attr:`n_dimensions_`,) Effective center of the radial filtration. Set in :meth:`fit`. + effective_metric_params_ : dict + Dictionary containing all information present in + `metric_params`. If `metric_params` is ``None``, it is set to + the empty dictionary. + mesh_ : ndarray of shape ( n_pixels_x, n_pixels_y [, n_pixels_z]) greyscale image corresponding to the radial filtration of a binary image where each pixel is activated. Set in :meth:`fit`. @@ -293,10 +295,10 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -304,11 +306,11 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): 'center': {'type': (np.ndarray, type(None)), 'of': {'type': int}}, 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, - 'metric_params': {'type': dict} + 'metric_params': {'type': (dict, type(None))} } def __init__(self, center=None, radius=np.inf, metric='euclidean', - metric_params={}, n_jobs=None): + metric_params=None, n_jobs=None): self.center = center self.radius = radius self.metric = metric @@ -325,9 +327,9 @@ def _calculate_radial(self, X): return Xr def fit(self, X, y=None): - """Calculate :attr:`center_`, :attr:`n_dimensions_`, :attr:`mesh_` and - :attr:`max_value_` from a collection of binary images. Then, return the - estimator. + """Calculate :attr:`center_`, :attr:`effective_metric_params_`, + :attr:`n_dimensions_`, :attr:`mesh_` and :attr:`max_value_` from a + collection of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -348,29 +350,34 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + n_dimensions = X.ndim - 1 + if (n_dimensions < 2) or (n_dimensions > 3): + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.center is None: - self.center_ = np.zeros(self.n_dimensions_) + self.center_ = np.zeros(n_dimensions_) else: self.center_ = np.copy(self.center) self.center_ = self.center_.reshape((1, -1)) + if self.metric_params is None: + self.effective_metric_params_ = {} + else: + self.effective_metric_params_ = self.metric_params.copy() + axis_order = [2, 1, 3] mesh_range_list = [np.arange(0, X.shape[i]) - for i in axis_order[:self.n_dimensions_]] + for i in axis_order[:n_dimensions_]] self.mesh_ = np.stack( np.meshgrid(*mesh_range_list), - axis=self.n_dimensions_).reshape((-1, self.n_dimensions_)) + axis=n_dimensions_).reshape((-1, n_dimensions_)) self.mesh_ = pairwise_distances( self.center_, self.mesh_, metric=self.metric, - n_jobs=1, **self.metric_params).reshape(X.shape[1:]) + n_jobs=1, **self.effective_metric_params_).reshape(X.shape[1:]) self.mesh_[self.mesh_ > self.radius] = np.inf self.max_value_ = 0. @@ -503,10 +510,10 @@ class DilationFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -553,8 +560,8 @@ def fit(self, X, y=None): n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -692,10 +699,10 @@ class ErosionFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -741,8 +748,8 @@ def fit(self, X, y=None): X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -883,10 +890,10 @@ class SignedDistanceFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -939,8 +946,8 @@ def fit(self, X, y=None): X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -1028,226 +1035,3 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', title=f"Signed-distance filtration of image {sample}", plotly_params=plotly_params ) - - -@adapt_fit_transform_docs -class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): - """Filtrations of 2D/3D binary images based on the number of neighboring - activated pixels. - - The density filtration assigns to each pixel of a binary image a greyscale - value equal to the weighted number of activated pixels within a ball - centered around it. The weights are calculated based on the distance of the - activated pixels to the center of the ball. - - Parameters - ---------- - radius : float, optional, default: ``1.`` - The radius of the ball within which the number of activated pixels is - considered. - - metric : string or callable, optional, default: ``'euclidean'`` - Determines a rule with which to calculate distances between - pairs of pixels. - If ``metric`` is a string, it must be one of the options allowed by - ``scipy.spatial.distance.pdist`` for its metric parameter, or a metric - listed in ``sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS``, including - "euclidean", "manhattan", or "cosine". - If ``metric`` is a callable function, it is called on each pair of - instances and the resulting value recorded. The callable should take - two arrays from the entry in `X` as input, and return a value - indicating the distance between them. - - metric_params : dict, optional, default: ``{}`` - Additional keyword arguments for the metric function. - - n_jobs : int or None, optional, default: ``None`` - The number of jobs to use for the computation. ``None`` means 1 unless - in a :obj:`joblib.parallel_backend` context. ``-1`` means using all - processors. - - Attributes - ---------- - mask_ : ndarray of shape (radius, radius, [, radius]) - The mask applied around each pixel to calculate the weighted number of - its activated neighbors. Set in :meth:`fit`. - - See also - -------- - gtda.homology.CubicalPersistence, Binarizer - - References - ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. - - """ - - _hyperparameters = { - 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, - 'metric': {'type': (str, FunctionType)}, - 'metric_params': {'type': dict}, - } - - def __init__(self, radius=3, metric='euclidean', metric_params={}, - n_jobs=None): - self.radius = radius - self.metric = metric - self.metric_params = metric_params - self.n_jobs = n_jobs - - def _calculate_density(self, X): - Xd = np.zeros(X.shape) - - for i, j, k in self._iterator: - Xd += np.roll(np.roll( - np.roll(X, k, axis=3), j, axis=2), i, axis=1) \ - * self.mask_[self._range + i, self._range + j, - self._range + k] - return Xd - - def fit(self, X, y=None): - """Calculate :attr:`mask_` from a collection of binary images. Then, - return the estimator. - - This method is here to implement the usual scikit-learn API and hence - work in pipelines. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) - Input data. Each entry along axis 0 is interpreted as a 2D or 3D - binary image. - - y : None - There is no need of a target in a transformer, yet the pipeline API - requires this parameter. - - Returns - ------- - self : object - - """ - X = check_array(X, allow_nd=True) - self._n_dimensions = X.ndim - 1 - if (self._n_dimensions < 2) or (self._n_dimensions > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self._n_dimensions}.") - validate_params( - self.get_params(), self._hyperparameters, exclude=['n_jobs']) - - self._range = int(np.ceil(self.radius)) - - iterator_range_list = [range(-self._range, self._range + 1)] * self._n_dimensions \ - for _ in range(self._n_dimensions)] \ - + [[0] * (3 - self._n_dimensions)] - self._iterator = tuple(itertools.product(*iterator_range_list)) - - # The mask is always 3D but not the iterator. - self.mask_ = np.ones(tuple([2 * self._range + 1] * 3, - dtype=np.bool)) - mesh_range_list = [np.arange(0, 2 * self._range + 1)] * 3 - self.mesh_ = np.stack( - np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) - - center = self._range * np.ones((1, 3)) - self.mask_ = pairwise_distances( - center, self.mesh_, metric=self.metric, - n_jobs=1, **self.metric_params).reshape(self.mask_.shape) - - self.mask_ = self.mask_ <= self.radius - - padding = np.asarray([self._range] * self._n_dimensions + \ - [0] * (3 - self._n_dimensions)) - self._padder = Padder(paddings=padding) - self._padder.fit(X.reshape((*X.shape[:3], -1))) - - return self - - def transform(self, X, y=None): - """For each binary image in the collection `X`, calculate a - corresponding greyscale image based on the density of its pixels. - Return the collection of greyscale images. - - Parameters - ---------- - X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) - Input data. Each entry along axis 0 is interpreted as a 2D or 3D - binary image. - - y : None - There is no need of a target in a transformer, yet the pipeline API - requires this parameter. - - Returns - ------- - Xt : ndarray of shape (n_samples, n_pixels_x, - n_pixels_y [, n_pixels_z]) - Transformed collection of images. Each entry along axis 0 is a - 2D or 3D greyscale image. - - """ - check_is_fitted(self) - Xt = check_array(X, allow_nd=True, copy=True) - - Xt = Xt.reshape((*X.shape[:3], -1)) - Xt = self._padder.transform(Xt) - - Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._calculate_density)(Xt[s]) - for s in gen_even_slices(Xt.shape[0], - effective_n_jobs(self.n_jobs))) - Xt = np.concatenate(Xt) - - Xt = Xt[:, self._range: -self._range, self._range: -self._range] - - if self._n_dimensions == 3: - Xt = Xt[:, :, :, self._range: -self._range] - - Xt = Xt.reshape(X.shape) - - return Xt - - @staticmethod - def plot(Xt, sample=0, colorscale='greys', origin='upper', - plotly_params=None): - """Plot a sample from a collection of 2D greyscale images. - - Parameters - ---------- - Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) - Collection of 2D greyscale images, such as returned by - :meth:`transform`. - - sample : int, optional, default: ``0`` - Index of the sample in `Xt` to be plotted. - - colorscale : str, optional, default: ``'greys'`` - Color scale to be used in the heat map. Can be anything allowed by - :class:`plotly.graph_objects.Heatmap`. - - origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` - Position of the [0, 0] pixel of `data`, in the upper left or lower - left corner. The convention ``'upper'`` is typically used for - matrices and images. - - plotly_params : dict or None, optional, default: ``None`` - Custom parameters to configure the plotly figure. Allowed keys are - ``"trace"`` and ``"layout"``, and the corresponding values should - be dictionaries containing keyword arguments as would be fed to the - :meth:`update_traces` and :meth:`update_layout` methods of - :class:`plotly.graph_objects.Figure`. - - Returns - ------- - fig : :class:`plotly.graph_objects.Figure` object - Plotly figure. - - """ - return plot_heatmap( - Xt[sample], colorscale=colorscale, origin=origin, - title=f"Signed-distance filtration of image {sample}", - plotly_params=plotly_params - ) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index d1542f451..f98b2da44 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -4,7 +4,6 @@ from functools import reduce from operator import iconcat from numbers import Real -from warnings import warn import numpy as np from joblib import Parallel, delayed, effective_n_jobs @@ -49,10 +48,10 @@ class Binarizer(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -95,8 +94,8 @@ def fit(self, X, y=None): X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -185,55 +184,27 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', @adapt_fit_transform_docs class Inverter(BaseEstimator, TransformerMixin, PlotterMixin): - """Invert all 2D/3D images in a collection. - - Applies an inversion function to the value of all pixels of all images in - the input collection. If the images are binary, the inversion function is - defined as the logical NOT function. Otherwise, it is the function - :math:`f(x) = M - x`, where `x` is a pixel value and `M` is - :attr:`max_value_`. + """Invert all 2D/3D binary images in a collection. Parameters ---------- - max_value : bool, int, float or None, optional, default: ``None`` - Maximum possible pixel value in the images. It should be a boolean if - input images are binary and an int or a float if they are greyscale. - If ``None``, it is calculated from the collection of images passed in - :meth:`fit`. - n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. - Attributes - ---------- - max_value_ : int ndarray of shape (padding_x, padding_y [, padding_z]) - Effective maximum value of the images' pixels. Set in :meth:`fit`. - References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: \ + Classification of MNIST using TDA"; 19th International \ + IEEE Conference on Machine Learning and Applications (ICMLA 2020), \ + 2019; arXiv: `1910.08345 `_. """ - _hyperparameters = { - 'max_value': {'type': (bool, Real, type(None))} - } - - def __init__(self, max_value=None, n_jobs=None): - self.max_value = max_value + def __init__(self, n_jobs=None): self.n_jobs = n_jobs - def _invert(self, X): - if self.max_value_ is True: - return np.logical_not(X) - else: - return self.max_value_ - X - def fit(self, X, y=None): """Do nothing and return the estimator unchanged. @@ -244,7 +215,7 @@ def fit(self, X, y=None): ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D - image. + binary image. y : None There is no need of a target in a transformer, yet the pipeline API @@ -256,18 +227,12 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) + n_dimensions = X.ndim - 1 + if (n_dimensions < 2) or (n_dimensions > 3): + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") - validate_params(self.get_params(), self._hyperparameters, - exclude=['n_jobs']) - - if self.max_value is None: - if X.dtype == np.bool: - self.max_value_ = True - else: - self.max_value_ = np.max(X) - else: - self.max_value_ = self.max_value - + self._is_fitted = True return self def transform(self, X, y=None): @@ -292,11 +257,11 @@ def transform(self, X, y=None): 2D or 3D binary image. """ - check_is_fitted(self) + check_is_fitted(self, ['_is_fitted']) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( - self._invert)(Xt[s]) + np.logical_not)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) @@ -347,19 +312,19 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', @adapt_fit_transform_docs class Padder(BaseEstimator, TransformerMixin, PlotterMixin): - """Pad all 2D/3D images in a collection. + """Pad all 2D/3D binary images in a collection. Parameters ---------- - padding : int ndarray of shape (padding_x, padding_y [, padding_z]) or \ + paddings : int ndarray of shape (padding_x, padding_y [, padding_z]) or \ None, optional, default: ``None`` Number of pixels to pad the images along each axis and on both side of the images. By default, a frame of a single pixel width is added around the image (``1 = padding_x = padding_y [= padding_z]``). - value : bool, int, or float, optional, default: ``0`` - Value given to the padded pixels. It should be a boolean if the input - images are binary and an int or float if they are greyscale. + activated : bool, optional, default: ``False`` + If ``True``, the padded pixels are activated. If ``False``, they are + deactivated. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless @@ -368,33 +333,31 @@ class Padder(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- - padding_ : int ndarray of shape (padding_x, padding_y [, padding_z]) + paddings_ : int ndarray of shape (padding_x, padding_y [, padding_z]) Effective padding along each of the axis. Set in :meth:`fit`. References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ _hyperparameters = { - 'padding': { - 'type': (np.ndarray, type(None)), - 'of': {'type': int}}, - 'value': {'type': (bool, Real)} - } - - def __init__(self, padding=None, value=False, n_jobs=None): - self.padding = padding - self.value = value + 'paddings': {'type': (np.ndarray, type(None)), 'of': {'type': int}}, + 'activated': {'type': bool} + } + + def __init__(self, paddings=None, activated=False, n_jobs=None): + self.paddings = paddings + self.activated = activated self.n_jobs = n_jobs def fit(self, X, y=None): - """Calculate :attr:`padding_` from a collection of images. Then, - return the estimator. + """Calculate :attr:`paddings_` from a collection of binary images. + Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -403,7 +366,7 @@ def fit(self, X, y=None): ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D - image. + binary image. y : None There is no need of a target in a transformer, yet the pipeline API @@ -416,24 +379,23 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 - if n_dimensions < 2 or n_dimensions > 3: - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") - - validate_params(self.get_params(), self._hyperparameters, - exclude=['value', 'n_jobs']) + if (n_dimensions < 2) or (n_dimensions > 3): + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") + validate_params( + self.get_params(), self._hyperparameters, exclude=['n_jobs']) - if self.padding is None: - self.padding_ = np.ones((n_dimensions,), dtype=np.int) - elif len(self.padding) != n_dimensions: + if self.paddings is None: + self.paddings_ = np.ones((n_dimensions,), dtype=np.int) + elif len(self.paddings) != n_dimensions: raise ValueError( - f"`padding` has length {self.padding} while the input " + f"`paddings` has length {self.paddings} while the input " f"data requires it to have length equal to {n_dimensions}.") else: - self.padding_ = self.padding + self.paddings_ = self.paddings self._pad_width = ((0, 0), - *[(self.padding_[axis], self.padding_[axis]) + *[(self.paddings_[axis], self.paddings_[axis]) for axis in range(n_dimensions)]) return self @@ -446,7 +408,7 @@ def transform(self, X, y=None): ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D - image. + binary image. y : None There is no need of a target in a transformer, yet the pipeline API @@ -465,7 +427,7 @@ def transform(self, X, y=None): Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.pad)(Xt[s], pad_width=self._pad_width, - constant_values=self.value) + constant_values=self.activated) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) @@ -540,10 +502,10 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE - Conference on Machine Learning and Applications (ICMLA 2020), 2019; - `arXiv:1910.08345 `_. + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. """ @@ -577,9 +539,9 @@ def fit(self, X, y=None): check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 - if n_dimensions < 2 or n_dimensions > 3: - warn(f"Input of `fit` contains arrays of dimension " - f"{self.n_dimensions_}.") + if (n_dimensions < 2) or (n_dimensions > 3): + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") self._is_fitted = True return self From 5937e64638f09c377c33d8253614fb6d20461dda Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 20 Sep 2020 12:48:21 +0200 Subject: [PATCH 11/29] Replace warnings by ValueErrors Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 311 +++++++++++++++++++++++++++++------ gtda/images/preprocessing.py | 149 ++++++++++------- 2 files changed, 356 insertions(+), 104 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index d545192ae..521d73f33 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -3,6 +3,8 @@ from numbers import Real from types import FunctionType +from warnings import warn +import itertools import numpy as np from joblib import Parallel, delayed, effective_n_jobs @@ -12,6 +14,7 @@ from sklearn.utils.validation import check_array, check_is_fitted from ._utils import _dilate, _erode +from .preprocessing import Padder from ..base import PlotterMixin from ..plotting import plot_heatmap from ..utils._docs import adapt_fit_transform_docs @@ -66,10 +69,10 @@ class HeightFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -114,8 +117,8 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") validate_params( @@ -260,7 +263,7 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): two arrays from the entry in `X` as input, and return a value indicating the distance between them. - metric_params : dict or None, optional, default: ``None`` + metric_params : dict or None, optional, default: ``{}`` Additional keyword arguments for the metric function. n_jobs : int or None, optional, default: ``None`` @@ -276,11 +279,6 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): center_ : ndarray of shape (:attr:`n_dimensions_`,) Effective center of the radial filtration. Set in :meth:`fit`. - effective_metric_params_ : dict - Dictionary containing all information present in - `metric_params`. If `metric_params` is ``None``, it is set to - the empty dictionary. - mesh_ : ndarray of shape ( n_pixels_x, n_pixels_y [, n_pixels_z]) greyscale image corresponding to the radial filtration of a binary image where each pixel is activated. Set in :meth:`fit`. @@ -295,10 +293,10 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -306,11 +304,11 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): 'center': {'type': (np.ndarray, type(None)), 'of': {'type': int}}, 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, - 'metric_params': {'type': (dict, type(None))} + 'metric_params': {'type': dict} } def __init__(self, center=None, radius=np.inf, metric='euclidean', - metric_params=None, n_jobs=None): + metric_params={}, n_jobs=None): self.center = center self.radius = radius self.metric = metric @@ -327,9 +325,9 @@ def _calculate_radial(self, X): return Xr def fit(self, X, y=None): - """Calculate :attr:`center_`, :attr:`effective_metric_params_`, - :attr:`n_dimensions_`, :attr:`mesh_` and :attr:`max_value_` from a - collection of binary images. Then, return the estimator. + """Calculate :attr:`center_`, :attr:`n_dimensions_`, :attr:`mesh_` and + :attr:`max_value_` from a collection of binary images. Then, return the + estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -350,8 +348,8 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") validate_params( @@ -363,11 +361,6 @@ def fit(self, X, y=None): self.center_ = np.copy(self.center) self.center_ = self.center_.reshape((1, -1)) - if self.metric_params is None: - self.effective_metric_params_ = {} - else: - self.effective_metric_params_ = self.metric_params.copy() - axis_order = [2, 1, 3] mesh_range_list = [np.arange(0, X.shape[i]) for i in axis_order[:n_dimensions_]] @@ -377,7 +370,7 @@ def fit(self, X, y=None): axis=n_dimensions_).reshape((-1, n_dimensions_)) self.mesh_ = pairwise_distances( self.center_, self.mesh_, metric=self.metric, - n_jobs=1, **self.effective_metric_params_).reshape(X.shape[1:]) + n_jobs=1, **self.metric_params).reshape(X.shape[1:]) self.mesh_[self.mesh_ > self.radius] = np.inf self.max_value_ = 0. @@ -510,10 +503,10 @@ class DilationFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -557,9 +550,8 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") validate_params( @@ -699,10 +691,10 @@ class ErosionFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -746,8 +738,8 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") validate_params( @@ -890,10 +882,10 @@ class SignedDistanceFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -944,8 +936,8 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") validate_params( @@ -1035,3 +1027,226 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', title=f"Signed-distance filtration of image {sample}", plotly_params=plotly_params ) + + +@adapt_fit_transform_docs +class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): + """Filtrations of 2D/3D binary images based on the number of neighboring + activated pixels. + + The density filtration assigns to each pixel of a binary image a greyscale + value equal to the weighted number of activated pixels within a ball + centered around it. The weights are calculated based on the distance of the + activated pixels to the center of the ball. + + Parameters + ---------- + radius : float, optional, default: ``1.`` + The radius of the ball within which the number of activated pixels is + considered. + + metric : string or callable, optional, default: ``'euclidean'`` + Determines a rule with which to calculate distances between + pairs of pixels. + If ``metric`` is a string, it must be one of the options allowed by + ``scipy.spatial.distance.pdist`` for its metric parameter, or a metric + listed in ``sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS``, including + "euclidean", "manhattan", or "cosine". + If ``metric`` is a callable function, it is called on each pair of + instances and the resulting value recorded. The callable should take + two arrays from the entry in `X` as input, and return a value + indicating the distance between them. + + metric_params : dict, optional, default: ``{}`` + Additional keyword arguments for the metric function. + + n_jobs : int or None, optional, default: ``None`` + The number of jobs to use for the computation. ``None`` means 1 unless + in a :obj:`joblib.parallel_backend` context. ``-1`` means using all + processors. + + Attributes + ---------- + mask_ : ndarray of shape (radius, radius, [, radius]) + The mask applied around each pixel to calculate the weighted number of + its activated neighbors. Set in :meth:`fit`. + + See also + -------- + gtda.homology.CubicalPersistence, Binarizer + + References + ---------- + [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification + of MNIST using TDA"; 19th International IEEE Conference on Machine + Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ + `_. + + """ + + _hyperparameters = { + 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, + 'metric': {'type': (str, FunctionType)}, + 'metric_params': {'type': dict}, + } + + def __init__(self, radius=3, metric='euclidean', metric_params={}, + n_jobs=None): + self.radius = radius + self.metric = metric + self.metric_params = metric_params + self.n_jobs = n_jobs + + def _calculate_density(self, X): + Xd = np.zeros(X.shape) + + for i, j, k in self._iterator: + Xd += np.roll(np.roll( + np.roll(X, k, axis=3), j, axis=2), i, axis=1) \ + * self.mask_[self._range + i, self._range + j, + self._range + k] + return Xd + + def fit(self, X, y=None): + """Calculate :attr:`mask_` from a collection of binary images. Then, + return the estimator. + + This method is here to implement the usual scikit-learn API and hence + work in pipelines. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) + Input data. Each entry along axis 0 is interpreted as a 2D or 3D + binary image. + + y : None + There is no need of a target in a transformer, yet the pipeline API + requires this parameter. + + Returns + ------- + self : object + + """ + X = check_array(X, allow_nd=True) + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): + raise ValueError(f"Input of `fit` contains arrays of dimension " + f"{n_dimensions_}.") + validate_params( + self.get_params(), self._hyperparameters, exclude=['n_jobs']) + + self._range = int(np.ceil(self.radius)) + + iterator_range_list = [range(-self._range, self._range + 1)] * self._n_dimensions \ + for _ in range(self._n_dimensions)] \ + + [[0] * (3 - self._n_dimensions)] + self._iterator = tuple(itertools.product(*iterator_range_list)) + + # The mask is always 3D but not the iterator. + self.mask_ = np.ones(tuple([2 * self._range + 1] * 3, + dtype=np.bool)) + mesh_range_list = [np.arange(0, 2 * self._range + 1)] * 3 + self.mesh_ = np.stack( + np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) + + center = self._range * np.ones((1, 3)) + self.mask_ = pairwise_distances( + center, self.mesh_, metric=self.metric, + n_jobs=1, **self.metric_params).reshape(self.mask_.shape) + + self.mask_ = self.mask_ <= self.radius + + padding = np.asarray([self._range] * self._n_dimensions + \ + [0] * (3 - self._n_dimensions)) + self._padder = Padder(paddings=padding) + self._padder.fit(X.reshape((*X.shape[:3], -1))) + + return self + + def transform(self, X, y=None): + """For each binary image in the collection `X`, calculate a + corresponding greyscale image based on the density of its pixels. + Return the collection of greyscale images. + + Parameters + ---------- + X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) + Input data. Each entry along axis 0 is interpreted as a 2D or 3D + binary image. + + y : None + There is no need of a target in a transformer, yet the pipeline API + requires this parameter. + + Returns + ------- + Xt : ndarray of shape (n_samples, n_pixels_x, + n_pixels_y [, n_pixels_z]) + Transformed collection of images. Each entry along axis 0 is a + 2D or 3D greyscale image. + + """ + check_is_fitted(self) + Xt = check_array(X, allow_nd=True, copy=True) + + Xt = Xt.reshape((*X.shape[:3], -1)) + Xt = self._padder.transform(Xt) + + Xt = Parallel(n_jobs=self.n_jobs)( + delayed(self._calculate_density)(Xt[s]) + for s in gen_even_slices(Xt.shape[0], + effective_n_jobs(self.n_jobs))) + Xt = np.concatenate(Xt) + + Xt = Xt[:, self._range: -self._range, self._range: -self._range] + + if self._n_dimensions == 3: + Xt = Xt[:, :, :, self._range: -self._range] + + Xt = Xt.reshape(X.shape) + + return Xt + + @staticmethod + def plot(Xt, sample=0, colorscale='greys', origin='upper', + plotly_params=None): + """Plot a sample from a collection of 2D greyscale images. + + Parameters + ---------- + Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y) + Collection of 2D greyscale images, such as returned by + :meth:`transform`. + + sample : int, optional, default: ``0`` + Index of the sample in `Xt` to be plotted. + + colorscale : str, optional, default: ``'greys'`` + Color scale to be used in the heat map. Can be anything allowed by + :class:`plotly.graph_objects.Heatmap`. + + origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'`` + Position of the [0, 0] pixel of `data`, in the upper left or lower + left corner. The convention ``'upper'`` is typically used for + matrices and images. + + plotly_params : dict or None, optional, default: ``None`` + Custom parameters to configure the plotly figure. Allowed keys are + ``"trace"`` and ``"layout"``, and the corresponding values should + be dictionaries containing keyword arguments as would be fed to the + :meth:`update_traces` and :meth:`update_layout` methods of + :class:`plotly.graph_objects.Figure`. + + Returns + ------- + fig : :class:`plotly.graph_objects.Figure` object + Plotly figure. + + """ + return plot_heatmap( + Xt[sample], colorscale=colorscale, origin=origin, + title=f"Signed-distance filtration of image {sample}", + plotly_params=plotly_params + ) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index f98b2da44..78ecec051 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -48,10 +48,10 @@ class Binarizer(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -92,7 +92,6 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") @@ -184,27 +183,55 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', @adapt_fit_transform_docs class Inverter(BaseEstimator, TransformerMixin, PlotterMixin): - """Invert all 2D/3D binary images in a collection. + """Invert all 2D/3D images in a collection. + + Applies an inversion function to the value of all pixels of all images in + the input collection. If the images are binary, the inversion function is + defined as the logical NOT function. Otherwise, it is the function + :math:`f(x) = M - x`, where `x` is a pixel value and `M` is + :attr:`max_value_`. Parameters ---------- + max_value : bool, int, float or None, optional, default: ``None`` + Maximum possible pixel value in the images. It should be a boolean if + input images are binary and an int or a float if they are greyscale. + If ``None``, it is calculated from the collection of images passed in + :meth:`fit`. + n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. + Attributes + ---------- + max_value_ : int ndarray of shape (padding_x, padding_y [, padding_z]) + Effective maximum value of the images' pixels. Set in :meth:`fit`. + References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: \ - Classification of MNIST using TDA"; 19th International \ - IEEE Conference on Machine Learning and Applications (ICMLA 2020), \ - 2019; arXiv: `1910.08345 `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ - def __init__(self, n_jobs=None): + _hyperparameters = { + 'max_value': {'type': (bool, Real, type(None))} + } + + def __init__(self, max_value=None, n_jobs=None): + self.max_value = max_value self.n_jobs = n_jobs + def _invert(self, X): + if self.max_value_ is True: + return np.logical_not(X) + else: + return self.max_value_ - X + def fit(self, X, y=None): """Do nothing and return the estimator unchanged. @@ -215,7 +242,7 @@ def fit(self, X, y=None): ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D - binary image. + image. y : None There is no need of a target in a transformer, yet the pipeline API @@ -227,12 +254,21 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") + validate_params(self.get_params(), self._hyperparameters, + exclude=['n_jobs']) + + if self.max_value is None: + if X.dtype == np.bool: + self.max_value_ = True + else: + self.max_value_ = np.max(X) + else: + self.max_value_ = self.max_value - self._is_fitted = True return self def transform(self, X, y=None): @@ -257,11 +293,11 @@ def transform(self, X, y=None): 2D or 3D binary image. """ - check_is_fitted(self, ['_is_fitted']) + check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( - np.logical_not)(Xt[s]) + self._invert)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) @@ -312,19 +348,19 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', @adapt_fit_transform_docs class Padder(BaseEstimator, TransformerMixin, PlotterMixin): - """Pad all 2D/3D binary images in a collection. + """Pad all 2D/3D images in a collection. Parameters ---------- - paddings : int ndarray of shape (padding_x, padding_y [, padding_z]) or \ + padding : int ndarray of shape (padding_x, padding_y [, padding_z]) or \ None, optional, default: ``None`` Number of pixels to pad the images along each axis and on both side of the images. By default, a frame of a single pixel width is added around the image (``1 = padding_x = padding_y [= padding_z]``). - activated : bool, optional, default: ``False`` - If ``True``, the padded pixels are activated. If ``False``, they are - deactivated. + value : bool, int, or float, optional, default: ``0`` + Value given to the padded pixels. It should be a boolean if the input + images are binary and an int or float if they are greyscale. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless @@ -333,31 +369,33 @@ class Padder(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- - paddings_ : int ndarray of shape (padding_x, padding_y [, padding_z]) + padding_ : int ndarray of shape (padding_x, padding_y [, padding_z]) Effective padding along each of the axis. Set in :meth:`fit`. References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ _hyperparameters = { - 'paddings': {'type': (np.ndarray, type(None)), 'of': {'type': int}}, - 'activated': {'type': bool} - } - - def __init__(self, paddings=None, activated=False, n_jobs=None): - self.paddings = paddings - self.activated = activated + 'padding': { + 'type': (np.ndarray, type(None)), + 'of': {'type': int}}, + 'value': {'type': (bool, Real)} + } + + def __init__(self, padding=None, value=False, n_jobs=None): + self.padding = padding + self.value = value self.n_jobs = n_jobs def fit(self, X, y=None): - """Calculate :attr:`paddings_` from a collection of binary images. - Then, return the estimator. + """Calculate :attr:`padding_` from a collection of images. Then, + return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -366,7 +404,7 @@ def fit(self, X, y=None): ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D - binary image. + image. y : None There is no need of a target in a transformer, yet the pipeline API @@ -378,24 +416,24 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") - validate_params( - self.get_params(), self._hyperparameters, exclude=['n_jobs']) + validate_params(self.get_params(), self._hyperparameters, + exclude=['value', 'n_jobs']) - if self.paddings is None: - self.paddings_ = np.ones((n_dimensions,), dtype=np.int) - elif len(self.paddings) != n_dimensions: + if self.padding is None: + self.padding_ = np.ones((n_dimensions,), dtype=np.int) + elif len(self.padding) != n_dimensions: raise ValueError( - f"`paddings` has length {self.paddings} while the input " + f"`padding` has length {self.padding} while the input " f"data requires it to have length equal to {n_dimensions}.") else: - self.paddings_ = self.paddings + self.padding_ = self.padding self._pad_width = ((0, 0), - *[(self.paddings_[axis], self.paddings_[axis]) + *[(self.padding_[axis], self.padding_[axis]) for axis in range(n_dimensions)]) return self @@ -408,7 +446,7 @@ def transform(self, X, y=None): ---------- X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D - binary image. + image. y : None There is no need of a target in a transformer, yet the pipeline API @@ -427,7 +465,7 @@ def transform(self, X, y=None): Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.pad)(Xt[s], pad_width=self._pad_width, - constant_values=self.activated) + constant_values=self.value) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) @@ -502,10 +540,10 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- - [1] A. Garin and G. Tauzin, "A topological reading lesson: Classification - of MNIST using TDA"; 19th International IEEE Conference on Machine - Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \ - `_. + .. [1] A. Garin and G. Tauzin, "A topological reading lesson: + Classification of MNIST using TDA"; 19th International IEEE + Conference on Machine Learning and Applications (ICMLA 2020), 2019; + `arXiv:1910.08345 `_. """ @@ -537,9 +575,8 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) - - n_dimensions = X.ndim - 1 - if (n_dimensions < 2) or (n_dimensions > 3): + n_dimensions_ = X.ndim - 1 + if (n_dimensions_ < 2) or (n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{n_dimensions_}.") From 8839b5d1327211a3c2abdd82aecea7cdeeffc946 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 20 Sep 2020 13:10:08 +0200 Subject: [PATCH 12/29] Improve transformer definition Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 521d73f33..c5fc85fa3 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1035,9 +1035,8 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): activated pixels. The density filtration assigns to each pixel of a binary image a greyscale - value equal to the weighted number of activated pixels within a ball - centered around it. The weights are calculated based on the distance of the - activated pixels to the center of the ball. + value equal to the sum of the distance between this pixel and all activated + pixels within a ball centered around it. Parameters ---------- From 8422a906884c6d87dca3f524bd7da3b468d6f9b5 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 20 Sep 2020 13:41:03 +0200 Subject: [PATCH 13/29] Fix tests Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 79 ++++++++++++++++++------------- gtda/images/preprocessing.py | 39 ++++++++++----- gtda/mapper/tests/test_cluster.py | 2 +- 3 files changed, 72 insertions(+), 48 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index c5fc85fa3..989bc69bf 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -3,7 +3,6 @@ from numbers import Real from types import FunctionType -from warnings import warn import itertools import numpy as np @@ -117,15 +116,15 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.direction is None: - self.direction_ = np.ones(n_dimensions_,) + self.direction_ = np.ones(self.n_dimensions_,) else: self.direction_ = np.copy(self.direction) self.direction_ = self.direction_ / np.linalg.norm(self.direction_) @@ -134,10 +133,10 @@ def fit(self, X, y=None): mesh_range_list = \ [np.arange(X.shape[order]) if self.direction_[i] >= 0 else -np.flip(np.arange(X.shape[order])) for i, order - in enumerate(axis_order[: n_dimensions_])] + in enumerate(axis_order[: self.n_dimensions_])] self.mesh_ = np.stack(np.meshgrid(*mesh_range_list, indexing='xy'), - axis=n_dimensions_) + axis=self.n_dimensions_) self.max_value_ = 0. self.max_value_ = np.max(self._calculate_height( @@ -348,26 +347,26 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) if self.center is None: - self.center_ = np.zeros(n_dimensions_) + self.center_ = np.zeros(self.n_dimensions_) else: self.center_ = np.copy(self.center) self.center_ = self.center_.reshape((1, -1)) axis_order = [2, 1, 3] mesh_range_list = [np.arange(0, X.shape[i]) - for i in axis_order[:n_dimensions_]] + for i in axis_order[:self.n_dimensions_]] self.mesh_ = np.stack( np.meshgrid(*mesh_range_list), - axis=n_dimensions_).reshape((-1, n_dimensions_)) + axis=self.n_dimensions_).reshape((-1, self.n_dimensions_)) self.mesh_ = pairwise_distances( self.center_, self.mesh_, metric=self.metric, n_jobs=1, **self.metric_params).reshape(X.shape[1:]) @@ -489,6 +488,9 @@ class DilationFiltration(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- + n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + n_iterations_ : int Effective number of iterations in the dilation process. Set in :meth:`fit`. @@ -550,10 +552,10 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -677,6 +679,9 @@ class ErosionFiltration(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- + n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + n_iterations_ : int Effective number of iterations in the erosion process. Set in :meth:`fit`. @@ -738,10 +743,10 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -867,6 +872,9 @@ class SignedDistanceFiltration(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- + n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + n_iterations_ : int Effective number of iterations in the dilation process. Set in :meth:`fit`. @@ -936,10 +944,10 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -1066,6 +1074,9 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- + n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + mask_ : ndarray of shape (radius, radius, [, radius]) The mask applied around each pixel to calculate the weighted number of its activated neighbors. Set in :meth:`fit`. @@ -1129,23 +1140,23 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) self._range = int(np.ceil(self.radius)) - iterator_range_list = [range(-self._range, self._range + 1)] * self._n_dimensions \ - for _ in range(self._n_dimensions)] \ - + [[0] * (3 - self._n_dimensions)] + iterator_range_list = [range(-self._range, self._range + 1) + for _ in range(self.n_dimensions_)] \ + + [[0] for _ in range(3 - self.n_dimensions_)] self._iterator = tuple(itertools.product(*iterator_range_list)) # The mask is always 3D but not the iterator. - self.mask_ = np.ones(tuple([2 * self._range + 1] * 3, - dtype=np.bool)) + self.mask_ = np.ones(tuple(2 * self._range + 1 for _ in range(3)), + dtype=np.bool) mesh_range_list = [np.arange(0, 2 * self._range + 1)] * 3 self.mesh_ = np.stack( np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) @@ -1157,9 +1168,9 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - padding = np.asarray([self._range] * self._n_dimensions + \ - [0] * (3 - self._n_dimensions)) - self._padder = Padder(paddings=padding) + padding = np.asarray([*[self._range] * self.n_dimensions_, + *[0] * (3 - self.n_dimensions_)]) + self._padder = Padder(padding=padding) self._padder.fit(X.reshape((*X.shape[:3], -1))) return self @@ -1201,7 +1212,7 @@ def transform(self, X, y=None): Xt = Xt[:, self._range: -self._range, self._range: -self._range] - if self._n_dimensions == 3: + if self.n_dimensions_ == 3: Xt = Xt[:, :, :, self._range: -self._range] Xt = Xt.reshape(X.shape) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 78ecec051..fb632547f 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -92,6 +92,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) + self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") @@ -206,6 +207,9 @@ class Inverter(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- + n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + max_value_ : int ndarray of shape (padding_x, padding_y [, padding_z]) Effective maximum value of the images' pixels. Set in :meth:`fit`. @@ -254,10 +258,10 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params(self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -369,6 +373,9 @@ class Padder(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- + self.n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + padding_ : int ndarray of shape (padding_x, padding_y [, padding_z]) Effective padding along each of the axis. Set in :meth:`fit`. @@ -416,25 +423,26 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") validate_params(self.get_params(), self._hyperparameters, exclude=['value', 'n_jobs']) if self.padding is None: - self.padding_ = np.ones((n_dimensions,), dtype=np.int) - elif len(self.padding) != n_dimensions: + self.padding_ = np.ones((self.n_dimensions_,), dtype=np.int) + elif len(self.padding) != self.n_dimensions_: raise ValueError( f"`padding` has length {self.padding} while the input " - f"data requires it to have length equal to {n_dimensions}.") + f"data requires it to have length equal to " + f"{self.n_dimensions_}.") else: self.padding_ = self.padding self._pad_width = ((0, 0), *[(self.padding_[axis], self.padding_[axis]) - for axis in range(n_dimensions)]) + for axis in range(self.n_dimensions_)]) return self @@ -533,6 +541,11 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. + Attributes + ---------- + n_dimensions_ : ``2`` or ``3`` + Dimension of the images. Set in :meth:`fit`. + See also -------- gtda.homology.VietorisRipsPersistence, gtda.homology.SparseRipsPersistence, @@ -575,10 +588,10 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) - n_dimensions_ = X.ndim - 1 - if (n_dimensions_ < 2) or (n_dimensions_ > 3): + self.n_dimensions_ = X.ndim - 1 + if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): raise ValueError(f"Input of `fit` contains arrays of dimension " - f"{n_dimensions_}.") + f"{self.n_dimensions_}.") self._is_fitted = True return self diff --git a/gtda/mapper/tests/test_cluster.py b/gtda/mapper/tests/test_cluster.py index 9ab85ad3d..397c4a8bf 100644 --- a/gtda/mapper/tests/test_cluster.py +++ b/gtda/mapper/tests/test_cluster.py @@ -147,4 +147,4 @@ def get_partition_from_preds(preds): for c in indices_cluster]) assert get_partition_from_preds(preds) == \ - get_partition_from_preds(preds_mat) + get_partition_from_preds(preds_mat) From a67c47e4ac16ef3ab9d07fda5837b5ac0c187a17 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 22 Sep 2020 17:26:49 +0200 Subject: [PATCH 14/29] Fix errors --- gtda/homology/cubical.py | 4 ++-- gtda/images/filtrations.py | 35 +++++++++++++++++++---------------- gtda/images/preprocessing.py | 27 +++++++++++++++------------ 3 files changed, 36 insertions(+), 30 deletions(-) diff --git a/gtda/homology/cubical.py b/gtda/homology/cubical.py index 0fd7af393..946d50c90 100644 --- a/gtda/homology/cubical.py +++ b/gtda/homology/cubical.py @@ -44,7 +44,7 @@ class CubicalPersistence(BaseEstimator, TransformerMixin, PlotterMixin): periodic_dimensions : boolean ndarray of shape (n_dimensions,) or None, \ optional, default: ``None`` - Periodicity of the boundaries along each of the axis, where + Periodicity of the boundaries along each of the axes, where ``n_dimensions`` is the dimension of the images of the collection. The boolean in the `d`th position expresses whether the boundaries along the `d`th axis are periodic. The default ``None`` is equivalent to @@ -69,7 +69,7 @@ class CubicalPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- periodic_dimensions_ : boolean ndarray of shape (n_dimensions,) - Effective periodicity of the boundaries along each of the axis. Set in + Effective periodicity of the boundaries along each of the axes. Set in :meth:`fit`. infinity_values_ : float diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 094410a50..4d2fc8842 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -69,7 +69,7 @@ class HeightFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -93,7 +93,7 @@ def _calculate_height(self, X): return Xh def fit(self, X, y=None): - """Calculate :attr:`direction_`, :attr:`n_dimensions_`, :attr:`mesh_` + """Calculate :attr:`n_dimensions_`, :attr:`direction_`, :attr:`mesh_` and :attr:`max_value_` from a collection of binary images. Then, return the estimator. @@ -163,8 +163,8 @@ def transform(self, X, y=None): Returns ------- - Xt : ndarray of shape (n_samples, n_pixels_x, - n_pixels_y [, n_pixels_z]) + Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y \ + [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. @@ -293,7 +293,7 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -506,7 +506,7 @@ class DilationFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -530,8 +530,9 @@ def _calculate_dilation(self, X): return Xd def fit(self, X, y=None): - """Calculate :attr:`n_iterations_` and :attr:`max_value_` from a - collection of binary images. Then, return the estimator. + """Calculate :attr:`n_dimensions_`, :attr:`n_iterations_` and + :attr:`max_value_` from a collection of binary images. Then, return the + estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -697,7 +698,7 @@ class ErosionFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -721,8 +722,9 @@ def _calculate_erosion(self, X): return Xe def fit(self, X, y=None): - """Calculate :attr:`n_iterations_` and :attr:`max_value_` from a - collection of binary images. Then, return the estimator. + """Calculate :attr:`n_dimensions_`, :attr:`n_iterations_` and + :attr:`max_value_` from a collection of binary images. Then, return the + estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -891,7 +893,7 @@ class SignedDistanceFiltration(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -922,8 +924,9 @@ def _calculate_signed_distance(self, X): return (Xd + Xe) def fit(self, X, y=None): - """Calculate :attr:`n_iterations_` and :attr:`max_value_` from a - collection of binary images. Then, return the estimator. + """Calculate :attr:`n_dimensions_`, :attr:`n_iterations_` and + :attr:`max_value_` from a collection of binary images. Then, return the + estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -1118,8 +1121,8 @@ def _calculate_density(self, X): return Xd def fit(self, X, y=None): - """Calculate :attr:`mask_` from a collection of binary images. Then, - return the estimator. + """Calculate :attr:`n_dimensions_` and :attr:`mask_` from a collection + of binary images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index c47ee13de..56b7f634c 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -35,7 +35,7 @@ class Binarizer(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- - n_dimensions_ : int + n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in meth:`fit`. max_value_ : float @@ -49,7 +49,7 @@ class Binarizer(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -210,13 +210,13 @@ class Inverter(BaseEstimator, TransformerMixin, PlotterMixin): n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. - max_value_ : int ndarray of shape (padding_x, padding_y [, padding_z]) + max_value_ : int, float or bool Effective maximum value of the images' pixels. Set in :meth:`fit`. References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -237,7 +237,8 @@ def _invert(self, X): return self.max_value_ - X def fit(self, X, y=None): - """Do nothing and return the estimator unchanged. + """Calculate :attr:`n_dimensions_` and :attr:`max_value_` from the + collection of images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -373,16 +374,16 @@ class Padder(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- - self.n_dimensions_ : ``2`` or ``3`` + n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. padding_ : int ndarray of shape (padding_x, padding_y [, padding_z]) - Effective padding along each of the axis. Set in :meth:`fit`. + Effective padding along each of the axes. Set in :meth:`fit`. References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -400,8 +401,8 @@ def __init__(self, padding=None, value=False, n_jobs=None): self.n_jobs = n_jobs def fit(self, X, y=None): - """Calculate :attr:`padding_` from a collection of images. Then, - return the estimator. + """Calculate :attr:`n_dimensions_` and :attr:`padding_` from a + collection of images. Then, return the estimator. This method is here to implement the usual scikit-learn API and hence work in pipelines. @@ -553,7 +554,7 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): References ---------- .. [1] A. Garin and G. Tauzin, "A topological reading lesson: - Classification of MNIST using TDA"; 19th International IEEE + Classification of MNIST using TDA"; 19th International IEEE Conference on Machine Learning and Applications (ICMLA 2020), 2019; `arXiv:1910.08345 `_. @@ -567,7 +568,9 @@ def _embed(X): return [np.argwhere(x) for x in X] def fit(self, X, y=None): - """Do nothing and return the estimator unchanged. + """Calculate :attr:`n_dimensions_` from a collection of binary images. + Then, return the estimator. + This method is here to implement the usual scikit-learn API and hence work in pipelines. From 58c966d76ed0cf091fb98da8930604c910a5178f Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Tue, 22 Sep 2020 17:45:05 +0200 Subject: [PATCH 15/29] Small wording/linting changes --- gtda/images/filtrations.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 4d2fc8842..1c299e7e0 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1042,8 +1042,8 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper', @adapt_fit_transform_docs class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): - """Filtrations of 2D/3D binary images based on the number of neighboring - activated pixels. + """Filtrations of 2D/3D binary images based on the number of activated + neighboring pixels. The density filtration assigns to each pixel of a binary image a greyscale value equal to the sum of the distance between this pixel and all activated @@ -1101,7 +1101,7 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): 'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')}, 'metric': {'type': (str, FunctionType)}, 'metric_params': {'type': dict}, - } + } def __init__(self, radius=3, metric='euclidean', metric_params={}, n_jobs=None): @@ -1152,9 +1152,9 @@ def fit(self, X, y=None): self._range = int(np.ceil(self.radius)) - iterator_range_list = [range(-self._range, self._range + 1) - for _ in range(self.n_dimensions_)] \ - + [[0] for _ in range(3 - self.n_dimensions_)] + iterator_range_list = ([range(-self._range, self._range + 1) + for _ in range(self.n_dimensions_)] + + [[0] for _ in range(3 - self.n_dimensions_)]) self._iterator = tuple(itertools.product(*iterator_range_list)) # The mask is always 3D but not the iterator. @@ -1195,8 +1195,8 @@ def transform(self, X, y=None): Returns ------- - Xt : ndarray of shape (n_samples, n_pixels_x, - n_pixels_y [, n_pixels_z]) + Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y \ + [, n_pixels_z]) Transformed collection of images. Each entry along axis 0 is a 2D or 3D greyscale image. From f5002b0d9567a9476fd224ac097b895cf825157a Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sun, 27 Sep 2020 19:08:38 +0200 Subject: [PATCH 16/29] Fix doc and mask size Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 39 ++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 1c299e7e0..f3a1bd71f 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1046,8 +1046,8 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): neighboring pixels. The density filtration assigns to each pixel of a binary image a greyscale - value equal to the sum of the distance between this pixel and all activated - pixels within a ball centered around it. + value equal to the number of activated pixels within a ball centered around + it. Parameters ---------- @@ -1116,8 +1116,8 @@ def _calculate_density(self, X): for i, j, k in self._iterator: Xd += np.roll(np.roll( np.roll(X, k, axis=3), j, axis=2), i, axis=1) \ - * self.mask_[self._range + i, self._range + j, - self._range + k] + * self.mask_[self._size + i, self._size + j, + self._size + k] return Xd def fit(self, X, y=None): @@ -1150,28 +1150,35 @@ def fit(self, X, y=None): validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) - self._range = int(np.ceil(self.radius)) + # Determine the size of the mask based on the radius and metric + self._size = int(np.ceil( + pairwise_distances([[0]], [[self.radius]], metric=self.metric, + **self.metric_params) + )) + # The mask is always 3D but not the iterator. + self.mask_ = np.ones(tuple(2 * self._size + 1 for _ in range(3)), + dtype=np.bool) - iterator_range_list = ([range(-self._range, self._range + 1) + # Create an iterator for applying the mask to every pixel at once + iterator_size_list = ([range(-self._size, self._size + 1) for _ in range(self.n_dimensions_)] + [[0] for _ in range(3 - self.n_dimensions_)]) - self._iterator = tuple(itertools.product(*iterator_range_list)) + self._iterator = tuple(itertools.product(*iterator_size_list)) - # The mask is always 3D but not the iterator. - self.mask_ = np.ones(tuple(2 * self._range + 1 for _ in range(3)), - dtype=np.bool) - mesh_range_list = [np.arange(0, 2 * self._range + 1)] * 3 + mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 self.mesh_ = np.stack( - np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3)) + np.meshgrid(*mesh_size_list), axis=3).reshape((-1, 3)) - center = self._range * np.ones((1, 3)) + # Set the mask values so that it corresponds to a ball + center = self._size * np.ones((1, 3)) self.mask_ = pairwise_distances( center, self.mesh_, metric=self.metric, n_jobs=1, **self.metric_params).reshape(self.mask_.shape) self.mask_ = self.mask_ <= self.radius - padding = np.asarray([*[self._range] * self.n_dimensions_, + # Instanciate a padder to handle image boundaries + padding = np.asarray([*[self._size] * self.n_dimensions_, *[0] * (3 - self.n_dimensions_)]) self._padder = Padder(padding=padding) self._padder.fit(X.reshape((*X.shape[:3], -1))) @@ -1213,10 +1220,10 @@ def transform(self, X, y=None): effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) - Xt = Xt[:, self._range: -self._range, self._range: -self._range] + Xt = Xt[:, self._size: -self._size, self._size: -self._size] if self.n_dimensions_ == 3: - Xt = Xt[:, :, :, self._range: -self._range] + Xt = Xt[:, :, :, self._size: -self._size] Xt = Xt.reshape(X.shape) From 6526bcf1a3955241a56429111bf9046a3edb4cc4 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Tue, 29 Sep 2020 21:38:06 +0200 Subject: [PATCH 17/29] Remove Error if dim is 1 Signed-off-by: Guillaume Tauzin --- gtda/externals/pybind11 | 2 +- gtda/images/filtrations.py | 12 ++++++------ gtda/images/preprocessing.py | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/gtda/externals/pybind11 b/gtda/externals/pybind11 index 8fa70e748..4f72ef846 160000 --- a/gtda/externals/pybind11 +++ b/gtda/externals/pybind11 @@ -1 +1 @@ -Subproject commit 8fa70e74838e93f0db38417f3590ba792489b958 +Subproject commit 4f72ef846fe8453596230ac285eeaa0ce3278bb4 diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index f3a1bd71f..ee43c8b4b 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -117,7 +117,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( @@ -348,7 +348,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( @@ -554,7 +554,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( @@ -746,7 +746,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( @@ -948,7 +948,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( @@ -1144,7 +1144,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 56b7f634c..d09f02870 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -93,7 +93,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params( @@ -260,7 +260,7 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params(self.get_params(), self._hyperparameters, @@ -424,7 +424,7 @@ def fit(self, X, y=None): """ X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") validate_params(self.get_params(), self._hyperparameters, @@ -591,7 +591,7 @@ def fit(self, X, y=None): """ check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 - if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): + if self.n_dimensions_ > 3: raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") From 7557715587e5730b09456971ceebde5cd8058059 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 13:33:46 +0200 Subject: [PATCH 18/29] Fix linting --- gtda/images/filtrations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index ee43c8b4b..0d1484018 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1162,7 +1162,7 @@ def fit(self, X, y=None): # Create an iterator for applying the mask to every pixel at once iterator_size_list = ([range(-self._size, self._size + 1) for _ in range(self.n_dimensions_)] + - [[0] for _ in range(3 - self.n_dimensions_)]) + [[0] for _ in range(3 - self.n_dimensions_)]) self._iterator = tuple(itertools.product(*iterator_size_list)) mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 From 4c2ff30fe03df51976929e207af4019113ac790c Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 18:58:56 +0200 Subject: [PATCH 19/29] Remove unnecessary _is_fitted from ImageToPointCloud --- gtda/images/preprocessing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index d09f02870..526646cf3 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -595,7 +595,6 @@ def fit(self, X, y=None): raise ValueError(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") - self._is_fitted = True return self def transform(self, X, y=None): @@ -616,13 +615,12 @@ def transform(self, X, y=None): Returns ------- Xt : ndarray of shape (n_samples, n_pixels_x * n_pixels_y [* \ - n_pixels_z], - n_dimensions) + n_pixels_z], n_dimensions) Transformed collection of images. Each entry along axis 0 is a point cloud in ``n_dimensions``-dimensional space. """ - check_is_fitted(self, '_is_fitted') + check_is_fitted(self) Xt = check_array(X, allow_nd=True) Xt = np.swapaxes(np.flip(Xt, axis=1), 1, 2) From d94d057ddb27264e6928e3cedefed3724589b779 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 19:16:12 +0200 Subject: [PATCH 20/29] Fix typo --- gtda/images/filtrations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 0d1484018..853ba698e 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1080,7 +1080,7 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): n_dimensions_ : ``2`` or ``3`` Dimension of the images. Set in :meth:`fit`. - mask_ : ndarray of shape (radius, radius, [, radius]) + mask_ : ndarray of shape (radius, radius [, radius]) The mask applied around each pixel to calculate the weighted number of its activated neighbors. Set in :meth:`fit`. From 87c4ef0d7f3df91751dd3f18477b7b5fe9604261 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 19:23:52 +0200 Subject: [PATCH 21/29] Improve See alsos in images/filtrations.py --- gtda/images/filtrations.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 853ba698e..93b84ab00 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -64,7 +64,9 @@ class HeightFiltration(BaseEstimator, TransformerMixin, PlotterMixin): See also -------- - gtda.homology.CubicalPersistence, Binarizer + RadialFiltration, DilationFiltration, ErosionFiltration, \ + SignedDistanceFiltration, DensityFiltration, \ + gtda.homology.CubicalPersistence References ---------- @@ -288,7 +290,9 @@ class RadialFiltration(BaseEstimator, TransformerMixin, PlotterMixin): See also -------- - gtda.homology.CubicalPersistence, Binarizer + HeightFiltration, DilationFiltration, ErosionFiltration, \ + SignedDistanceFiltration, DensityFiltration, \ + gtda.homology.CubicalPersistence References ---------- @@ -501,7 +505,9 @@ class DilationFiltration(BaseEstimator, TransformerMixin, PlotterMixin): See also -------- - gtda.homology.CubicalPersistence, Binarizer + HeightFiltration, RadialFiltration, ErosionFiltration, \ + SignedDistanceFiltration, DensityFiltration, \ + gtda.homology.CubicalPersistence References ---------- @@ -693,7 +699,9 @@ class ErosionFiltration(BaseEstimator, TransformerMixin, PlotterMixin): See also -------- - gtda.homology.CubicalPersistence, Binarizer + HeightFiltration, RadialFiltration, DilationFiltration, \ + SignedDistanceFiltration, DensityFiltration, \ + gtda.homology.CubicalPersistence References ---------- @@ -887,8 +895,8 @@ class SignedDistanceFiltration(BaseEstimator, TransformerMixin, PlotterMixin): See also -------- - gtda.homology.CubicalPersistence, Binarizer, ErosionFiltration, \ - DilationFiltration + HeightFiltration, RadialFiltration, DilationFiltration, \ + ErosionFiltration, DensityFiltration, gtda.homology.CubicalPersistence References ---------- @@ -1086,7 +1094,9 @@ class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin): See also -------- - gtda.homology.CubicalPersistence, Binarizer + HeightFiltration, RadialFiltration, DilationFiltration, \ + ErosionFiltration, SignedDistanceFiltration, \ + gtda.homology.CubicalPersistence References ---------- From bbf3a45f12297ff12afc70138551fb9d6dcb8ca1 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 19:31:44 +0200 Subject: [PATCH 22/29] Simplify code --- gtda/images/filtrations.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 93b84ab00..8c84d3349 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1164,15 +1164,14 @@ def fit(self, X, y=None): self._size = int(np.ceil( pairwise_distances([[0]], [[self.radius]], metric=self.metric, **self.metric_params) - )) + )) # The mask is always 3D but not the iterator. - self.mask_ = np.ones(tuple(2 * self._size + 1 for _ in range(3)), - dtype=np.bool) + self.mask_ = np.ones([2 * self._size + 1] * 3, dtype=np.bool) # Create an iterator for applying the mask to every pixel at once - iterator_size_list = ([range(-self._size, self._size + 1) - for _ in range(self.n_dimensions_)] + - [[0] for _ in range(3 - self.n_dimensions_)]) + iterator_size_list = \ + [range(-self._size, self._size + 1)] * self.n_dimensions_ + \ + [0] * (3 - self.n_dimensions_) self._iterator = tuple(itertools.product(*iterator_size_list)) mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 @@ -1187,7 +1186,7 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - # Instanciate a padder to handle image boundaries + # Instantiate a padder to handle image boundaries padding = np.asarray([*[self._size] * self.n_dimensions_, *[0] * (3 - self.n_dimensions_)]) self._padder = Padder(padding=padding) From 716f246e56d52d47c2007a4f377513e2785e6d85 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 19:31:44 +0200 Subject: [PATCH 23/29] Revert "Simplify code" This reverts commit bbf3a45f12297ff12afc70138551fb9d6dcb8ca1. --- gtda/images/filtrations.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 8c84d3349..93b84ab00 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1164,14 +1164,15 @@ def fit(self, X, y=None): self._size = int(np.ceil( pairwise_distances([[0]], [[self.radius]], metric=self.metric, **self.metric_params) - )) + )) # The mask is always 3D but not the iterator. - self.mask_ = np.ones([2 * self._size + 1] * 3, dtype=np.bool) + self.mask_ = np.ones(tuple(2 * self._size + 1 for _ in range(3)), + dtype=np.bool) # Create an iterator for applying the mask to every pixel at once - iterator_size_list = \ - [range(-self._size, self._size + 1)] * self.n_dimensions_ + \ - [0] * (3 - self.n_dimensions_) + iterator_size_list = ([range(-self._size, self._size + 1) + for _ in range(self.n_dimensions_)] + + [[0] for _ in range(3 - self.n_dimensions_)]) self._iterator = tuple(itertools.product(*iterator_size_list)) mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 @@ -1186,7 +1187,7 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - # Instantiate a padder to handle image boundaries + # Instanciate a padder to handle image boundaries padding = np.asarray([*[self._size] * self.n_dimensions_, *[0] * (3 - self.n_dimensions_)]) self._padder = Padder(padding=padding) From 2e72d0183c6c48482ba367de2a10a73489196876 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 20:05:03 +0200 Subject: [PATCH 24/29] Simplify code --- gtda/images/filtrations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 93b84ab00..bde84bb2a 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1170,9 +1170,9 @@ def fit(self, X, y=None): dtype=np.bool) # Create an iterator for applying the mask to every pixel at once - iterator_size_list = ([range(-self._size, self._size + 1) - for _ in range(self.n_dimensions_)] + - [[0] for _ in range(3 - self.n_dimensions_)]) + iterator_size_list = \ + [range(-self._size, self._size + 1)] * self.n_dimensions_ + \ + [[0] for _ in range(3 - self.n_dimensions_)] self._iterator = tuple(itertools.product(*iterator_size_list)) mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 From 359bb616ff651b44ef877340f61711d6b0d4defa Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 20:09:45 +0200 Subject: [PATCH 25/29] Fix typo --- gtda/images/filtrations.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index bde84bb2a..de8c2bad9 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1164,7 +1164,7 @@ def fit(self, X, y=None): self._size = int(np.ceil( pairwise_distances([[0]], [[self.radius]], metric=self.metric, **self.metric_params) - )) + )) # The mask is always 3D but not the iterator. self.mask_ = np.ones(tuple(2 * self._size + 1 for _ in range(3)), dtype=np.bool) @@ -1187,7 +1187,7 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - # Instanciate a padder to handle image boundaries + # Instantiate a padder to handle image boundaries padding = np.asarray([*[self._size] * self.n_dimensions_, *[0] * (3 - self.n_dimensions_)]) self._padder = Padder(padding=padding) From 49344d068db5a0478c5352f170cf15ceac63a168 Mon Sep 17 00:00:00 2001 From: Umberto Lupo Date: Wed, 30 Sep 2020 20:24:12 +0200 Subject: [PATCH 26/29] Add tests for bad input shapes in image subpackage --- gtda/images/tests/test_filtrations.py | 14 ++++++++++++++ gtda/images/tests/test_preprocessing.py | 9 +++++++++ 2 files changed, 23 insertions(+) diff --git a/gtda/images/tests/test_filtrations.py b/gtda/images/tests/test_filtrations.py index c3c36b2ae..359fa1e61 100644 --- a/gtda/images/tests/test_filtrations.py +++ b/gtda/images/tests/test_filtrations.py @@ -24,6 +24,16 @@ np.zeros((3, 4, 2))], axis=0) +@pytest.mark.parametrize("transformer", + [HeightFiltration(), RadialFiltration(), + DilationFiltration(), ErosionFiltration(), + SignedDistanceFiltration(), DensityFiltration()]) +def test_invalid_input_shape(transformer): + X = np.ones((1, 1, 1, 1, 1)) + with pytest.raises(ValueError, match="Input of `fit`"): + transformer.fit(X) + + def test_height_not_fitted(): height = HeightFiltration() with pytest.raises(NotFittedError): @@ -319,3 +329,7 @@ def test_density_transform(radius, images, expected): assert_almost_equal(density.fit_transform(images), expected) + + +def test_density_fit_transform_plot(): + DensityFiltration().fit_transform_plot(images_2D, sample=0) diff --git a/gtda/images/tests/test_preprocessing.py b/gtda/images/tests/test_preprocessing.py index 348a840bd..2e8087461 100644 --- a/gtda/images/tests/test_preprocessing.py +++ b/gtda/images/tests/test_preprocessing.py @@ -27,6 +27,15 @@ np.zeros((7, 8, 4))], axis=0) +@pytest.mark.parametrize("transformer", + [Binarizer(), Inverter(), Padder(), + ImageToPointCloud()]) +def test_invalid_input_shape(transformer): + X = np.ones((1, 1, 1, 1, 1)) + with pytest.raises(ValueError, match="Input of `fit`"): + transformer.fit(X) + + def test_binarizer_not_fitted(): binarizer = Binarizer() with pytest.raises(NotFittedError): From 44126c0ea97c4528d237e16b25f4af93ba3a0430 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sat, 3 Oct 2020 21:23:27 +0200 Subject: [PATCH 27/29] Add inline comments Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index de8c2bad9..7e13e48ca 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1123,6 +1123,8 @@ def __init__(self, radius=3, metric='euclidean', metric_params={}, def _calculate_density(self, X): Xd = np.zeros(X.shape) + # The idea behind this is to sum up pixel values of the image + # rolled according to the 3D mask for i, j, k in self._iterator: Xd += np.roll(np.roll( np.roll(X, k, axis=3), j, axis=2), i, axis=1) \ @@ -1175,11 +1177,14 @@ def fit(self, X, y=None): [[0] for _ in range(3 - self.n_dimensions_)] self._iterator = tuple(itertools.product(*iterator_size_list)) + # We create a mesh so that we have an array with coordinates and we can + # calculate the distance of each point to the center mesh_size_list = [np.arange(0, 2 * self._size + 1)] * 3 self.mesh_ = np.stack( np.meshgrid(*mesh_size_list), axis=3).reshape((-1, 3)) - # Set the mask values so that it corresponds to a ball + # Calculate those distances to the center and use them to set the mask + # values so that it corresponds to a ball center = self._size * np.ones((1, 3)) self.mask_ = pairwise_distances( center, self.mesh_, metric=self.metric, @@ -1187,7 +1192,8 @@ def fit(self, X, y=None): self.mask_ = self.mask_ <= self.radius - # Instantiate a padder to handle image boundaries + # Instantiate a padder to pad all images with 0 so that the rolling of + # the mask also works at the boundary of the images padding = np.asarray([*[self._size] * self.n_dimensions_, *[0] * (3 - self.n_dimensions_)]) self._padder = Padder(padding=padding) @@ -1221,6 +1227,8 @@ def transform(self, X, y=None): check_is_fitted(self) Xt = check_array(X, allow_nd=True, copy=True) + # Reshape the images to 3D so that they can be rolled according to their + # 3D mask Xt = Xt.reshape((*X.shape[:3], -1)) Xt = self._padder.transform(Xt) From 2b723318f3d4734e105f9c6359e0bd3b389bec39 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sat, 3 Oct 2020 21:35:13 +0200 Subject: [PATCH 28/29] Revert update of pybind11 --- gtda/externals/pybind11 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/externals/pybind11 b/gtda/externals/pybind11 index 4f72ef846..8fa70e748 160000 --- a/gtda/externals/pybind11 +++ b/gtda/externals/pybind11 @@ -1 +1 @@ -Subproject commit 4f72ef846fe8453596230ac285eeaa0ce3278bb4 +Subproject commit 8fa70e74838e93f0db38417f3590ba792489b958 From f5cd3291993e22bf14a14bd8db599d34cd9ff072 Mon Sep 17 00:00:00 2001 From: Guillaume Tauzin Date: Sat, 3 Oct 2020 22:00:37 +0200 Subject: [PATCH 29/29] Fix typo Signed-off-by: Guillaume Tauzin --- gtda/images/filtrations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 7e13e48ca..68b8e8652 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -1227,7 +1227,7 @@ def transform(self, X, y=None): check_is_fitted(self) Xt = check_array(X, allow_nd=True, copy=True) - # Reshape the images to 3D so that they can be rolled according to their + # Reshape the images to 3D so that they can be rolled according to the # 3D mask Xt = Xt.reshape((*X.shape[:3], -1)) Xt = self._padder.transform(Xt)