From 260faf6be64d289af3d4f2d057564689c1480014 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 15:33:19 +0100 Subject: [PATCH 01/45] Add a check_list_of_arrays fct in utils/validation --- gtda/utils/validation.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 4622c0764..115040eae 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -5,6 +5,8 @@ import types import numpy as np +from sklearn.utils.validation import check_array + available_metrics = { 'bottleneck': [('delta', numbers.Number, (0., 1.))], 'wasserstein': [('p', int, (1, np.inf)), @@ -174,3 +176,34 @@ def validate_metric_params(metric, metric_params): " parameter. Available metric_params" " are {}".format(param, available_metric_params[metric])) + + +def check_list_of_arrays(X, **kwargs): + """"Check a list of arrays, by itegrating through the input one by one. + Parameters + ---------- + X : list(np.array), such that `X[i].ndim==2` (n_points, n_dimensions), + or an array `X.dim==3` + Returns + ------- + X : list of input arrays, as modified by check_array + """ + if isinstance(X, np.ndarray): + return check_array(X, **kwargs) + else: + results = [] + messages = [] + for id_x, x in enumerate(X): + try: + X[id_x] = np.squeeze(check_array(np.expand_dims(x, axis=0), + **kwargs), axis=0) + results.append(True) + messages = [''] + except ValueError as e: + results.append(False) + messages.append(str(e)) + if all(results): + return X + else: + raise ValueError("The following errors were raised" + + "by the inputs: \n" + "\n".join(messages)) From 2d840d364b3695c83a9d486858fe2f85e7cf724d Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 15:52:30 +0100 Subject: [PATCH 02/45] Change the checks in the homology classes --- gtda/homology/consistent.py | 8 ++++---- gtda/homology/grids.py | 6 +++--- gtda/homology/point_clouds.py | 16 ++++++++-------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/gtda/homology/consistent.py b/gtda/homology/consistent.py index 1e1499b95..f48fb82e9 100644 --- a/gtda/homology/consistent.py +++ b/gtda/homology/consistent.py @@ -7,10 +7,10 @@ from joblib import Parallel, delayed from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances -from sklearn.utils.validation import check_array, check_is_fitted +from sklearn.utils.validation import check_is_fitted from ..utils._docs import adapt_fit_transform_docs -from ..utils.validation import validate_params +from ..utils.validation import validate_params, check_list_of_arrays @adapt_fit_transform_docs @@ -132,7 +132,7 @@ def fit(self, X, y=None): """ validate_params(self.get_params(), self._hyperparameters) - check_array(X, allow_nd=True) + check_list_of_arrays(X, allow_nd=True) self._is_fitted = True return self @@ -164,7 +164,7 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') - X = check_array(X, allow_nd=True) + X = check_list_of_arrays(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._consistent_homology_distance)(X[i]) diff --git a/gtda/homology/grids.py b/gtda/homology/grids.py index d222da0de..fc67429ec 100644 --- a/gtda/homology/grids.py +++ b/gtda/homology/grids.py @@ -5,9 +5,9 @@ import numbers from sklearn.base import BaseEstimator, TransformerMixin from joblib import Parallel, delayed -from sklearn.utils.validation import check_array, check_is_fitted +from sklearn.utils.validation import check_is_fitted from ._utils import _pad_diagram -from ..utils.validation import validate_params +from ..utils.validation import validate_params, check_list_of_arrays from ..externals.python import CubicalComplex, PeriodicCubicalComplex @@ -157,7 +157,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_array(X, allow_nd=True) + check_list_of_arrays(X, allow_nd=True) self._max_homology_dimension = self._homology_dimensions[-1] return self diff --git a/gtda/homology/point_clouds.py b/gtda/homology/point_clouds.py index a9d228473..2f7c616ee 100644 --- a/gtda/homology/point_clouds.py +++ b/gtda/homology/point_clouds.py @@ -6,13 +6,13 @@ import numpy as np from joblib import Parallel, delayed from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.utils.validation import check_array, check_is_fitted +from sklearn.utils.validation import check_is_fitted from sklearn.metrics.pairwise import pairwise_distances from ._utils import _postprocess_diagrams from ..externals.python import ripser, SparseRipsComplex, CechComplex from ..utils._docs import adapt_fit_transform_docs -from ..utils.validation import validate_params +from ..utils.validation import validate_params, check_list_of_arrays @adapt_fit_transform_docs @@ -165,7 +165,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_array(X, allow_nd=True, force_all_finite=False) + check_list_of_arrays(X, allow_nd=True, force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -207,7 +207,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_array(X, allow_nd=True, force_all_finite=False) + X = check_list_of_arrays(X, allow_nd=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) for i in range(len(X))) @@ -381,7 +381,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_array(X, allow_nd=True, force_all_finite=False) + check_list_of_arrays(X, allow_nd=True, force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -423,7 +423,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_array(X, allow_nd=True, force_all_finite=False) + X = check_list_of_arrays(X, allow_nd=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( @@ -565,7 +565,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_array(X, allow_nd=True) + check_list_of_arrays(X, allow_nd=True) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -602,7 +602,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_array(X, allow_nd=True) + X = check_list_of_arrays(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( From 5c3f24af4be68410a8af636133135b410f4a237b Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 16:47:00 +0100 Subject: [PATCH 03/45] Discard check on arrays and treat everything as lists in utils/validations:check_list_of_arrays, modify the point-cloud class accordingly --- gtda/homology/point_clouds.py | 12 +++++------ gtda/utils/validation.py | 39 ++++++++++++++++------------------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/gtda/homology/point_clouds.py b/gtda/homology/point_clouds.py index 2f7c616ee..5a40374cc 100644 --- a/gtda/homology/point_clouds.py +++ b/gtda/homology/point_clouds.py @@ -165,7 +165,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, allow_nd=True, force_all_finite=False) + check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -207,7 +207,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_arrays(X, allow_nd=True, force_all_finite=False) + X = check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) for i in range(len(X))) @@ -381,7 +381,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, allow_nd=True, force_all_finite=False) + check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -423,7 +423,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_arrays(X, allow_nd=True, force_all_finite=False) + X = check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( @@ -565,7 +565,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, allow_nd=True) + check_list_of_arrays(X, ensure_2d=True) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -602,7 +602,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_arrays(X, allow_nd=True) + X = check_list_of_arrays(X, ensure_2d=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 115040eae..2de9b0eb1 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -179,31 +179,28 @@ def validate_metric_params(metric, metric_params): def check_list_of_arrays(X, **kwargs): - """"Check a list of arrays, by itegrating through the input one by one. + """"Check a list of arrays, by integrating through the input one by one. Parameters ---------- - X : list(np.array), such that `X[i].ndim==2` (n_points, n_dimensions), + X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), or an array `X.dim==3` Returns ------- X : list of input arrays, as modified by check_array """ - if isinstance(X, np.ndarray): - return check_array(X, **kwargs) - else: - results = [] - messages = [] - for id_x, x in enumerate(X): - try: - X[id_x] = np.squeeze(check_array(np.expand_dims(x, axis=0), - **kwargs), axis=0) - results.append(True) - messages = [''] - except ValueError as e: - results.append(False) - messages.append(str(e)) - if all(results): - return X - else: - raise ValueError("The following errors were raised" + - "by the inputs: \n" + "\n".join(messages)) + + results = [] + messages = [] + for id_x, x in enumerate(X): + try: + X[id_x] = check_array(x, **kwargs) + results.append(True) + messages = [''] + except ValueError as e: + results.append(False) + messages.append(str(e)) + if all(results): + return X + else: + raise ValueError("The following errors were raised" + + "by the inputs: \n" + "\n".join(messages)) From 57f98a228b375d5a2727e6d67eed54467f738196 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 16:48:23 +0100 Subject: [PATCH 04/45] Adapt the tests for pcs --- gtda/homology/tests/test_point_clouds.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/gtda/homology/tests/test_point_clouds.py b/gtda/homology/tests/test_point_clouds.py index 42631d1e7..f18c91d44 100644 --- a/gtda/homology/tests/test_point_clouds.py +++ b/gtda/homology/tests/test_point_clouds.py @@ -97,3 +97,17 @@ def test_cp_transform(): cp = EuclideanCechPersistence() assert_almost_equal(cp.fit_transform(pc), pc_cp_res) + + +def test_vrp_list_of_arrays(): + pc_2 = np.array([[0, 1, 2], [1, 2, 4]]) + pc_list = [pc[0].copy(), pc_2] + vrp = VietorisRipsPersistence() + vrp.fit(pc_list) + + +def test_vrp_list_invalid_arrays(): + pc_invalid = np.array([0, 1]) + vrp = VietorisRipsPersistence() + with pytest.raises(ValueError): + vrp.fit([pc_invalid]) From d4113057f88cc5865ee818e3a13ef2d510c3d25c Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 17:03:25 +0100 Subject: [PATCH 05/45] Correct the indentation on the for loop --- gtda/utils/validation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 2de9b0eb1..323ecdd5b 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -199,8 +199,8 @@ def check_list_of_arrays(X, **kwargs): except ValueError as e: results.append(False) messages.append(str(e)) - if all(results): - return X - else: - raise ValueError("The following errors were raised" + - "by the inputs: \n" + "\n".join(messages)) + if all(results): + return X + else: + raise ValueError("The following errors were raised" + + "by the inputs: \n" + "\n".join(messages)) From cf80832b652227b57ca62e25f4f58a7616944eda Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 17:04:31 +0100 Subject: [PATCH 06/45] Ensure that all points clouds are 2d tensors --- gtda/homology/consistent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtda/homology/consistent.py b/gtda/homology/consistent.py index f48fb82e9..c725e66ce 100644 --- a/gtda/homology/consistent.py +++ b/gtda/homology/consistent.py @@ -132,7 +132,7 @@ def fit(self, X, y=None): """ validate_params(self.get_params(), self._hyperparameters) - check_list_of_arrays(X, allow_nd=True) + check_list_of_arrays(X, ensure_2d=True) self._is_fitted = True return self @@ -164,7 +164,7 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') - X = check_list_of_arrays(X, allow_nd=True) + X = check_list_of_arrays(X, ensure_2d=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._consistent_homology_distance)(X[i]) From c3310543e96bbb73e3ac40ea74d155650f52da07 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 20 Feb 2020 18:06:42 +0100 Subject: [PATCH 07/45] Introduce the check_list_of_images function --- gtda/images/filtrations.py | 20 ++++++++-------- gtda/utils/validation.py | 49 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 10 deletions(-) diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 250530251..56586f0c3 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -7,9 +7,9 @@ from joblib import Parallel, delayed, effective_n_jobs from sklearn.metrics import pairwise_distances from sklearn.utils import gen_even_slices -from sklearn.utils.validation import check_is_fitted, check_array +from sklearn.utils.validation import check_is_fitted from ..utils._docs import adapt_fit_transform_docs -from ..utils.validation import validate_params +from ..utils.validation import validate_params, check_list_of_images from ._utils import _dilate @@ -96,7 +96,7 @@ def fit(self, X, y=None): self : object """ - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) self.n_dimensions_ = len(X.shape) - 1 @@ -152,7 +152,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_height)(X[s]) @@ -279,7 +279,7 @@ def fit(self, X, y=None): self : object """ - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) self.n_dimensions_ = len(X.shape) - 1 @@ -341,7 +341,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_radial)(X[s]) @@ -435,7 +435,7 @@ def fit(self, X, y=None): self : object """ - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) self.max_value_ = np.sum(X.shape[1:]) @@ -475,7 +475,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_dilation)(X[s]) @@ -569,7 +569,7 @@ def fit(self, X, y=None): self : object """ - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) self.max_value_ = np.sum(X.shape[1:]) @@ -609,7 +609,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_erosion)(X[s]) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 323ecdd5b..3c6df7a14 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -178,6 +178,40 @@ def validate_metric_params(metric, metric_params): available_metric_params[metric])) +def check_list_of_images(X, **kwargs): + """"Check a list of arrays representing images, by integrating + through the input one by one. + Parameters + ---------- + X : list(ndarray), ??? + Returns + ------- + X : list of input arrays, as modified by check_array + """ + kwargs_default = {'allow_n_axis_incons': False, + 'allow_dims_incons': False, + 'force_all_finite': True, + 'ensure_2d': False} + kwargs_default.update(kwargs) + return check_list_of_arrays(X, **kwargs_default) + + +def check_list_of_point_clouds(X, **kwargs): + """"Check a list of arrays representing point clouds, by integrating + through the input one by one. + Parameters + ---------- + X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), + or an array `X.dim==3` + Returns + ------- + X : list of input arrays, as modified by check_array + """ + kwargs_default = {'ensure_2d': True, 'force_all_finite': False} + kwargs_default.update(kwargs) + return check_list_of_arrays(X, **kwargs_default) + + def check_list_of_arrays(X, **kwargs): """"Check a list of arrays, by integrating through the input one by one. Parameters @@ -188,6 +222,21 @@ def check_list_of_arrays(X, **kwargs): ------- X : list of input arrays, as modified by check_array """ + allow_n_axis_incons = kwargs.pop('allow_n_axis_incons', True) + allow_dim_incons = kwargs.pop('allow_dims_incons', True) + + # if restrictions on the dimensions of the input are imposed + if not allow_dim_incons: + shapes = [X.shape for x in X] + if not(all([shapes[0] == s for s in shapes])): + raise ValueError("The arrays in X do not have the same dimensions" + "({}), while they should.".format(shapes)) + # if the number of dimensions can very + if not allow_n_axis_incons: + n_axis = [len(X.shape) for x in X] + if not(all([n_axis[0] == n for n in n_axis])): + raise ValueError("The arrays in X do not have the same number" + "of axes ({}), while they should.".format(n_axis)) results = [] messages = [] From 9cfd604821b3f6b57db8fb9051656a0359bc3f30 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 09:18:25 +0100 Subject: [PATCH 08/45] Use check_point_clouds in the point-cloud filtrations. Add the allow_nd param to check_array for images --- gtda/homology/point_clouds.py | 14 +++++++------- gtda/utils/validation.py | 28 ++++++++++++++-------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/gtda/homology/point_clouds.py b/gtda/homology/point_clouds.py index 5a40374cc..aed190cb4 100644 --- a/gtda/homology/point_clouds.py +++ b/gtda/homology/point_clouds.py @@ -12,7 +12,7 @@ from ._utils import _postprocess_diagrams from ..externals.python import ripser, SparseRipsComplex, CechComplex from ..utils._docs import adapt_fit_transform_docs -from ..utils.validation import validate_params, check_list_of_arrays +from ..utils.validation import validate_params, check_list_of_point_clouds @adapt_fit_transform_docs @@ -165,7 +165,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) + check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -207,7 +207,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) + X = check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) for i in range(len(X))) @@ -381,7 +381,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) + check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -423,7 +423,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_arrays(X, ensure_2d=True, force_all_finite=False) + X = check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( @@ -565,7 +565,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, ensure_2d=True) + check_list_of_point_clouds(X, ensure_2d=True) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -602,7 +602,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_arrays(X, ensure_2d=True) + X = check_list_of_point_clouds(X, ensure_2d=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 3c6df7a14..8c8eeef48 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -183,15 +183,14 @@ def check_list_of_images(X, **kwargs): through the input one by one. Parameters ---------- - X : list(ndarray), ??? + X : list of ndarray, Returns ------- - X : list of input arrays, as modified by check_array + X : list of ndarray, as modified by check_array """ - kwargs_default = {'allow_n_axis_incons': False, - 'allow_dims_incons': False, - 'force_all_finite': True, - 'ensure_2d': False} + kwargs_default = {'force_same_n_axis': False, + 'force_same_dim': True, 'force_all_finite': True, + 'ensure_2d': False, 'allow_nd': True} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) @@ -207,7 +206,8 @@ def check_list_of_point_clouds(X, **kwargs): ------- X : list of input arrays, as modified by check_array """ - kwargs_default = {'ensure_2d': True, 'force_all_finite': False} + kwargs_default = {'ensure_2d': True, 'force_all_finite': False, + 'force_same_dim': False, 'force_same_n_axis': True} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) @@ -222,18 +222,18 @@ def check_list_of_arrays(X, **kwargs): ------- X : list of input arrays, as modified by check_array """ - allow_n_axis_incons = kwargs.pop('allow_n_axis_incons', True) - allow_dim_incons = kwargs.pop('allow_dims_incons', True) + force_same_dim = kwargs.pop('force_same_dim', True) + force_same_n_axis = kwargs.pop('force_same_n_axis', True) # if restrictions on the dimensions of the input are imposed - if not allow_dim_incons: - shapes = [X.shape for x in X] + if force_same_dim: + shapes = [x.shape for x in X] if not(all([shapes[0] == s for s in shapes])): raise ValueError("The arrays in X do not have the same dimensions" "({}), while they should.".format(shapes)) - # if the number of dimensions can very - if not allow_n_axis_incons: - n_axis = [len(X.shape) for x in X] + # if the number of dimensions can vary + if force_same_n_axis: + n_axis = [len(x.shape) for x in X] if not(all([n_axis[0] == n for n in n_axis])): raise ValueError("The arrays in X do not have the same number" "of axes ({}), while they should.".format(n_axis)) From 993cff1782413a0525cba07ea1f97f2049a74fc6 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 09:25:55 +0100 Subject: [PATCH 09/45] Fix lkinting --- gtda/homology/point_clouds.py | 6 ++++-- gtda/utils/validation.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/gtda/homology/point_clouds.py b/gtda/homology/point_clouds.py index aed190cb4..5e1c0981f 100644 --- a/gtda/homology/point_clouds.py +++ b/gtda/homology/point_clouds.py @@ -207,7 +207,8 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) + X = check_list_of_point_clouds(X, ensure_2d=True, + force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) for i in range(len(X))) @@ -423,7 +424,8 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) + X = check_list_of_point_clouds(X, ensure_2d=True, + force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 8c8eeef48..24863eff5 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -183,7 +183,7 @@ def check_list_of_images(X, **kwargs): through the input one by one. Parameters ---------- - X : list of ndarray, + X : list of ndarray, Returns ------- X : list of ndarray, as modified by check_array From 028bd00a5d11b92ec6302523c7cf1c10a5e08fb0 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 09:59:13 +0100 Subject: [PATCH 10/45] Change the size of inputs in preprocessing --- gtda/images/preprocessing.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 1b2fb93c0..e189aec8c 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -6,9 +6,9 @@ from sklearn.base import BaseEstimator, TransformerMixin from joblib import Parallel, delayed, effective_n_jobs from sklearn.utils import gen_even_slices -from sklearn.utils.validation import check_is_fitted, check_array +from sklearn.utils.validation import check_is_fitted from ..utils._docs import adapt_fit_transform_docs -from ..utils.validation import validate_params +from ..utils.validation import validate_params,check_list_of_images @adapt_fit_transform_docs @@ -83,7 +83,7 @@ def fit(self, X, y=None): """ validate_params(self.get_params(), self._hyperparameters) - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) self.n_dimensions_ = len(X.shape) - 1 self.max_value_ = np.max(X) @@ -114,7 +114,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._binarize)(X[s]) @@ -166,7 +166,7 @@ def fit(self, X, y=None): self : object """ - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) self._is_fitted = True return self @@ -194,7 +194,7 @@ def transform(self, X, y=None): """ check_is_fitted(self, ['_is_fitted']) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.logical_not)(X[s]) @@ -275,7 +275,7 @@ def fit(self, X, y=None): {**self._hyperparameters, 'paddings_dim': [int, [n_dimensions]]}) - check_array(X, ensure_2d=False, allow_nd=True) + check_list_of_images(X) self._pad_width = ((0, 0), *[(self.paddings_[axis], self.paddings_[axis]) @@ -306,7 +306,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.pad)(X[s], pad_width=self._pad_width, @@ -378,7 +378,7 @@ def fit(self, X, y=None): self : object """ - X = check_array(X, ensure_2d=False, allow_nd=True) + X = check_list_of_images(X) n_dimensions = len(X.shape) - 1 axis_order = [2, 1, 3] @@ -415,7 +415,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + Xt = check_list_of_images(X, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._embed)(X[s]) From c1e7b163b3482330703dbcac1bfd391620c9ed49 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 10:52:48 +0100 Subject: [PATCH 11/45] Make the outpu of the image2pctransformer variable in size, and adjust the tests (modify the expected values) --- gtda/images/preprocessing.py | 12 +++++++--- gtda/images/tests/test_preprocessing.py | 32 +++++++++++++++++++++++-- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index e189aec8c..ba8707e4d 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -2,6 +2,7 @@ # License: GNU AGPLv3 import numbers +from functools import reduce import numpy as np from sklearn.base import BaseEstimator, TransformerMixin from joblib import Parallel, delayed, effective_n_jobs @@ -353,11 +354,15 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin): def __init__(self, n_jobs=None): self.n_jobs = n_jobs - def _embed(self, X): + def _embed_(self, X): Xpts = np.stack([self.mesh_ for _ in range(X.shape[0])]) * 1.0 Xpts[np.logical_not(X.reshape((X.shape[0], -1))), :] += np.inf return Xpts + def _embed(self, X): + Xpts = [np.stack(np.nonzero(x), axis=1) for x in X] + return Xpts + def fit(self, X, y=None): """Do nothing and return the estimator unchanged. This method is here to implement the usual scikit-learn API and hence @@ -380,7 +385,7 @@ def fit(self, X, y=None): """ X = check_list_of_images(X) - n_dimensions = len(X.shape) - 1 + n_dimensions = len(X[0].shape) axis_order = [2, 1, 3] mesh_range_list = [np.arange(0, X.shape[i]) for i in axis_order[:n_dimensions]] @@ -421,5 +426,6 @@ def transform(self, X, y=None): self._embed)(X[s]) for s in gen_even_slices(X.shape[0], effective_n_jobs(self.n_jobs))) - Xt = np.concatenate(Xt) + #Xt = np.concatenate(Xt) + Xt = reduce(sum, Xt, []) return Xt diff --git a/gtda/images/tests/test_preprocessing.py b/gtda/images/tests/test_preprocessing.py index 77dd4855c..d55180c28 100644 --- a/gtda/images/tests/test_preprocessing.py +++ b/gtda/images/tests/test_preprocessing.py @@ -136,6 +136,24 @@ def test_img2pc_not_fitted(): [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf]]]) +images_2D_img2pc = list( + [np.array([[0., 2.], [1., 2.], [0., 1.], [1., 1.], [0., 0.], [1., 0.]]), + np.array([[0., 2.], [0., 1.], [0., 0.]]), + np.array([[]]) + ]) + +images_3D_img2pc = list( + [np.array([[0., 2., 0.], [0., 2., 1.], + [1., 2., 0.], [1., 2., 1.], + [0., 1., 0.], [0., 1., 1.], + [1., 1., 0.], [1., 1., 1.], + [0., 0., 0.], [0., 0., 1.], + [1., 0., 0.], [1., 0., 1.]]), + np.array([[0., 2., 0.], [0., 2., 1.], + [0., 1., 0.], [0., 1., 1.], + [0., 0., 0.], [0., 0., 1.]]), + np.array([[]])]) + @pytest.mark.parametrize("images, expected", [(images_2D_small, images_2D_img2pc), @@ -143,5 +161,15 @@ def test_img2pc_not_fitted(): def test_img2pc_transform(images, expected): img2pc = ImageToPointCloud() - assert_almost_equal(img2pc.fit_transform(images), - expected) + all(compare_arrays_as_sets(res, expected) + for res, expected in zip(img2pc.fit_transform(images), + expected)) + + +def compare_arrays_as_sets(a1, a2): + """ A helper function to compare two point_clouds. They should have the same points, + but not necessarily in the same order""" + def to_set_of_elements(a): + return set([tuple(p) for p in a]) + as1, as2 = [to_set_of_elements(a) for a in [a1, a2]] + return (as1 <= as2) and (as1 >= as2) From 4572d74cc269a6723d30887b16d0e72568d90ad6 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 10:55:35 +0100 Subject: [PATCH 12/45] Fix linting in gttda.images and the docstring from imageToPointCloud --- gtda/images/preprocessing.py | 23 ++++------------------- gtda/images/tests/test_preprocessing.py | 5 +++-- 2 files changed, 7 insertions(+), 21 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index ba8707e4d..29c34e3d8 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -9,7 +9,7 @@ from sklearn.utils import gen_even_slices from sklearn.utils.validation import check_is_fitted from ..utils._docs import adapt_fit_transform_docs -from ..utils.validation import validate_params,check_list_of_images +from ..utils.validation import validate_params, check_list_of_images @adapt_fit_transform_docs @@ -324,8 +324,7 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin): """Represent active pixels in 2D/3D binary images as points in 2D/3D space. The coordinates of each point is calculated as follows. For each activated - pixel, assign coordinates that are the pixel position on this image. All - deactivated pixels are given infinite coordinates in that space. + pixel, assign coordinates that are the pixel position on this image. This transformer is meant to transform a collection of images to a point cloud so that collection of point clouds-based persistent homology module can be applied. @@ -354,11 +353,6 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin): def __init__(self, n_jobs=None): self.n_jobs = n_jobs - def _embed_(self, X): - Xpts = np.stack([self.mesh_ for _ in range(X.shape[0])]) * 1.0 - Xpts[np.logical_not(X.reshape((X.shape[0], -1))), :] += np.inf - return Xpts - def _embed(self, X): Xpts = [np.stack(np.nonzero(x), axis=1) for x in X] return Xpts @@ -383,16 +377,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X) - - n_dimensions = len(X[0].shape) - axis_order = [2, 1, 3] - mesh_range_list = [np.arange(0, X.shape[i]) - for i in axis_order[:n_dimensions]] - - self.mesh_ = np.flip(np.stack(np.meshgrid(*mesh_range_list), - axis=n_dimensions), - axis=0).reshape((-1, n_dimensions)) + _ = check_list_of_images(X) return self @@ -426,6 +411,6 @@ def transform(self, X, y=None): self._embed)(X[s]) for s in gen_even_slices(X.shape[0], effective_n_jobs(self.n_jobs))) - #Xt = np.concatenate(Xt) + Xt = reduce(sum, Xt, []) return Xt diff --git a/gtda/images/tests/test_preprocessing.py b/gtda/images/tests/test_preprocessing.py index d55180c28..8ee30ad45 100644 --- a/gtda/images/tests/test_preprocessing.py +++ b/gtda/images/tests/test_preprocessing.py @@ -167,8 +167,9 @@ def test_img2pc_transform(images, expected): def compare_arrays_as_sets(a1, a2): - """ A helper function to compare two point_clouds. They should have the same points, - but not necessarily in the same order""" + """ A helper function to compare two point_clouds. + They should have the same points, but not necessarily in the same order. + """ def to_set_of_elements(a): return set([tuple(p) for p in a]) as1, as2 = [to_set_of_elements(a) for a in [a1, a2]] From 5556e4c284cc98efaf82e46b5109886fcc2d38c0 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 11:30:52 +0100 Subject: [PATCH 13/45] Document the validation functions, lint test preprocessing and make ImageToPointCloud pass the check_is:_fitted test --- gtda/images/preprocessing.py | 3 ++- gtda/images/tests/test_preprocessing.py | 3 ++- gtda/utils/validation.py | 30 +++++++++++++++++++++---- 3 files changed, 30 insertions(+), 6 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 29c34e3d8..a119452f6 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -124,7 +124,7 @@ def transform(self, X, y=None): Xt = np.concatenate(Xt) if self.n_dimensions_ == 2: - Xt = Xt.reshape((*X.shape)) + Xt = Xt.reshape(X.shape) return Xt @@ -378,6 +378,7 @@ def fit(self, X, y=None): """ _ = check_list_of_images(X) + self.is_fitted_ = True return self diff --git a/gtda/images/tests/test_preprocessing.py b/gtda/images/tests/test_preprocessing.py index 8ee30ad45..3b877960e 100644 --- a/gtda/images/tests/test_preprocessing.py +++ b/gtda/images/tests/test_preprocessing.py @@ -160,9 +160,10 @@ def test_img2pc_not_fitted(): (images_3D_small, images_3D_img2pc)]) def test_img2pc_transform(images, expected): img2pc = ImageToPointCloud() + results = img2pc.fit_transform(images) all(compare_arrays_as_sets(res, expected) - for res, expected in zip(img2pc.fit_transform(images), + for res, expected in zip(results, expected)) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 24863eff5..637568b61 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -180,13 +180,17 @@ def validate_metric_params(metric, metric_params): def check_list_of_images(X, **kwargs): """"Check a list of arrays representing images, by integrating - through the input one by one. + through the input one by one. To pass a test with + :param:`kwargs`=None + Parameters ---------- X : list of ndarray, + Each entry of X corresponds to an image. Returns ------- X : list of ndarray, as modified by check_array + """ kwargs_default = {'force_same_n_axis': False, 'force_same_dim': True, 'force_all_finite': True, @@ -202,9 +206,11 @@ def check_list_of_point_clouds(X, **kwargs): ---------- X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), or an array `X.dim==3` + Returns ------- X : list of input arrays, as modified by check_array + """ kwargs_default = {'ensure_2d': True, 'force_all_finite': False, 'force_same_dim': False, 'force_same_n_axis': True} @@ -212,18 +218,34 @@ def check_list_of_point_clouds(X, **kwargs): return check_list_of_arrays(X, **kwargs_default) -def check_list_of_arrays(X, **kwargs): +def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, + **kwargs): """"Check a list of arrays, by integrating through the input one by one. + The constraints are to be specified in :param:`kwargs`. On top of + parameters from :func:`~sklearn.utils.validation.check_array`, + the optional parameters are listed below. + Parameters ---------- X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), or an array `X.dim==3` + + force_same_dim : bool, optional, default: ``True`` + Indicates whether the shapes of the elements of X should all + be the same. + + force_same_n_axis : bool, optional, default: ``True`` + Indicates whether the number of axes in the elements of X should all + be the same. + + kwargs: dict or None, optional, default: ``None`` + Parameters accepted by :func:`~sklearn.utils.validation.check_array`. + Returns ------- X : list of input arrays, as modified by check_array + """ - force_same_dim = kwargs.pop('force_same_dim', True) - force_same_n_axis = kwargs.pop('force_same_n_axis', True) # if restrictions on the dimensions of the input are imposed if force_same_dim: From 6faf94aabaa6be9f94b5c8b0af964ab17369f809 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 11:38:31 +0100 Subject: [PATCH 14/45] Work on docstring for check images and point cloud functions --- gtda/utils/validation.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 637568b61..8f1441988 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -181,19 +181,26 @@ def validate_metric_params(metric, metric_params): def check_list_of_images(X, **kwargs): """"Check a list of arrays representing images, by integrating through the input one by one. To pass a test with - :param:`kwargs`=None + :param:`kwargs`=``None``, all images x in X must satisfy: + - x.ndim >= 2, + - all(np.isfinite(x)) Parameters ---------- X : list of ndarray, Each entry of X corresponds to an image. + + kwargs: dict or None, optional, default: ``None`` + Parameters accepted by + :func:`~gtda.utils.validation.check_list_of_arrays`. + Returns ------- X : list of ndarray, as modified by check_array """ kwargs_default = {'force_same_n_axis': False, - 'force_same_dim': True, 'force_all_finite': True, + 'force_same_dim': False, 'force_all_finite': True, 'ensure_2d': False, 'allow_nd': True} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) @@ -201,12 +208,20 @@ def check_list_of_images(X, **kwargs): def check_list_of_point_clouds(X, **kwargs): """"Check a list of arrays representing point clouds, by integrating - through the input one by one. + through the input one by one. To pass a test with + :param:`kwargs`=``None``, all point clouds x, y in X must satisfy: + - x.ndim == 2, + - len(y.shape) == len(y.shape). + Parameters ---------- X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), or an array `X.dim==3` + kwargs: dict or None, optional, default: ``None`` + Parameters accepted by + :func:`~gtda.utils.validation.check_list_of_arrays`. + Returns ------- X : list of input arrays, as modified by check_array From 2c445d7eca2e2a21fc41f1a108f7e6e7800da0e4 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 15:26:14 +0100 Subject: [PATCH 15/45] Fix docstring issues --- gtda/utils/validation.py | 49 +++++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 8f1441988..d21278cf1 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -179,24 +179,25 @@ def validate_metric_params(metric, metric_params): def check_list_of_images(X, **kwargs): - """"Check a list of arrays representing images, by integrating - through the input one by one. To pass a test with - :param:`kwargs`=``None``, all images x in X must satisfy: - - x.ndim >= 2, - - all(np.isfinite(x)) + """Check a list of arrays representing images, by integrating + through the input one by one. To pass a test when `kwargs` is ``None``, + all images ``x`` in `X` must satisfy: + - ``x.ndim`` >= 2, + - ``all(np.isfinite(x))`` Parameters ---------- - X : list of ndarray, - Each entry of X corresponds to an image. + X : list of ndarray + Each entry of `X` corresponds to an image. - kwargs: dict or None, optional, default: ``None`` + kwargs : dict or None, optional, default: ``None`` Parameters accepted by :func:`~gtda.utils.validation.check_list_of_arrays`. Returns ------- - X : list of ndarray, as modified by check_array + X : list of ndarray + as modified by :func:`~sklearn.utils.validation.check_array` """ kwargs_default = {'force_same_n_axis': False, @@ -207,24 +208,25 @@ def check_list_of_images(X, **kwargs): def check_list_of_point_clouds(X, **kwargs): - """"Check a list of arrays representing point clouds, by integrating - through the input one by one. To pass a test with - :param:`kwargs`=``None``, all point clouds x, y in X must satisfy: - - x.ndim == 2, - - len(y.shape) == len(y.shape). + """Check a list of arrays representing point clouds, by integrating + through the input one by one. To pass a test when `kwargs` is ``None``, + all point clouds ``x``, ``y`` in X must satisfy: + - ``x.ndim == 2``, + - ``len(y.shape) == len(y.shape)``. Parameters ---------- - X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), + X : list of ndarray, such that `X[i].ndim==2` (n_points, n_dimensions), or an array `X.dim==3` - kwargs: dict or None, optional, default: ``None`` + kwargs : dict or None, optional, default: ``None`` Parameters accepted by :func:`~gtda.utils.validation.check_list_of_arrays`. Returns ------- - X : list of input arrays, as modified by check_array + X : list of input arrays + as modified by :func:`~sklearn.utils.validation.check_array` """ kwargs_default = {'ensure_2d': True, 'force_all_finite': False, @@ -235,7 +237,7 @@ def check_list_of_point_clouds(X, **kwargs): def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, **kwargs): - """"Check a list of arrays, by integrating through the input one by one. + """Check a list of arrays, by integrating through the input one by one. The constraints are to be specified in :param:`kwargs`. On top of parameters from :func:`~sklearn.utils.validation.check_array`, the optional parameters are listed below. @@ -258,7 +260,8 @@ def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, Returns ------- - X : list of input arrays, as modified by check_array + X : list of input arrays + as modified by :func:`~sklearn.utils.validation.check_array` """ @@ -266,14 +269,14 @@ def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, if force_same_dim: shapes = [x.shape for x in X] if not(all([shapes[0] == s for s in shapes])): - raise ValueError("The arrays in X do not have the same dimensions" - "({}), while they should.".format(shapes)) + raise ValueError(f"The arrays in X do not have the same dimensions" + "({shapes}), while they should.") # if the number of dimensions can vary if force_same_n_axis: n_axis = [len(x.shape) for x in X] if not(all([n_axis[0] == n for n in n_axis])): - raise ValueError("The arrays in X do not have the same number" - "of axes ({}), while they should.".format(n_axis)) + raise ValueError(f"The arrays in X do not have the same number" + "of axes ({n_axis}), while they should.") results = [] messages = [] From 4219e82d20e97271b63c8904288f33091c1695ab Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 15:28:16 +0100 Subject: [PATCH 16/45] Rename force_same_dim to force_same_shape --- gtda/utils/validation.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index d21278cf1..89384f8c5 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -201,7 +201,7 @@ def check_list_of_images(X, **kwargs): """ kwargs_default = {'force_same_n_axis': False, - 'force_same_dim': False, 'force_all_finite': True, + 'force_same_shape': False, 'force_all_finite': True, 'ensure_2d': False, 'allow_nd': True} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) @@ -230,12 +230,12 @@ def check_list_of_point_clouds(X, **kwargs): """ kwargs_default = {'ensure_2d': True, 'force_all_finite': False, - 'force_same_dim': False, 'force_same_n_axis': True} + 'force_same_shape': False, 'force_same_n_axis': True} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) -def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, +def check_list_of_arrays(X, force_same_shape=True, force_same_n_axis=True, **kwargs): """Check a list of arrays, by integrating through the input one by one. The constraints are to be specified in :param:`kwargs`. On top of @@ -247,7 +247,7 @@ def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), or an array `X.dim==3` - force_same_dim : bool, optional, default: ``True`` + force_same_shape : bool, optional, default: ``True`` Indicates whether the shapes of the elements of X should all be the same. @@ -266,7 +266,7 @@ def check_list_of_arrays(X, force_same_dim=True, force_same_n_axis=True, """ # if restrictions on the dimensions of the input are imposed - if force_same_dim: + if force_same_shape: shapes = [x.shape for x in X] if not(all([shapes[0] == s for s in shapes])): raise ValueError(f"The arrays in X do not have the same dimensions" From cb03ee450c71be94f1a3615700e136b19a79773e Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 15:56:37 +0100 Subject: [PATCH 17/45] Remove list of results, andreplaceitwith aflag parameter, put ToDo --- gtda/utils/validation.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 89384f8c5..bece4fb25 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -86,7 +86,7 @@ def validate_params(parameters, references): if len(references[key]) == 1: continue if references[key][0] == list or \ - references[key][0] == np.ndarray: + references[key][0] == np.ndarray: for parameter in parameters[key]: if references[key][1][0] == int: if not isinstance(parameter, numbers.Number): @@ -164,7 +164,7 @@ def validate_metric_params(metric, metric_params): param_type)) if param_values is not None: if input_param < param_values[0] or \ - input_param > param_values[1]: + input_param > param_values[1]: raise ValueError("{} in param_metric should be between {} " "and {} but has been set to {}." "".format(param, param_values[0], @@ -200,7 +200,7 @@ def check_list_of_images(X, **kwargs): as modified by :func:`~sklearn.utils.validation.check_array` """ - kwargs_default = {'force_same_n_axis': False, + kwargs_default = {'force_same_n_axis': False, 'force_same_shape': False, 'force_all_finite': True, 'ensure_2d': False, 'allow_nd': True} kwargs_default.update(kwargs) @@ -268,28 +268,28 @@ def check_list_of_arrays(X, force_same_shape=True, force_same_n_axis=True, # if restrictions on the dimensions of the input are imposed if force_same_shape: shapes = [x.shape for x in X] - if not(all([shapes[0] == s for s in shapes])): + if not (all([shapes[0] == s for s in shapes])): raise ValueError(f"The arrays in X do not have the same dimensions" "({shapes}), while they should.") # if the number of dimensions can vary - if force_same_n_axis: - n_axis = [len(x.shape) for x in X] - if not(all([n_axis[0] == n for n in n_axis])): + elif force_same_n_axis: + n_axis = [x.ndim for x in X] + if not (all([n_axis[0] == n for n in n_axis])): raise ValueError(f"The arrays in X do not have the same number" "of axes ({n_axis}), while they should.") - results = [] + is_check_failed = False messages = [] - for id_x, x in enumerate(X): + for i, x in enumerate(X): try: - X[id_x] = check_array(x, **kwargs) - results.append(True) + # TODO: verifythe behavior depending on copy. + X[i] = check_array(x, **kwargs) messages = [''] except ValueError as e: - results.append(False) + is_check_failed = True messages.append(str(e)) - if all(results): - return X - else: + if is_check_failed: raise ValueError("The following errors were raised" + "by the inputs: \n" + "\n".join(messages)) + else: + return X From 2af6d380c25f9ebc77cc99a24d130cfc1ad8ec5a Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Fri, 21 Feb 2020 15:59:26 +0100 Subject: [PATCH 18/45] Change force_same_n_axis to force_same_ndim and fix a docstring --- gtda/utils/validation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index bece4fb25..50cdc4ba7 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -182,7 +182,7 @@ def check_list_of_images(X, **kwargs): """Check a list of arrays representing images, by integrating through the input one by one. To pass a test when `kwargs` is ``None``, all images ``x`` in `X` must satisfy: - - ``x.ndim`` >= 2, + - ``x.ndim >= 2``, - ``all(np.isfinite(x))`` Parameters @@ -200,7 +200,7 @@ def check_list_of_images(X, **kwargs): as modified by :func:`~sklearn.utils.validation.check_array` """ - kwargs_default = {'force_same_n_axis': False, + kwargs_default = {'force_same_ndim': False, 'force_same_shape': False, 'force_all_finite': True, 'ensure_2d': False, 'allow_nd': True} kwargs_default.update(kwargs) @@ -230,12 +230,12 @@ def check_list_of_point_clouds(X, **kwargs): """ kwargs_default = {'ensure_2d': True, 'force_all_finite': False, - 'force_same_shape': False, 'force_same_n_axis': True} + 'force_same_shape': False, 'force_same_ndim': True} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) -def check_list_of_arrays(X, force_same_shape=True, force_same_n_axis=True, +def check_list_of_arrays(X, force_same_shape=True, force_same_ndim=True, **kwargs): """Check a list of arrays, by integrating through the input one by one. The constraints are to be specified in :param:`kwargs`. On top of @@ -251,7 +251,7 @@ def check_list_of_arrays(X, force_same_shape=True, force_same_n_axis=True, Indicates whether the shapes of the elements of X should all be the same. - force_same_n_axis : bool, optional, default: ``True`` + force_same_ndim : bool, optional, default: ``True`` Indicates whether the number of axes in the elements of X should all be the same. @@ -272,7 +272,7 @@ def check_list_of_arrays(X, force_same_shape=True, force_same_n_axis=True, raise ValueError(f"The arrays in X do not have the same dimensions" "({shapes}), while they should.") # if the number of dimensions can vary - elif force_same_n_axis: + elif force_same_ndim: n_axis = [x.ndim for x in X] if not (all([n_axis[0] == n for n in n_axis])): raise ValueError(f"The arrays in X do not have the same number" From 596bab643a8472b327fe5530c10546fef9ded0c4 Mon Sep 17 00:00:00 2001 From: Umberto Lupo <46537483+ulupo@users.noreply.github.com> Date: Fri, 21 Feb 2020 18:56:31 +0100 Subject: [PATCH 19/45] Docstring fixes --- gtda/utils/validation.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 50cdc4ba7..78f379cac 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -216,7 +216,7 @@ def check_list_of_point_clouds(X, **kwargs): Parameters ---------- - X : list of ndarray, such that `X[i].ndim==2` (n_points, n_dimensions), + X : list of ndarray, such that ``X[i].ndim==2`` (n_points, n_dimensions), or an array `X.dim==3` kwargs : dict or None, optional, default: ``None`` @@ -237,31 +237,33 @@ def check_list_of_point_clouds(X, **kwargs): def check_list_of_arrays(X, force_same_shape=True, force_same_ndim=True, **kwargs): - """Check a list of arrays, by integrating through the input one by one. - The constraints are to be specified in :param:`kwargs`. On top of + """Input validation on a list of lists, arrays, sparse matrices, or similar. + + The constraints are to be specified in `kwargs`. On top of parameters from :func:`~sklearn.utils.validation.check_array`, the optional parameters are listed below. Parameters ---------- - X : list(ndarray), such that `X[i].ndim==2` (n_points, n_dimensions), - or an array `X.dim==3` + X : list + Input list of objects to check / convert. force_same_shape : bool, optional, default: ``True`` - Indicates whether the shapes of the elements of X should all + Indicates whether the shapes of the elements of `X` should all be the same. force_same_ndim : bool, optional, default: ``True`` - Indicates whether the number of axes in the elements of X should all + Indicates whether the number of axes in the elements of `X` should all be the same. - kwargs: dict or None, optional, default: ``None`` + kwargs : dict or None, optional, default: ``None`` Parameters accepted by :func:`~sklearn.utils.validation.check_array`. Returns ------- - X : list of input arrays - as modified by :func:`~sklearn.utils.validation.check_array` + X : list + Output list of objects, each checked / converted by + :func:`~sklearn.utils.validation.check_array` """ From cb96cbd18d2e7007eb0ac09c0b4ce8a894d06404 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 2 Mar 2020 09:19:02 +0100 Subject: [PATCH 20/45] Remove redundant lines for test results definitions for ImageToPointCloud --- gtda/images/tests/test_preprocessing.py | 28 ------------------------- 1 file changed, 28 deletions(-) diff --git a/gtda/images/tests/test_preprocessing.py b/gtda/images/tests/test_preprocessing.py index 3b877960e..1f4ebe8d8 100644 --- a/gtda/images/tests/test_preprocessing.py +++ b/gtda/images/tests/test_preprocessing.py @@ -108,34 +108,6 @@ def test_img2pc_not_fitted(): img2pc.transform(images_2D) -images_2D_img2pc = np.array( - [[[0., 2.], [1., 2.], [0., 1.], - [1., 1.], [0., 0.], [1., 0.]], - [[0., 2.], [np.inf, np.inf], [0., 1.], - [np.inf, np.inf], [0., 0.], [np.inf, np.inf]], - [[np.inf, np.inf], [np.inf, np.inf], [np.inf, np.inf], - [np.inf, np.inf], [np.inf, np.inf], [np.inf, np.inf]]]) - -images_3D_img2pc = np.array( - [[[0., 2., 0.], [0., 2., 1.], - [1., 2., 0.], [1., 2., 1.], - [0., 1., 0.], [0., 1., 1.], - [1., 1., 0.], [1., 1., 1.], - [0., 0., 0.], [0., 0., 1.], - [1., 0., 0.], [1., 0., 1.]], - [[0., 2., 0.], [0., 2., 1.], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [0., 1., 0.], [0., 1., 1.], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [0., 0., 0.], [0., 0., 1.], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf]], - [[np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf], - [np.inf, np.inf, np.inf], [np.inf, np.inf, np.inf]]]) - images_2D_img2pc = list( [np.array([[0., 2.], [1., 2.], [0., 1.], [1., 1.], [0., 0.], [1., 0.]]), np.array([[0., 2.], [0., 1.], [0., 0.]]), From 11fcfc8b7e8a671f5a610ed891ef4dbf393467e7 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 2 Mar 2020 10:48:58 +0100 Subject: [PATCH 21/45] Modularizethe checks with lambda functions. Modify the tests in test_simplicial (tests for ok and invalid shapes respectively) --- gtda/homology/tests/test_simplicial.py | 9 +++-- gtda/utils/validation.py | 53 ++++++++++++++------------ 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/gtda/homology/tests/test_simplicial.py b/gtda/homology/tests/test_simplicial.py index f18c91d44..38292053f 100644 --- a/gtda/homology/tests/test_simplicial.py +++ b/gtda/homology/tests/test_simplicial.py @@ -100,14 +100,17 @@ def test_cp_transform(): def test_vrp_list_of_arrays(): - pc_2 = np.array([[0, 1, 2], [1, 2, 4]]) + """Verify that a list of point clouds""" + pc_2 = np.array([[0, 1], [1, 2]]) pc_list = [pc[0].copy(), pc_2] vrp = VietorisRipsPersistence() vrp.fit(pc_list) def test_vrp_list_invalid_arrays(): - pc_invalid = np.array([0, 1]) + pc_2 = np.array([[0, 1, 2]]) + pc_invalid = [pc[0].copy(), pc_2] + vrp = VietorisRipsPersistence() with pytest.raises(ValueError): - vrp.fit([pc_invalid]) + vrp.fit(pc_invalid) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index f18cf7115..37d7442ca 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -242,9 +242,12 @@ def check_list_of_images(X, **kwargs): as modified by :func:`~sklearn.utils.validation.check_array` """ - kwargs_default = {'force_same_ndim': False, - 'force_same_shape': False, 'force_all_finite': True, - 'ensure_2d': False, 'allow_nd': True} + kwargs_default = {'force_all_finite': True, + 'ensure_2d': False, 'allow_nd': True, + 'check_shapes': [('embedding_dimension', + lambda x: x.shape, + 'The images should have exactly' + 'the same shape')]} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) @@ -272,13 +275,24 @@ def check_list_of_point_clouds(X, **kwargs): """ kwargs_default = {'ensure_2d': True, 'force_all_finite': False, - 'force_same_shape': False, 'force_same_ndim': True} + 'check_shapes': [('embedding_dimension', + lambda x: x.shape[1:], + 'Not all point clouds have the same' + 'embedding dimension')]} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) -def check_list_of_arrays(X, force_same_shape=True, force_same_ndim=True, - **kwargs): +def check_dimensions(X, get_property): + """ """ + from functools import reduce + from operator import and_ + reference = get_property(X[0]) + return reduce(and_, map(lambda x: get_property(x) == reference, X[1:]), + True) + + +def check_list_of_arrays(X, check_shapes=list(), **kwargs): """Input validation on a list of lists, arrays, sparse matrices, or similar. The constraints are to be specified in `kwargs`. On top of @@ -290,13 +304,10 @@ def check_list_of_arrays(X, force_same_shape=True, force_same_ndim=True, X : list Input list of objects to check / convert. - force_same_shape : bool, optional, default: ``True`` - Indicates whether the shapes of the elements of `X` should all - be the same. - - force_same_ndim : bool, optional, default: ``True`` - Indicates whether the number of axes in the elements of `X` should all - be the same. + check_shapes: list of tuples t, where t = (str, function to pass to + check_dimensions, error message if test fails). + The checks are applied in the order they are provided, only until + the first failure. kwargs : dict or None, optional, default: ``None`` Parameters accepted by :func:`~sklearn.utils.validation.check_array`. @@ -310,17 +321,11 @@ def check_list_of_arrays(X, force_same_shape=True, force_same_ndim=True, """ # if restrictions on the dimensions of the input are imposed - if force_same_shape: - shapes = [x.shape for x in X] - if not (all([shapes[0] == s for s in shapes])): - raise ValueError(f"The arrays in X do not have the same dimensions" - "({shapes}), while they should.") - # if the number of dimensions can vary - elif force_same_ndim: - n_axis = [x.ndim for x in X] - if not (all([n_axis[0] == n for n in n_axis])): - raise ValueError(f"The arrays in X do not have the same number" - "of axes ({n_axis}), while they should.") + for (test_name, get_property, err_message) in check_shapes: + if check_dimensions(X, get_property): + continue + else: + raise ValueError(err_message) is_check_failed = False messages = [] From d349c89f47e4efafde044735e9eae29b75cdc024 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 2 Mar 2020 11:15:17 +0100 Subject: [PATCH 22/45] Add a docstring in check_dimensions --- gtda/utils/validation.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 37d7442ca..f1aa2b6c4 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -266,7 +266,7 @@ def check_list_of_point_clouds(X, **kwargs): kwargs : dict or None, optional, default: ``None`` Parameters accepted by - :func:`~gtda.utils.validation.check_list_of_arrays`. + :func:`~`gtda.utils.validation.check_list_of_arrays``. Returns ------- @@ -284,7 +284,19 @@ def check_list_of_point_clouds(X, **kwargs): def check_dimensions(X, get_property): - """ """ + """Check the dimensions of X are consistent, where the check is defined + by get_property 'sample-wise'. + Parameters + ---------- + X: list of ndarray, + Usually represents point clouds or images- see + :func:`~`gtda.utils.validation.check_list_of_arrays``. + + get_property: function: ndarray -> _, + Defines a property to be conserved, across all arrays (samples) + in X. + + """ from functools import reduce from operator import and_ reference = get_property(X[0]) From 54cacbdd979fc831eb73bc966097eace19579905 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 2 Mar 2020 11:19:38 +0100 Subject: [PATCH 23/45] Adapt the docstrings of check_list_of_point_clouds and check_list_of_images, to match the logic agreed upon --- gtda/utils/validation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index f1aa2b6c4..e7c6a4ea2 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -223,9 +223,10 @@ def validate_metric_params(metric, metric_params): def check_list_of_images(X, **kwargs): """Check a list of arrays representing images, by integrating through the input one by one. To pass a test when `kwargs` is ``None``, - all images ``x`` in `X` must satisfy: + all images ``x``, ``y`` in `X` must satisfy: - ``x.ndim >= 2``, - - ``all(np.isfinite(x))`` + - ``all(np.isfinite(x))``, + - ``x.shape == y.shape``. Parameters ---------- @@ -257,7 +258,7 @@ def check_list_of_point_clouds(X, **kwargs): through the input one by one. To pass a test when `kwargs` is ``None``, all point clouds ``x``, ``y`` in X must satisfy: - ``x.ndim == 2``, - - ``len(y.shape) == len(y.shape)``. + - ``len(y.shape[1:]) == len(y.shape[1:])``. Parameters ---------- @@ -295,7 +296,7 @@ def check_dimensions(X, get_property): get_property: function: ndarray -> _, Defines a property to be conserved, across all arrays (samples) in X. - + """ from functools import reduce from operator import and_ From 3ed0dad93ce4678b70afe437722e62bee2ad28f5 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 2 Mar 2020 16:43:35 +0100 Subject: [PATCH 24/45] Add falling back to check_array if the input is really an array --- gtda/homology/cubical.py | 4 ++-- gtda/homology/simplicial.py | 18 +++++++++++------ gtda/utils/validation.py | 39 ++++++++++++++++++++++--------------- 3 files changed, 37 insertions(+), 24 deletions(-) diff --git a/gtda/homology/cubical.py b/gtda/homology/cubical.py index bf6875dad..c1cc83d90 100644 --- a/gtda/homology/cubical.py +++ b/gtda/homology/cubical.py @@ -7,7 +7,7 @@ from joblib import Parallel, delayed from sklearn.utils.validation import check_is_fitted from ._utils import _pad_diagram -from ..utils.validation import validate_params, check_list_of_arrays +from ..utils.validation import validate_params, check_list_of_images from ..externals.python import CubicalComplex, PeriodicCubicalComplex @@ -158,7 +158,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_arrays(X, allow_nd=True) + check_list_of_images(X, allow_nd=True) self._max_homology_dimension = self._homology_dimensions[-1] return self diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index 219e8e55f..5f5a8a507 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -165,7 +165,8 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) + check_list_of_point_clouds(X, ensure_2d=False, allow_nd=True, + force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -207,7 +208,8 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=True, + X = check_list_of_point_clouds(X, ensure_2d=False, + allow_nd=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) @@ -382,7 +384,9 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_point_clouds(X, ensure_2d=True, force_all_finite=False) + check_list_of_point_clouds(X, ensure_2d=False, + allow_nd=True, + force_all_finite=False) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -424,7 +428,8 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=True, + X = check_list_of_point_clouds(X, ensure_2d=False, + allow_nd=True, force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)( @@ -568,7 +573,7 @@ def fit(self, X, y=None): 'infinity_values_': self.infinity_values_, '_homology_dimensions': self._homology_dimensions}, self._hyperparameters) - check_list_of_point_clouds(X, ensure_2d=True) + check_list_of_point_clouds(X, ensure_2d=False, allow_nd=True) self._max_homology_dimension = self._homology_dimensions[-1] return self @@ -605,7 +610,8 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=True) + X = check_list_of_point_clouds(X, ensure_2d=False, + allow_nd=True,) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index e7c6a4ea2..4d794f115 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -243,14 +243,17 @@ def check_list_of_images(X, **kwargs): as modified by :func:`~sklearn.utils.validation.check_array` """ - kwargs_default = {'force_all_finite': True, - 'ensure_2d': False, 'allow_nd': True, - 'check_shapes': [('embedding_dimension', - lambda x: x.shape, - 'The images should have exactly' - 'the same shape')]} - kwargs_default.update(kwargs) - return check_list_of_arrays(X, **kwargs_default) + if hasattr(X, 'shape'): + return check_array(X, **kwargs) + else: + kwargs_default = {'force_all_finite': True, + 'ensure_2d': False, 'allow_nd': True, + 'check_shapes': [('embedding_dimension', + lambda x: x.shape, + 'The images should have exactly' + 'the same shape')]} + kwargs_default.update(kwargs) + return check_list_of_arrays(X, **kwargs_default) def check_list_of_point_clouds(X, **kwargs): @@ -275,13 +278,16 @@ def check_list_of_point_clouds(X, **kwargs): as modified by :func:`~sklearn.utils.validation.check_array` """ - kwargs_default = {'ensure_2d': True, 'force_all_finite': False, - 'check_shapes': [('embedding_dimension', - lambda x: x.shape[1:], - 'Not all point clouds have the same' - 'embedding dimension')]} - kwargs_default.update(kwargs) - return check_list_of_arrays(X, **kwargs_default) + if hasattr(X, 'shape'): + return check_array(X, **kwargs) + else: + kwargs_default = {'ensure_2d': False, 'force_all_finite': False, + 'check_shapes': [('embedding_dimension', + lambda x: x.shape[1:], + 'Not all point clouds have the same' + 'embedding dimension')]} + kwargs_default.update(kwargs) + return check_list_of_arrays(X, **kwargs_default) def check_dimensions(X, get_property): @@ -345,7 +351,8 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): for i, x in enumerate(X): try: # TODO: verifythe behavior depending on copy. - X[i] = check_array(x, **kwargs) + X[i] = check_array(x.reshape(1, *x.shape), + **kwargs).reshape(*x.shape) messages = [''] except ValueError as e: is_check_failed = True From be1bcf9373da13cac03969007b8d6e3a3f1fe62d Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 2 Mar 2020 23:01:06 +0100 Subject: [PATCH 25/45] Fix linting invalidation --- gtda/utils/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 4d794f115..d91e31d1d 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -284,8 +284,8 @@ def check_list_of_point_clouds(X, **kwargs): kwargs_default = {'ensure_2d': False, 'force_all_finite': False, 'check_shapes': [('embedding_dimension', lambda x: x.shape[1:], - 'Not all point clouds have the same' - 'embedding dimension')]} + 'Not all point clouds have the ' + 'same embedding dimension.')]} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) From 0d2f0b8890b9b9c45f1ae83280f4d04238299881 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Tue, 3 Mar 2020 09:25:15 +0100 Subject: [PATCH 26/45] Add a fail on small dimensional-images --- gtda/utils/validation.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index d91e31d1d..cfc138714 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -244,6 +244,10 @@ def check_list_of_images(X, **kwargs): """ if hasattr(X, 'shape'): + if X.ndim < 3: + raise ValueError(f"An image in the collection X should be at " + f"least of dimension 2, while it has dimension " + f"{X.ndim - 1}.") return check_array(X, **kwargs) else: kwargs_default = {'force_all_finite': True, From e719a8fb6293ad93374a5f2d1b42b8eb96828da3 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Sun, 8 Mar 2020 21:18:41 +0100 Subject: [PATCH 27/45] Merge master into pcs_different_sizes --- gtda/diagrams/features.py | 6 +++--- gtda/homology/simplicial.py | 3 +-- gtda/images/filtrations.py | 2 +- gtda/images/preprocessing.py | 4 ++-- gtda/utils/validation.py | 2 +- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/gtda/diagrams/features.py b/gtda/diagrams/features.py index ec3e0b994..31b826e9c 100644 --- a/gtda/diagrams/features.py +++ b/gtda/diagrams/features.py @@ -569,9 +569,9 @@ class PersistenceImage(BaseEstimator, TransformerMixin): dimension, to sample during :meth:`fit`. weight_function : callable or None, default: ``None`` - Function mapping the 1D array of persistence values of the points of an - input diagram to a 1D array of weights. ``None`` is equivalent to passing - the identity function. + Function mapping the 1D array of persistence values of the points of + an input diagram to a 1D array of weights. ``None`` is equivalent to + passing the identity function. n_jobs : int or None, optional, default: ``None`` The number of jobs to use for the computation. ``None`` means 1 unless diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index 9a0416973..f28cd7d5b 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -9,7 +9,7 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics.pairwise import pairwise_distances -from sklearn.utils.validation import check_array, check_is_fitted +from sklearn.utils.validation import check_is_fitted from ._utils import _postprocess_diagrams from ..externals.python import ripser, SparseRipsComplex, CechComplex @@ -19,7 +19,6 @@ from ..utils.validation import validate_params, check_list_of_point_clouds - @adapt_fit_transform_docs class VietorisRipsPersistence(BaseEstimator, TransformerMixin): """`Persistence diagrams `_ resulting from diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 2ecdae0c6..99465c6a8 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -11,7 +11,7 @@ from sklearn.metrics import pairwise_distances from sklearn.utils import gen_even_slices -from sklearn.utils.validation import check_is_fitted, check_array +from sklearn.utils.validation import check_is_fitted from ._utils import _dilate, _erode from ..utils._docs import adapt_fit_transform_docs diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index de953428b..919c0e88e 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -9,7 +9,7 @@ from joblib import Parallel, delayed, effective_n_jobs from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import gen_even_slices -from sklearn.utils.validation import check_is_fitted, check_array +from sklearn.utils.validation import check_is_fitted from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval @@ -372,7 +372,7 @@ def __init__(self, n_jobs=None): self.n_jobs = n_jobs def _embed(self, X): - #Xpts = [np.stack(np.nonzero(x), axis=1) for x in X] + # Xpts = [np.stack(np.nonzero(x), axis=1) for x in X] Xpts = np.stack([self.mesh_ for _ in range(X.shape[0])]) * 1. Xpts[np.logical_not(X.reshape((X.shape[0], -1))), :] += np.inf return Xpts diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 1c4dd9908..1a468dcd9 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -2,10 +2,10 @@ # License: GNU AGPLv3 import numpy as np -import types from sklearn.utils.validation import check_array + def check_diagram(X, copy=False): """Input validation on a persistence diagram. From 315575a6ac6f550f714ed6e908dbbfd5b2c64266 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 9 Mar 2020 09:00:26 +0100 Subject: [PATCH 28/45] Change docstrings on kwargs in utils.validation --- gtda/utils/validation.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 1a468dcd9..339224e48 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -159,8 +159,8 @@ def validate_params(parameters, references, exclude=None): def check_list_of_images(X, **kwargs): - """Check a list of arrays representing images, by integrating - through the input one by one. To pass a test when `kwargs` is ``None``, + """Check a list of arrays representing images, by iterating through + the input one by one. To pass a test when `kwargs` is empty, all images ``x``, ``y`` in `X` must satisfy: - ``x.ndim >= 2``, - ``all(np.isfinite(x))``, @@ -171,7 +171,7 @@ def check_list_of_images(X, **kwargs): X : list of ndarray Each entry of `X` corresponds to an image. - kwargs : dict or None, optional, default: ``None`` + kwargs : dict, optional, default: {} Parameters accepted by :func:`~gtda.utils.validation.check_list_of_arrays`. @@ -200,7 +200,7 @@ def check_list_of_images(X, **kwargs): def check_list_of_point_clouds(X, **kwargs): """Check a list of arrays representing point clouds, by integrating - through the input one by one. To pass a test when `kwargs` is ``None``, + through the input one by one. To pass a test when `kwargs` is empty, all point clouds ``x``, ``y`` in X must satisfy: - ``x.ndim == 2``, - ``len(y.shape[1:]) == len(y.shape[1:])``. @@ -210,7 +210,7 @@ def check_list_of_point_clouds(X, **kwargs): X : list of ndarray, such that ``X[i].ndim==2`` (n_points, n_dimensions), or an array `X.dim==3` - kwargs : dict or None, optional, default: ``None`` + kwargs : dict, optional, default: {} Parameters accepted by :func:`~`gtda.utils.validation.check_list_of_arrays``. @@ -270,7 +270,7 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): The checks are applied in the order they are provided, only until the first failure. - kwargs : dict or None, optional, default: ``None`` + kwargs : dict, optional, default: {} Parameters accepted by :func:`~sklearn.utils.validation.check_array`. Returns From a648fd15eaf905f577c61d89cd95158fbed47824 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 9 Mar 2020 09:20:46 +0100 Subject: [PATCH 29/45] Add code formatting in docstrings in utils.validation --- gtda/utils/validation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 339224e48..3e6716b55 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -171,7 +171,7 @@ def check_list_of_images(X, **kwargs): X : list of ndarray Each entry of `X` corresponds to an image. - kwargs : dict, optional, default: {} + kwargs : dict, optional, default: ``{}`` Parameters accepted by :func:`~gtda.utils.validation.check_list_of_arrays`. @@ -210,7 +210,7 @@ def check_list_of_point_clouds(X, **kwargs): X : list of ndarray, such that ``X[i].ndim==2`` (n_points, n_dimensions), or an array `X.dim==3` - kwargs : dict, optional, default: {} + kwargs : dict, optional, default: ``{}`` Parameters accepted by :func:`~`gtda.utils.validation.check_list_of_arrays``. @@ -270,7 +270,7 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): The checks are applied in the order they are provided, only until the first failure. - kwargs : dict, optional, default: {} + kwargs : dict, optional, default: ``{}`` Parameters accepted by :func:`~sklearn.utils.validation.check_array`. Returns From fc51502ec815b26b41253ed5866506c04447c044 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Mon, 9 Mar 2020 09:58:54 +0100 Subject: [PATCH 30/45] Add sklearn estimator checks known failures --- gtda/tests/test_common.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/gtda/tests/test_common.py b/gtda/tests/test_common.py index 057e55b88..1e9a515f7 100644 --- a/gtda/tests/test_common.py +++ b/gtda/tests/test_common.py @@ -27,6 +27,19 @@ ], } +LISTFAIL_TESTS = ['check_estimators_dtypes', 'check_fit_score_takes_y', + 'check_estimators_fit_returns_self', + 'check_estimators_fit_returns_self(readonly_memmap=True)', + 'check_complex_data', 'check_dtype_object', + 'check_estimators_empty_data_messages', + 'check_pipeline_consistency', 'check_estimators_nan_inf', + 'check_estimators_overwrite_params', + 'check_estimator_sparse_data', 'check_estimators_pickle', + 'check_fit2d_predict1d', 'check_methods_subset_invariance', + 'check_fit2d_1sample', 'check_fit2d_1feature', + 'check_dict_unchanged', 'check_dont_overwrite_parameters', + 'check_fit_idempotent'] + # adapted from sklearn.utils.estimator_check v0.22 def _get_callable_name(obj): @@ -86,4 +99,8 @@ def test_sklearn_api(check, estimator, request): request.applymarker(pytest.mark.xfail( run=True, reason='known failure')) + if check_name in LISTFAIL_TESTS: + request.applymarker(pytest.mark.xfail( + run=True, reason='Known failure: 2d input.')) + check(estimator) From eb3aca4ee539f30ecc35d0bd38c4d241f46e3693 Mon Sep 17 00:00:00 2001 From: Wojciech Reise Date: Thu, 12 Mar 2020 09:03:37 +0100 Subject: [PATCH 31/45] Change the kwargs docuemntation in validation --- gtda/utils/validation.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 3e6716b55..bb511ec47 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -171,9 +171,9 @@ def check_list_of_images(X, **kwargs): X : list of ndarray Each entry of `X` corresponds to an image. - kwargs : dict, optional, default: ``{}`` - Parameters accepted by - :func:`~gtda.utils.validation.check_list_of_arrays`. + kwargs : + Keyword arguments. For a list of accepted values, see the documentation + of :func:`~gtda.utils.validation.check_list_of_arrays`. Returns ------- @@ -210,9 +210,9 @@ def check_list_of_point_clouds(X, **kwargs): X : list of ndarray, such that ``X[i].ndim==2`` (n_points, n_dimensions), or an array `X.dim==3` - kwargs : dict, optional, default: ``{}`` - Parameters accepted by - :func:`~`gtda.utils.validation.check_list_of_arrays``. + kwargs : + Keyword arguments. For a list of accepted values, see the documentation + of :func:`~`gtda.utils.validation.check_list_of_arrays``. Returns ------- @@ -270,8 +270,9 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): The checks are applied in the order they are provided, only until the first failure. - kwargs : dict, optional, default: ``{}`` - Parameters accepted by :func:`~sklearn.utils.validation.check_array`. + kwargs : + Keyword arguments. For a list of accepted values, see the documentation + of :func:`~sklearn.utils.validation.check_array`. Returns ------- From b5f004234275fee536ce6c2770b8d07f752843fb Mon Sep 17 00:00:00 2001 From: Umberto Date: Sat, 21 Mar 2020 22:35:23 +0100 Subject: [PATCH 32/45] Rename functions check_list_of_point_clouds -> check_point_clouds check_list_of_images -> check_images --- gtda/homology/cubical.py | 4 ++-- gtda/homology/simplicial.py | 30 +++++++++++++++--------------- gtda/images/filtrations.py | 22 +++++++++++----------- gtda/images/preprocessing.py | 18 +++++++++--------- gtda/utils/validation.py | 11 +++++------ 5 files changed, 42 insertions(+), 43 deletions(-) diff --git a/gtda/homology/cubical.py b/gtda/homology/cubical.py index ae137c96b..87ec462da 100644 --- a/gtda/homology/cubical.py +++ b/gtda/homology/cubical.py @@ -14,7 +14,7 @@ from ..externals.python import CubicalComplex, PeriodicCubicalComplex from ..plotting import plot_diagram from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_list_of_images +from ..utils.validation import validate_params, check_images class CubicalPersistence(BaseEstimator, TransformerMixin, PlotterMixin): @@ -146,7 +146,7 @@ def fit(self, X, y=None): self : object """ - check_list_of_images(X, allow_nd=True) + check_images(X, allow_nd=True) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index ef0f67006..d8b07d8a4 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -18,7 +18,7 @@ from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_list_of_point_clouds +from ..utils.validation import validate_params, check_point_clouds @adapt_fit_transform_docs @@ -169,8 +169,8 @@ def fit(self, X, y=None): self : object """ - check_list_of_point_clouds(X, ensure_2d=False, allow_nd=True, - force_all_finite=False) + check_point_clouds(X, ensure_2d=False, allow_nd=True, + force_all_finite=False) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -220,9 +220,9 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=False, - allow_nd=True, - force_all_finite=False) + X = check_point_clouds(X, ensure_2d=False, + allow_nd=True, + force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) for i in range(len(X))) @@ -417,9 +417,9 @@ def fit(self, X, y=None): self : object """ - check_list_of_point_clouds(X, ensure_2d=False, - allow_nd=True, - force_all_finite=False) + check_point_clouds(X, ensure_2d=False, + allow_nd=True, + force_all_finite=False) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -469,9 +469,9 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=False, - allow_nd=True, - force_all_finite=False) + X = check_point_clouds(X, ensure_2d=False, + allow_nd=True, + force_all_finite=False) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( @@ -635,7 +635,7 @@ def fit(self, X, y=None): self : object """ - check_list_of_point_clouds(X, ensure_2d=False, allow_nd=True) + check_point_clouds(X, ensure_2d=False, allow_nd=True) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -680,8 +680,8 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_list_of_point_clouds(X, ensure_2d=False, - allow_nd=True,) + X = check_point_clouds(X, ensure_2d=False, + allow_nd=True, ) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index cecf7e992..88b712c09 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -18,7 +18,7 @@ from ..plotting import plot_heatmap from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_list_of_images +from ..utils.validation import validate_params, check_images @adapt_fit_transform_docs @@ -116,7 +116,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -171,7 +171,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_height)(X[s]) @@ -337,7 +337,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -398,7 +398,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_radial)(X[s]) @@ -529,7 +529,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): @@ -572,7 +572,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_dilation)(X[s]) @@ -703,7 +703,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -745,7 +745,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_erosion)(X[s]) @@ -886,7 +886,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -928,7 +928,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_signed_distance)(X[s]) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index ae03a1023..73e04d19d 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -15,7 +15,7 @@ from ..plotting import plot_point_cloud, plot_heatmap from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_list_of_images +from ..utils.validation import validate_params, check_images @adapt_fit_transform_docs @@ -91,7 +91,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -127,7 +127,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._binarize)(Xt[s]) @@ -211,7 +211,7 @@ def fit(self, X, y=None): self : object """ - X = check_list_of_images(X, allow_nd=True) + X = check_images(X, allow_nd=True) self._is_fitted = True return self @@ -239,7 +239,7 @@ def transform(self, X, y=None): """ check_is_fitted(self, ['_is_fitted']) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.logical_not)(Xt[s]) @@ -345,7 +345,7 @@ def fit(self, X, y=None): self : object """ - check_list_of_images(X, allow_nd=True) + check_images(X, allow_nd=True) n_dimensions = X.ndim - 1 if n_dimensions < 2 or n_dimensions > 3: warn(f"Input of `fit` contains arrays of dimension " @@ -391,7 +391,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.pad)(Xt[s], pad_width=self._pad_width, @@ -497,7 +497,7 @@ def fit(self, X, y=None): self : object """ - _ = check_list_of_images(X, allow_nd=True) + _ = check_images(X, allow_nd=True) self.is_fitted_ = True n_dimensions = X.ndim - 1 if n_dimensions < 2 or n_dimensions > 3: @@ -538,7 +538,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_list_of_images(X, allow_nd=True, copy=True) + Xt = check_images(X, allow_nd=True, copy=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._embed)(Xt[s]) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 042abce23..fcadb35c0 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -183,7 +183,7 @@ def validate_params(parameters, references, exclude=None): return _validate_params(parameters_, references) -def check_list_of_images(X, **kwargs): +def check_images(X, **kwargs): """Check a list of arrays representing images, by iterating through the input one by one. To pass a test when `kwargs` is empty, all images ``x``, ``y`` in `X` must satisfy: @@ -196,7 +196,7 @@ def check_list_of_images(X, **kwargs): X : list of ndarray Each entry of `X` corresponds to an image. - kwargs : + kwargs Keyword arguments. For a list of accepted values, see the documentation of :func:`~gtda.utils.validation.check_list_of_arrays`. @@ -223,7 +223,7 @@ def check_list_of_images(X, **kwargs): return check_list_of_arrays(X, **kwargs_default) -def check_list_of_point_clouds(X, **kwargs): +def check_point_clouds(X, **kwargs): """Check a list of arrays representing point clouds, by integrating through the input one by one. To pass a test when `kwargs` is empty, all point clouds ``x``, ``y`` in X must satisfy: @@ -235,7 +235,7 @@ def check_list_of_point_clouds(X, **kwargs): X : list of ndarray, such that ``X[i].ndim==2`` (n_points, n_dimensions), or an array `X.dim==3` - kwargs : + kwargs Keyword arguments. For a list of accepted values, see the documentation of :func:`~`gtda.utils.validation.check_list_of_arrays``. @@ -306,7 +306,6 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): :func:`~sklearn.utils.validation.check_array` """ - # if restrictions on the dimensions of the input are imposed for (test_name, get_property, err_message) in check_shapes: if check_dimensions(X, get_property): @@ -318,7 +317,7 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): messages = [] for i, x in enumerate(X): try: - # TODO: verifythe behavior depending on copy. + # TODO: verify the behavior depending on copy. X[i] = check_array(x.reshape(1, *x.shape), **kwargs).reshape(*x.shape) messages = [''] From 72c811d827fd7f07d8b215d2ac33d66b97a9fc3a Mon Sep 17 00:00:00 2001 From: Umberto Date: Sun, 22 Mar 2020 12:56:32 +0100 Subject: [PATCH 33/45] Fixes in ImageToPointCloud --- gtda/images/preprocessing.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 73e04d19d..c3f405c2b 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -472,7 +472,6 @@ def __init__(self, n_jobs=None): self.n_jobs = n_jobs def _embed(self, X): - # Xpts = [np.stack(np.nonzero(x), axis=1) for x in X] Xpts = np.stack([self.mesh_ for _ in range(X.shape[0])]) * 1. Xpts[np.logical_not(X.reshape((X.shape[0], -1))), :] += np.inf return Xpts @@ -497,8 +496,8 @@ def fit(self, X, y=None): self : object """ - _ = check_images(X, allow_nd=True) - self.is_fitted_ = True + check_images(X, allow_nd=True) + n_dimensions = X.ndim - 1 if n_dimensions < 2 or n_dimensions > 3: warn(f"Input of `fit` contains arrays of dimension " From a41cb5d62d2889c48cba957aa33d26b8dfcf4837 Mon Sep 17 00:00:00 2001 From: Umberto Date: Sun, 22 Mar 2020 18:38:56 +0100 Subject: [PATCH 34/45] Add mapper and utils to global __init__ --- gtda/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gtda/__init__.py b/gtda/__init__.py index 3080a7797..c738ddf0c 100644 --- a/gtda/__init__.py +++ b/gtda/__init__.py @@ -1,4 +1,4 @@ from ._version import __version__ -__all__ = ['homology', 'time_series', 'graphs', 'diagrams', 'images', - 'point_clouds', 'externals', 'plotting', '__version__'] +__all__ = ['mapper', 'homology', 'time_series', 'graphs', 'diagrams', 'images', + 'utils', 'point_clouds', 'externals', 'plotting', '__version__'] From 4cd196e7ef66f9e446cc222f2c87dd5edd9c903a Mon Sep 17 00:00:00 2001 From: Umberto Date: Sun, 22 Mar 2020 19:25:10 +0100 Subject: [PATCH 35/45] Make check_shapes a list of pairs, fix validation docstrings, revise use of check_array throughout --- gtda/diagrams/representations.py | 9 +++----- gtda/graphs/geodesic_distance.py | 2 +- gtda/graphs/kneighbors.py | 17 ++++++++-------- gtda/graphs/transition.py | 5 ++--- gtda/images/_utils.py | 3 +-- gtda/mapper/cover.py | 32 ++++++++++++++++------------- gtda/mapper/filter.py | 34 +++++++++++++++++-------------- gtda/plotting/images.py | 2 -- gtda/point_clouds/rescaling.py | 29 +++++++++++++------------- gtda/time_series/embedding.py | 7 ++++--- gtda/time_series/features.py | 8 ++++---- gtda/time_series/multivariate.py | 2 +- gtda/time_series/preprocessing.py | 4 +++- gtda/utils/validation.py | 32 +++++++++++++++-------------- 14 files changed, 96 insertions(+), 90 deletions(-) diff --git a/gtda/diagrams/representations.py b/gtda/diagrams/representations.py index f59329895..bda0386fa 100644 --- a/gtda/diagrams/representations.py +++ b/gtda/diagrams/representations.py @@ -109,10 +109,10 @@ def fit(self, X, y=None): self.homology_dimensions_ = sorted(list(set(X[0, :, 2]))) self._n_dimensions = len(self.homology_dimensions_) - self._samplings, _ = _bin(X, metric='betti', n_bins=self.n_bins) self.samplings_ = {dim: s.flatten() for dim, s in self._samplings.items()} + return self def transform(self, X, y=None): @@ -319,7 +319,6 @@ def fit(self, X, y=None): self.homology_dimensions_ = sorted(list(set(X[0, :, 2]))) self._n_dimensions = len(self.homology_dimensions_) - self._samplings, _ = _bin(X, metric="landscape", n_bins=self.n_bins) self.samplings_ = {dim: s.flatten() for dim, s in self._samplings.items()} @@ -553,11 +552,11 @@ def fit(self, X, y=None): self.homology_dimensions_ = sorted(list(set(X[0, :, 2]))) self._n_dimensions = len(self.homology_dimensions_) - self._samplings, self._step_size = _bin( X, metric='heat', n_bins=self.n_bins) self.samplings_ = {dim: s.flatten() for dim, s in self._samplings.items()} + return self def transform(self, X, y=None): @@ -747,7 +746,6 @@ def fit(self, X, y=None): """ X = check_diagrams(X) - validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -758,13 +756,13 @@ def fit(self, X, y=None): self.homology_dimensions_ = sorted(list(set(X[0, :, 2]))) self._n_dimensions = len(self.homology_dimensions_) - self._samplings, self._step_size = _bin( X, metric='persistence_image', n_bins=self.n_bins) self.samplings_ = {dim: s.transpose() for dim, s in self._samplings.items()} self.weights_ = _calculate_weights(X, self.effective_weight_function_, self._samplings) + return self def transform(self, X, y=None): @@ -945,7 +943,6 @@ def fit(self, X, y=None): self.homology_dimensions_ = sorted(list(set(X[0, :, 2]))) self._n_dimensions = len(self.homology_dimensions_) - self._samplings, _ = _bin(X, metric='silhouette', n_bins=self.n_bins) self.samplings_ = {dim: s.flatten() for dim, s in self._samplings.items()} diff --git a/gtda/graphs/geodesic_distance.py b/gtda/graphs/geodesic_distance.py index 933445b65..00bc1cb92 100644 --- a/gtda/graphs/geodesic_distance.py +++ b/gtda/graphs/geodesic_distance.py @@ -121,7 +121,7 @@ def transform(self, X, y=None): X = check_graph(X) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._geodesic_distance)(X[i]) for i in range(X.shape[0])) + delayed(self._geodesic_distance)(x) for x in X) Xt = np.array(Xt) return Xt diff --git a/gtda/graphs/kneighbors.py b/gtda/graphs/kneighbors.py index 7711611af..4487ec743 100644 --- a/gtda/graphs/kneighbors.py +++ b/gtda/graphs/kneighbors.py @@ -33,7 +33,7 @@ class KNeighborsGraph(BaseEstimator, TransformerMixin): n_neighbors : int, optional, default: ``4`` Number of neighbors to use. - metric : string or callable, default ``'minkowski'`` + metric : string or callable, optional, default: ``'euclidean'`` Metric to use for distance computation. Any metric from scikit-learn or :mod:`scipy.spatial.distance` can be used. If metric is a callable function, it is called on each @@ -56,13 +56,14 @@ class KNeighborsGraph(BaseEstimator, TransformerMixin): See the documentation for :mod:`scipy.spatial.distance` for details on these metrics. - metric_params : dict, optional, default: ``{}`` + metric_params : dict or None, optional, default: ``None`` Additional keyword arguments for the metric function. p : int, optional, default: ``2`` Parameter for the Minkowski (i.e. :math:`\\ell^p`) metric from - :func:`sklearn.metrics.pairwise.pairwise_distances`. `p` = 1 is the - Manhattan distance and `p` = 2 is the Euclidean distance. + :func:`sklearn.metrics.pairwise.pairwise_distances`. Only relevant + when `metric` is ``'minkowski'``. `p` = 1 is the Manhattan distance, + and `p` = 2 reduces to the Euclidean distance. metric_params : dict, optional, default: ``{}`` Additional keyword arguments for the metric function. @@ -90,9 +91,8 @@ class KNeighborsGraph(BaseEstimator, TransformerMixin): """ - # TODO: Consider using an immutable default value for metric_params. def __init__(self, n_neighbors=4, metric='euclidean', - p=2, metric_params={}, n_jobs=None): + p=2, metric_params=None, n_jobs=None): self.n_neighbors = n_neighbors self.metric = metric self.p = p @@ -158,10 +158,9 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_nearest_neighbors') - X = check_array(X, allow_nd=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._make_adjacency_matrix)(X[i]) for i in - range(X.shape[0])) + delayed(self._make_adjacency_matrix)(x) for x in Xt) Xt = np.array(Xt) return Xt diff --git a/gtda/graphs/transition.py b/gtda/graphs/transition.py index 526b4a78b..3022fa80e 100644 --- a/gtda/graphs/transition.py +++ b/gtda/graphs/transition.py @@ -16,8 +16,7 @@ def identity(x): - """The identity function. - """ + """The identity function.""" return x @@ -195,7 +194,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, copy=True, allow_nd=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._make_adjacency_matrix)(Xt[i]) for i in diff --git a/gtda/images/_utils.py b/gtda/images/_utils.py index 825f1cc90..f2df1bda5 100644 --- a/gtda/images/_utils.py +++ b/gtda/images/_utils.py @@ -8,8 +8,7 @@ def _dilate(X, min_iteration, max_iteration, min_value, max_value): X = X * 1. for iteration in range(min_iteration, min(max_iteration, max_value) + 1): - Xtemp = np.asarray([ndi.binary_dilation(X[i]) - for i in range(X.shape[0])]) + Xtemp = np.asarray([ndi.binary_dilation(x) for x in X]) Xnew = (X + Xtemp) == 1 if np.any(Xnew): X[Xnew] = iteration + min_value diff --git a/gtda/mapper/cover.py b/gtda/mapper/cover.py index 58d543c7e..9fd85b977 100644 --- a/gtda/mapper/cover.py +++ b/gtda/mapper/cover.py @@ -182,11 +182,12 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_array(X, ensure_2d=False) - if X.ndim == 2: - _check_has_one_column(X) + Xt = check_array(X, ensure_2d=False) + + if Xt.ndim == 2: + _check_has_one_column(Xt) else: - X = X[:, None] + Xt = Xt[:, None] if self.kind == 'balanced': # Test whether self.left_limits_ and self.right_limits_ have @@ -194,7 +195,7 @@ def transform(self, X, y=None): # fit_transform but not after fit. self._check_limit_attrs() - Xt = self._transform(X) + Xt = self._transform(Xt) Xt = _remove_empty_and_duplicate_intervals(Xt) return Xt @@ -242,14 +243,15 @@ def fit_transform(self, X, y=None, **fit_params): or duplicated cover sets are removed. """ + Xt = check_array(X, ensure_2d=False) validate_params(self.get_params(), self._hyperparameters) - X = check_array(X, ensure_2d=False) - if X.ndim == 2: - _check_has_one_column(X) + + if Xt.ndim == 2: + _check_has_one_column(Xt) else: - X = X[:, None] + Xt = Xt[:, None] - Xt = self._fit_transform(X) + Xt = self._fit_transform(Xt) Xt = _remove_empty_and_duplicate_intervals(Xt) return Xt @@ -441,7 +443,7 @@ def fit(self, X, y=None): X = check_array(X, ensure_2d=False) validate_params(self.get_params(), self._hyperparameters) - # reshape filter function values derived from FunctionTransformer + # Reshape filter function values derived from FunctionTransformer if X.ndim == 1: X = X[:, None] @@ -479,8 +481,9 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_coverers') - # Reshape filter function values derived from FunctionTransformer Xt = check_array(X, ensure_2d=False) + + # Reshape filter function values derived from FunctionTransformer if Xt.ndim == 1: Xt = Xt[:, None] @@ -519,9 +522,10 @@ def fit_transform(self, X, y=None, **fit_params): n_features` as empty or duplicated cover sets are removed. """ - validate_params(self.get_params(), self._hyperparameters) - # reshape filter function values derived from FunctionTransformer Xt = check_array(X, ensure_2d=False) + validate_params(self.get_params(), self._hyperparameters) + + # Reshape filter function values derived from FunctionTransformer if Xt.ndim == 1: Xt = Xt[:, None] diff --git a/gtda/mapper/filter.py b/gtda/mapper/filter.py index 83053e25e..0b6cc2cf1 100644 --- a/gtda/mapper/filter.py +++ b/gtda/mapper/filter.py @@ -69,10 +69,12 @@ def fit(self, X, y=None): # may be computed. May be useful for supervised tasks with Mapper? # Evaluate performance impact of doing this. check_array(X) + if self.metric_params is None: self.effective_metric_params_ = dict() else: self.effective_metric_params_ = self.metric_params.copy() + return self def transform(self, X, y=None): @@ -95,13 +97,13 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_array(X) - if self.metric == 'precomputed': - Xt = X - else: + Xt = check_array(X) + + if self.metric != 'precomputed': Xt = squareform( - pdist(X, metric=self.metric, **self.effective_metric_params_)) - Xt = np.linalg.norm(Xt, axis=1, ord=self.exponent).reshape(-1, 1) + pdist(Xt, metric=self.metric, **self.effective_metric_params_)) + + Xt = np.linalg.norm(Xt, axis=1, ord=self.exponent, keepdims=True) return Xt @@ -139,6 +141,7 @@ def fit(self, X, y=None): """ check_array(X) + self._is_fitted = True return self @@ -165,15 +168,15 @@ def transform(self, X, y=None): # consists of "probabilities" that sum to one. Consider normalisation # in terms of bin counts? check_is_fitted(self, '_is_fitted') - X = check_array(X) + Xt = check_array(X) - if np.any(X < 0): + if np.any(Xt < 0): warnings.warn("Negative values detected in X! Taking absolute " "value to calculate probabilities.") - X = np.abs(X) + Xt = np.abs(Xt) - probs = X / X.sum(axis=1, keepdims=True) - Xt = (entr(probs).sum(axis=1) / np.log(2)).reshape(-1, 1) + Xt = Xt / Xt.sum(axis=1, keepdims=True) + Xt = entr(Xt).sum(axis=1, keepdims=True) / np.log(2) return Xt @@ -215,6 +218,7 @@ def fit(self, X, y=None): """ check_array(X) + self._is_fitted = True return self @@ -240,9 +244,9 @@ def transform(self, X, y=None): # Simple duck typing to handle case of pandas dataframe input if hasattr(X, 'columns'): # NB in this case we do not check the health of other columns - Xt = check_array(X[self.columns], ensure_2d=False) + Xt = check_array(X[self.columns], ensure_2d=False, copy=True) else: - X = check_array(X) - Xt = X[:, self.columns] - Xt = Xt.reshape(len(X), -1) + Xt = check_array(X, copy=True) + Xt = Xt[:, self.columns] + Xt = Xt.reshape(len(Xt), -1) return Xt diff --git a/gtda/plotting/images.py b/gtda/plotting/images.py index 304418281..be8d7e25c 100644 --- a/gtda/plotting/images.py +++ b/gtda/plotting/images.py @@ -2,7 +2,6 @@ # License: GNU AGPLv3 import plotly.graph_objects as gobj -from sklearn.utils.validation import check_array def plot_heatmap(data, x=None, y=None, colorscale='greys', origin='upper', @@ -33,7 +32,6 @@ def plot_heatmap(data, x=None, y=None, colorscale='greys', origin='upper', Title of the resulting figure. """ - check_array(data, ensure_2d=True) autorange = True if origin == 'lower' else 'reversed' layout = dict( xaxis=dict(scaleanchor='y', constrain='domain'), diff --git a/gtda/point_clouds/rescaling.py b/gtda/point_clouds/rescaling.py index 460fb08ca..7b6284f70 100644 --- a/gtda/point_clouds/rescaling.py +++ b/gtda/point_clouds/rescaling.py @@ -184,11 +184,10 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._consistent_rescaling)(Xt[i]) - for i in range(Xt.shape[0])) + delayed(self._consistent_rescaling)(x) for x in Xt) Xt = np.array(Xt) return Xt @@ -291,13 +290,6 @@ def __init__(self, metric='euclidean', metric_params=None, factor=0., self.factor = factor self.n_jobs = n_jobs - def _consecutive_rescaling(self, X): - Xm = pairwise_distances(X, metric=self.metric, n_jobs=1, - **self.effective_metric_params_) - - Xm[range(Xm.shape[0]-1), range(1, Xm.shape[0])] *= self.factor - return Xm - def fit(self, X, y=None): """Calculate :attr:`effective_metric_params_`. Then, return the estimator. @@ -361,12 +353,21 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_array(X, allow_nd=True, copy=True) + is_precomputed = self.metric == 'precomputed' + X = check_array(X, allow_nd=True, copy=is_precomputed) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._consecutive_rescaling)(Xt[i]) - for i in range(Xt.shape[0])) - Xt = np.array(Xt) + delayed(pairwise_distances)( + x, metric=self.metric, n_jobs=1, + **self.effective_metric_params_) + for x in X) + + if is_precomputed: + # Parallel loop above serves only as additional input validation + Xt = X + else: + Xt = np.array(Xt) + Xt[:, range(Xt.shape[1] - 1), range(1, Xt.shape[1])] *= self.factor return Xt @staticmethod diff --git a/gtda/time_series/embedding.py b/gtda/time_series/embedding.py index 6c37a7870..9f4ec34d3 100644 --- a/gtda/time_series/embedding.py +++ b/gtda/time_series/embedding.py @@ -137,11 +137,11 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') - X = check_array(X, ensure_2d=False, allow_nd=True) + Xt = check_array(X, ensure_2d=False, allow_nd=True) - window_slices = self._slice_windows(X) + window_slices = self._slice_windows(Xt) - Xt = np.stack([X[begin:end] for begin, end in window_slices]) + Xt = np.stack([Xt[begin:end] for begin, end in window_slices]) return Xt def resample(self, y, X=None): @@ -470,6 +470,7 @@ def transform(self, X, y=None): """ check_is_fitted(self) Xt = check_array(X, ensure_2d=False) + if Xt.ndim == 1: Xt = Xt[:, None] Xt = self._embed(Xt, self.time_delay_, self.dimension_, self.stride) diff --git a/gtda/time_series/features.py b/gtda/time_series/features.py index 28dec9e8b..4274f0346 100644 --- a/gtda/time_series/features.py +++ b/gtda/time_series/features.py @@ -49,7 +49,7 @@ def _entropy(self, X): def _permutation_entropy(self, X): Xo = np.argsort(X, axis=2) - Xo = np.stack([self._entropy(Xo[i]) for i in range(Xo.shape[0])]) + Xo = np.stack([self._entropy(x) for x in Xo]) return Xo.reshape(-1, 1) def fit(self, X, y=None): @@ -97,10 +97,10 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') - X = check_array(X, allow_nd=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( - self._permutation_entropy)(X[s]) - for s in gen_even_slices(len(X), effective_n_jobs(self.n_jobs))) + self._permutation_entropy)(Xt[s]) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt diff --git a/gtda/time_series/multivariate.py b/gtda/time_series/multivariate.py index 02239a44f..932933cc8 100644 --- a/gtda/time_series/multivariate.py +++ b/gtda/time_series/multivariate.py @@ -100,7 +100,7 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') - check_array(X, allow_nd=True) + X = check_array(X, allow_nd=True) Xt = np.empty((X.shape[0], X.shape[2], X.shape[2])) for i, sample in enumerate(X): diff --git a/gtda/time_series/preprocessing.py b/gtda/time_series/preprocessing.py index af1f7b839..c5479a166 100644 --- a/gtda/time_series/preprocessing.py +++ b/gtda/time_series/preprocessing.py @@ -88,7 +88,8 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') - Xt = check_array(X, ensure_2d=False, allow_nd=True) + Xt = check_array(X, ensure_2d=False, allow_nd=True, copy=True) + if Xt.ndim == 1: Xt = Xt[: None] Xt = Xt[::self.period] @@ -209,6 +210,7 @@ def transform(self, X, y=None): """ check_is_fitted(self, '_is_fitted') Xt = check_array(X, ensure_2d=False, allow_nd=True) + if Xt.ndim == 1: Xt = Xt[:, None] diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index fcadb35c0..b55453eb0 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -237,7 +237,7 @@ def check_point_clouds(X, **kwargs): kwargs Keyword arguments. For a list of accepted values, see the documentation - of :func:`~`gtda.utils.validation.check_list_of_arrays``. + of :func:`~gtda.utils.validation.check_list_of_arrays`. Returns ------- @@ -248,11 +248,12 @@ def check_point_clouds(X, **kwargs): if hasattr(X, 'shape'): return check_array(X, **kwargs) else: - kwargs_default = {'ensure_2d': False, 'force_all_finite': False, - 'check_shapes': [('embedding_dimension', - lambda x: x.shape[1:], - 'Not all point clouds have the ' - 'same embedding dimension.')]} + kwargs_default = { + 'ensure_2d': True, + 'force_all_finite': False, + 'check_shapes': [ + (lambda x: x.shape[1:], "Not all point clouds have the same " + "embedding dimension.")]} kwargs_default.update(kwargs) return check_list_of_arrays(X, **kwargs_default) @@ -260,16 +261,20 @@ def check_point_clouds(X, **kwargs): def check_dimensions(X, get_property): """Check the dimensions of X are consistent, where the check is defined by get_property 'sample-wise'. + Parameters ---------- - X: list of ndarray, + X : list of ndarray, Usually represents point clouds or images- see - :func:`~`gtda.utils.validation.check_list_of_arrays``. + :func:`~gtda.utils.validation.check_list_of_arrays`. - get_property: function: ndarray -> _, + get_property : function: ndarray -> _, Defines a property to be conserved, across all arrays (samples) in X. + Returns + ------- + """ from functools import reduce from operator import and_ @@ -307,10 +312,8 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): """ # if restrictions on the dimensions of the input are imposed - for (test_name, get_property, err_message) in check_shapes: - if check_dimensions(X, get_property): - continue - else: + for get_property, err_message in check_shapes: + if not check_dimensions(X, get_property): raise ValueError(err_message) is_check_failed = False @@ -327,5 +330,4 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): if is_check_failed: raise ValueError("The following errors were raised" + "by the inputs: \n" + "\n".join(messages)) - else: - return X + return X From fca12726b0dab5476656fedb698fdacbfdb8eed2 Mon Sep 17 00:00:00 2001 From: Umberto Date: Sun, 22 Mar 2020 19:59:29 +0100 Subject: [PATCH 36/45] Add check_point_clouds to utils __init__ and docs --- doc/modules/validation.rst | 1 + gtda/utils/__init__.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/modules/validation.rst b/doc/modules/validation.rst index 0dc37db47..1e20375fb 100644 --- a/doc/modules/validation.rst +++ b/doc/modules/validation.rst @@ -12,4 +12,5 @@ :template: function.rst utils.check_diagrams + utils.check_point_clouds utils.validate_params diff --git a/gtda/utils/__init__.py b/gtda/utils/__init__.py index a6e9b2077..919720e5a 100644 --- a/gtda/utils/__init__.py +++ b/gtda/utils/__init__.py @@ -1,12 +1,11 @@ """The module :mod:`gtda.utils` implements hyperparameter and input validation functions.""" -from .validation import check_diagrams, check_graph -from .validation import validate_params +from .validation import check_diagrams, check_point_clouds, validate_params __all__ = [ 'check_diagrams', - 'check_graph', + 'check_point_clouds', 'validate_params' ] From 79b529a2da303e8ab1d572ee8d58a4cc182928f1 Mon Sep 17 00:00:00 2001 From: Umberto Date: Sun, 22 Mar 2020 20:04:04 +0100 Subject: [PATCH 37/45] Remove check_images and additions to test_common These are valuable and thank you @wreise, but in the interest of pushing at least check_point_clouds, I am opting for a more ad-hoc implementation of the latter and for the temporary removal of check_images. As we also did not have much time to discuss the additions to test_common, I will also revert that file to the state on master for now. It can be reintroduced in a later PR together with check_images --- gtda/homology/cubical.py | 24 ++++---- gtda/images/filtrations.py | 42 ++++++-------- gtda/images/preprocessing.py | 43 +++++++------- gtda/tests/test_common.py | 17 ------ gtda/utils/validation.py | 107 ++++++++++++----------------------- 5 files changed, 83 insertions(+), 150 deletions(-) diff --git a/gtda/homology/cubical.py b/gtda/homology/cubical.py index 87ec462da..4437947cc 100644 --- a/gtda/homology/cubical.py +++ b/gtda/homology/cubical.py @@ -5,16 +5,15 @@ import numpy as np from joblib import Parallel, delayed - from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_array, check_is_fitted from ._utils import _pad_diagram from ..base import PlotterMixin from ..externals.python import CubicalComplex, PeriodicCubicalComplex from ..plotting import plot_diagram from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_images +from ..utils.validation import validate_params class CubicalPersistence(BaseEstimator, TransformerMixin, PlotterMixin): @@ -146,7 +145,7 @@ def fit(self, X, y=None): self : object """ - check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -202,24 +201,23 @@ def transform(self, X, y=None): `X`. """ check_is_fitted(self) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( - X.shape[0])) + delayed(self._gudhi_diagram)(x) for x in Xt) max_n_points = { - dim: max(1, np.max([Xt[i][dim].shape[0] for i in range(len( - Xt))])) for dim in self.homology_dimensions} - min_values = { - dim: min([np.min(Xt[i][dim][:, 0]) if Xt[i][dim].size else - np.inf for i in range(len(Xt))]) for dim in + dim: max(1, np.max([x[dim].shape[0] for x in Xt])) for dim in self.homology_dimensions} + min_values = { + dim: min([np.min(x[dim][:, 0]) if x[dim].size else np.inf for x + in Xt]) for dim in self.homology_dimensions} min_values = { dim: min_values[dim] if min_values[dim] != np.inf else 0 for dim in self.homology_dimensions} Xt = Parallel(n_jobs=self.n_jobs)(delayed(_pad_diagram)( - Xt[i], self._homology_dimensions, max_n_points, min_values) - for i in range(len(Xt))) + x, self._homology_dimensions, max_n_points, min_values) + for x in Xt) Xt = np.stack(Xt) Xt = np.nan_to_num(Xt, posinf=self.infinity_values_) return Xt diff --git a/gtda/images/filtrations.py b/gtda/images/filtrations.py index 88b712c09..624da3f4e 100644 --- a/gtda/images/filtrations.py +++ b/gtda/images/filtrations.py @@ -10,15 +10,14 @@ from sklearn.base import BaseEstimator, TransformerMixin from sklearn.metrics import pairwise_distances from sklearn.utils import gen_even_slices - -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_array, check_is_fitted from ._utils import _dilate, _erode from ..base import PlotterMixin from ..plotting import plot_heatmap from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_images +from ..utils.validation import validate_params @adapt_fit_transform_docs @@ -87,7 +86,7 @@ def __init__(self, direction=None, n_jobs=None): def _calculate_height(self, X): Xh = np.full(X.shape, self.max_value_) - for i in range(Xh.shape[0]): + for i in range(len(Xh)): Xh[i][np.where(X[i])] = np.dot(self.mesh_[np.where(X[i])], self.direction_).reshape((-1,)) @@ -116,7 +115,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -171,12 +170,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_height)(X[s]) - for s in gen_even_slices(Xt.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt @@ -337,7 +335,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -398,12 +396,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_radial)(X[s]) - for s in gen_even_slices(Xt.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt @@ -529,7 +526,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): @@ -572,12 +569,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_dilation)(X[s]) - for s in gen_even_slices(Xt.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt @@ -703,7 +699,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -745,12 +741,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_erosion)(X[s]) - for s in gen_even_slices(Xt.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt @@ -886,7 +881,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if (n_dimensions < 2) or (n_dimensions > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -928,12 +923,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( delayed(self._calculate_signed_distance)(X[s]) - for s in gen_even_slices(Xt.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index c3f405c2b..9fb7fe84a 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -9,13 +9,13 @@ from joblib import Parallel, delayed, effective_n_jobs from sklearn.base import BaseEstimator, TransformerMixin from sklearn.utils import gen_even_slices -from sklearn.utils.validation import check_is_fitted +from sklearn.utils.validation import check_array, check_is_fitted from ..base import PlotterMixin from ..plotting import plot_point_cloud, plot_heatmap from ..utils._docs import adapt_fit_transform_docs from ..utils.intervals import Interval -from ..utils.validation import validate_params, check_images +from ..utils.validation import validate_params @adapt_fit_transform_docs @@ -91,7 +91,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) self.n_dimensions_ = X.ndim - 1 if (self.n_dimensions_ < 2) or (self.n_dimensions_ > 3): warn(f"Input of `fit` contains arrays of dimension " @@ -127,12 +127,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._binarize)(Xt[s]) - for s in gen_even_slices(X.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) if self.n_dimensions_ == 2: @@ -211,7 +210,7 @@ def fit(self, X, y=None): self : object """ - X = check_images(X, allow_nd=True) + check_array(X, allow_nd=True) self._is_fitted = True return self @@ -239,12 +238,11 @@ def transform(self, X, y=None): """ check_is_fitted(self, ['_is_fitted']) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.logical_not)(Xt[s]) - for s in gen_even_slices(X.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt @@ -345,7 +343,7 @@ def fit(self, X, y=None): self : object """ - check_images(X, allow_nd=True) + X = check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if n_dimensions < 2 or n_dimensions > 3: warn(f"Input of `fit` contains arrays of dimension " @@ -391,13 +389,12 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( np.pad)(Xt[s], pad_width=self._pad_width, constant_values=self.activated) - for s in gen_even_slices(X.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = np.concatenate(Xt) return Xt @@ -435,9 +432,10 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): The coordinates of each point is calculated as follows. For each activated pixel, assign coordinates that are the pixel position on this image. - This transformer is meant to transform a collection of images to a point - cloud so that collection of point clouds-based persistent homology module - can be applied. + + This transformer is meant to transform a collection of images to a + collection of point clouds so that persistent homology calculations can be + performed. Parameters ---------- @@ -477,9 +475,7 @@ def _embed(self, X): return Xpts def fit(self, X, y=None): - """Do nothing and return the estimator unchanged. - This method is here to implement the usual scikit-learn API and hence - work in pipelines. + """Compute :attr:`mesh_`, and return the estimator. Parameters ---------- @@ -496,7 +492,7 @@ def fit(self, X, y=None): self : object """ - check_images(X, allow_nd=True) + check_array(X, allow_nd=True) n_dimensions = X.ndim - 1 if n_dimensions < 2 or n_dimensions > 3: @@ -537,12 +533,11 @@ def transform(self, X, y=None): """ check_is_fitted(self) - Xt = check_images(X, allow_nd=True, copy=True) + Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._embed)(Xt[s]) - for s in gen_even_slices(X.shape[0], - effective_n_jobs(self.n_jobs))) + for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) Xt = reduce(sum, Xt, []) return Xt diff --git a/gtda/tests/test_common.py b/gtda/tests/test_common.py index 1e9a515f7..057e55b88 100644 --- a/gtda/tests/test_common.py +++ b/gtda/tests/test_common.py @@ -27,19 +27,6 @@ ], } -LISTFAIL_TESTS = ['check_estimators_dtypes', 'check_fit_score_takes_y', - 'check_estimators_fit_returns_self', - 'check_estimators_fit_returns_self(readonly_memmap=True)', - 'check_complex_data', 'check_dtype_object', - 'check_estimators_empty_data_messages', - 'check_pipeline_consistency', 'check_estimators_nan_inf', - 'check_estimators_overwrite_params', - 'check_estimator_sparse_data', 'check_estimators_pickle', - 'check_fit2d_predict1d', 'check_methods_subset_invariance', - 'check_fit2d_1sample', 'check_fit2d_1feature', - 'check_dict_unchanged', 'check_dont_overwrite_parameters', - 'check_fit_idempotent'] - # adapted from sklearn.utils.estimator_check v0.22 def _get_callable_name(obj): @@ -99,8 +86,4 @@ def test_sklearn_api(check, estimator, request): request.applymarker(pytest.mark.xfail( run=True, reason='known failure')) - if check_name in LISTFAIL_TESTS: - request.applymarker(pytest.mark.xfail( - run=True, reason='Known failure: 2d input.')) - check(estimator) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index b55453eb0..84a5f3f69 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -183,46 +183,6 @@ def validate_params(parameters, references, exclude=None): return _validate_params(parameters_, references) -def check_images(X, **kwargs): - """Check a list of arrays representing images, by iterating through - the input one by one. To pass a test when `kwargs` is empty, - all images ``x``, ``y`` in `X` must satisfy: - - ``x.ndim >= 2``, - - ``all(np.isfinite(x))``, - - ``x.shape == y.shape``. - - Parameters - ---------- - X : list of ndarray - Each entry of `X` corresponds to an image. - - kwargs - Keyword arguments. For a list of accepted values, see the documentation - of :func:`~gtda.utils.validation.check_list_of_arrays`. - - Returns - ------- - X : list of ndarray - as modified by :func:`~sklearn.utils.validation.check_array` - - """ - if hasattr(X, 'shape'): - if X.ndim < 3: - raise ValueError(f"An image in the collection X should be at " - f"least of dimension 2, while it has dimension " - f"{X.ndim - 1}.") - return check_array(X, **kwargs) - else: - kwargs_default = {'force_all_finite': True, - 'ensure_2d': False, 'allow_nd': True, - 'check_shapes': [('embedding_dimension', - lambda x: x.shape, - 'The images should have exactly' - 'the same shape')]} - kwargs_default.update(kwargs) - return check_list_of_arrays(X, **kwargs_default) - - def check_point_clouds(X, **kwargs): """Check a list of arrays representing point clouds, by integrating through the input one by one. To pass a test when `kwargs` is empty, @@ -258,49 +218,27 @@ def check_point_clouds(X, **kwargs): return check_list_of_arrays(X, **kwargs_default) -def check_dimensions(X, get_property): - """Check the dimensions of X are consistent, where the check is defined - by get_property 'sample-wise'. - - Parameters - ---------- - X : list of ndarray, - Usually represents point clouds or images- see - :func:`~gtda.utils.validation.check_list_of_arrays`. - - get_property : function: ndarray -> _, - Defines a property to be conserved, across all arrays (samples) - in X. - - Returns - ------- - - """ - from functools import reduce - from operator import and_ - reference = get_property(X[0]) - return reduce(and_, map(lambda x: get_property(x) == reference, X[1:]), - True) - - def check_list_of_arrays(X, check_shapes=list(), **kwargs): - """Input validation on a list of lists, arrays, sparse matrices, or similar. + """Input validation on a list of arrays, sparse matrices, or similar. - The constraints are to be specified in `kwargs`. On top of - parameters from :func:`~sklearn.utils.validation.check_array`, - the optional parameters are listed below. + Each entry in the list is validated using + :func:`~sklearn.utils.validation.check_array`. Optionally, some shared + shapes of + all + optional + parameters are + are listed below. Parameters ---------- X : list Input list of objects to check / convert. - check_shapes: list of tuples t, where t = (str, function to pass to - check_dimensions, error message if test fails). + check_shapes : list of tuple The checks are applied in the order they are provided, only until the first failure. - kwargs : + kwargs Keyword arguments. For a list of accepted values, see the documentation of :func:`~sklearn.utils.validation.check_array`. @@ -311,6 +249,31 @@ def check_list_of_arrays(X, check_shapes=list(), **kwargs): :func:`~sklearn.utils.validation.check_array` """ + + def check_dimensions(X, get_property): + """Check the dimensions of X are consistent, where the check is defined + by get_property 'sample-wise'. + + Parameters + ---------- + X : list of ndarray, + Usually represents point clouds or images- see + :func:`~gtda.utils.validation.check_list_of_arrays`. + + get_property : function: ndarray -> _, + Defines a property to be conserved, across all arrays (samples) + in X. + + Returns + ------- + + """ + from functools import reduce + from operator import and_ + reference = get_property(X[0]) + return reduce(and_, map(lambda x: get_property(x) == reference, X[1:]), + True) + # if restrictions on the dimensions of the input are imposed for get_property, err_message in check_shapes: if not check_dimensions(X, get_property): From 811fe87edd61cc776699adae2f675154f36d1c6d Mon Sep 17 00:00:00 2001 From: Umberto Date: Sun, 22 Mar 2020 20:17:43 +0100 Subject: [PATCH 38/45] Linting --- gtda/homology/cubical.py | 6 +++--- gtda/images/preprocessing.py | 11 ++++++----- gtda/time_series/embedding.py | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/gtda/homology/cubical.py b/gtda/homology/cubical.py index 4437947cc..373fb3b1a 100644 --- a/gtda/homology/cubical.py +++ b/gtda/homology/cubical.py @@ -133,7 +133,7 @@ def fit(self, X, y=None): Parameters ---------- - X : ndarray, shape (n_samples, n_pixels_1, ..., n_pixels_d) + X : ndarray of shape (n_samples, n_pixels_1, ..., n_pixels_d) Input data. Array of d-dimensional images. y : None @@ -184,7 +184,7 @@ def transform(self, X, y=None): Parameters ---------- - X : ndarray, shape (n_samples, n_pixels_1, ..., n_pixels_d) + X : ndarray of shape (n_samples, n_pixels_1, ..., n_pixels_d) Input data. Array of d-dimensional images. y : None @@ -193,7 +193,7 @@ def transform(self, X, y=None): Returns ------- - Xt : ndarray, shape (n_samples, n_features, 3) + Xt : ndarray of shape (n_samples, n_features, 3) Array of persistence diagrams computed from the feature arrays or distance matrices in `X`. ``n_features`` equals :math:`\\sum_q n_q`, where :math:`n_q` is the maximum number of diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 9fb7fe84a..0d300f4bb 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -446,7 +446,7 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): Attributes ---------- - mesh_ : ndarray, shape (n_pixels_x * n_pixels_y [* n_pixels_z], \ + mesh_ : ndarray of shape (n_pixels_x * n_pixels_y [* n_pixels_z], \ n_dimensions) Mesh image for which each pixel value is its coordinates in a ``n_dimensions``-dimensional space, where ``n_dimensions`` is the @@ -479,7 +479,7 @@ def fit(self, X, y=None): Parameters ---------- - X : ndarray, shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) + X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. @@ -516,7 +516,7 @@ def transform(self, X, y=None): Parameters ---------- - X : ndarray, shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) + X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z]) Input data. Each entry along axis 0 is interpreted as a 2D or 3D binary image. @@ -526,7 +526,8 @@ def transform(self, X, y=None): Returns ------- - Xt : ndarray, shape (n_samples, n_pixels_x * n_pixels_y [* n_pixels_z], + Xt : ndarray of shape (n_samples, n_pixels_x * n_pixels_y [* \ + n_pixels_z], n_dimensions) Transformed collection of images. Each entry along axis 0 is a point cloud in ``n_dimensions``-dimensional space. @@ -549,7 +550,7 @@ def plot(Xt, sample=0): Parameters ---------- - Xt : ndarray, shape (n_samples, n_points, n_dimensions) + Xt : ndarray of shape (n_samples, n_points, n_dimensions) Collection of point clouds in ``n_dimension``-dimensional space, such as returned by :meth:`transform`. diff --git a/gtda/time_series/embedding.py b/gtda/time_series/embedding.py index 9f4ec34d3..a909c7e71 100644 --- a/gtda/time_series/embedding.py +++ b/gtda/time_series/embedding.py @@ -184,7 +184,7 @@ def plot(Xt, sample=0): Parameters ---------- - Xt : ndarray, shape (n_samples, n_points, n_dimensions) + Xt : ndarray of shape (n_samples, n_points, n_dimensions) Collection of sliding windows, each containing ``n_points`` points in ``n_dimensions``-dimensional space, such as returned by :meth:`transform`. From e6c311b02676703c36dc498cfed4f3976b06fa82 Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 01:38:48 +0100 Subject: [PATCH 39/45] Refactor ImageToPointCloud to return lists of point clouds --- gtda/images/preprocessing.py | 35 +++++++++++------------------------ 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/gtda/images/preprocessing.py b/gtda/images/preprocessing.py index 0d300f4bb..dc996f8d5 100644 --- a/gtda/images/preprocessing.py +++ b/gtda/images/preprocessing.py @@ -2,6 +2,7 @@ # License: GNU AGPLv3 from functools import reduce +from operator import iconcat from numbers import Real from warnings import warn @@ -431,7 +432,8 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): """Represent active pixels in 2D/3D binary images as points in 2D/3D space. The coordinates of each point is calculated as follows. For each activated - pixel, assign coordinates that are the pixel position on this image. + pixel, assign coordinates that are the pixel index on this image, after + flipping the rows and then swapping between rows and columns. This transformer is meant to transform a collection of images to a collection of point clouds so that persistent homology calculations can be @@ -444,14 +446,6 @@ class ImageToPointCloud(BaseEstimator, TransformerMixin, PlotterMixin): in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. - Attributes - ---------- - mesh_ : ndarray of shape (n_pixels_x * n_pixels_y [* n_pixels_z], \ - n_dimensions) - Mesh image for which each pixel value is its coordinates in a - ``n_dimensions``-dimensional space, where ``n_dimensions`` is the - dimension of the images of the input collection. Set in meth:`fit`. - See also -------- gtda.homology.VietorisRipsPersistence, gtda.homology.SparseRipsPersistence, @@ -470,12 +464,12 @@ def __init__(self, n_jobs=None): self.n_jobs = n_jobs def _embed(self, X): - Xpts = np.stack([self.mesh_ for _ in range(X.shape[0])]) * 1. - Xpts[np.logical_not(X.reshape((X.shape[0], -1))), :] += np.inf - return Xpts + return [np.argwhere(x) for x in X] def fit(self, X, y=None): - """Compute :attr:`mesh_`, and return the estimator. + """Do nothing and return the estimator unchanged. + This method is here to implement the usual scikit-learn API and hence + work in pipelines. Parameters ---------- @@ -499,14 +493,7 @@ def fit(self, X, y=None): warn(f"Input of `fit` contains arrays of dimension " f"{self.n_dimensions_}.") - axis_order = [2, 1, 3] - mesh_range_list = [np.arange(0, X.shape[i]) - for i in axis_order[:n_dimensions]] - - self.mesh_ = np.flip(np.stack(np.meshgrid(*mesh_range_list), - axis=n_dimensions), - axis=0).reshape((-1, n_dimensions)) - + self._is_fitted = True return self def transform(self, X, y=None): @@ -533,14 +520,14 @@ def transform(self, X, y=None): point cloud in ``n_dimensions``-dimensional space. """ - check_is_fitted(self) + check_is_fitted(self, '_is_fitted') Xt = check_array(X, allow_nd=True) + Xt = np.swapaxes(np.flip(Xt, axis=1), 1, 2) Xt = Parallel(n_jobs=self.n_jobs)(delayed( self._embed)(Xt[s]) for s in gen_even_slices(len(Xt), effective_n_jobs(self.n_jobs))) - - Xt = reduce(sum, Xt, []) + Xt = reduce(iconcat, Xt, []) return Xt @staticmethod From 5fb744f93988d3f1870e9e181db14959a211aae9 Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 02:51:33 +0100 Subject: [PATCH 40/45] Refactor check_point_clouds making it more ad-hoc --- gtda/homology/simplicial.py | 32 ++++----- gtda/utils/validation.py | 136 ++++++++++++------------------------ 2 files changed, 56 insertions(+), 112 deletions(-) diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index d8b07d8a4..52d9857b8 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -169,10 +169,10 @@ def fit(self, X, y=None): self : object """ - check_point_clouds(X, ensure_2d=False, allow_nd=True, - force_all_finite=False) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) + self._is_precomputed = self.metric == 'precomputed' + check_point_clouds(X, distance_matrix=self._is_precomputed) if self.infinity_values is None: self.infinity_values_ = self.max_edge_length @@ -220,12 +220,10 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_point_clouds(X, ensure_2d=False, - allow_nd=True, - force_all_finite=False) + X = check_point_clouds(X, distance_matrix=self._is_precomputed) - Xt = Parallel(n_jobs=self.n_jobs)(delayed(self._ripser_diagram)(X[i]) - for i in range(len(X))) + Xt = Parallel(n_jobs=self.n_jobs)( + delayed(self._ripser_diagram)(x) for x in X) Xt = _postprocess_diagrams(Xt, self._homology_dimensions, self.infinity_values_, self.n_jobs) @@ -417,11 +415,10 @@ def fit(self, X, y=None): self : object """ - check_point_clouds(X, ensure_2d=False, - allow_nd=True, - force_all_finite=False) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) + self._is_precomputed = self.metric == 'precomputed' + check_point_clouds(X, distance_matrix=self._is_precomputed) if self.infinity_values is None: self.infinity_values_ = self.max_edge_length @@ -469,13 +466,10 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_point_clouds(X, ensure_2d=False, - allow_nd=True, - force_all_finite=False) + X = check_point_clouds(X, distance_matrix=self._is_precomputed) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( - X.shape[0])) + delayed(self._gudhi_diagram)(x) for x in X) Xt = _postprocess_diagrams(Xt, self._homology_dimensions, self.infinity_values_, self.n_jobs) @@ -635,7 +629,7 @@ def fit(self, X, y=None): self : object """ - check_point_clouds(X, ensure_2d=False, allow_nd=True) + check_point_clouds(X) validate_params( self.get_params(), self._hyperparameters, exclude=['n_jobs']) @@ -680,12 +674,10 @@ def transform(self, X, y=None): """ check_is_fitted(self) - X = check_point_clouds(X, ensure_2d=False, - allow_nd=True, ) + X = check_point_clouds(X) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._gudhi_diagram)(X[i, :, :]) for i in range( - X.shape[0])) + delayed(self._gudhi_diagram)(x) for x in X) Xt = _postprocess_diagrams(Xt, self._homology_dimensions, self.infinity_values_, self.n_jobs) diff --git a/gtda/utils/validation.py b/gtda/utils/validation.py index 84a5f3f69..a96ba460a 100644 --- a/gtda/utils/validation.py +++ b/gtda/utils/validation.py @@ -1,8 +1,11 @@ """Utilities for input validation.""" # License: GNU AGPLv3 -import numpy as np +from functools import reduce +from operator import and_ +from warnings import warn +import numpy as np from sklearn.utils.validation import check_array @@ -183,114 +186,63 @@ def validate_params(parameters, references, exclude=None): return _validate_params(parameters_, references) -def check_point_clouds(X, **kwargs): - """Check a list of arrays representing point clouds, by integrating - through the input one by one. To pass a test when `kwargs` is empty, - all point clouds ``x``, ``y`` in X must satisfy: - - ``x.ndim == 2``, - - ``len(y.shape[1:]) == len(y.shape[1:])``. - - Parameters - ---------- - X : list of ndarray, such that ``X[i].ndim==2`` (n_points, n_dimensions), - or an array `X.dim==3` +def check_point_clouds(X, distance_matrix=False, **kwargs): + """Input validation on an array or list representing a collection of point + clouds or distance matrices. - kwargs - Keyword arguments. For a list of accepted values, see the documentation - of :func:`~gtda.utils.validation.check_list_of_arrays`. + The input is checked to be either a single 3D array using a single call + to :func:`~sklearn.utils.validation.check_array`, or a list of 2D arrays by + calling :func:`~sklearn.utils.validation.check_array` on each entry. In + the latter case, warnings are issued when not all point clouds are in + the same Euclidean space. - Returns - ------- - X : list of input arrays - as modified by :func:`~sklearn.utils.validation.check_array` - - """ - if hasattr(X, 'shape'): - return check_array(X, **kwargs) - else: - kwargs_default = { - 'ensure_2d': True, - 'force_all_finite': False, - 'check_shapes': [ - (lambda x: x.shape[1:], "Not all point clouds have the same " - "embedding dimension.")]} - kwargs_default.update(kwargs) - return check_list_of_arrays(X, **kwargs_default) - - -def check_list_of_arrays(X, check_shapes=list(), **kwargs): - """Input validation on a list of arrays, sparse matrices, or similar. - - Each entry in the list is validated using - :func:`~sklearn.utils.validation.check_array`. Optionally, some shared - shapes of - all - optional - parameters are - are listed below. + Conversions and copies may be triggered as per + :func:`~gtda.utils.validation.check_list_of_arrays`. Parameters ---------- - X : list - Input list of objects to check / convert. + X : object + Input object to check / convert. - check_shapes : list of tuple - The checks are applied in the order they are provided, only until - the first failure. + distance_matrix : bool, optional, default: ``False`` + Whether the input represents a collection of distance matrices or of + concrete point clouds in Euclidean space. In the first case, entries + are allowed to be infinite unless otherwise specified in `kwargs`. kwargs - Keyword arguments. For a list of accepted values, see the documentation - of :func:`~sklearn.utils.validation.check_array`. + Keyword arguments accepted by + :func:`~gtda.utils.validation.check_list_of_arrays`. Returns ------- - X : list - Output list of objects, each checked / converted by - :func:`~sklearn.utils.validation.check_array` + Xnew : ndarray or list + The converted and validated object. """ + kwargs_ = {'force_all_finite': not distance_matrix} + kwargs_.update(kwargs) + if hasattr(X, 'shape'): + if X.ndim != 3: + raise ValueError("ndarray input must be 3D.") + return check_array(X, allow_nd=True, **kwargs_) + else: + if not distance_matrix: + reference = X[0].shape[1] # Embedding dimension of first sample + if not reduce( + and_, (x.shape[1] == reference for x in X[1:]), True): + warn("Not all point clouds have the same embedding dimension.") - def check_dimensions(X, get_property): - """Check the dimensions of X are consistent, where the check is defined - by get_property 'sample-wise'. - - Parameters - ---------- - X : list of ndarray, - Usually represents point clouds or images- see - :func:`~gtda.utils.validation.check_list_of_arrays`. - - get_property : function: ndarray -> _, - Defines a property to be conserved, across all arrays (samples) - in X. - - Returns - ------- - - """ - from functools import reduce - from operator import and_ - reference = get_property(X[0]) - return reduce(and_, map(lambda x: get_property(x) == reference, X[1:]), - True) - - # if restrictions on the dimensions of the input are imposed - for get_property, err_message in check_shapes: - if not check_dimensions(X, get_property): - raise ValueError(err_message) - - is_check_failed = False + has_check_failed = False messages = [] + Xnew = [] for i, x in enumerate(X): try: - # TODO: verify the behavior depending on copy. - X[i] = check_array(x.reshape(1, *x.shape), - **kwargs).reshape(*x.shape) + Xnew.append(check_array(x, **kwargs_)) messages = [''] except ValueError as e: - is_check_failed = True + has_check_failed = True messages.append(str(e)) - if is_check_failed: - raise ValueError("The following errors were raised" + - "by the inputs: \n" + "\n".join(messages)) - return X + if has_check_failed: + raise ValueError("The following errors were raised by the inputs: \n" + "\n".join(messages)) + return Xnew From d4abe519731ff74d5d3f2740e68b351d96558369 Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 03:01:25 +0100 Subject: [PATCH 41/45] Fix some incorrect links to GH pages --- CONTRIBUTING.rst | 2 +- PULL_REQUEST_TEMPLATE.md | 4 ++-- README.rst | 4 ++-- examples/plotting_api.ipynb | 8 ++++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 0493a881b..5c0654a6d 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -1,7 +1,7 @@ Contributing guidelines ======================= -This document only redirects to more `detailed instructions `_, +This document only redirects to more `detailed instructions `_, which consist of: - a pull request checklist, - a Contributor License Agreement, diff --git a/PULL_REQUEST_TEMPLATE.md b/PULL_REQUEST_TEMPLATE.md index 73db335ec..21041aade 100644 --- a/PULL_REQUEST_TEMPLATE.md +++ b/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ **Reference issues/PRs** @@ -37,7 +37,7 @@ Describe your changes in detail. Go over all the following points, and put an `x` in all the boxes that apply. If you're unsure about any of these, don't hesitate to ask. We're here to help! --> -- [ ] I have read the [guidelines for contributing](https://giotto-ai.github.io/gtda-docs/dev/contributing/#guidelines). +- [ ] I have read the [guidelines for contributing](https://giotto-ai.github.io/gtda-docs/latest/contributing/#guidelines). - [ ] My code follows the code style of this project. I used `flake8` to check my Python changes. - [ ] My change requires a change to the documentation. - [ ] I have updated the documentation accordingly. diff --git a/README.rst b/README.rst index 84ce12254..d6938665f 100644 --- a/README.rst +++ b/README.rst @@ -96,7 +96,7 @@ the same environment. Developer installation ---------------------- -Please consult the `relevant page `_ +Please consult the `relevant page `_ for detailed instructions on how to build ``giotto-tda`` from sources across different platforms. .. _contributing-section: @@ -107,7 +107,7 @@ Contributing We welcome new contributors of all experience levels. The Giotto community goals are to be helpful, welcoming, and effective. To learn more about making a contribution to ``giotto-tda``, please consult the `relevant page -`_. +`_. Testing ------- diff --git a/examples/plotting_api.ipynb b/examples/plotting_api.ipynb index 107e73ef2..22dc13186 100644 --- a/examples/plotting_api.ipynb +++ b/examples/plotting_api.ipynb @@ -8,7 +8,7 @@ "\n", "`giotto-tda` includes a set of plotting functions and class methods, powered by `plotly`. The library's plotting API is designed to facilitate the exploration of intermediate results in pipelines by harnessing the highly visual nature of topological signatures.\n", "\n", - "This notebook is a quick tutorial on how to use `giotto-tda`'s plotting functionalities and unified plotting API. The plotting functions in `gtda.mapper` are not covered here as they are somewhat tailored to the Mapper algorithm, see the [dedicated tutorial](https://giotto-ai.github.io/gtda-docs/dev/notebooks/mapper_quickstart.html).\n", + "This notebook is a quick tutorial on how to use `giotto-tda`'s plotting functionalities and unified plotting API. The plotting functions in `gtda.mapper` are not covered here as they are somewhat tailored to the Mapper algorithm, see the [dedicated tutorial](https://giotto-ai.github.io/gtda-docs/latest/notebooks/mapper_quickstart.html).\n", "\n", "If you are looking at a static version of this notebook and would like to run its contents, head over to [github](https://github.com/giotto-ai/giotto-tda/blob/master/examples/plotting_api.ipynb).\n", "\n", @@ -43,9 +43,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Plotting functions\n", + "### 1.1 Plotting functions\n", "\n", - "Several `plot` methods in `giotto-tda` actually fall back to specialised functions which can be found in the [plotting subpackage](https://giotto-ai.github.io/gtda-docs/dev/modules/pipeline.html) and which can be used directly instead. However, unless the additional degree of control is necessary, `plot` methods should be preferred as they often exploit class parameters and/or attributes (e.g. those computed during `fit`) to automatically fill some parameters in the corresponding functions." + "Several `plot` methods in `giotto-tda` actually fall back to specialised functions which can be found in the [plotting subpackage](https://giotto-ai.github.io/gtda-docs/latest/modules/plotting.html) and which can be used directly instead. However, unless the additional degree of control is necessary, `plot` methods should be preferred as they often exploit class parameters and/or attributes (e.g. those computed during `fit`) to automatically fill some parameters in the corresponding functions." ] }, { @@ -54,7 +54,7 @@ "source": [ "### 1.2 Example: Plotting persistence diagrams with `VietorisRipsPersistence`\n", "\n", - "Let's take the example of `VietorisRipsPersistence` – a transformer also covered in [another notebook](https://giotto-ai.github.io/gtda-docs/dev/notebooks/vietoris_rips_quickstart.html). Let's create the input collection `X` for this transformer as a collection of randomly generated point clouds, each containing 100 points positioned along two circles." + "Let's take the example of `VietorisRipsPersistence` – a transformer also covered in [another notebook](https://giotto-ai.github.io/gtda-docs/latest/notebooks/vietoris_rips_quickstart.html). Let's create the input collection `X` for this transformer as a collection of randomly generated point clouds, each containing 100 points positioned along two circles." ] }, { From 34ae7ac859a15f0277a62ec26c101866f07e10e4 Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 03:04:35 +0100 Subject: [PATCH 42/45] Delete test expecting ValueError on point clouds in different embedding dims --- gtda/homology/tests/test_simplicial.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/gtda/homology/tests/test_simplicial.py b/gtda/homology/tests/test_simplicial.py index 38292053f..4af1633e1 100644 --- a/gtda/homology/tests/test_simplicial.py +++ b/gtda/homology/tests/test_simplicial.py @@ -100,17 +100,7 @@ def test_cp_transform(): def test_vrp_list_of_arrays(): - """Verify that a list of point clouds""" pc_2 = np.array([[0, 1], [1, 2]]) pc_list = [pc[0].copy(), pc_2] vrp = VietorisRipsPersistence() vrp.fit(pc_list) - - -def test_vrp_list_invalid_arrays(): - pc_2 = np.array([[0, 1, 2]]) - pc_invalid = [pc[0].copy(), pc_2] - - vrp = VietorisRipsPersistence() - with pytest.raises(ValueError): - vrp.fit(pc_invalid) From 3f2f53b479113549b41a5d31a7a1f22886610f6d Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 03:09:42 +0100 Subject: [PATCH 43/45] Small code simplification --- gtda/graphs/transition.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/gtda/graphs/transition.py b/gtda/graphs/transition.py index 3022fa80e..4e7be4ab1 100644 --- a/gtda/graphs/transition.py +++ b/gtda/graphs/transition.py @@ -197,7 +197,6 @@ def transform(self, X, y=None): Xt = check_array(X, allow_nd=True) Xt = Parallel(n_jobs=self.n_jobs)( - delayed(self._make_adjacency_matrix)(Xt[i]) for i in - range(Xt.shape[0])) + delayed(self._make_adjacency_matrix)(x) for x in Xt) Xt = np.asarray(Xt) return Xt From d577250495c1211762048848e73d4713c1523156 Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 09:20:23 +0100 Subject: [PATCH 44/45] Update docs in homology/simplicial.py following refactoring of check_point_clouds --- gtda/homology/simplicial.py | 86 ++++++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 35 deletions(-) diff --git a/gtda/homology/simplicial.py b/gtda/homology/simplicial.py index 52d9857b8..1b6c0bc0c 100644 --- a/gtda/homology/simplicial.py +++ b/gtda/homology/simplicial.py @@ -39,7 +39,7 @@ class VietorisRipsPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Parameters ---------- metric : string or callable, optional, default: ``'euclidean'`` - If set to `'precomputed'`, input data is to be interpreted as a + If set to ``'precomputed'``, input data is to be interpreted as a collection of distance matrices. Otherwise, input data is to be interpreted as a collection of point clouds (i.e. feature arrays), and `metric` determines a rule with which to calculate distances @@ -152,13 +152,15 @@ def fit(self, X, y=None): Parameters ---------- - X : ndarray of shape (n_samples, n_points, n_points) or \ - (n_samples, n_points, n_dimensions) - Input data. If ``metric == 'precomputed'``, the input should be an - ndarray whose each entry along axis 0 is a distance matrix of shape - ``(n_points, n_points)``. Otherwise, each such entry will be - interpreted as an ndarray of ``n_points`` row vectors in - ``n_dimensions``-dimensional space. + X : ndarray or list + Input data representing a collection of point clouds or of distance + matrices. Can be either a 3D ndarray whose zeroth dimension has + size ``n_samples``, or a list containing ``n_samples`` 2D ndarrays. + If ``metric == 'precomputed'``, elements of `X` must be square + arrays representing distance matrices; otherwise, their rows are + interpreted as vectors in Euclidean space and, when `X` is a list, + warnings are issued when the number of columns (dimension of the + Euclidean space) differs among samples. y : None There is no need for a target in a transformer, yet the pipeline @@ -197,13 +199,15 @@ def transform(self, X, y=None): Parameters ---------- - X : ndarray of shape (n_samples, n_points, n_points) or \ - (n_samples, n_points, n_dimensions) - Input data. If ``metric == 'precomputed'``, the input should be an - ndarray whose each entry along axis 0 is a distance matrix of shape - ``(n_points, n_points)``. Otherwise, each such entry will be - interpreted as an ndarray of ``n_points`` row vectors in - ``n_dimensions``-dimensional space. + X : ndarray or list + Input data representing a collection of point clouds or of distance + matrices. Can be either a 3D ndarray whose zeroth dimension has + size ``n_samples``, or a list containing ``n_samples`` 2D ndarrays. + If ``metric == 'precomputed'``, elements of `X` must be square + arrays representing distance matrices; otherwise, their rows are + interpreted as vectors in Euclidean space and, when `X` is a list, + warnings are issued when the number of columns (dimension of the + Euclidean space) differs among samples. y : None There is no need for a target in a transformer, yet the pipeline @@ -270,7 +274,7 @@ class SparseRipsPersistence(BaseEstimator, TransformerMixin, PlotterMixin): Parameters ---------- metric : string or callable, optional, default: ``'euclidean'`` - If set to `'precomputed'`, input data is to be interpreted as a + If set to ``'precomputed'``, input data is to be interpreted as a collection of distance matrices. Otherwise, input data is to be interpreted as a collection of point clouds (i.e. feature arrays), and `metric` determines a rule with which to calculate distances @@ -398,13 +402,15 @@ def fit(self, X, y=None): Parameters ---------- - X : ndarray of shape (n_samples, n_points, n_points) or \ - (n_samples, n_points, n_dimensions) - Input data. If ``metric == 'precomputed'``, the input should be an - ndarray whose each entry along axis 0 is a distance matrix of shape - ``(n_points, n_points)``. Otherwise, each such entry will be - interpreted as an ndarray of ``n_points`` row vectors in - ``n_dimensions``-dimensional space. + X : ndarray or list + Input data representing a collection of point clouds or of distance + matrices. Can be either a 3D ndarray whose zeroth dimension has + size ``n_samples``, or a list containing ``n_samples`` 2D ndarrays. + If ``metric == 'precomputed'``, elements of `X` must be square + arrays representing distance matrices; otherwise, their rows are + interpreted as vectors in Euclidean space and, when `X` is a list, + warnings are issued when the number of columns (dimension of the + Euclidean space) differs among samples. y : None There is no need for a target in a transformer, yet the pipeline @@ -443,13 +449,15 @@ def transform(self, X, y=None): Parameters ---------- - X : ndarray of shape (n_samples, n_points, n_points) or \ - (n_samples, n_points, n_dimensions) - Input data. If ``metric == 'precomputed'``, the input should be an - ndarray whose each entry along axis 0 is a distance matrix of shape - ``(n_points, n_points)``. Otherwise, each such entry will be - interpreted as an ndarray of ``n_points`` row vectors in - ``n_dimensions``-dimensional space. + X : ndarray or list + Input data representing a collection of point clouds or of distance + matrices. Can be either a 3D ndarray whose zeroth dimension has + size ``n_samples``, or a list containing ``n_samples`` 2D ndarrays. + If ``metric == 'precomputed'``, elements of `X` must be square + arrays representing distance matrices; otherwise, their rows are + interpreted as vectors in Euclidean space and, when `X` is a list, + warnings are issued when the number of columns (dimension of the + Euclidean space) differs among samples. y : None There is no need for a target in a transformer, yet the pipeline @@ -616,9 +624,13 @@ def fit(self, X, y=None): Parameters ---------- - X : ndarray of shape (n_samples, n_points, n_dimensions) - Input data. Each entry along axis 0 is a point cloud of - ``n_points`` row vectors in ``n_dimensions``-dimensional space. + X : ndarray or list + Input data representing a collection of point clouds. Can be + either a 3D ndarray whose zeroth dimension has size ``n_samples``, + or a list containing ``n_samples`` 2D ndarrays. The rows of + elements in `X` are interpreted as vectors in Euclidean space and. + and, when `X` is a list, warnings are issued when the number of + columns (dimension of the Euclidean space) differs among samples. y : None There is no need for a target in a transformer, yet the pipeline @@ -657,8 +669,12 @@ def transform(self, X, y=None): Parameters ---------- X : ndarray of shape (n_samples, n_points, n_dimensions) - Input data. Each entry along axis 0 is a point cloud of - ``n_points`` row vectors in ``n_dimensions``-dimensional space. + Input data representing a collection of point clouds. Can be + either a 3D ndarray whose zeroth dimension has size ``n_samples``, + or a list containing ``n_samples`` 2D ndarrays. The rows of + elements in `X` are interpreted as vectors in Euclidean space and. + and, when `X` is a list, warnings are issued when the number of + columns (dimension of the Euclidean space) differs among samples. y : None There is no need for a target in a transformer, yet the pipeline From 2ed3ad164d13c82495942396e5f23b01af9134da Mon Sep 17 00:00:00 2001 From: Umberto Date: Mon, 23 Mar 2020 09:41:35 +0100 Subject: [PATCH 45/45] Revert to trivial slicing in transform_plot method of PlotterMixin Needed to cover the case of list input --- gtda/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gtda/base.py b/gtda/base.py index e6a10f2e2..25a981e63 100644 --- a/gtda/base.py +++ b/gtda/base.py @@ -139,7 +139,7 @@ def transform_plot(self, X, sample=0, **plot_params): Transformed one-sample slice from the input. """ - Xt = self.transform(X[[sample]]) + Xt = self.transform(X[sample:sample+1]) self.plot(Xt, sample=0, **plot_params) return Xt