giotto-ai · ulupo · Aug 14, 2020 · Aug 10, 2020 · Aug 10, 2020 · Aug 10, 2020
diff --git a/gtda/diagrams/_metrics.py b/gtda/diagrams/_metrics.py
diff --git a/gtda/diagrams/_utils.py b/gtda/diagrams/_utils.py
@@ -21,16 +21,21 @@ def _subdiagrams(X, homology_dimensions, remove_dim=False):
 
 
 def _pad(X, max_diagram_sizes):
-    X_padded = {dim: np.pad(
-        X[dim],
-        ((0, 0), (0, max_diagram_sizes[dim] - X[dim].shape[1]),
-         (0, 0)), 'constant') for dim in X.keys()}
+    X_padded = {
+        dim: np.pad(
+            Xdim,
+            ((0, 0), (0, max_diagram_sizes[dim] - Xdim.shape[1]), (0, 0)),
+            "constant"
+            )
+        for dim, Xdim in X.items()
+        }
     return X_padded
 
 
-def _sample_image(image, sampled_diag):
-    # NOTE: Modifies `image` in-place
-    unique, counts = np.unique(sampled_diag, axis=0, return_counts=True)
+def _sample_image(image, diagram_pixel_coords):
+    # WARNING: Modifies `image` in-place
+    unique, counts = \
+        np.unique(diagram_pixel_coords, axis=0, return_counts=True)
     unique = tuple(tuple(row) for row in unique.astype(np.int).T)
     image[unique] = counts
 
@@ -131,18 +136,11 @@ def _bin(X, metric, n_bins=100, **kw_args):
     samplings = {}
     step_sizes = {}
     for dim in homology_dimensions:
-        samplings[dim], step_sizes[dim] = np.linspace(min_vals[dim],
-                                                      max_vals[dim],
-                                                      retstep=True,
-                                                      num=n_bins)
+        samplings[dim], step_sizes[dim] = np.linspace(
+            min_vals[dim], max_vals[dim], retstep=True, num=n_bins
+            )
     if metric in ['landscape', 'betti', 'heat', 'silhouette']:
         for dim in homology_dimensions:
             samplings[dim] = samplings[dim][:, [0], None]
             step_sizes[dim] = step_sizes[dim][0]
     return samplings, step_sizes
-
-
-def _calculate_weights(X, weight_function, samplings, **kw_args):
-    weights = {dim: weight_function(samplings[dim][:, 1])
-               for dim in samplings.keys()}
-    return weights
diff --git a/gtda/diagrams/distance.py b/gtda/diagrams/distance.py
@@ -8,7 +8,7 @@
 from sklearn.utils.validation import check_is_fitted
 
 from ._metrics import _AVAILABLE_METRICS, _parallel_pairwise
-from ._utils import _bin, _calculate_weights
+from ._utils import _bin
 from ..utils._docs import adapt_fit_transform_docs
 from ..utils.intervals import Interval
 from ..utils.validation import check_diagrams, validate_params
@@ -24,9 +24,6 @@ class PairwiseDistance(BaseEstimator, TransformerMixin):
     matrices or a single distance matrix between pairs of diagrams is
     calculated according to the following steps:
 
-    Input collections of persistence diagrams for this transformer must satisfy
-    certain requirements, see e.g. :meth:`fit`.
-
         1. All diagrams are partitioned into subdiagrams corresponding to
            distinct homology dimensions.
         2. Pairwise distances between subdiagrams of equal homology
@@ -37,22 +34,29 @@ class PairwiseDistance(BaseEstimator, TransformerMixin):
            three-dimensional array, or a single distance matrix constructed
            by taking norms of the vectors of distances between diagram pairs.
 
+    **Important notes**:
+
+        - Input collections of persistence diagrams for this transformer must
+          satisfy certain requirements, see e.g. :meth:`fit`.
+        - The shape of outputs of :meth:`transform` depends on the value of the
+          `order` parameter.
+
     Parameters
     ----------
-    metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'landscape'`` | \
-        ``'betti'`` | ``'heat'`` | ``'persistence_image'``, | \
-        ``'silhouette'``, optional, default: ``'landscape'``
+    metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'betti'`` | \
+        ``'landscape'`` | ``'silhouette'`` | ``'heat'`` | \
+        ``'persistence_image'``, optional, default: ``'landscape'``
         Distance or dissimilarity function between subdiagrams:
 
         - ``'bottleneck'`` and ``'wasserstein'`` refer to the identically named
           perfect-matching--based notions of distance.
+        - ``'betti'`` refers to the :math:`L^p` distance between Betti curves.
         - ``'landscape'`` refers to the :math:`L^p` distance between
           persistence landscapes.
-        - ``'betti'`` refers to the :math:`L^p` distance between Betti curves.
-        - ``'heat'`` refers to the :math:`L^p` distance between
-          Gaussian-smoothed diagrams.
         - ``'silhouette'`` refers to the :math:`L^p` distance between
           silhouettes.
+        - ``'heat'`` refers to the :math:`L^p` distance between
+          Gaussian-smoothed diagrams.
         - ``'persistence_image'`` refers to the :math:`L^p` distance between
           Gaussian-smoothed diagrams represented on birth-persistence axes.
 
@@ -61,27 +65,27 @@ class PairwiseDistance(BaseEstimator, TransformerMixin):
         ``None`` is equivalent to passing the defaults described below):
 
         - If ``metric == 'bottleneck'`` the only argument is `delta` (float,
-          default: ``0.01``). When equal to ``0.``, an exact algorithm is
-          used; otherwise, a faster approximate algorithm is used.
+          default: ``0.01``). When equal to ``0.``, an exact algorithm is used;
+          otherwise, a faster approximate algorithm is used.
         - If ``metric == 'wasserstein'`` the available arguments are `p`
           (float, default: ``2.``) and `delta` (float, default: ``0.01``).
-          Unlike the case of ``'bottleneck'``, `delta` cannot be set to
-          ``0.`` and an exact algorithm is not available.
+          Unlike the case of ``'bottleneck'``, `delta` cannot be set to ``0.``
+          and an exact algorithm is not available.
         - If ``metric == 'betti'`` the available arguments are `p` (float,
           default: ``2.``) and `n_bins` (int, default: ``100``).
-        - If ``metric == 'landscape'`` the available arguments are `p`
-          (float, default: ``2.``), `n_bins` (int, default: ``100``) and
-          `n_layers` (int, default: ``1``).
-        - If ``metric == 'heat'`` the available arguments are `p`
-          (float, default: ``2.``), `sigma` (float, default: ``1.``) and
-          `n_bins` (int, default: ``100``).
-        - If ``metric == 'silhouette'`` the available arguments are `p`
-          (float, default: ``2.``), `order` (float, default: ``1.``) and
-          `n_bins` (int, default: ``100``).
+        - If ``metric == 'landscape'`` the available arguments are `p` (float,
+          default: ``2.``), `n_bins` (int, default: ``100``) and `n_layers`
+          (int, default: ``1``).
+        - If ``metric == 'silhouette'`` the available arguments are `p` (float,
+          default: ``2.``), `power` (float, default: ``1.``) and `n_bins` (int,
+          default: ``100``).
+        - If ``metric == 'heat'`` the available arguments are `p` (float,
+          default: ``2.``), `sigma` (float, default: ``0.1``) and `n_bins`
+          (int, default: ``100``).
         - If ``metric == 'persistence_image'`` the available arguments are `p`
-          (float, default: ``2.``), `sigma` (float, default: ``1.``),
-          `n_bins` (int, default: ``100``) and `weight_function`
-          (callable or None, default: ``None``).
+          (float, default: ``2.``), `sigma` (float, default: ``0.1``), `n_bins`
+          (int, default: ``100``) and `weight_function` (callable or None,
+          default: ``None``).
 
     order : float or None, optional, default: ``2.``
         If ``None``, :meth:`transform` returns for each pair of diagrams a
@@ -98,7 +102,7 @@ class PairwiseDistance(BaseEstimator, TransformerMixin):
     ----------
     effective_metric_params_ : dict
         Dictionary containing all information present in `metric_params` as
-        well as on any relevant quantities computed in :meth:`fit`.
+        well as relevant quantities computed in :meth:`fit`.
 
     homology_dimensions_ : list
         Homology dimensions seen in :meth:`fit`, sorted in ascending order.
@@ -178,11 +182,14 @@ def fit(self, X, y=None):
 
         self.effective_metric_params_['samplings'], \
             self.effective_metric_params_['step_sizes'] = \
-            _bin(X, metric=self.metric, **self.effective_metric_params_)
+            _bin(X, self.metric, **self.effective_metric_params_)
 
         if self.metric == 'persistence_image':
-            self.effective_metric_params_['weights'] = \
-                _calculate_weights(X, **self.effective_metric_params_)
+            weight_function = self.effective_metric_params_.get(
+                'weight_function', None
+                )
+            if weight_function is None:
+                self.effective_metric_params_['weight_function'] = np.ones_like
 
         self._X = X
         return self

diff --git a/gtda/diagrams/features.py b/gtda/diagrams/features.py
@@ -11,7 +11,7 @@
 from sklearn.utils.validation import check_is_fitted
 
 from ._metrics import _AVAILABLE_AMPLITUDE_METRICS, _parallel_amplitude
-from ._utils import _subdiagrams, _bin, _calculate_weights
+from ._utils import _subdiagrams, _bin
 from ..utils._docs import adapt_fit_transform_docs
 from ..utils.intervals import Interval
 from ..utils.validation import validate_params, check_diagrams
@@ -30,13 +30,14 @@ class PersistenceEntropy(BaseEstimator, TransformerMixin):
     differences. Optionally, these entropies can be normalized according to a
     simple heuristic, see `normalize`.
 
-    Input collections of persistence diagrams for this transformer must satisfy
-    certain requirements, see e.g. :meth:`fit`.
+    **Important notes**:
 
-    **Important note**: By default, persistence subdiagrams containing only
-    triples with zero lifetime will have corresponding (normalized) entropies
-    computed as ``numpy.nan``. To avoid this, set a value of `nan_fill_value`
-    different from ``None``.
+        - Input collections of persistence diagrams for this transformer must
+          satisfy certain requirements, see e.g. :meth:`fit`.
+        - By default, persistence subdiagrams containing only triples with zero
+          lifetime will have corresponding (normalized) entropies computed as
+          ``numpy.nan``. To avoid this, set a value of `nan_fill_value`
+          different from ``None``.
 
     Parameters
     ----------
@@ -189,26 +190,30 @@ class Amplitude(BaseEstimator, TransformerMixin):
         3. The final result is either :math:`\\mathbf{a}` itself or
            a norm of :math:`\\mathbf{a}`, specified by the parameter `order`.
 
-    Input collections of persistence diagrams for this transformer must satisfy
-    certain requirements, see e.g. :meth:`fit`.
+    **Important notes**:
+
+        - Input collections of persistence diagrams for this transformer must
+          satisfy certain requirements, see e.g. :meth:`fit`.
+        - The shape of outputs of :meth:`transform` depends on the value of the
+          `order` parameter.
 
     Parameters
     ----------
-    metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'landscape'`` | \
-        ``'betti'`` | ``'heat'`` | ``'silhouette'`` | \
+    metric : ``'bottleneck'`` | ``'wasserstein'`` | ``'betti'`` | \
+        ``'landscape'`` | ``'silhouette'`` | ``'heat'`` | \
         ``'persistence_image'``, optional, default: ``'landscape'``
         Distance or dissimilarity function used to define the amplitude of
         a subdiagram as its distance from the (trivial) diagonal diagram:
 
         - ``'bottleneck'`` and ``'wasserstein'`` refer to the identically named
           perfect-matching--based notions of distance.
+        - ``'betti'`` refers to the :math:`L^p` distance between Betti curves.
         - ``'landscape'`` refers to the :math:`L^p` distance between
           persistence landscapes.
-        - ``'betti'`` refers to the :math:`L^p` distance between Betti curves.
-        - ``'heat'`` refers to the :math:`L^p` distance between
-          Gaussian-smoothed diagrams.
         - ``'silhouette'`` refers to the :math:`L^p` distance between
           silhouettes.
+        - ``'heat'`` refers to the :math:`L^p` distance between
+          Gaussian-smoothed diagrams.
         - ``'persistence_image'`` refers to the :math:`L^p` distance between
           Gaussian-smoothed diagrams represented on birth-persistence axes.
 
@@ -219,23 +224,23 @@ class Amplitude(BaseEstimator, TransformerMixin):
         - If ``metric == 'bottleneck'`` there are no available arguments.
         - If ``metric == 'wasserstein'`` the only argument is `p` (float,
           default: ``2.``).
-        - If ``metric == 'landscape'`` the available arguments are `p`
-          (float, default: ``2.``), `n_bins` (int, default: ``100``) and
-          `n_layers` (int, default: ``1``).
         - If ``metric == 'betti'`` the available arguments are `p` (float,
           default: ``2.``) and `n_bins` (int, default: ``100``).
+        - If ``metric == 'landscape'`` the available arguments are `p` (float,
+          default: ``2.``), `n_bins` (int, default: ``100``) and `n_layers`
+          (int, default: ``1``).
+        - If ``metric == 'silhouette'`` the available arguments are `p` (float,
+          default: ``2.``), `power` (float, default: ``1.``) and `n_bins` (int,
+          default: ``100``).
         - If ``metric == 'heat'`` the available arguments are `p` (float,
-          default: ``2.``), `sigma` (float, default: ``1.``) and `n_bins`
+          default: ``2.``), `sigma` (float, default: ``0.1``) and `n_bins`
           (int, default: ``100``).
-        - If ``metric == 'silhouette'`` the available arguments are `p`
-          (float, default: ``2.``), `order` (float, default: ``1.``) and
-          `n_bins` (int, default: ``100``).
         - If ``metric == 'persistence_image'`` the available arguments are `p`
-          (float, default: ``2.``), `sigma` (float, default: ``1.``),
-          `n_bins` (int, default: ``100``) and `weight_function`
-          (callable or None, default: ``None``).
+          (float, default: ``2.``), `sigma` (float, default: ``0.1``), `n_bins`
+          (int, default: ``100``) and `weight_function` (callable or None,
+          default: ``None``).
 
-    order : float or None, optional, default: ``2.``
+    order : float or None, optional, default: ``None``
         If ``None``, :meth:`transform` returns for each diagram a vector of
         amplitudes corresponding to the dimensions in
         :attr:`homology_dimensions_`. Otherwise, the :math:`p`-norm of
@@ -250,7 +255,7 @@ class Amplitude(BaseEstimator, TransformerMixin):
     ----------
     effective_metric_params_ : dict
         Dictionary containing all information present in `metric_params` as
-        well as on any relevant quantities computed in :meth:`fit`.
+        well as relevant quantities computed in :meth:`fit`.
 
     homology_dimensions_ : list
         Homology dimensions seen in :meth:`fit`, sorted in ascending order.
@@ -277,7 +282,7 @@ class Amplitude(BaseEstimator, TransformerMixin):
         'metric_params': {'type': (dict, type(None))}
         }
 
-    def __init__(self, metric='landscape', metric_params=None, order=2.,
+    def __init__(self, metric='landscape', metric_params=None, order=None,
                  n_jobs=None):
         self.metric = metric
         self.metric_params = metric_params
@@ -326,11 +331,14 @@ def fit(self, X, y=None):
 
         self.effective_metric_params_['samplings'], \
             self.effective_metric_params_['step_sizes'] = \
-            _bin(X, metric=self.metric, **self.effective_metric_params_)
+            _bin(X, self.metric, **self.effective_metric_params_)
 
         if self.metric == 'persistence_image':
-            self.effective_metric_params_['weights'] = \
-                _calculate_weights(X, **self.effective_metric_params_)
+            weight_function = self.effective_metric_params_.get(
+                'weight_function', None
+                )
+            if weight_function is None:
+                self.effective_metric_params_['weight_function'] = np.ones_like
 
         return self
 

diff --git a/gtda/diagrams/preprocessing.py b/gtda/diagrams/preprocessing.py
@@ -9,7 +9,7 @@
 from sklearn.utils.validation import check_is_fitted
 
 from ._metrics import _AVAILABLE_AMPLITUDE_METRICS, _parallel_amplitude
-from ._utils import _filter, _bin, _calculate_weights
+from ._utils import _filter, _bin
 from ..base import PlotterMixin
 from ..plotting.persistence_diagrams import plot_diagram
 from ..utils._docs import adapt_fit_transform_docs
@@ -139,8 +139,10 @@ class Scaler(BaseEstimator, TransformerMixin, PlotterMixin):
           two-dimensional array of amplitudes (one per diagram and homology
           dimension) to obtain :attr:`scale_`.
 
-    Input collections of persistence diagrams for this transformer must satisfy
-    certain requirements, see e.g. :meth:`fit`.
+    **Important note**:
+
+        - Input collections of persistence diagrams for this transformer must
+          satisfy certain requirements, see e.g. :meth:`fit`.
 
     Parameters
     ----------
@@ -157,15 +159,15 @@ class Scaler(BaseEstimator, TransformerMixin, PlotterMixin):
         amplitude vectors in :meth:`fit`. Must map 2D arrays to scalars.
 
     n_jobs : int or None, optional, default: ``None``
-        The number of jobs to use for the computation. ``None`` means 1
-        unless in a :obj:`joblib.parallel_backend` context. ``-1`` means
-        using all processors.
+        The number of jobs to use for the computation. ``None`` means 1 unless
+        in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
+        processors.
 
     Attributes
     ----------
     effective_metric_params_ : dict
         Dictionary containing all information present in `metric_params` as
-        well as on any relevant quantities computed in :meth:`fit`.
+        well as relevant quantities computed in :meth:`fit`.
 
     homology_dimensions_ : list
         Homology dimensions seen in :meth:`fit`, sorted in ascending order.
@@ -241,11 +243,14 @@ def fit(self, X, y=None):
 
         self.effective_metric_params_['samplings'], \
             self.effective_metric_params_['step_sizes'] = \
-            _bin(X, metric=self.metric, **self.effective_metric_params_)
+            _bin(X, self.metric, **self.effective_metric_params_)
 
         if self.metric == 'persistence_image':
-            self.effective_metric_params_['weights'] = \
-                _calculate_weights(X, **self.effective_metric_params_)
+            weight_function = self.effective_metric_params_['weight_function']
+            samplings = self.effective_metric_params_['samplings']
+            weights = {dim: weight_function(samplings_dim[:, 1])
+                       for dim, samplings_dim in samplings.items()}
+            self.effective_metric_params_['weights'] = weights
 
         amplitude_array = _parallel_amplitude(X, self.metric,
                                               self.effective_metric_params_,
@@ -356,8 +361,10 @@ class Filtering(BaseEstimator, TransformerMixin, PlotterMixin):
     are equal) may still appear in the output for padding purposes, but carry
     no information.
 
-    Input collections of persistence diagrams for this transformer must satisfy
-    certain requirements, see e.g. :meth:`fit`.
+    **Important note**:
+
+        - Input collections of persistence diagrams for this transformer must
+          satisfy certain requirements, see e.g. :meth:`fit`.
 
     Parameters
     ----------