Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DensityFiltration #473

Merged
merged 34 commits into from
Oct 4, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
dfdad0f
Add below option for Filter
gtauzin Mar 23, 2020
7e7e8ab
Revert "Add below option for Filter"
gtauzin Mar 23, 2020
48fe0ea
Add DensityFiltration
gtauzin Mar 23, 2020
ee46506
Add tests for DensityFiltration
gtauzin Mar 23, 2020
a3bfc72
Merge with upstream
gtauzin Aug 30, 2020
e2057cf
Add DensityFiltration to the doc
gtauzin Aug 30, 2020
0df98e5
Fix docstrings
gtauzin Sep 13, 2020
80f1ffa
Remove effective_metric_params in radial
gtauzin Sep 13, 2020
9c72649
Merge branch 'master' of github.com:giotto-ai/giotto-tda into density
gtauzin Sep 14, 2020
aa499a0
Apply @ulupo's suggestions on list ranges
gtauzin Sep 14, 2020
83210d8
Merge branch 'density' of github.com:gtauzin/giotto-tda into density
gtauzin Sep 15, 2020
9dd4fa5
Remove leftover code
gtauzin Sep 15, 2020
5c9ea23
Merge branch 'master' into density
ulupo Sep 19, 2020
d1faa01
Turn warning into ValueError
gtauzin Sep 20, 2020
5937e64
Replace warnings by ValueErrors
gtauzin Sep 20, 2020
8839b5d
Improve transformer definition
gtauzin Sep 20, 2020
8422a90
Fix tests
gtauzin Sep 20, 2020
372b5b3
Merge branch 'master' into density
ulupo Sep 22, 2020
a67c47e
Fix errors
ulupo Sep 22, 2020
58c966d
Small wording/linting changes
ulupo Sep 22, 2020
f5002b0
Fix doc and mask size
gtauzin Sep 27, 2020
6526bcf
Remove Error if dim is 1
gtauzin Sep 29, 2020
7557715
Fix linting
ulupo Sep 30, 2020
4c2ff30
Remove unnecessary _is_fitted from ImageToPointCloud
ulupo Sep 30, 2020
d94d057
Fix typo
ulupo Sep 30, 2020
87c4ef0
Improve See alsos in images/filtrations.py
ulupo Sep 30, 2020
bbf3a45
Simplify code
ulupo Sep 30, 2020
716f246
Revert "Simplify code"
ulupo Sep 30, 2020
2e72d01
Simplify code
ulupo Sep 30, 2020
359bb61
Fix typo
ulupo Sep 30, 2020
49344d0
Add tests for bad input shapes in image subpackage
ulupo Sep 30, 2020
44126c0
Add inline comments
gtauzin Oct 3, 2020
2b72331
Revert update of pybind11
gtauzin Oct 3, 2020
f5cd329
Fix typo
gtauzin Oct 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/modules/images.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ Filtrations
images.DilationFiltration
images.ErosionFiltration
images.SignedDistanceFiltration
images.DensityFiltration
4 changes: 3 additions & 1 deletion gtda/images/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@

from .preprocessing import Binarizer, Inverter, Padder, ImageToPointCloud
from .filtrations import HeightFiltration, RadialFiltration, \
DilationFiltration, ErosionFiltration, SignedDistanceFiltration
DilationFiltration, ErosionFiltration, SignedDistanceFiltration, \
DensityFiltration

__all__ = [
'Binarizer',
Expand All @@ -17,4 +18,5 @@
'DilationFiltration',
'ErosionFiltration',
'SignedDistanceFiltration',
'DensityFiltration',
]
226 changes: 226 additions & 0 deletions gtda/images/filtrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from numbers import Real
from types import FunctionType
from warnings import warn
import itertools

import numpy as np
from joblib import Parallel, delayed, effective_n_jobs
Expand All @@ -13,6 +14,7 @@
from sklearn.utils.validation import check_array, check_is_fitted

from ._utils import _dilate, _erode
from .preprocessing import Padder
from ..base import PlotterMixin
from ..plotting import plot_heatmap
from ..utils._docs import adapt_fit_transform_docs
Expand Down Expand Up @@ -1036,3 +1038,227 @@ def plot(Xt, sample=0, colorscale='greys', origin='upper',
title=f"Signed-distance filtration of image {sample}",
plotly_params=plotly_params
)


@adapt_fit_transform_docs
class DensityFiltration(BaseEstimator, TransformerMixin, PlotterMixin):
"""Filtrations of 2D/3D binary images based on the number of neighboring
activated pixels.

The density filtration assigns to each pixel of a binary image a greyscale
value equal to the number of activated pixels with a ball centered around
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
it.

Parameters
----------
radius : float, default: ``1``
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
The radius of the ball within which the number of activated pixels is
considered.

metric : string, or callable, optional, default: ``'euclidean'``
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
Determines a rule with which to calculate distances between
pairs of pixels.
If ``metric`` is a string, it must be one of the options allowed by
``scipy.spatial.distance.pdist`` for its metric parameter, or a metric
listed in ``sklearn.pairwise.PAIRWISE_DISTANCE_FUNCTIONS``, including
"euclidean", "manhattan", or "cosine".
If ``metric`` is a callable function, it is called on each pair of
instances and the resulting value recorded. The callable should take
two arrays from the entry in `X` as input, and return a value
indicating the distance between them.

metric_params : dict, optional, default: ``{}``
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
Additional keyword arguments for the metric function.

n_jobs : int or None, optional, default: ``None``
The number of jobs to use for the computation. ``None`` means 1 unless
in a :obj:`joblib.parallel_backend` context. ``-1`` means using all
processors.

Attributes
----------
mask_ : ndarray of shape
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
The mask applied around each pixel to calculate the number of its
activated neighbors. It is obtained from the choice of the ``radius``
and ``metric``. Set in :meth:`fit`.

See also
--------
gtda.homology.CubicalPersistence, Binarizer

References
----------
[1] A. Garin and G. Tauzin, "A topological reading lesson: Classification
of MNIST using TDA"; 19th International IEEE Conference on Machine
Learning and Applications (ICMLA 2020), 2019; arXiv: `1910.08345 \
<https://arxiv.org/abs/1910.08345>`_.

"""

_hyperparameters = {
'radius': {'type': Real, 'in': Interval(0, np.inf, closed='right')},
'metric': {'type': (str, FunctionType)},
'metric_params': {'type': (dict, type(None))},
}

def __init__(self, radius=3, metric='euclidean', metric_params={},
n_jobs=None):
self.radius = radius
self.metric = metric
self.metric_params = metric_params
self.n_jobs = n_jobs

def _calculate_density(self, X):
Xd = np.zeros(X.shape)

for i, j, k in self._iterator:
Xd += np.roll(np.roll(
np.roll(X, k, axis=3), j, axis=2), i, axis=1) \
* self.mask_[self._range + i, self._range + j,
self._range + k]
return Xd

def fit(self, X, y=None):
"""Calculate :attr:`mask_` from a collection of binary images. Then,
return the estimator.

This method is here to implement the usual scikit-learn API and hence
work in pipelines.

Parameters
----------
X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z])
Input data. Each entry along axis 0 is interpreted as a 2D or 3D
binary image.

y : None
There is no need of a target in a transformer, yet the pipeline API
requires this parameter.

Returns
-------
self : object

"""
X = check_array(X, allow_nd=True)
self._n_dimensions = X.ndim - 1
if (self._n_dimensions < 2) or (self._n_dimensions > 3):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing that, unlike some of the other transformers in gtda.images, this one will break on arrays which are not 3D. In your opinion, is it better to extend support to n-dimensional images, or turn this warning into an exception?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's a tradeoff. I don't think I want to work on extending the transformers to images of arbitrary dimensions simply because I am not sure anyone you would use it.

At the same time, I won't deny them the possibility of trying to make it work. Let's fix this in October?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure! Nonetheless, why not throwing exceptions for now, instead of warnings?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you throw exceptions, noone will be able to try passing 4d images, no?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, but this transformer will break there (many 3s hardcoded in the code), unless it is suitably generalized.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, you're right! I changed it!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I misunderstood, but I still see a lot of 3s in the code.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The mask it computes is 3D, that's why images are reshaped in transform. I can then simply use np.roll in _calculate_density.

warn(f"Input of `fit` contains arrays of dimension "
f"{self._n_dimensions}.")
validate_params(
self.get_params(), self._hyperparameters, exclude=['n_jobs'])

self._range = int(np.ceil(self.radius))

iterator_range_list = [range(-self._range, self._range + 1)
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
for _ in range(self._n_dimensions)] \
+ [[0] for _ in range(3 - self._n_dimensions)]
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
self._iterator = tuple(itertools.product(*iterator_range_list))

# The mask is always 3D but not the iterator.
self.mask_ = np.ones(tuple(2 * self._range + 1 for _ in range(3)),
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
dtype=np.bool)
mesh_range_list = [np.arange(0, 2 * self._range + 1) for _ in range(3)]
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
self.mesh_ = np.stack(
np.meshgrid(*mesh_range_list), axis=3).reshape((-1, 3))

center = self._range * np.ones((1, 3))
self.mask_ = pairwise_distances(
center, self.mesh_, metric=self.metric,
n_jobs=1, **self.metric_params).reshape(self.mask_.shape)

self.mask_ = self.mask_ <= self.radius

padding = np.asarray(
gtauzin marked this conversation as resolved.
Show resolved Hide resolved
[*[self._range for _ in range(self._n_dimensions)],
*[0 for _ in range(3 - self._n_dimensions)]])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There seems to be forgotten leftover code here (maybe I was unclear in my review)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not, you were not unclear, but you used a feature to change the code as suggestions and GitHub suggested me to commit them directly. That's what I did, but your suggestion snippets included only addition not change. xD

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha! Thanks for pointing that out, I'll pay more attention to this (I suspect I failed to select multiple lines).

self._padder = Padder(paddings=padding)
self._padder.fit(X.reshape((*X.shape[:3], -1)))

return self

def transform(self, X, y=None):
"""For each binary image in the collection `X`, calculate a
corresponding greyscale image based on the density of its pixels.
Return the collection of greyscale images.

Parameters
----------
X : ndarray of shape (n_samples, n_pixels_x, n_pixels_y [, n_pixels_z])
Input data. Each entry along axis 0 is interpreted as a 2D or 3D
binary image.

y : None
There is no need of a target in a transformer, yet the pipeline API
requires this parameter.

Returns
-------
Xt : ndarray of shape (n_samples, n_pixels_x,
n_pixels_y [, n_pixels_z])
Transformed collection of images. Each entry along axis 0 is a
2D or 3D greyscale image.

"""
check_is_fitted(self)
Xt = check_array(X, allow_nd=True, copy=True)

Xt = Xt.reshape((*X.shape[:3], -1))
Xt = self._padder.transform(Xt)

Xt = Parallel(n_jobs=self.n_jobs)(
delayed(self._calculate_density)(Xt[s])
for s in gen_even_slices(Xt.shape[0],
effective_n_jobs(self.n_jobs)))
Xt = np.concatenate(Xt)

Xt = Xt[:, self._range: -self._range, self._range: -self._range]

if self._n_dimensions == 3:
Xt = Xt[:, :, :, self._range: -self._range]

Xt = Xt.reshape(X.shape)

return Xt

@staticmethod
def plot(Xt, sample=0, colorscale='greys', origin='upper',
plotly_params=None):
"""Plot a sample from a collection of 2D greyscale images.

Parameters
----------
Xt : ndarray of shape (n_samples, n_pixels_x, n_pixels_y)
Collection of 2D greyscale images, such as returned by
:meth:`transform`.

sample : int, optional, default: ``0``
Index of the sample in `Xt` to be plotted.

colorscale : str, optional, default: ``'greys'``
Color scale to be used in the heat map. Can be anything allowed by
:class:`plotly.graph_objects.Heatmap`.

origin : ``'upper'`` | ``'lower'``, optional, default: ``'upper'``
Position of the [0, 0] pixel of `data`, in the upper left or lower
left corner. The convention ``'upper'`` is typically used for
matrices and images.

plotly_params : dict or None, optional, default: ``None``
Custom parameters to configure the plotly figure. Allowed keys are
``"trace"`` and ``"layout"``, and the corresponding values should
be dictionaries containing keyword arguments as would be fed to the
:meth:`update_traces` and :meth:`update_layout` methods of
:class:`plotly.graph_objects.Figure`.

Returns
-------
fig : :class:`plotly.graph_objects.Figure` object
Plotly figure.

"""
return plot_heatmap(
Xt[sample], colorscale=colorscale, origin=origin,
title=f"Signed-distance filtration of image {sample}",
plotly_params=plotly_params
)
45 changes: 44 additions & 1 deletion gtda/images/tests/test_filtrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
from sklearn.exceptions import NotFittedError

from gtda.images import HeightFiltration, RadialFiltration, \
DilationFiltration, ErosionFiltration, SignedDistanceFiltration
DilationFiltration, ErosionFiltration, SignedDistanceFiltration, \
DensityFiltration

pio.renderers.default = 'plotly_mimetype'

Expand Down Expand Up @@ -276,3 +277,45 @@ def test_signed_transform(n_iterations, images, expected):

def test_signed_fit_transform_plot():
SignedDistanceFiltration().fit_transform_plot(images_2D, sample=0)


def test_density_not_fitted():
density = DensityFiltration()
with pytest.raises(NotFittedError):
density.transform(images_2D)


def test_density_errors():
radius = 'a'
density = DensityFiltration(radius=radius)
with pytest.raises(TypeError):
density.fit(images_2D)


images_2D_density = np.array(
[[[6., 8., 8., 6.], [7., 10., 10., 7.], [6., 8., 8., 6.]],
[[5., 5., 3., 1.], [6., 6., 4., 1.], [5., 5., 3., 1.]],
[[0., 0., 0., 0.], [0., 0., 0., 0.], [0., 0., 0., 0.]]])


images_3D_density = np.array(
[[[[10., 10.], [14., 14.], [14., 14.], [10., 10.]],
[[13., 13.], [19., 19.], [19., 19.], [13., 13.]],
[[10., 10.], [14., 14.], [14., 14.], [10., 10.]]],
[[[9., 9.], [9., 9.], [5., 5.], [1., 1.]],
[[12., 12.], [12., 12.], [7., 7.], [1., 1.]],
[[9., 9.], [9., 9.], [5., 5.], [1., 1.]]],
[[[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.]],
[[0., 0.], [0., 0.], [0., 0.], [0., 0.]]]])


@pytest.mark.parametrize("radius, images, expected",
[(2., images_2D, images_2D_density),
(2.2, images_2D, images_2D_density),
(2., images_3D, images_3D_density)])
def test_density_transform(radius, images, expected):
density = DensityFiltration(radius=radius)

assert_almost_equal(density.fit_transform(images),
expected)