From 8e2323d3e2f7c3af1f3de3324a75f03177a27787 Mon Sep 17 00:00:00 2001 From: E105D104U125 <72515278+E105D104U125@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:49:44 +0100 Subject: [PATCH 01/12] Converted tests to pytests and added FData tests. Converted tests from old test_covariances file to pytest format and added tests for functional data objects. --- skfda/tests/test_covariances.py | 450 ++++++++++++++++++++++++-------- 1 file changed, 344 insertions(+), 106 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index 1005da49d..f9a54c17a 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,106 +1,344 @@ -import unittest - -import numpy as np - -import skfda.misc.covariances - - -class TestsSklearn(unittest.TestCase): - - def setUp(self) -> None: - unittest.TestCase.setUp(self) - - self.x = np.linspace(-1, 1, 1000)[:, np.newaxis] - - def _test_compare_sklearn( - self, - cov: skfda.misc.covariances.Covariance, - ) -> None: - cov_sklearn = cov.to_sklearn() - cov_matrix = cov(self.x, self.x) - cov_sklearn_matrix = cov_sklearn(self.x) - - np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) - - def test_linear(self) -> None: - - for variance in (1, 2): - for intercept in (0, 1, 2): - with self.subTest(variance=variance, intercept=intercept): - cov = skfda.misc.covariances.Linear( - variance=variance, intercept=intercept) - self._test_compare_sklearn(cov) - - def test_polynomial(self) -> None: - - # Test a couple of non-default parameters only for speed - for variance in (2,): - for intercept in (0, 2): - for slope in (1, 2): - for degree in (1, 2, 3): - with self.subTest( - variance=variance, - intercept=intercept, - slope=slope, - degree=degree, - ): - cov = skfda.misc.covariances.Polynomial( - variance=variance, - intercept=intercept, - slope=slope, - degree=degree, - ) - self._test_compare_sklearn(cov) - - def test_gaussian(self) -> None: - - for variance in (1, 2): - for length_scale in (0.5, 1, 2): - with self.subTest( - variance=variance, - length_scale=length_scale, - ): - cov = skfda.misc.covariances.Gaussian( - variance=variance, - length_scale=length_scale, - ) - self._test_compare_sklearn(cov) - - def test_exponential(self) -> None: - - for variance in (1, 2): - for length_scale in (0.5, 1, 2): - with self.subTest( - variance=variance, - length_scale=length_scale, - ): - cov = skfda.misc.covariances.Exponential( - variance=variance, - length_scale=length_scale, - ) - self._test_compare_sklearn(cov) - - def test_matern(self) -> None: - - # Test a couple of non-default parameters only for speed - for variance in (2,): - for length_scale in (0.5,): - for nu in (0.5, 1, 1.5, 2.5, 3.5, np.inf): - with self.subTest( - variance=variance, - length_scale=length_scale, - nu=nu, - ): - cov = skfda.misc.covariances.Matern( - variance=variance, - length_scale=length_scale, - nu=nu, - ) - self._test_compare_sklearn(cov) - - def test_white_noise(self) -> None: - - for variance in (1, 2): - with self.subTest(variance=variance): - cov = skfda.misc.covariances.WhiteNoise(variance=variance) - self._test_compare_sklearn(cov) +""" +Tests for Covariance module. + +This file includes tests for multivariate data from the previous version +of the file. It additionally incorporates tests cases for functional data +objects. +""" +import pytest +from typing import Tuple + +import numpy as np + +import skfda.misc.covariances as cov +from skfda import FDataGrid, FDataBasis +from skfda.datasets import fetch_phoneme +from skfda.representation.basis import MonomialBasis + + +def _test_compare_sklearn( + multivariate_data, + cov: cov.Covariance, +) -> None: + cov_sklearn = cov.to_sklearn() + cov_matrix = cov(multivariate_data, multivariate_data) + cov_sklearn_matrix = cov_sklearn(multivariate_data) + + np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) + +############################################################################## +# Fixtures +############################################################################## + + +@pytest.fixture +def fetch_phoneme_fixture() -> FDataGrid: + """Fixture for loading the phoneme dataset example.""" + fd, _ = fetch_phoneme(return_X_y=True) + return fd[:20] + + +@pytest.fixture( + params=[ + cov.Linear(), + cov.Polynomial(), + cov.Gaussian(), + cov.Exponential(), + cov.Matern(), + ], +) +def covariances_fixture(request) -> cov.Covariance: + """Fixture for getting a covariance kernel function.""" + return request.param + + +@pytest.fixture( + params=[ + cov.Brownian(), + cov.WhiteNoise(), + ], +) +def covariances_raise_fixture(request) -> cov.Covariance: + """Fixture for getting a covariance kernel that raises a ValueError.""" + return request.param + + +@pytest.fixture +def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: + """Fixture for loading fdatabasis objects.""" + basis = MonomialBasis( + n_basis=2, + domain_range=(-2, 2), + ) + + fd1 = FDataBasis( + basis=basis, + coefficients=[ + [1, 0], + [1, 2], + ], + ) + + fd2 = FDataBasis( + basis=basis, + coefficients=[ + [0, 1], + ], + ) + + return fd1, fd2 + + +@pytest.fixture +def multivariate_data() -> None: + """Fixture for loading multivariate data.""" + return np.linspace(-1, 1, 1000)[:, np.newaxis] + + +@pytest.fixture( + params=[1, 2], +) +def variance_param(request) -> np.ndarray: + """Fixture for loading variance parameter.""" + return request.param + + +@pytest.fixture( + params=[0, 1, 2], +) +def intercept_param(request) -> np.ndarray: + """Fixture for loading intercept parameter.""" + return request.param + + +@pytest.fixture( + params=[1, 2], +) +def slope_param(request) -> np.ndarray: + """Fixture for loading slope parameter.""" + return request.param + + +@pytest.fixture( + params=[1, 2, 3], +) +def degree_param(request) -> np.ndarray: + """Fixture for loading degree parameter.""" + return request.param + + +@pytest.fixture( + params=[1, 2, 3], +) +def length_scale_param(request) -> np.ndarray: + """Fixture for loading length scale parameter.""" + return request.param + + +@pytest.fixture( + params=[0.5, 1, 1.5, 2.5, 3.5, np.inf], +) +def nu_param(request) -> np.ndarray: + """Fixture for loading nu parameter.""" + return request.param + + +############################################################################## +# Tests +############################################################################## + + +def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None: + """Check that invalid parameters in fit raise exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_fixture + + # Also test that it does not fail + res1 = cov_kernel(fd, fd) + res2 = cov_kernel(fd) + + np.testing.assert_allclose( + res1, + res2, + atol=1e-7, + ) + + +def test_raises(fetch_phoneme_fixture, covariances_raise_fixture): + """Check that it raises a ValueError exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_raise_fixture + + np.testing.assert_raises( + ValueError, + cov_kernel, + fd, + ) + + +def test_fdatabasis_example_linear(fdatabasis_data): + """Check a precalculated example for Linear covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) + res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_polynomial(fdatabasis_data): + """Check a precalculated example for Polynomial covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Polynomial( + variance=1 / 3, + slope=2, + intercept=1, + degree=2, + )(fd1, fd2) + res2 = np.array([[1 / 3], [67**2 / 3**3]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_gaussian(fdatabasis_data): + """Check a precalculated example for Gaussian covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) + res2 = np.array([ + [3 * np.exp(-7 / 6)], + [3 * np.exp(-7 / 6)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_exponential(fdatabasis_data): + """Check a precalculated example for Exponential covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) + res2 = np.array([ + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_matern(fdatabasis_data): + """Check a precalculated example for Matern covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) + res2 = np.array([ + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_multivariate_linear( + multivariate_data, + variance_param, + intercept_param, +) -> None: + """Test linear covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Linear( + variance=variance_param, + intercept=intercept_param, + ), + ) + + +def test_multivariate_polynomial( + multivariate_data, + variance_param, + intercept_param, + slope_param, + degree_param, +) -> None: + """Test polynomial covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Polynomial( + variance=variance_param, + intercept=intercept_param, + slope=slope_param, + degree=degree_param, + ), + ) + + +def test_multivariate_gaussian( + multivariate_data, + variance_param, + length_scale_param, +) -> None: + """Test gaussian covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Gaussian( + variance=variance_param, + length_scale=length_scale_param, + ), + ) + + +def test_multivariate_exponential( + multivariate_data, + variance_param, + length_scale_param, +) -> None: + """Test exponential covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Exponential( + variance=variance_param, + length_scale=length_scale_param, + ), + ) + + +def test_multivariate_matern( + multivariate_data, + variance_param, + length_scale_param, + nu_param, +) -> None: + """Test matern covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.Matern( + variance=variance_param, + length_scale=length_scale_param, + nu=nu_param, + ), + ) + + +def test_multivariate_white_noise( + multivariate_data, + variance_param, +) -> None: + """Test white noise covariance kernel against scikit learn's kernel.""" + _test_compare_sklearn( + multivariate_data, + cov.WhiteNoise( + variance=variance_param, + ), + ) From 16d8eb023a954a61d70c095bca8f1fa0d748cc7e Mon Sep 17 00:00:00 2001 From: E105D104U125 <72515278+E105D104U125@users.noreply.github.com> Date: Thu, 15 Feb 2024 15:52:39 +0100 Subject: [PATCH 02/12] Adapted covariances to accept FData objects. Adapted Linear, Polynomial, Exponential, Gaussian and Matern covariances and added control messages for covariance kernels that do not support functional data objects. --- skfda/misc/covariances.py | 1811 +++++++++++++++++++------------------ 1 file changed, 941 insertions(+), 870 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 43635eb02..9bce97d74 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,870 +1,941 @@ -from __future__ import annotations - -import abc -from typing import Callable, Sequence, Tuple, Union - -import matplotlib.pyplot as plt -import numpy as np -import sklearn.gaussian_process.kernels as sklearn_kern -from matplotlib.figure import Figure -from scipy.special import gamma, kv - -from ..representation import FData, FDataBasis, FDataGrid -from ..representation.basis import TensorBasis -from ..typing._numpy import ArrayLike, NDArrayFloat - - -def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: - return ( # type: ignore[no-any-return] - (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 - ).sum(2) - - -CovarianceLike = Union[ - float, - NDArrayFloat, - Callable[[ArrayLike, ArrayLike], NDArrayFloat], -] - - -def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: - """Transform 1d arrays in column vectors.""" - t = np.asfarray(t) - - dim = len(t.shape) - assert dim <= 2 - - if dim < 2: - t = np.atleast_2d(t).T - - return t - - -def _execute_covariance( - covariance: CovarianceLike, - x: ArrayLike, - y: ArrayLike, -) -> NDArrayFloat: - """Execute a covariance function.""" - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - if isinstance(covariance, (int, float)): - return np.array(covariance) - else: - if callable(covariance): - result = covariance(x, y) - elif isinstance(covariance, np.ndarray): - result = covariance - else: - # GPy kernel - result = covariance.K(x, y) - - assert result.shape[0] == len(x) - assert result.shape[1] == len(y) - return result - - -class Covariance(abc.ABC): - """Abstract class for covariance functions.""" - - _parameters_str: Sequence[Tuple[str, str]] - _latex_formula: str - - @abc.abstractmethod - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - pass - - def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: - """Return a heatmap plot of the covariance function.""" - from ..exploratory.visualization._utils import _create_figure - - x = np.linspace(*limits, 1000) - - cov_matrix = self(x, x) - - fig = _create_figure() - ax = fig.add_subplot(1, 1, 1) - ax.imshow( - cov_matrix, - extent=[limits[0], limits[1], limits[1], limits[0]], - ) - ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") - - return fig - - def _sample_trajectories_plot(self) -> Figure: - from ..datasets import make_gaussian_process - - fd = make_gaussian_process( - start=-1, - n_samples=10, - cov=self, - random_state=0, - ) - fig = fd.plot() - fig.axes[0].set_title("Sample trajectories") - return fig - - def __repr__(self) -> str: - - params_str = ', '.join( - f'{n}={getattr(self, n)}' for n, _ in self._parameters_str - ) - - return ( - f"{self.__module__}.{type(self).__qualname__}(" - f"{params_str}" - f")" - ) - - def _latex_content(self) -> str: - params_str = ''.join( - fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str - ) - - return ( - fr"{self._latex_formula} \\" - r"\text{where:}" - r"\begin{aligned}" - fr"\qquad{params_str}" - r"\end{aligned}" - ) - - def _repr_latex_(self) -> str: - return fr"\(\displaystyle {self._latex_content()}\)" - - def _repr_html_(self) -> str: - from ..exploratory.visualization._utils import _figure_to_svg - - fig = self.heatmap() - heatmap = _figure_to_svg(fig) - plt.close(fig) - - fig = self._sample_trajectories_plot() - sample_trajectories = _figure_to_svg(fig) - plt.close(fig) - - row_style = '' - - def column_style(percent: float, margin_top: str = "0") -> str: - return ( - f'style="display: inline-block; ' - f'margin:0; ' - f'margin-top: {margin_top}; ' - f'width:{percent}%; ' - f'height:auto;' - f'vertical-align: middle"' - ) - - return fr""" -
-
- \[{self._latex_content()}\] -
-
-
-
- {sample_trajectories} -
-
- {heatmap} -
-
- """ - - def to_sklearn(self) -> sklearn_kern.Kernel: - """Convert it to a sklearn kernel, if there is one.""" - raise NotImplementedError( - f"{type(self).__name__} covariance not " - f"implemented in scikit-learn", - ) - - -class Brownian(Covariance): - r""" - Brownian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| - - |x - x'|}{2} - - where :math:`\sigma^2` is the variance at distance 1 from - :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. - If :math:`\mathcal{O} = 0` (the default) and we only - consider positive values, the formula can be simplified as - - .. math:: - K(x, y) = \sigma^2 \min(x, y). - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - import matplotlib.pyplot as plt - - Brownian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Brownian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - - Brownian() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " - r"|x' - \mathcal{O}| - |x - x'|}{2}" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("origin", r"\mathcal{O}"), - ] - - def __init__(self, *, variance: float = 1, origin: float = 0) -> None: - self.variance = variance - self.origin = origin - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - self.origin - y = _transform_to_2d(y) - self.origin - - sum_norms = np.add.outer( - np.linalg.norm(x, axis=-1), - np.linalg.norm(y, axis=-1), - ) - norm_sub = np.linalg.norm( - x[:, np.newaxis, :] - y[np.newaxis, :, :], - axis=-1, - ) - - return ( # type: ignore[no-any-return] - self.variance * (sum_norms - norm_sub) / 2 - ) - - -class Linear(Covariance): - r""" - Linear covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (x^T x' + c) - - where :math:`\sigma^2` is the scale of the variance and - :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - import matplotlib.pyplot as plt - - Linear().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Linear(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - - Linear() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ] - - def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: - self.variance = variance - self.intercept = intercept - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - return self.variance * (x @ y.T + self.intercept) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (sklearn_kern.DotProduct(0) + self.intercept) - ) - - -class Polynomial(Covariance): - r""" - Polynomial covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (\alpha x^T x' + c)^d - - where :math:`\sigma^2` is the scale of the variance, - :math:`\alpha` is the slope, :math:`d` the degree of the - polynomial and :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - import matplotlib.pyplot as plt - - Polynomial().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Polynomial(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - - Polynomial() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ("slope", r"\alpha"), - ("degree", "d"), - ] - - def __init__( - self, - *, - variance: float = 1, - intercept: float = 0, - slope: float = 1, - degree: float = 2, - ) -> None: - self.variance = variance - self.intercept = intercept - self.slope = slope - self.degree = degree - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - return ( - self.variance - * (self.slope * x @ y.T + self.intercept) ** self.degree - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) - ** self.degree - ) - - -class Gaussian(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - import matplotlib.pyplot as plt - - Gaussian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Gaussian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - - Gaussian() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__(self, *, variance: float = 1, length_scale: float = 1): - self.variance = variance - self.length_scale = length_scale - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - x_y = _squared_norms(x, y) - - return self.variance * np.exp(-x_y / (2 * self.length_scale ** 2)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance * sklearn_kern.RBF(length_scale=self.length_scale) - ) - - -class Exponential(Covariance): - r""" - Exponential covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - import matplotlib.pyplot as plt - - Exponential().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Exponential(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - - Exponential() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - ) -> None: - self.variance = variance - self.length_scale = length_scale - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - x_y = _squared_norms(x, y) - return self.variance * np.exp(-np.sqrt(x_y) / (self.length_scale)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) - ) - - -class WhiteNoise(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x')= \begin{cases} - \sigma^2, \quad x = x' \\ - 0, \quad x \neq x'\\ - \end{cases} - - where :math:`\sigma^2` is the variance. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - import matplotlib.pyplot as plt - - WhiteNoise().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=WhiteNoise(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - - WhiteNoise() - - """ - - _latex_formula = ( - r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" - r"0, \quad x \neq x'\\ \end{cases}" - ) - - _parameters_str = [("variance", r"\sigma^2")] - - def __init__(self, *, variance: float = 1): - self.variance = variance - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - return self.variance * np.eye(x.shape[0]) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return sklearn_kern.WhiteKernel(noise_level=self.variance) - - -class Matern(Covariance): - r""" - Matérn covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} - \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} - K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) - - where :math:`\sigma^2` is the variance, :math:`l` is the length scale - and :math:`\nu` controls the smoothness of the related Gaussian process. - The trajectories of a Gaussian process with Matérn covariance is - :math:`\lceil \nu \rceil - 1` times differentiable. - - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - import matplotlib.pyplot as plt - - Matern().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Matern(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - - Matern() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" - r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" - r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ("nu", r"\nu"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - nu: float = 1.5, - ) -> None: - self.variance = variance - self.length_scale = length_scale - self.nu = nu - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - x_y_squared = _squared_norms(x, y) - x_y = np.sqrt(x_y_squared) - - p = self.nu - 0.5 - if p.is_integer(): - # Formula for half-integers - p = int(p) - body = np.sqrt(2 * p + 1) * x_y / self.length_scale - exponential = np.exp(-body) - power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) - power_list = np.cumprod(power_list, axis=0) - power_list = np.concatenate( - (power_list[::-1], np.asarray([np.ones_like(body)])), - ) - power_list = np.moveaxis(power_list, 0, -1) - numerator = np.cumprod(np.arange(p, 0, -1)) - numerator = np.concatenate(([1], numerator)) - denom1 = np.cumprod(np.arange(2 * p, p, -1)) - denom1 = np.concatenate((denom1[::-1], [1])) - denom2 = np.cumprod(np.arange(1, p + 1)) - denom2 = np.concatenate(([1], denom2)) - - sum_terms = power_list * numerator / (denom1 * denom2) - return ( # type: ignore[no-any-return] - self.variance * exponential * np.sum(sum_terms, axis=-1) - ) - elif self.nu == np.inf: - return ( - self.variance * np.exp( - -x_y_squared / (2 * self.length_scale ** 2), - ) - ) - else: - # General formula - scaling = 2**(1 - self.nu) / gamma(self.nu) - body = np.sqrt(2 * self.nu) * x_y / self.length_scale - power = body**self.nu - bessel = kv(self.nu, body) - - with np.errstate(invalid='ignore'): - eval_cov = self.variance * scaling * power * bessel - - # Values with nan are where the distance is 0 - return np.nan_to_num( # type: ignore[no-any-return] - eval_cov, - nan=self.variance, - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) - ) - - -class Empirical(Covariance): - r""" - Sample covariance function. - - The sample covariance function is defined as - . math:: - K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - - \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) - - where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the - mean of the samples. :math:`N` is the number of samples, - :math:`\text{correction}` is the degrees of freedom adjustment and is such - that :math:`N-\text{correction}` is the divisor used in the calculation of - the covariance function. - - """ - - _latex_formula = ( - r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" - r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" - ) - _parameters_str = [ - ("data", "data"), - ] - - cov_fdata: FData - correction: int - - @abc.abstractmethod - def __init__(self, data: FData) -> None: - if data.dim_domain != 1 or data.dim_codomain != 1: - raise NotImplementedError( - "Covariance only implemented " - "for univariate functions", - ) - - def __call__(self, x: ArrayLike, y: ArrayLike) -> NDArrayFloat: - """Evaluate the covariance function. - - Args: - x: First array of points of evaluation. - y: Second array of points of evaluation. - - Returns: - Covariance function evaluated at the grid formed by x and y. - """ - return self.cov_fdata([x, y], grid=True)[0, ..., 0] - - -class EmpiricalGrid(Empirical): - """Sample covariance function for FDataGrid.""" - - cov_fdata: FDataGrid - correction: int - - def __init__(self, data: FDataGrid, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.cov_fdata = data.copy( - data_matrix=np.cov( - data.data_matrix[..., 0], - rowvar=False, - ddof=correction, - )[np.newaxis, ...], - grid_points=[ - data.grid_points[0], - data.grid_points[0], - ], - domain_range=[ - data.domain_range[0], - data.domain_range[0], - ], - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) - - -class EmpiricalBasis(Empirical): - """ - Sample covariance function for FDataBasis. - - In this case one may use the basis expression of the samples to - express the sample covariance function in the tensor product basis - of the original basis. - """ - - cov_fdata: FDataBasis - coeff_matrix: NDArrayFloat - correction: int - - def __init__(self, data: FDataBasis, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.coeff_matrix = np.cov( - data.coefficients, - rowvar=False, - ddof=correction, - ) - self.cov_fdata = FDataBasis( - basis=TensorBasis([data.basis, data.basis]), - coefficients=self.coeff_matrix.flatten(), - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) +from __future__ import annotations + +import abc +from typing import Callable, Sequence, Tuple, Union + +import matplotlib.pyplot as plt +import numpy as np +import sklearn.gaussian_process.kernels as sklearn_kern +from matplotlib.figure import Figure +from scipy.special import gamma, kv + +from ..misc import inner_product_matrix +from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance +from ..representation import FData, FDataBasis, FDataGrid +from ..representation.basis import TensorBasis +from ..typing._numpy import ArrayLike, NDArrayFloat + + +def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: + return ( # type: ignore[no-any-return] + (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 + ).sum(2) + + +CovarianceLike = Union[ + float, + NDArrayFloat, + Callable[[ArrayLike, ArrayLike], NDArrayFloat], +] + +InputAcceptable = Union[ + np.ndarray, + FData, +] + + +def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: + """Transform 1d arrays in column vectors.""" + t = np.asfarray(t) + + dim = len(t.shape) + assert dim <= 2 + + if dim < 2: + t = np.atleast_2d(t).T + + return t + + +def _execute_covariance( + covariance: CovarianceLike, + x: ArrayLike, + y: ArrayLike, +) -> NDArrayFloat: + """Execute a covariance function.""" + x = _transform_to_2d(x) + y = _transform_to_2d(y) + + if isinstance(covariance, (int, float)): + return np.array(covariance) + else: + if callable(covariance): + result = covariance(x, y) + elif isinstance(covariance, np.ndarray): + result = covariance + else: + # GPy kernel + result = covariance.K(x, y) + + assert result.shape[0] == len(x) + assert result.shape[1] == len(y) + return result + + +class Covariance(abc.ABC): + """Abstract class for covariance functions.""" + + _parameters_str: Sequence[Tuple[str, str]] + _latex_formula: str + + @abc.abstractmethod + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + pass + + def _param_check_and_transform( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: + # Param check + if y is None: + y = x + + if type(x) is not type(y): # noqa: WPS516 + raise ValueError( + 'Cannot operate objects x and y from different classes', + f'({type(x)}, {type(y)}).', + ) + + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): + x, y = np.array(x), np.array(y) + if len(x.shape) < 2: + x = np.atleast_2d(x) + if len(y.shape) < 2: + y = np.atleast_2d(y) + + return x, y + + def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: + """Return a heatmap plot of the covariance function.""" + from ..exploratory.visualization._utils import _create_figure + + x = np.linspace(*limits, 1000) + + cov_matrix = self(x, x) + + fig = _create_figure() + ax = fig.add_subplot(1, 1, 1) + ax.imshow( + cov_matrix, + extent=[limits[0], limits[1], limits[1], limits[0]], + ) + ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") + + return fig + + def _sample_trajectories_plot(self) -> Figure: + from ..datasets import make_gaussian_process + + fd = make_gaussian_process( + start=-1, + n_samples=10, + cov=self, + random_state=0, + ) + fig = fd.plot() + fig.axes[0].set_title("Sample trajectories") + return fig + + def __repr__(self) -> str: + + params_str = ', '.join( + f'{n}={getattr(self, n)}' for n, _ in self._parameters_str + ) + + return ( + f"{self.__module__}.{type(self).__qualname__}(" + f"{params_str}" + f")" + ) + + def _latex_content(self) -> str: + params_str = ''.join( + fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str + ) + + return ( + fr"{self._latex_formula} \\" + r"\text{where:}" + r"\begin{aligned}" + fr"\qquad{params_str}" + r"\end{aligned}" + ) + + def _repr_latex_(self) -> str: + return fr"\(\displaystyle {self._latex_content()}\)" + + def _repr_html_(self) -> str: + from ..exploratory.visualization._utils import _figure_to_svg + + fig = self.heatmap() + heatmap = _figure_to_svg(fig) + plt.close(fig) + + fig = self._sample_trajectories_plot() + sample_trajectories = _figure_to_svg(fig) + plt.close(fig) + + row_style = '' + + def column_style(percent: float, margin_top: str = "0") -> str: + return ( + f'style="display: inline-block; ' + f'margin:0; ' + f'margin-top: {margin_top}; ' + f'width:{percent}%; ' + f'height:auto;' + f'vertical-align: middle"' + ) + + return fr""" +
+
+ \[{self._latex_content()}\] +
+
+
+
+ {sample_trajectories} +
+
+ {heatmap} +
+
+ """ + + def to_sklearn(self) -> sklearn_kern.Kernel: + """Convert it to a sklearn kernel, if there is one.""" + raise NotImplementedError( + f"{type(self).__name__} covariance not " + f"implemented in scikit-learn", + ) + + +class Brownian(Covariance): + r""" + Brownian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| + - |x - x'|}{2} + + where :math:`\sigma^2` is the variance at distance 1 from + :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. + If :math:`\mathcal{O} = 0` (the default) and we only + consider positive values, the formula can be simplified as + + .. math:: + K(x, y) = \sigma^2 \min(x, y). + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + import matplotlib.pyplot as plt + + Brownian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Brownian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + + Brownian() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " + r"|x' - \mathcal{O}| - |x - x'|}{2}" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("origin", r"\mathcal{O}"), + ] + + def __init__(self, *, variance: float = 1, origin: float = 0) -> None: + self.variance = variance + self.origin = origin + + def __call__( + self, + x: InputAcceptable | FData, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) - self.origin + y = _transform_to_2d(y) - self.origin + + sum_norms = np.add.outer( + np.linalg.norm(x, axis=-1), + np.linalg.norm(y, axis=-1), + ) + norm_sub = np.linalg.norm( + x[:, np.newaxis, :] - y[np.newaxis, :, :], + axis=-1, + ) + + return ( # type: ignore[no-any-return] + self.variance * (sum_norms - norm_sub) / 2 + ) + + +class Linear(Covariance): + r""" + Linear covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (x^T x' + c) + + where :math:`\sigma^2` is the scale of the variance and + :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + import matplotlib.pyplot as plt + + Linear().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Linear(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + + Linear() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ] + + def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: + self.variance = variance + self.intercept = intercept + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return self.variance * (x_y + self.intercept) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (sklearn_kern.DotProduct(0) + self.intercept) + ) + + +class Polynomial(Covariance): + r""" + Polynomial covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (\alpha x^T x' + c)^d + + where :math:`\sigma^2` is the scale of the variance, + :math:`\alpha` is the slope, :math:`d` the degree of the + polynomial and :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + import matplotlib.pyplot as plt + + Polynomial().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Polynomial(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + + Polynomial() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ("slope", r"\alpha"), + ("degree", "d"), + ] + + def __init__( + self, + *, + variance: float = 1, + intercept: float = 0, + slope: float = 1, + degree: float = 2, + ) -> None: + self.variance = variance + self.intercept = intercept + self.slope = slope + self.degree = degree + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return ( + self.variance + * (self.slope * x_y + self.intercept) ** self.degree + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) + ** self.degree + ) + + +class Gaussian(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + import matplotlib.pyplot as plt + + Gaussian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Gaussian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + + Gaussian() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__(self, *, variance: float = 1, length_scale: float = 1): + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance * sklearn_kern.RBF(length_scale=self.length_scale) + ) + + +class Exponential(Covariance): + r""" + Exponential covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + import matplotlib.pyplot as plt + + Exponential().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Exponential(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + + Exponential() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + ) -> None: + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y / (self.length_scale)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) + ) + + +class WhiteNoise(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x')= \begin{cases} + \sigma^2, \quad x = x' \\ + 0, \quad x \neq x'\\ + \end{cases} + + where :math:`\sigma^2` is the variance. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + import matplotlib.pyplot as plt + + WhiteNoise().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=WhiteNoise(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + + WhiteNoise() + + """ + + _latex_formula = ( + r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" + r"0, \quad x \neq x'\\ \end{cases}" + ) + + _parameters_str = [("variance", r"\sigma^2")] + + def __init__(self, *, variance: float = 1): + self.variance = variance + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) + return self.variance * np.eye(x.shape[0]) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return sklearn_kern.WhiteKernel(noise_level=self.variance) + + +class Matern(Covariance): + r""" + Matérn covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} + \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} + K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) + + where :math:`\sigma^2` is the variance, :math:`l` is the length scale + and :math:`\nu` controls the smoothness of the related Gaussian process. + The trajectories of a Gaussian process with Matérn covariance is + :math:`\lceil \nu \rceil - 1` times differentiable. + + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + import matplotlib.pyplot as plt + + Matern().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Matern(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + + Matern() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" + r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" + r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ("nu", r"\nu"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + nu: float = 1.5, + ) -> None: + self.variance = variance + self.length_scale = length_scale + self.nu = nu + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + + p = self.nu - 0.5 + if p.is_integer(): + # Formula for half-integers + p = int(p) + body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale + exponential = np.exp(-body) + power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) + power_list = np.cumprod(power_list, axis=0) + power_list = np.concatenate( + (power_list[::-1], np.asarray([np.ones_like(body)])), + ) + power_list = np.moveaxis(power_list, 0, -1) + numerator = np.cumprod(np.arange(p, 0, -1)) + numerator = np.concatenate(([1], numerator)) + denom1 = np.cumprod(np.arange(2 * p, p, -1)) + denom1 = np.concatenate((denom1[::-1], [1])) + denom2 = np.cumprod(np.arange(1, p + 1)) + denom2 = np.concatenate(([1], denom2)) + + sum_terms = power_list * numerator / (denom1 * denom2) + return ( # type: ignore[no-any-return] + self.variance * exponential * np.sum(sum_terms, axis=-1) + ) + elif self.nu == np.inf: + return ( # type: ignore[no-any-return] + self.variance * np.exp( + -distance_x_y ** 2 / (2 * self.length_scale ** 2), + ) + ) + else: + # General formula + scaling = 2**(1 - self.nu) / gamma(self.nu) + body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale + power = body**self.nu + bessel = kv(self.nu, body) + + with np.errstate(invalid='ignore'): + eval_cov = self.variance * scaling * power * bessel + + # Values with nan are where the distance is 0 + return np.nan_to_num( # type: ignore[no-any-return] + eval_cov, + nan=self.variance, + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) + ) + + +class Empirical(Covariance): + r""" + Sample covariance function. + + The sample covariance function is defined as + . math:: + K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - + \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) + + where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the + mean of the samples. :math:`N` is the number of samples, + :math:`\text{correction}` is the degrees of freedom adjustment and is such + that :math:`N-\text{correction}` is the divisor used in the calculation of + the covariance function. + + """ + + _latex_formula = ( + r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" + r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" + ) + _parameters_str = [ + ("data", "data"), + ] + + cov_fdata: FData + correction: int + + @abc.abstractmethod + def __init__(self, data: FData) -> None: + if data.dim_domain != 1 or data.dim_codomain != 1: + raise NotImplementedError( + "Covariance only implemented " + "for univariate functions", + ) + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + """Evaluate the covariance function. + + Args: + x: First array of points of evaluation. + y: Second array of points of evaluation. + + Returns: + Covariance function evaluated at the grid formed by x and y. + """ + return self.cov_fdata([x, y], grid=True)[0, ..., 0] + + +class EmpiricalGrid(Empirical): + """Sample covariance function for FDataGrid.""" + + cov_fdata: FDataGrid + correction: int + + def __init__(self, data: FDataGrid, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.cov_fdata = data.copy( + data_matrix=np.cov( + data.data_matrix[..., 0], + rowvar=False, + ddof=correction, + )[np.newaxis, ...], + grid_points=[ + data.grid_points[0], + data.grid_points[0], + ], + domain_range=[ + data.domain_range[0], + data.domain_range[0], + ], + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) + + +class EmpiricalBasis(Empirical): + """ + Sample covariance function for FDataBasis. + + In this case one may use the basis expression of the samples to + express the sample covariance function in the tensor product basis + of the original basis. + """ + + cov_fdata: FDataBasis + coeff_matrix: NDArrayFloat + correction: int + + def __init__(self, data: FDataBasis, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.coeff_matrix = np.cov( + data.coefficients, + rowvar=False, + ddof=correction, + ) + self.cov_fdata = FDataBasis( + basis=TensorBasis([data.basis, data.basis]), + coefficients=self.coeff_matrix.flatten(), + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) From 4abb6e5e7540048d898469b274626f7fcfe12273 Mon Sep 17 00:00:00 2001 From: E69D68U85 <72515278+E105D104U125@users.noreply.github.com> Date: Sun, 3 Mar 2024 22:27:26 +0100 Subject: [PATCH 03/12] Input parameters now parsed using ParameterGrid. Changed function to test multivariate data for all covariance functions. Now uses ParameterGrid. --- skfda/tests/test_covariances.py | 237 +++++++++++--------------------- 1 file changed, 79 insertions(+), 158 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index f9a54c17a..dc5982ca0 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,27 +1,22 @@ -""" -Tests for Covariance module. - -This file includes tests for multivariate data from the previous version -of the file. It additionally incorporates tests cases for functional data -objects. -""" -import pytest -from typing import Tuple +"""Tests for Covariance module.""" +from typing import Any, Tuple import numpy as np +import pytest +from sklearn.model_selection import ParameterGrid import skfda.misc.covariances as cov -from skfda import FDataGrid, FDataBasis +from skfda import FDataBasis, FDataGrid from skfda.datasets import fetch_phoneme from skfda.representation.basis import MonomialBasis def _test_compare_sklearn( - multivariate_data, + multivariate_data: Any, cov: cov.Covariance, ) -> None: cov_sklearn = cov.to_sklearn() - cov_matrix = cov(multivariate_data, multivariate_data) + cov_matrix = cov(multivariate_data) cov_sklearn_matrix = cov_sklearn(multivariate_data) np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) @@ -47,7 +42,7 @@ def fetch_phoneme_fixture() -> FDataGrid: cov.Matern(), ], ) -def covariances_fixture(request) -> cov.Covariance: +def covariances_fixture(request: Any) -> Any: """Fixture for getting a covariance kernel function.""" return request.param @@ -58,14 +53,14 @@ def covariances_fixture(request) -> cov.Covariance: cov.WhiteNoise(), ], ) -def covariances_raise_fixture(request) -> cov.Covariance: +def covariances_raise_fixture(request: Any) -> Any: """Fixture for getting a covariance kernel that raises a ValueError.""" return request.param @pytest.fixture def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: - """Fixture for loading fdatabasis objects.""" + """Fixture for getting fdatabasis objects.""" basis = MonomialBasis( n_basis=2, domain_range=(-2, 2), @@ -90,56 +85,50 @@ def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: @pytest.fixture -def multivariate_data() -> None: - """Fixture for loading multivariate data.""" +def multivariate_data() -> np.array: + """Fixture for getting multivariate data.""" return np.linspace(-1, 1, 1000)[:, np.newaxis] @pytest.fixture( - params=[1, 2], -) -def variance_param(request) -> np.ndarray: - """Fixture for loading variance parameter.""" - return request.param - - -@pytest.fixture( - params=[0, 1, 2], -) -def intercept_param(request) -> np.ndarray: - """Fixture for loading intercept parameter.""" - return request.param - - -@pytest.fixture( - params=[1, 2], -) -def slope_param(request) -> np.ndarray: - """Fixture for loading slope parameter.""" - return request.param - - -@pytest.fixture( - params=[1, 2, 3], -) -def degree_param(request) -> np.ndarray: - """Fixture for loading degree parameter.""" - return request.param - - -@pytest.fixture( - params=[1, 2, 3], -) -def length_scale_param(request) -> np.ndarray: - """Fixture for loading length scale parameter.""" - return request.param - - -@pytest.fixture( - params=[0.5, 1, 1.5, 2.5, 3.5, np.inf], + params=[ + (cov.Linear, + { + "variance": [1, 2], + "intercept": [3, 4], + }, + ), + (cov.Polynomial, + { + "variance": [2], + "intercept": [0, 2], + "slope": [1, 2], + "degree": [1, 2, 3], + }, + ), + (cov.Exponential, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Gaussian, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Matern, + { + "variance": [2], + "length_scale": [0.5], + "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], + }, + ), + ], ) -def nu_param(request) -> np.ndarray: - """Fixture for loading nu parameter.""" +def covariance_and_params(request: Any) -> Any: + """Fixture to load the covariance functions.""" return request.param @@ -148,7 +137,10 @@ def nu_param(request) -> np.ndarray: ############################################################################## -def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None: +def test_covariances( + fetch_phoneme_fixture: Any, + covariances_fixture: Any, +) -> None: """Check that invalid parameters in fit raise exception.""" fd = fetch_phoneme_fixture cov_kernel = covariances_fixture @@ -164,7 +156,10 @@ def test_covariances(fetch_phoneme_fixture, covariances_fixture) -> None: ) -def test_raises(fetch_phoneme_fixture, covariances_raise_fixture): +def test_raises( + fetch_phoneme_fixture: Any, + covariances_raise_fixture: Any, +) -> None: """Check that it raises a ValueError exception.""" fd = fetch_phoneme_fixture cov_kernel = covariances_raise_fixture @@ -176,7 +171,9 @@ def test_raises(fetch_phoneme_fixture, covariances_raise_fixture): ) -def test_fdatabasis_example_linear(fdatabasis_data): +def test_fdatabasis_example_linear( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Linear covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) @@ -188,7 +185,9 @@ def test_fdatabasis_example_linear(fdatabasis_data): ) -def test_fdatabasis_example_polynomial(fdatabasis_data): +def test_fdatabasis_example_polynomial( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Polynomial covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Polynomial( @@ -205,7 +204,9 @@ def test_fdatabasis_example_polynomial(fdatabasis_data): ) -def test_fdatabasis_example_gaussian(fdatabasis_data): +def test_fdatabasis_example_gaussian( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Gaussian covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) @@ -220,7 +221,9 @@ def test_fdatabasis_example_gaussian(fdatabasis_data): ) -def test_fdatabasis_example_exponential(fdatabasis_data): +def test_fdatabasis_example_exponential( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Exponential covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) @@ -235,7 +238,9 @@ def test_fdatabasis_example_exponential(fdatabasis_data): ) -def test_fdatabasis_example_matern(fdatabasis_data): +def test_fdatabasis_example_matern( + fdatabasis_data: Any, +) -> None: """Check a precalculated example for Matern covariance kernel.""" fd1, fd2 = fdatabasis_data res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) @@ -250,95 +255,11 @@ def test_fdatabasis_example_matern(fdatabasis_data): ) -def test_multivariate_linear( - multivariate_data, - variance_param, - intercept_param, +def test_multivariate_covariance_kernel( + multivariate_data: Any, + covariance_and_params: Any, ) -> None: - """Test linear covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Linear( - variance=variance_param, - intercept=intercept_param, - ), - ) - - -def test_multivariate_polynomial( - multivariate_data, - variance_param, - intercept_param, - slope_param, - degree_param, -) -> None: - """Test polynomial covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Polynomial( - variance=variance_param, - intercept=intercept_param, - slope=slope_param, - degree=degree_param, - ), - ) - - -def test_multivariate_gaussian( - multivariate_data, - variance_param, - length_scale_param, -) -> None: - """Test gaussian covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Gaussian( - variance=variance_param, - length_scale=length_scale_param, - ), - ) - - -def test_multivariate_exponential( - multivariate_data, - variance_param, - length_scale_param, -) -> None: - """Test exponential covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Exponential( - variance=variance_param, - length_scale=length_scale_param, - ), - ) - - -def test_multivariate_matern( - multivariate_data, - variance_param, - length_scale_param, - nu_param, -) -> None: - """Test matern covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.Matern( - variance=variance_param, - length_scale=length_scale_param, - nu=nu_param, - ), - ) - - -def test_multivariate_white_noise( - multivariate_data, - variance_param, -) -> None: - """Test white noise covariance kernel against scikit learn's kernel.""" - _test_compare_sklearn( - multivariate_data, - cov.WhiteNoise( - variance=variance_param, - ), - ) + """Test general covariance kernel against scikit-learn's kernel.""" + cov_kernel, param_dict = covariance_and_params + for input_params in list(ParameterGrid(param_dict)): + _test_compare_sklearn(multivariate_data, cov_kernel(**input_params)) From 3a54b9f275775266611979d590aa6c0a99d387ad Mon Sep 17 00:00:00 2001 From: E69D68U85 <72515278+E105D104U125@users.noreply.github.com> Date: Mon, 25 Mar 2024 19:56:52 +0100 Subject: [PATCH 04/12] Changed EOL to LF. --- skfda/misc/covariances.py | 1882 ++++++++++++++++++------------------- 1 file changed, 941 insertions(+), 941 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 9bce97d74..b441892ff 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,941 +1,941 @@ -from __future__ import annotations - -import abc -from typing import Callable, Sequence, Tuple, Union - -import matplotlib.pyplot as plt -import numpy as np -import sklearn.gaussian_process.kernels as sklearn_kern -from matplotlib.figure import Figure -from scipy.special import gamma, kv - -from ..misc import inner_product_matrix -from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance -from ..representation import FData, FDataBasis, FDataGrid -from ..representation.basis import TensorBasis -from ..typing._numpy import ArrayLike, NDArrayFloat - - -def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: - return ( # type: ignore[no-any-return] - (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 - ).sum(2) - - -CovarianceLike = Union[ - float, - NDArrayFloat, - Callable[[ArrayLike, ArrayLike], NDArrayFloat], -] - -InputAcceptable = Union[ - np.ndarray, - FData, -] - - -def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: - """Transform 1d arrays in column vectors.""" - t = np.asfarray(t) - - dim = len(t.shape) - assert dim <= 2 - - if dim < 2: - t = np.atleast_2d(t).T - - return t - - -def _execute_covariance( - covariance: CovarianceLike, - x: ArrayLike, - y: ArrayLike, -) -> NDArrayFloat: - """Execute a covariance function.""" - x = _transform_to_2d(x) - y = _transform_to_2d(y) - - if isinstance(covariance, (int, float)): - return np.array(covariance) - else: - if callable(covariance): - result = covariance(x, y) - elif isinstance(covariance, np.ndarray): - result = covariance - else: - # GPy kernel - result = covariance.K(x, y) - - assert result.shape[0] == len(x) - assert result.shape[1] == len(y) - return result - - -class Covariance(abc.ABC): - """Abstract class for covariance functions.""" - - _parameters_str: Sequence[Tuple[str, str]] - _latex_formula: str - - @abc.abstractmethod - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - pass - - def _param_check_and_transform( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: - # Param check - if y is None: - y = x - - if type(x) is not type(y): # noqa: WPS516 - raise ValueError( - 'Cannot operate objects x and y from different classes', - f'({type(x)}, {type(y)}).', - ) - - if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): - x, y = np.array(x), np.array(y) - if len(x.shape) < 2: - x = np.atleast_2d(x) - if len(y.shape) < 2: - y = np.atleast_2d(y) - - return x, y - - def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: - """Return a heatmap plot of the covariance function.""" - from ..exploratory.visualization._utils import _create_figure - - x = np.linspace(*limits, 1000) - - cov_matrix = self(x, x) - - fig = _create_figure() - ax = fig.add_subplot(1, 1, 1) - ax.imshow( - cov_matrix, - extent=[limits[0], limits[1], limits[1], limits[0]], - ) - ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") - - return fig - - def _sample_trajectories_plot(self) -> Figure: - from ..datasets import make_gaussian_process - - fd = make_gaussian_process( - start=-1, - n_samples=10, - cov=self, - random_state=0, - ) - fig = fd.plot() - fig.axes[0].set_title("Sample trajectories") - return fig - - def __repr__(self) -> str: - - params_str = ', '.join( - f'{n}={getattr(self, n)}' for n, _ in self._parameters_str - ) - - return ( - f"{self.__module__}.{type(self).__qualname__}(" - f"{params_str}" - f")" - ) - - def _latex_content(self) -> str: - params_str = ''.join( - fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str - ) - - return ( - fr"{self._latex_formula} \\" - r"\text{where:}" - r"\begin{aligned}" - fr"\qquad{params_str}" - r"\end{aligned}" - ) - - def _repr_latex_(self) -> str: - return fr"\(\displaystyle {self._latex_content()}\)" - - def _repr_html_(self) -> str: - from ..exploratory.visualization._utils import _figure_to_svg - - fig = self.heatmap() - heatmap = _figure_to_svg(fig) - plt.close(fig) - - fig = self._sample_trajectories_plot() - sample_trajectories = _figure_to_svg(fig) - plt.close(fig) - - row_style = '' - - def column_style(percent: float, margin_top: str = "0") -> str: - return ( - f'style="display: inline-block; ' - f'margin:0; ' - f'margin-top: {margin_top}; ' - f'width:{percent}%; ' - f'height:auto;' - f'vertical-align: middle"' - ) - - return fr""" -
-
- \[{self._latex_content()}\] -
-
-
-
- {sample_trajectories} -
-
- {heatmap} -
-
- """ - - def to_sklearn(self) -> sklearn_kern.Kernel: - """Convert it to a sklearn kernel, if there is one.""" - raise NotImplementedError( - f"{type(self).__name__} covariance not " - f"implemented in scikit-learn", - ) - - -class Brownian(Covariance): - r""" - Brownian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| - - |x - x'|}{2} - - where :math:`\sigma^2` is the variance at distance 1 from - :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. - If :math:`\mathcal{O} = 0` (the default) and we only - consider positive values, the formula can be simplified as - - .. math:: - K(x, y) = \sigma^2 \min(x, y). - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - import matplotlib.pyplot as plt - - Brownian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Brownian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Brownian - - Brownian() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " - r"|x' - \mathcal{O}| - |x - x'|}{2}" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("origin", r"\mathcal{O}"), - ] - - def __init__(self, *, variance: float = 1, origin: float = 0) -> None: - self.variance = variance - self.origin = origin - - def __call__( - self, - x: InputAcceptable | FData, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - - x = _transform_to_2d(x) - self.origin - y = _transform_to_2d(y) - self.origin - - sum_norms = np.add.outer( - np.linalg.norm(x, axis=-1), - np.linalg.norm(y, axis=-1), - ) - norm_sub = np.linalg.norm( - x[:, np.newaxis, :] - y[np.newaxis, :, :], - axis=-1, - ) - - return ( # type: ignore[no-any-return] - self.variance * (sum_norms - norm_sub) / 2 - ) - - -class Linear(Covariance): - r""" - Linear covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (x^T x' + c) - - where :math:`\sigma^2` is the scale of the variance and - :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - import matplotlib.pyplot as plt - - Linear().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Linear(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Linear - - Linear() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ] - - def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: - self.variance = variance - self.intercept = intercept - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - x_y = inner_product_matrix(x, y) - return self.variance * (x_y + self.intercept) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (sklearn_kern.DotProduct(0) + self.intercept) - ) - - -class Polynomial(Covariance): - r""" - Polynomial covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 (\alpha x^T x' + c)^d - - where :math:`\sigma^2` is the scale of the variance, - :math:`\alpha` is the slope, :math:`d` the degree of the - polynomial and :math:`c` is the intercept. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - import matplotlib.pyplot as plt - - Polynomial().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Polynomial(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Polynomial - - Polynomial() - - """ - - _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("intercept", "c"), - ("slope", r"\alpha"), - ("degree", "d"), - ] - - def __init__( - self, - *, - variance: float = 1, - intercept: float = 0, - slope: float = 1, - degree: float = 2, - ) -> None: - self.variance = variance - self.intercept = intercept - self.slope = slope - self.degree = degree - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - x_y = inner_product_matrix(x, y) - return ( - self.variance - * (self.slope * x_y + self.intercept) ** self.degree - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) - ** self.degree - ) - - -class Gaussian(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - import matplotlib.pyplot as plt - - Gaussian().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Gaussian(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Gaussian - - Gaussian() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__(self, *, variance: float = 1, length_scale: float = 1): - self.variance = variance - self.length_scale = length_scale - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance * sklearn_kern.RBF(length_scale=self.length_scale) - ) - - -class Exponential(Covariance): - r""" - Exponential covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) - - where :math:`\sigma^2` is the variance and :math:`l` is the length scale. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - import matplotlib.pyplot as plt - - Exponential().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Exponential(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Exponential - - Exponential() - - """ - - _latex_formula = ( - r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" - r"\right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - ) -> None: - self.variance = variance - self.length_scale = length_scale - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y / (self.length_scale)) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) - ) - - -class WhiteNoise(Covariance): - r""" - Gaussian covariance function. - - The covariance function is - - .. math:: - K(x, x')= \begin{cases} - \sigma^2, \quad x = x' \\ - 0, \quad x \neq x'\\ - \end{cases} - - where :math:`\sigma^2` is the variance. - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - import matplotlib.pyplot as plt - - WhiteNoise().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=WhiteNoise(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import WhiteNoise - - WhiteNoise() - - """ - - _latex_formula = ( - r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" - r"0, \quad x \neq x'\\ \end{cases}" - ) - - _parameters_str = [("variance", r"\sigma^2")] - - def __init__(self, *, variance: float = 1): - self.variance = variance - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - - x = _transform_to_2d(x) - return self.variance * np.eye(x.shape[0]) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return sklearn_kern.WhiteKernel(noise_level=self.variance) - - -class Matern(Covariance): - r""" - Matérn covariance function. - - The covariance function is - - .. math:: - K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} - \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} - K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) - - where :math:`\sigma^2` is the variance, :math:`l` is the length scale - and :math:`\nu` controls the smoothness of the related Gaussian process. - The trajectories of a Gaussian process with Matérn covariance is - :math:`\lceil \nu \rceil - 1` times differentiable. - - - Heatmap plot of the covariance function: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - import matplotlib.pyplot as plt - - Matern().heatmap(limits=(0, 1)) - plt.show() - - Example of Gaussian process trajectories using this covariance: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - from skfda.datasets import make_gaussian_process - import matplotlib.pyplot as plt - - gp = make_gaussian_process( - n_samples=10, cov=Matern(), random_state=0) - gp.plot() - plt.show() - - Default representation in a Jupyter notebook: - - .. jupyter-execute:: - - from skfda.misc.covariances import Matern - - Matern() - - """ - _latex_formula = ( - r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" - r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" - r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" - ) - - _parameters_str = [ - ("variance", r"\sigma^2"), - ("length_scale", "l"), - ("nu", r"\nu"), - ] - - def __init__( - self, - *, - variance: float = 1, - length_scale: float = 1, - nu: float = 1.5, - ) -> None: - self.variance = variance - self.length_scale = length_scale - self.nu = nu - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - x, y = self._param_check_and_transform(x, y) - - distance_x_y = PairwiseMetric(l2_distance)(x, y) - - p = self.nu - 0.5 - if p.is_integer(): - # Formula for half-integers - p = int(p) - body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale - exponential = np.exp(-body) - power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) - power_list = np.cumprod(power_list, axis=0) - power_list = np.concatenate( - (power_list[::-1], np.asarray([np.ones_like(body)])), - ) - power_list = np.moveaxis(power_list, 0, -1) - numerator = np.cumprod(np.arange(p, 0, -1)) - numerator = np.concatenate(([1], numerator)) - denom1 = np.cumprod(np.arange(2 * p, p, -1)) - denom1 = np.concatenate((denom1[::-1], [1])) - denom2 = np.cumprod(np.arange(1, p + 1)) - denom2 = np.concatenate(([1], denom2)) - - sum_terms = power_list * numerator / (denom1 * denom2) - return ( # type: ignore[no-any-return] - self.variance * exponential * np.sum(sum_terms, axis=-1) - ) - elif self.nu == np.inf: - return ( # type: ignore[no-any-return] - self.variance * np.exp( - -distance_x_y ** 2 / (2 * self.length_scale ** 2), - ) - ) - else: - # General formula - scaling = 2**(1 - self.nu) / gamma(self.nu) - body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale - power = body**self.nu - bessel = kv(self.nu, body) - - with np.errstate(invalid='ignore'): - eval_cov = self.variance * scaling * power * bessel - - # Values with nan are where the distance is 0 - return np.nan_to_num( # type: ignore[no-any-return] - eval_cov, - nan=self.variance, - ) - - def to_sklearn(self) -> sklearn_kern.Kernel: - return ( - self.variance - * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) - ) - - -class Empirical(Covariance): - r""" - Sample covariance function. - - The sample covariance function is defined as - . math:: - K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - - \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) - - where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the - mean of the samples. :math:`N` is the number of samples, - :math:`\text{correction}` is the degrees of freedom adjustment and is such - that :math:`N-\text{correction}` is the divisor used in the calculation of - the covariance function. - - """ - - _latex_formula = ( - r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" - r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" - ) - _parameters_str = [ - ("data", "data"), - ] - - cov_fdata: FData - correction: int - - @abc.abstractmethod - def __init__(self, data: FData) -> None: - if data.dim_domain != 1 or data.dim_codomain != 1: - raise NotImplementedError( - "Covariance only implemented " - "for univariate functions", - ) - - def __call__( - self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - - """Evaluate the covariance function. - - Args: - x: First array of points of evaluation. - y: Second array of points of evaluation. - - Returns: - Covariance function evaluated at the grid formed by x and y. - """ - return self.cov_fdata([x, y], grid=True)[0, ..., 0] - - -class EmpiricalGrid(Empirical): - """Sample covariance function for FDataGrid.""" - - cov_fdata: FDataGrid - correction: int - - def __init__(self, data: FDataGrid, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.cov_fdata = data.copy( - data_matrix=np.cov( - data.data_matrix[..., 0], - rowvar=False, - ddof=correction, - )[np.newaxis, ...], - grid_points=[ - data.grid_points[0], - data.grid_points[0], - ], - domain_range=[ - data.domain_range[0], - data.domain_range[0], - ], - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) - - -class EmpiricalBasis(Empirical): - """ - Sample covariance function for FDataBasis. - - In this case one may use the basis expression of the samples to - express the sample covariance function in the tensor product basis - of the original basis. - """ - - cov_fdata: FDataBasis - coeff_matrix: NDArrayFloat - correction: int - - def __init__(self, data: FDataBasis, correction: int = 0) -> None: - super().__init__(data=data) - - self.correction = correction - self.coeff_matrix = np.cov( - data.coefficients, - rowvar=False, - ddof=correction, - ) - self.cov_fdata = FDataBasis( - basis=TensorBasis([data.basis, data.basis]), - coefficients=self.coeff_matrix.flatten(), - argument_names=data.argument_names * 2, - sample_names=("covariance",), - ) +from __future__ import annotations + +import abc +from typing import Callable, Sequence, Tuple, Union + +import matplotlib.pyplot as plt +import numpy as np +import sklearn.gaussian_process.kernels as sklearn_kern +from matplotlib.figure import Figure +from scipy.special import gamma, kv + +from ..misc import inner_product_matrix +from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance +from ..representation import FData, FDataBasis, FDataGrid +from ..representation.basis import TensorBasis +from ..typing._numpy import ArrayLike, NDArrayFloat + + +def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: + return ( # type: ignore[no-any-return] + (x[:, np.newaxis, :] - y[np.newaxis, :, :]) ** 2 + ).sum(2) + + +CovarianceLike = Union[ + float, + NDArrayFloat, + Callable[[ArrayLike, ArrayLike], NDArrayFloat], +] + +InputAcceptable = Union[ + np.ndarray, + FData, +] + + +def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: + """Transform 1d arrays in column vectors.""" + t = np.asfarray(t) + + dim = len(t.shape) + assert dim <= 2 + + if dim < 2: + t = np.atleast_2d(t).T + + return t + + +def _execute_covariance( + covariance: CovarianceLike, + x: ArrayLike, + y: ArrayLike, +) -> NDArrayFloat: + """Execute a covariance function.""" + x = _transform_to_2d(x) + y = _transform_to_2d(y) + + if isinstance(covariance, (int, float)): + return np.array(covariance) + else: + if callable(covariance): + result = covariance(x, y) + elif isinstance(covariance, np.ndarray): + result = covariance + else: + # GPy kernel + result = covariance.K(x, y) + + assert result.shape[0] == len(x) + assert result.shape[1] == len(y) + return result + + +class Covariance(abc.ABC): + """Abstract class for covariance functions.""" + + _parameters_str: Sequence[Tuple[str, str]] + _latex_formula: str + + @abc.abstractmethod + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + pass + + def _param_check_and_transform( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: + # Param check + if y is None: + y = x + + if type(x) is not type(y): # noqa: WPS516 + raise ValueError( + 'Cannot operate objects x and y from different classes', + f'({type(x)}, {type(y)}).', + ) + + if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): + x, y = np.array(x), np.array(y) + if len(x.shape) < 2: + x = np.atleast_2d(x) + if len(y.shape) < 2: + y = np.atleast_2d(y) + + return x, y + + def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: + """Return a heatmap plot of the covariance function.""" + from ..exploratory.visualization._utils import _create_figure + + x = np.linspace(*limits, 1000) + + cov_matrix = self(x, x) + + fig = _create_figure() + ax = fig.add_subplot(1, 1, 1) + ax.imshow( + cov_matrix, + extent=[limits[0], limits[1], limits[1], limits[0]], + ) + ax.set_title(f"Covariance function in [{limits[0]}, {limits[1]}]") + + return fig + + def _sample_trajectories_plot(self) -> Figure: + from ..datasets import make_gaussian_process + + fd = make_gaussian_process( + start=-1, + n_samples=10, + cov=self, + random_state=0, + ) + fig = fd.plot() + fig.axes[0].set_title("Sample trajectories") + return fig + + def __repr__(self) -> str: + + params_str = ', '.join( + f'{n}={getattr(self, n)}' for n, _ in self._parameters_str + ) + + return ( + f"{self.__module__}.{type(self).__qualname__}(" + f"{params_str}" + f")" + ) + + def _latex_content(self) -> str: + params_str = ''.join( + fr'{l} &= {getattr(self, n)} \\' for n, l in self._parameters_str + ) + + return ( + fr"{self._latex_formula} \\" + r"\text{where:}" + r"\begin{aligned}" + fr"\qquad{params_str}" + r"\end{aligned}" + ) + + def _repr_latex_(self) -> str: + return fr"\(\displaystyle {self._latex_content()}\)" + + def _repr_html_(self) -> str: + from ..exploratory.visualization._utils import _figure_to_svg + + fig = self.heatmap() + heatmap = _figure_to_svg(fig) + plt.close(fig) + + fig = self._sample_trajectories_plot() + sample_trajectories = _figure_to_svg(fig) + plt.close(fig) + + row_style = '' + + def column_style(percent: float, margin_top: str = "0") -> str: + return ( + f'style="display: inline-block; ' + f'margin:0; ' + f'margin-top: {margin_top}; ' + f'width:{percent}%; ' + f'height:auto;' + f'vertical-align: middle"' + ) + + return fr""" +
+
+ \[{self._latex_content()}\] +
+
+
+
+ {sample_trajectories} +
+
+ {heatmap} +
+
+ """ + + def to_sklearn(self) -> sklearn_kern.Kernel: + """Convert it to a sklearn kernel, if there is one.""" + raise NotImplementedError( + f"{type(self).__name__} covariance not " + f"implemented in scikit-learn", + ) + + +class Brownian(Covariance): + r""" + Brownian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + |x' - \mathcal{O}| + - |x - x'|}{2} + + where :math:`\sigma^2` is the variance at distance 1 from + :math:`\mathcal{O}` and :math:`\mathcal{O}` is the origin point. + If :math:`\mathcal{O} = 0` (the default) and we only + consider positive values, the formula can be simplified as + + .. math:: + K(x, y) = \sigma^2 \min(x, y). + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + import matplotlib.pyplot as plt + + Brownian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Brownian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Brownian + + Brownian() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " + r"|x' - \mathcal{O}| - |x - x'|}{2}" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("origin", r"\mathcal{O}"), + ] + + def __init__(self, *, variance: float = 1, origin: float = 0) -> None: + self.variance = variance + self.origin = origin + + def __call__( + self, + x: InputAcceptable | FData, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) - self.origin + y = _transform_to_2d(y) - self.origin + + sum_norms = np.add.outer( + np.linalg.norm(x, axis=-1), + np.linalg.norm(y, axis=-1), + ) + norm_sub = np.linalg.norm( + x[:, np.newaxis, :] - y[np.newaxis, :, :], + axis=-1, + ) + + return ( # type: ignore[no-any-return] + self.variance * (sum_norms - norm_sub) / 2 + ) + + +class Linear(Covariance): + r""" + Linear covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (x^T x' + c) + + where :math:`\sigma^2` is the scale of the variance and + :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + import matplotlib.pyplot as plt + + Linear().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Linear(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Linear + + Linear() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (x^T x' + c)" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ] + + def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: + self.variance = variance + self.intercept = intercept + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return self.variance * (x_y + self.intercept) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (sklearn_kern.DotProduct(0) + self.intercept) + ) + + +class Polynomial(Covariance): + r""" + Polynomial covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 (\alpha x^T x' + c)^d + + where :math:`\sigma^2` is the scale of the variance, + :math:`\alpha` is the slope, :math:`d` the degree of the + polynomial and :math:`c` is the intercept. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + import matplotlib.pyplot as plt + + Polynomial().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Polynomial(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Polynomial + + Polynomial() + + """ + + _latex_formula = r"K(x, x') = \sigma^2 (\alpha x^T x' + c)^d" + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("intercept", "c"), + ("slope", r"\alpha"), + ("degree", "d"), + ] + + def __init__( + self, + *, + variance: float = 1, + intercept: float = 0, + slope: float = 1, + degree: float = 2, + ) -> None: + self.variance = variance + self.intercept = intercept + self.slope = slope + self.degree = degree + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + x_y = inner_product_matrix(x, y) + return ( + self.variance + * (self.slope * x_y + self.intercept) ** self.degree + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) + ** self.degree + ) + + +class Gaussian(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||^2}{2l^2}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + import matplotlib.pyplot as plt + + Gaussian().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Gaussian(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Gaussian + + Gaussian() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|^2}{2l^2}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__(self, *, variance: float = 1, length_scale: float = 1): + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance * sklearn_kern.RBF(length_scale=self.length_scale) + ) + + +class Exponential(Covariance): + r""" + Exponential covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \exp\left(-\frac{\|x - x'\|}{l}\right) + + where :math:`\sigma^2` is the variance and :math:`l` is the length scale. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + import matplotlib.pyplot as plt + + Exponential().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Exponential(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Exponential + + Exponential() + + """ + + _latex_formula = ( + r"K(x, x') = \sigma^2 \exp\left(-\frac{||x - x'||}{l}" + r"\right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + ) -> None: + self.variance = variance + self.length_scale = length_scale + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + return self.variance * np.exp(-distance_x_y / (self.length_scale)) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) + ) + + +class WhiteNoise(Covariance): + r""" + Gaussian covariance function. + + The covariance function is + + .. math:: + K(x, x')= \begin{cases} + \sigma^2, \quad x = x' \\ + 0, \quad x \neq x'\\ + \end{cases} + + where :math:`\sigma^2` is the variance. + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + import matplotlib.pyplot as plt + + WhiteNoise().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=WhiteNoise(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import WhiteNoise + + WhiteNoise() + + """ + + _latex_formula = ( + r"K(x, x')= \begin{cases} \sigma^2, \quad x = x' \\" + r"0, \quad x \neq x'\\ \end{cases}" + ) + + _parameters_str = [("variance", r"\sigma^2")] + + def __init__(self, *, variance: float = 1): + self.variance = variance + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + x = _transform_to_2d(x) + return self.variance * np.eye(x.shape[0]) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return sklearn_kern.WhiteKernel(noise_level=self.variance) + + +class Matern(Covariance): + r""" + Matérn covariance function. + + The covariance function is + + .. math:: + K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)} + \left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu} + K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right) + + where :math:`\sigma^2` is the variance, :math:`l` is the length scale + and :math:`\nu` controls the smoothness of the related Gaussian process. + The trajectories of a Gaussian process with Matérn covariance is + :math:`\lceil \nu \rceil - 1` times differentiable. + + + Heatmap plot of the covariance function: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + import matplotlib.pyplot as plt + + Matern().heatmap(limits=(0, 1)) + plt.show() + + Example of Gaussian process trajectories using this covariance: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + from skfda.datasets import make_gaussian_process + import matplotlib.pyplot as plt + + gp = make_gaussian_process( + n_samples=10, cov=Matern(), random_state=0) + gp.plot() + plt.show() + + Default representation in a Jupyter notebook: + + .. jupyter-execute:: + + from skfda.misc.covariances import Matern + + Matern() + + """ + _latex_formula = ( + r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" + r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" + r"K_{\nu}\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)" + ) + + _parameters_str = [ + ("variance", r"\sigma^2"), + ("length_scale", "l"), + ("nu", r"\nu"), + ] + + def __init__( + self, + *, + variance: float = 1, + length_scale: float = 1, + nu: float = 1.5, + ) -> None: + self.variance = variance + self.length_scale = length_scale + self.nu = nu + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + x, y = self._param_check_and_transform(x, y) + + distance_x_y = PairwiseMetric(l2_distance)(x, y) + + p = self.nu - 0.5 + if p.is_integer(): + # Formula for half-integers + p = int(p) + body = np.sqrt(2 * p + 1) * distance_x_y / self.length_scale + exponential = np.exp(-body) + power_list = np.full(shape=(p,) + body.shape, fill_value=2 * body) + power_list = np.cumprod(power_list, axis=0) + power_list = np.concatenate( + (power_list[::-1], np.asarray([np.ones_like(body)])), + ) + power_list = np.moveaxis(power_list, 0, -1) + numerator = np.cumprod(np.arange(p, 0, -1)) + numerator = np.concatenate(([1], numerator)) + denom1 = np.cumprod(np.arange(2 * p, p, -1)) + denom1 = np.concatenate((denom1[::-1], [1])) + denom2 = np.cumprod(np.arange(1, p + 1)) + denom2 = np.concatenate(([1], denom2)) + + sum_terms = power_list * numerator / (denom1 * denom2) + return ( # type: ignore[no-any-return] + self.variance * exponential * np.sum(sum_terms, axis=-1) + ) + elif self.nu == np.inf: + return ( # type: ignore[no-any-return] + self.variance * np.exp( + -distance_x_y ** 2 / (2 * self.length_scale ** 2), + ) + ) + else: + # General formula + scaling = 2**(1 - self.nu) / gamma(self.nu) + body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale + power = body**self.nu + bessel = kv(self.nu, body) + + with np.errstate(invalid='ignore'): + eval_cov = self.variance * scaling * power * bessel + + # Values with nan are where the distance is 0 + return np.nan_to_num( # type: ignore[no-any-return] + eval_cov, + nan=self.variance, + ) + + def to_sklearn(self) -> sklearn_kern.Kernel: + return ( + self.variance + * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) + ) + + +class Empirical(Covariance): + r""" + Sample covariance function. + + The sample covariance function is defined as + . math:: + K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - + \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) + + where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the + mean of the samples. :math:`N` is the number of samples, + :math:`\text{correction}` is the degrees of freedom adjustment and is such + that :math:`N-\text{correction}` is the divisor used in the calculation of + the covariance function. + + """ + + _latex_formula = ( + r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" + r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" + ) + _parameters_str = [ + ("data", "data"), + ] + + cov_fdata: FData + correction: int + + @abc.abstractmethod + def __init__(self, data: FData) -> None: + if data.dim_domain != 1 or data.dim_codomain != 1: + raise NotImplementedError( + "Covariance only implemented " + "for univariate functions", + ) + + def __call__( + self, + x: InputAcceptable, + y: InputAcceptable | None = None, + ) -> NDArrayFloat: + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + + """Evaluate the covariance function. + + Args: + x: First array of points of evaluation. + y: Second array of points of evaluation. + + Returns: + Covariance function evaluated at the grid formed by x and y. + """ + return self.cov_fdata([x, y], grid=True)[0, ..., 0] + + +class EmpiricalGrid(Empirical): + """Sample covariance function for FDataGrid.""" + + cov_fdata: FDataGrid + correction: int + + def __init__(self, data: FDataGrid, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.cov_fdata = data.copy( + data_matrix=np.cov( + data.data_matrix[..., 0], + rowvar=False, + ddof=correction, + )[np.newaxis, ...], + grid_points=[ + data.grid_points[0], + data.grid_points[0], + ], + domain_range=[ + data.domain_range[0], + data.domain_range[0], + ], + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) + + +class EmpiricalBasis(Empirical): + """ + Sample covariance function for FDataBasis. + + In this case one may use the basis expression of the samples to + express the sample covariance function in the tensor product basis + of the original basis. + """ + + cov_fdata: FDataBasis + coeff_matrix: NDArrayFloat + correction: int + + def __init__(self, data: FDataBasis, correction: int = 0) -> None: + super().__init__(data=data) + + self.correction = correction + self.coeff_matrix = np.cov( + data.coefficients, + rowvar=False, + ddof=correction, + ) + self.cov_fdata = FDataBasis( + basis=TensorBasis([data.basis, data.basis]), + coefficients=self.coeff_matrix.flatten(), + argument_names=data.argument_names * 2, + sample_names=("covariance",), + ) From f31229dd36e2741d168b8510348372c8957b3e8d Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 25 Mar 2024 20:00:57 +0100 Subject: [PATCH 05/12] Replaced EOL to LF --- skfda/tests/test_covariances.py | 530 ++++++++++++++++---------------- 1 file changed, 265 insertions(+), 265 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index dc5982ca0..598280342 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,265 +1,265 @@ -"""Tests for Covariance module.""" -from typing import Any, Tuple - -import numpy as np -import pytest -from sklearn.model_selection import ParameterGrid - -import skfda.misc.covariances as cov -from skfda import FDataBasis, FDataGrid -from skfda.datasets import fetch_phoneme -from skfda.representation.basis import MonomialBasis - - -def _test_compare_sklearn( - multivariate_data: Any, - cov: cov.Covariance, -) -> None: - cov_sklearn = cov.to_sklearn() - cov_matrix = cov(multivariate_data) - cov_sklearn_matrix = cov_sklearn(multivariate_data) - - np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) - -############################################################################## -# Fixtures -############################################################################## - - -@pytest.fixture -def fetch_phoneme_fixture() -> FDataGrid: - """Fixture for loading the phoneme dataset example.""" - fd, _ = fetch_phoneme(return_X_y=True) - return fd[:20] - - -@pytest.fixture( - params=[ - cov.Linear(), - cov.Polynomial(), - cov.Gaussian(), - cov.Exponential(), - cov.Matern(), - ], -) -def covariances_fixture(request: Any) -> Any: - """Fixture for getting a covariance kernel function.""" - return request.param - - -@pytest.fixture( - params=[ - cov.Brownian(), - cov.WhiteNoise(), - ], -) -def covariances_raise_fixture(request: Any) -> Any: - """Fixture for getting a covariance kernel that raises a ValueError.""" - return request.param - - -@pytest.fixture -def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: - """Fixture for getting fdatabasis objects.""" - basis = MonomialBasis( - n_basis=2, - domain_range=(-2, 2), - ) - - fd1 = FDataBasis( - basis=basis, - coefficients=[ - [1, 0], - [1, 2], - ], - ) - - fd2 = FDataBasis( - basis=basis, - coefficients=[ - [0, 1], - ], - ) - - return fd1, fd2 - - -@pytest.fixture -def multivariate_data() -> np.array: - """Fixture for getting multivariate data.""" - return np.linspace(-1, 1, 1000)[:, np.newaxis] - - -@pytest.fixture( - params=[ - (cov.Linear, - { - "variance": [1, 2], - "intercept": [3, 4], - }, - ), - (cov.Polynomial, - { - "variance": [2], - "intercept": [0, 2], - "slope": [1, 2], - "degree": [1, 2, 3], - }, - ), - (cov.Exponential, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Gaussian, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Matern, - { - "variance": [2], - "length_scale": [0.5], - "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], - }, - ), - ], -) -def covariance_and_params(request: Any) -> Any: - """Fixture to load the covariance functions.""" - return request.param - - -############################################################################## -# Tests -############################################################################## - - -def test_covariances( - fetch_phoneme_fixture: Any, - covariances_fixture: Any, -) -> None: - """Check that invalid parameters in fit raise exception.""" - fd = fetch_phoneme_fixture - cov_kernel = covariances_fixture - - # Also test that it does not fail - res1 = cov_kernel(fd, fd) - res2 = cov_kernel(fd) - - np.testing.assert_allclose( - res1, - res2, - atol=1e-7, - ) - - -def test_raises( - fetch_phoneme_fixture: Any, - covariances_raise_fixture: Any, -) -> None: - """Check that it raises a ValueError exception.""" - fd = fetch_phoneme_fixture - cov_kernel = covariances_raise_fixture - - np.testing.assert_raises( - ValueError, - cov_kernel, - fd, - ) - - -def test_fdatabasis_example_linear( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Linear covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) - res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_polynomial( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Polynomial covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Polynomial( - variance=1 / 3, - slope=2, - intercept=1, - degree=2, - )(fd1, fd2) - res2 = np.array([[1 / 3], [67**2 / 3**3]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_gaussian( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Gaussian covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) - res2 = np.array([ - [3 * np.exp(-7 / 6)], - [3 * np.exp(-7 / 6)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_exponential( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Exponential covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) - res2 = np.array([ - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_matern( - fdatabasis_data: Any, -) -> None: - """Check a precalculated example for Matern covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) - res2 = np.array([ - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_multivariate_covariance_kernel( - multivariate_data: Any, - covariance_and_params: Any, -) -> None: - """Test general covariance kernel against scikit-learn's kernel.""" - cov_kernel, param_dict = covariance_and_params - for input_params in list(ParameterGrid(param_dict)): - _test_compare_sklearn(multivariate_data, cov_kernel(**input_params)) +"""Tests for Covariance module.""" +from typing import Any, Tuple + +import numpy as np +import pytest +from sklearn.model_selection import ParameterGrid + +import skfda.misc.covariances as cov +from skfda import FDataBasis, FDataGrid +from skfda.datasets import fetch_weather # fetch_phoneme, +from skfda.representation.basis import MonomialBasis + + +def _test_compare_sklearn( + multivariate_data: Any, + cov: cov.Covariance, +) -> None: + cov_sklearn = cov.to_sklearn() + cov_matrix = cov(multivariate_data) + cov_sklearn_matrix = cov_sklearn(multivariate_data) + + np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) + +############################################################################## +# Fixtures +############################################################################## + + +@pytest.fixture +def fetch_functional_data() -> FDataGrid: + """Fixture for loading the phoneme dataset example.""" + fd, _ = fetch_weather(return_X_y=True) + return fd[:20] + + +@pytest.fixture( + params=[ + cov.Linear(), + cov.Polynomial(), + cov.Gaussian(), + cov.Exponential(), + cov.Matern(), + ], +) +def covariances_fixture(request: Any) -> Any: + """Fixture for getting a covariance kernel function.""" + return request.param + + +@pytest.fixture( + params=[ + cov.Brownian(), + cov.WhiteNoise(), + ], +) +def covariances_raise_fixture(request: Any) -> Any: + """Fixture for getting a covariance kernel that raises a ValueError.""" + return request.param + + +@pytest.fixture +def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: + """Fixture for getting fdatabasis objects.""" + basis = MonomialBasis( + n_basis=2, + domain_range=(-2, 2), + ) + + fd1 = FDataBasis( + basis=basis, + coefficients=[ + [1, 0], + [1, 2], + ], + ) + + fd2 = FDataBasis( + basis=basis, + coefficients=[ + [0, 1], + ], + ) + + return fd1, fd2 + + +@pytest.fixture +def multivariate_data() -> np.array: + """Fixture for getting multivariate data.""" + return np.linspace(-1, 1, 1000)[:, np.newaxis] + + +@pytest.fixture( + params=[ + (cov.Linear, + { + "variance": [1, 2], + "intercept": [3, 4], + }, + ), + (cov.Polynomial, + { + "variance": [2], + "intercept": [0, 2], + "slope": [1, 2], + "degree": [1, 2, 3], + }, + ), + (cov.Exponential, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Gaussian, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ), + (cov.Matern, + { + "variance": [2], + "length_scale": [0.5], + "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], + }, + ), + ], +) +def covariance_and_params(request: Any) -> Any: + """Fixture to load the covariance functions.""" + return request.param + + +############################################################################## +# Tests +############################################################################## + + +def test_covariances( + fetch_phoneme_fixture: FDataGrid, + covariances_fixture: cov.Covariance, +) -> None: + """Check that invalid parameters in fit raise exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_fixture + + # Also test that it does not fail + res1 = cov_kernel(fd, fd) + res2 = cov_kernel(fd) + + np.testing.assert_allclose( + res1, + res2, + atol=1e-7, + ) + + +def test_raises( + fetch_phoneme_fixture: FDataGrid, + covariances_raise_fixture: Any, +) -> None: + """Check that it raises a ValueError exception.""" + fd = fetch_phoneme_fixture + cov_kernel = covariances_raise_fixture + + np.testing.assert_raises( + ValueError, + cov_kernel, + fd, + ) + + +def test_fdatabasis_example_linear( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Linear covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) + res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_polynomial( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Polynomial covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Polynomial( + variance=1 / 3, + slope=2, + intercept=1, + degree=2, + )(fd1, fd2) + res2 = np.array([[1 / 3], [67**2 / 3**3]]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_gaussian( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Gaussian covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) + res2 = np.array([ + [3 * np.exp(-7 / 6)], + [3 * np.exp(-7 / 6)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_exponential( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Exponential covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) + res2 = np.array([ + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_fdatabasis_example_matern( + fdatabasis_data: Tuple[FDataBasis, FDataBasis], +) -> None: + """Check a precalculated example for Matern covariance kernel.""" + fd1, fd2 = fdatabasis_data + res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) + res2 = np.array([ + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + ]) + np.testing.assert_allclose( + res1, + res2, + rtol=1e-6, + ) + + +def test_multivariate_covariance_kernel( + multivariate_data: np.array, + covariance_and_params: Any, +) -> None: + """Test general covariance kernel against scikit-learn's kernel.""" + cov_kernel, param_dict = covariance_and_params + for input_params in list(ParameterGrid(param_dict)): + _test_compare_sklearn(multivariate_data, cov_kernel(**input_params)) From 2c95ed16d39bd1e516f4796e5fa812e6d305b146 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Tue, 26 Mar 2024 11:08:26 +0100 Subject: [PATCH 06/12] Changed import to avoid circular dependencies. --- skfda/misc/covariances.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index bff439734..4b25c439a 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -9,8 +9,8 @@ from matplotlib.figure import Figure from scipy.special import gamma, kv -from ..misc import inner_product_matrix -from ..misc.metrics import PairwiseMetric, LpNorm, l2_distance +from ..misc._math import inner_product_matrix +from ..misc.metrics import PairwiseMetric, l2_distance from ..representation import FData, FDataBasis, FDataGrid from ..representation.basis import TensorBasis from ..typing._numpy import ArrayLike, NDArrayFloat @@ -38,7 +38,7 @@ def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = t.ndim + dim = len(t.shape) assert dim <= 2 if dim < 2: From 2f993453b65143443ece22edf92aa0720dfdb2f4 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Tue, 26 Mar 2024 11:28:05 +0100 Subject: [PATCH 07/12] Changed data fecthing function name --- skfda/tests/test_covariances.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index 598280342..c28019afc 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -7,7 +7,7 @@ import skfda.misc.covariances as cov from skfda import FDataBasis, FDataGrid -from skfda.datasets import fetch_weather # fetch_phoneme, +from skfda.datasets import fetch_weather from skfda.representation.basis import MonomialBasis @@ -138,11 +138,11 @@ def covariance_and_params(request: Any) -> Any: def test_covariances( - fetch_phoneme_fixture: FDataGrid, + fetch_functional_data: FDataGrid, covariances_fixture: cov.Covariance, ) -> None: """Check that invalid parameters in fit raise exception.""" - fd = fetch_phoneme_fixture + fd = fetch_functional_data cov_kernel = covariances_fixture # Also test that it does not fail @@ -157,11 +157,11 @@ def test_covariances( def test_raises( - fetch_phoneme_fixture: FDataGrid, + fetch_functional_data: FDataGrid, covariances_raise_fixture: Any, ) -> None: """Check that it raises a ValueError exception.""" - fd = fetch_phoneme_fixture + fd = fetch_functional_data cov_kernel = covariances_raise_fixture np.testing.assert_raises( From 81f6e585b991dbbcc3dab74122cab974efa0d886 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Thu, 4 Jul 2024 12:04:46 +0200 Subject: [PATCH 08/12] Covariance_kernels pullrequest changes incorporated. --- skfda/misc/covariances.py | 123 ++++++++++++++++++-------------- skfda/tests/test_covariances.py | 16 +++-- 2 files changed, 81 insertions(+), 58 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 4b25c439a..d5d7a1abd 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -1,12 +1,14 @@ +"""Covariances module.""" from __future__ import annotations import abc -from typing import Callable, Sequence, Tuple, Union +from typing import Any, Callable, Sequence import matplotlib.pyplot as plt import numpy as np import sklearn.gaussian_process.kernels as sklearn_kern from matplotlib.figure import Figure +from numpy.typing import NDArray from scipy.special import gamma, kv from ..misc._math import inner_product_matrix @@ -22,23 +24,20 @@ def _squared_norms(x: NDArrayFloat, y: NDArrayFloat) -> NDArrayFloat: ).sum(2) -CovarianceLike = Union[ - float, - NDArrayFloat, - Callable[[ArrayLike, ArrayLike], NDArrayFloat], -] +CovarianceLike = ( + float + | NDArrayFloat + | Callable[[ArrayLike, ArrayLike], NDArrayFloat] +) -InputAcceptable = Union[ - np.ndarray, - FData, -] +Input = NDArray[Any] | FData def _transform_to_2d(t: ArrayLike) -> NDArrayFloat: """Transform 1d arrays in column vectors.""" t = np.asfarray(t) - dim = len(t.shape) + dim = t.ndim assert dim <= 2 if dim < 2: @@ -58,39 +57,40 @@ def _execute_covariance( if isinstance(covariance, (int, float)): return np.array(covariance) + + if callable(covariance): + result = covariance(x, y) + elif isinstance(covariance, np.ndarray): + result = covariance else: - if callable(covariance): - result = covariance(x, y) - elif isinstance(covariance, np.ndarray): - result = covariance - else: - # GPy kernel - result = covariance.K(x, y) + # GPy kernel + result = covariance.K(x, y) - assert result.shape[0] == len(x) - assert result.shape[1] == len(y) - return result + assert result.shape[0] == len(x) + assert result.shape[1] == len(y) + return result class Covariance(abc.ABC): """Abstract class for covariance functions.""" - _parameters_str: Sequence[Tuple[str, str]] + _parameters_str: Sequence[tuple[str, str]] _latex_formula: str @abc.abstractmethod def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute covariance function on input data.""" pass def _param_check_and_transform( self, - x: InputAcceptable, - y: InputAcceptable | None = None, - ) -> Tuple[np.ndarray | FData, np.ndarray | FData]: + x: Input, + y: Input | None = None, + ) -> tuple[Input, Input]: # Param check if y is None: y = x @@ -101,8 +101,7 @@ def _param_check_and_transform( f'({type(x)}, {type(y)}).', ) - if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): - x, y = np.array(x), np.array(y) + if not isinstance(x, FData) and not isinstance(y, FData): if len(x.shape) < 2: x = np.atleast_2d(x) if len(y.shape) < 2: @@ -110,7 +109,7 @@ def _param_check_and_transform( return x, y - def heatmap(self, limits: Tuple[float, float] = (-1, 1)) -> Figure: + def heatmap(self, limits: tuple[float, float] = (-1, 1)) -> Figure: """Return a heatmap plot of the covariance function.""" from ..exploratory.visualization._utils import _create_figure @@ -266,6 +265,7 @@ class Brownian(Covariance): Brownian() """ + _latex_formula = ( r"K(x, x') = \sigma^2 \frac{|x - \mathcal{O}| + " r"|x' - \mathcal{O}| - |x - x'|}{2}" @@ -282,14 +282,21 @@ def __init__(self, *, variance: float = 1, origin: float = 0) -> None: def __call__( self, - x: InputAcceptable | FData, - y: InputAcceptable | None = None, + x: NDArray[Any], + y: NDArray[Any] | None = None, ) -> NDArrayFloat: + """Compute Brownian covariance function on input data.""" if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') + raise ValueError( + 'Brownian covariance not defined for FData objects.', + ) + + xx: NDArray[Any] + yy: NDArray[Any] + xx, yy = self._param_check_and_transform(x, y) - x = _transform_to_2d(x) - self.origin - y = _transform_to_2d(y) - self.origin + xx = xx - self.origin + yy = yy - self.origin sum_norms = np.add.outer( np.linalg.norm(x, axis=-1), @@ -363,9 +370,10 @@ def __init__(self, *, variance: float = 1, intercept: float = 0) -> None: def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute linear covariance function on input data.""" x, y = self._param_check_and_transform(x, y) x_y = inner_product_matrix(x, y) @@ -445,12 +453,13 @@ def __init__( self.intercept = intercept self.slope = slope self.degree = degree - + def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute polynomial covariance function on input data.""" x, y = self._param_check_and_transform(x, y) x_y = inner_product_matrix(x, y) @@ -527,13 +536,16 @@ def __init__(self, *, variance: float = 1, length_scale: float = 1): def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute Gaussian covariance function on input data.""" x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y ** 2 / (2 * self.length_scale ** 2)) + return self.variance * np.exp( # type: ignore[no-any-return] + -distance_x_y ** 2 / (2 * self.length_scale ** 2), + ) def to_sklearn(self) -> sklearn_kern.Kernel: return ( @@ -606,9 +618,10 @@ def __init__( def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute exponential covariance function on input data.""" x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) @@ -680,9 +693,10 @@ def __init__(self, *, variance: float = 1): def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute white noise covariance function on input data.""" if isinstance(x, FData) or isinstance(y, FData): raise ValueError('Not defined for FData objects.') @@ -767,9 +781,10 @@ def __init__( def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: + """Compute Matern covariance function on input data.""" x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) @@ -864,12 +879,9 @@ def __init__(self, data: FData) -> None: def __call__( self, - x: InputAcceptable, - y: InputAcceptable | None = None, + x: Input, + y: Input | None = None, ) -> NDArrayFloat: - if isinstance(x, FData) or isinstance(y, FData): - raise ValueError('Not defined for FData objects.') - """Evaluate the covariance function. Args: @@ -879,6 +891,9 @@ def __call__( Returns: Covariance function evaluated at the grid formed by x and y. """ + if isinstance(x, FData) or isinstance(y, FData): + raise ValueError('Not defined for FData objects.') + return self.cov_fdata([x, y], grid=True)[0, ..., 0] diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index c28019afc..ebf78f20d 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -60,7 +60,11 @@ def covariances_raise_fixture(request: Any) -> Any: @pytest.fixture def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: - """Fixture for getting fdatabasis objects.""" + """Fixture for getting fdatabasis objects. + + The dataset is used to test manual calculations of the covariance functions + against the implementation. + """ basis = MonomialBasis( n_basis=2, domain_range=(-2, 2), @@ -141,7 +145,7 @@ def test_covariances( fetch_functional_data: FDataGrid, covariances_fixture: cov.Covariance, ) -> None: - """Check that invalid parameters in fit raise exception.""" + """Check that parameter conversion is done correctly.""" fd = fetch_functional_data cov_kernel = covariances_fixture @@ -160,11 +164,15 @@ def test_raises( fetch_functional_data: FDataGrid, covariances_raise_fixture: Any, ) -> None: - """Check that it raises a ValueError exception.""" + """Check raises ValueError. + + Check that non-functional kernels raise a ValueError exception + with functional data. + """ fd = fetch_functional_data cov_kernel = covariances_raise_fixture - np.testing.assert_raises( + pytest.raises( ValueError, cov_kernel, fd, From b18326967aa50036cf3082dc76e937cb84ac1c70 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Thu, 4 Jul 2024 12:32:12 +0200 Subject: [PATCH 09/12] Covariance_kernels pullrequest changes incorporated. --- skfda/misc/covariances.py | 58 +++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index d5d7a1abd..a36142afb 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -181,7 +181,10 @@ def _repr_html_(self) -> str: row_style = '' - def column_style(percent: float, margin_top: str = "0") -> str: + def column_style( # noqa: WPS430 + percent: float, + margin_top: str = "0", + ) -> str: return ( f'style="display: inline-block; ' f'margin:0; ' @@ -205,7 +208,7 @@ def column_style(percent: float, margin_top: str = "0") -> str: {heatmap} - """ + """ # noqa: WPS432 def to_sklearn(self) -> sklearn_kern.Kernel: """Convert it to a sklearn kernel, if there is one.""" @@ -291,12 +294,11 @@ def __call__( 'Brownian covariance not defined for FData objects.', ) - xx: NDArray[Any] - yy: NDArray[Any] - xx, yy = self._param_check_and_transform(x, y) + x = _transform_to_2d(x) + y = _transform_to_2d(y) - xx = xx - self.origin - yy = yy - self.origin + x = x - self.origin + y = y - self.origin sum_norms = np.add.outer( np.linalg.norm(x, axis=-1), @@ -380,6 +382,7 @@ def __call__( return self.variance * (x_y + self.intercept) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * (sklearn_kern.DotProduct(0) + self.intercept) @@ -469,6 +472,7 @@ def __call__( ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * (self.slope * sklearn_kern.DotProduct(0) + self.intercept) @@ -548,6 +552,7 @@ def __call__( ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * sklearn_kern.RBF(length_scale=self.length_scale) ) @@ -625,9 +630,13 @@ def __call__( x, y = self._param_check_and_transform(x, y) distance_x_y = PairwiseMetric(l2_distance)(x, y) - return self.variance * np.exp(-distance_x_y / (self.length_scale)) + return self.variance * np.exp( # type: ignore[no-any-return] + -distance_x_y + / (self.length_scale), + ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * sklearn_kern.Matern(length_scale=self.length_scale, nu=0.5) @@ -704,6 +713,7 @@ def __call__( return self.variance * np.eye(x.shape[0]) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return sklearn_kern.WhiteKernel(noise_level=self.variance) @@ -756,6 +766,7 @@ class Matern(Covariance): Matern() """ + _latex_formula = ( r"K(x, x') = \sigma^2 \frac{2^{1-\nu}}{\Gamma(\nu)}" r"\left( \frac{\sqrt{2\nu}|x - x'|}{l} \right)^{\nu}" @@ -818,23 +829,24 @@ def __call__( -distance_x_y ** 2 / (2 * self.length_scale ** 2), ) ) - else: - # General formula - scaling = 2**(1 - self.nu) / gamma(self.nu) - body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale - power = body**self.nu - bessel = kv(self.nu, body) - - with np.errstate(invalid='ignore'): - eval_cov = self.variance * scaling * power * bessel - - # Values with nan are where the distance is 0 - return np.nan_to_num( # type: ignore[no-any-return] - eval_cov, - nan=self.variance, - ) + + # General formula + scaling = 2**(1 - self.nu) / gamma(self.nu) + body = np.sqrt(2 * self.nu) * distance_x_y / self.length_scale + power = body**self.nu + bessel = kv(self.nu, body) + + with np.errstate(invalid='ignore'): + eval_cov = self.variance * scaling * power * bessel + + # Values with nan are where the distance is 0 + return np.nan_to_num( # type: ignore[no-any-return] + eval_cov, + nan=self.variance, + ) def to_sklearn(self) -> sklearn_kern.Kernel: + """Obtain corresponding scikit-learn kernel type.""" return ( self.variance * sklearn_kern.Matern(length_scale=self.length_scale, nu=self.nu) From fb79ed1bc4fbfc7c3b9fd2b75f41dc49e62dccd1 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 8 Jul 2024 11:33:44 +0200 Subject: [PATCH 10/12] Merge develop into covariance_kernels --- skfda/misc/covariances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index a36142afb..c130fc454 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -730,7 +730,7 @@ class Matern(Covariance): where :math:`\sigma^2` is the variance, :math:`l` is the length scale and :math:`\nu` controls the smoothness of the related Gaussian process. - The trajectories of a Gaussian process with Matérn covariance is + The trajectories of a Gaussian process with Matérn covariance is :math:`\lceil \nu \rceil - 1` times differentiable. From 4e9db76673313716bfb8f9178c2d3daebee6d902 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 8 Jul 2024 12:22:42 +0200 Subject: [PATCH 11/12] Refactored covariances test module and corrected style problems. --- skfda/tests/test_covariances.py | 252 ++++++++++++++------------------ 1 file changed, 110 insertions(+), 142 deletions(-) diff --git a/skfda/tests/test_covariances.py b/skfda/tests/test_covariances.py index ebf78f20d..1e8550564 100644 --- a/skfda/tests/test_covariances.py +++ b/skfda/tests/test_covariances.py @@ -1,5 +1,5 @@ """Tests for Covariance module.""" -from typing import Any, Tuple +from typing import Any import numpy as np import pytest @@ -21,14 +21,27 @@ def _test_compare_sklearn( np.testing.assert_array_almost_equal(cov_matrix, cov_sklearn_matrix) +############################################################################### +# Example datasets for which to calculate the evaluation of the kernel by hand +# to compare it against the results yielded by the implementation. +############################################################################### + + +basis = MonomialBasis(n_basis=2, domain_range=(-2, 2)) + +fd = [ + FDataBasis(basis=basis, coefficients=[[1, 0], [1, 2]]), + FDataBasis(basis=basis, coefficients=[[0, 1]]), +] + ############################################################################## # Fixtures ############################################################################## @pytest.fixture -def fetch_functional_data() -> FDataGrid: - """Fixture for loading the phoneme dataset example.""" +def fetch_weather_subset() -> FDataGrid: + """Fixture for loading the canadian weather dataset example.""" fd, _ = fetch_weather(return_X_y=True) return fd[:20] @@ -58,34 +71,46 @@ def covariances_raise_fixture(request: Any) -> Any: return request.param -@pytest.fixture -def fdatabasis_data() -> Tuple[FDataBasis, FDataBasis]: +@pytest.fixture( + params=[ + [ + cov.Linear(variance=1 / 2, intercept=3), + np.array([[3 / 2], [3 / 2 + 32 / 6]]), + ], + [ + cov.Polynomial(variance=1 / 3, slope=2, intercept=1, degree=2), + np.array([[1 / 3], [67**2 / 3**3]]), + ], + [ + cov.Gaussian(variance=3, length_scale=2), + np.array([[3 * np.exp(-7 / 6)], [3 * np.exp(-7 / 6)]]), + ], + [ + cov.Exponential(variance=4, length_scale=5), + np.array([ + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + [4 * np.exp(-np.sqrt(28 / 3) / 5)], + ]), + ], + [ + cov.Matern(variance=2, length_scale=3, nu=2), + np.array([ + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], + ]), + ], + ], +) +def precalc_example_data( + request: Any, +) -> list[FDataBasis, FDataBasis, cov.Covariance, np.array]: """Fixture for getting fdatabasis objects. The dataset is used to test manual calculations of the covariance functions against the implementation. """ - basis = MonomialBasis( - n_basis=2, - domain_range=(-2, 2), - ) - - fd1 = FDataBasis( - basis=basis, - coefficients=[ - [1, 0], - [1, 2], - ], - ) - - fd2 = FDataBasis( - basis=basis, - coefficients=[ - [0, 1], - ], - ) - - return fd1, fd2 + # First fd, Second fd, kernel used, result + return *fd, *request.param @pytest.fixture @@ -96,39 +121,44 @@ def multivariate_data() -> np.array: @pytest.fixture( params=[ - (cov.Linear, - { - "variance": [1, 2], - "intercept": [3, 4], - }, - ), - (cov.Polynomial, - { - "variance": [2], - "intercept": [0, 2], - "slope": [1, 2], - "degree": [1, 2, 3], - }, - ), - (cov.Exponential, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Gaussian, - { - "variance": [1, 2], - "length_scale": [0.5, 1, 2], - }, - ), - (cov.Matern, - { - "variance": [2], - "length_scale": [0.5], - "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], - }, - ), + [ + cov.Linear, + { + "variance": [1, 2], + "intercept": [3, 4], + }, + ], + [ + cov.Polynomial, + { + "variance": [2], + "intercept": [0, 2], + "slope": [1, 2], + "degree": [1, 2, 3], + }, + ], + [ + cov.Exponential, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ], + [ + cov.Gaussian, + { + "variance": [1, 2], + "length_scale": [0.5, 1, 2], + }, + ], + [ + cov.Matern, + { + "variance": [2], + "length_scale": [0.5], + "nu": [0.5, 1, 1.5, 2.5, 3.5, np.inf], + }, + ], ], ) def covariance_and_params(request: Any) -> Any: @@ -142,11 +172,11 @@ def covariance_and_params(request: Any) -> Any: def test_covariances( - fetch_functional_data: FDataGrid, + fetch_weather_subset: FDataGrid, covariances_fixture: cov.Covariance, ) -> None: """Check that parameter conversion is done correctly.""" - fd = fetch_functional_data + fd = fetch_weather_subset cov_kernel = covariances_fixture # Also test that it does not fail @@ -161,7 +191,7 @@ def test_covariances( def test_raises( - fetch_functional_data: FDataGrid, + fetch_weather_subset: FDataGrid, covariances_raise_fixture: Any, ) -> None: """Check raises ValueError. @@ -169,7 +199,7 @@ def test_raises( Check that non-functional kernels raise a ValueError exception with functional data. """ - fd = fetch_functional_data + fd = fetch_weather_subset cov_kernel = covariances_raise_fixture pytest.raises( @@ -179,86 +209,24 @@ def test_raises( ) -def test_fdatabasis_example_linear( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Linear covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Linear(variance=1 / 2, intercept=3)(fd1, fd2) - res2 = np.array([[3 / 2], [3 / 2 + 32 / 6]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_polynomial( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Polynomial covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Polynomial( - variance=1 / 3, - slope=2, - intercept=1, - degree=2, - )(fd1, fd2) - res2 = np.array([[1 / 3], [67**2 / 3**3]]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_gaussian( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Gaussian covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Gaussian(variance=3, length_scale=2)(fd1, fd2) - res2 = np.array([ - [3 * np.exp(-7 / 6)], - [3 * np.exp(-7 / 6)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_exponential( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Exponential covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Exponential(variance=4, length_scale=5)(fd1, fd2) - res2 = np.array([ - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - [4 * np.exp(-np.sqrt(28 / 3) / 5)], - ]) - np.testing.assert_allclose( - res1, - res2, - rtol=1e-6, - ) - - -def test_fdatabasis_example_matern( - fdatabasis_data: Tuple[FDataBasis, FDataBasis], -) -> None: - """Check a precalculated example for Matern covariance kernel.""" - fd1, fd2 = fdatabasis_data - res1 = cov.Matern(variance=2, length_scale=3, nu=2)(fd1, fd2) - res2 = np.array([ - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - [(2 / 3) ** 2 * (28 / 3) * 0.239775899566], - ]) +def test_precalc_example( + precalc_example_data: list[ # noqa: WPS320 + FDataBasis, FDataBasis, cov.Covariance, np.array, + ], +): + """Check the precalculated example for Linear covariance kernel. + + Compare the theoretical precalculated results against the covariance kernel + implementation, for different kernels. + The structure of the input is a list containing: + [First functional dataset, Second functional dataset, + Covariance kernel used, Result] + """ + fd1, fd2, kernel, precalc_result = precalc_example_data + computed_result = kernel(fd1, fd2) np.testing.assert_allclose( - res1, - res2, + computed_result, + precalc_result, rtol=1e-6, ) From 26c0e73b82fae6e76e645c25488e70db2a523e31 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 8 Jul 2024 12:28:17 +0200 Subject: [PATCH 12/12] Changes in style --- skfda/misc/covariances.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index c130fc454..00baa7e66 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -208,7 +208,7 @@ def column_style( # noqa: WPS430 {heatmap} - """ # noqa: WPS432 + """ # noqa: WPS432, WPS318 def to_sklearn(self) -> sklearn_kern.Kernel: """Convert it to a sklearn kernel, if there is one."""