Skip to content

Commit

Permalink
Merge pull request #610 from GAA-UAM/feature/scoring-fdatairregular
Browse files Browse the repository at this point in the history
Implement scores for `FDatairregular` objects as described in #609
  • Loading branch information
vnmabus authored Jul 5, 2024
2 parents 59b6f60 + ca0a11c commit d19e1bd
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 38 deletions.
1 change: 1 addition & 0 deletions skfda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
FData as FData,
FDataBasis as FDataBasis,
FDataGrid as FDataGrid,
FDataIrregular as FDataIrregular,
concatenate as concatenate,
)

Expand Down
191 changes: 155 additions & 36 deletions skfda/misc/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
import math
import warnings
from functools import singledispatch
from typing import Callable, Optional, TypeVar, Union, overload
from typing import Callable, TypeVar, overload

import numpy as np
import sklearn.metrics
from typing_extensions import Literal, Protocol

from .._utils import nquad_vec
from ..representation import FData, FDataBasis, FDataGrid
from ..representation import FData, FDataBasis, FDataGrid, FDataIrregular
from ..representation._functional_data import EvalPointsType
from ..typing._numpy import NDArrayFloat

Expand Down Expand Up @@ -125,6 +125,42 @@ def _multioutput_score_grid(
return float(np.mean(score.integrate()[0]) / _domain_measure(score))


def _integral_average_fdatairregular(
score: FDataIrregular,
squared: bool = True,
weights: NDArrayFloat | None = None,
) -> float:
"""Calculate the weighted average of the normalized integrals of the score.
The integral of the score is normalized because each integral is divided by
the length of the curve's domain.
If the score is vector-valued, then the mean of each codimension integral
is calculated for every functional observation.
Args:
score: Score of the functions.
squared: If False, the square root is taken.
weights: Weights for the mean.
"""
if score.dim_domain != 1:
raise ValueError(
"Only univariate FDataIrregular objects are supported",
)
if not squared:
score = np.sqrt(score)

integrals = np.mean(score.integrate(), axis=1)
lebesgue_measures = np.diff(score.sample_range, axis=-1).reshape(-1)
normalized_integrals = np.divide(
integrals,
lebesgue_measures,
out=np.zeros_like(integrals),
where=lebesgue_measures != 0,
)
return np.average(normalized_integrals, weights=weights)


@overload
def explained_variance_score(
y_true: DataType,
Expand Down Expand Up @@ -238,9 +274,9 @@ def _explained_variance_score_fdatagrid(
y_true: FDataGrid,
y_pred: FDataGrid,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> Union[float, FDataGrid]:
) -> float | FDataGrid:

num = _var(y_true - y_pred, weights=sample_weight)
den = _var(y_true, weights=sample_weight)
Expand All @@ -260,7 +296,7 @@ def _explained_variance_score_fdatabasis(
y_true: FDataBasis,
y_pred: FDataBasis,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:

Expand Down Expand Up @@ -361,8 +397,9 @@ def mean_absolute_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -378,8 +415,10 @@ def mean_absolute_error(
Mean absolute error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand All @@ -403,21 +442,35 @@ def _mean_absolute_error_fdatagrid(
y_true: FDataGrid,
y_pred: FDataGrid,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> Union[float, FDataGrid]:
) -> float | FDataGrid:
from ..exploratory.stats import mean

error = mean(np.abs(y_true - y_pred), weights=sample_weight)
return _multioutput_score_grid(error, multioutput)


@mean_absolute_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:
return _integral_average_fdatairregular(
np.abs(y_true - y_pred),
weights=sample_weight,
)


@mean_absolute_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_error_fdatabasis(
y_true: FDataBasis,
y_pred: FDataBasis,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:

Expand Down Expand Up @@ -491,8 +544,9 @@ def mean_absolute_percentage_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -511,8 +565,10 @@ def mean_absolute_percentage_error(
Mean absolute percentage error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand All @@ -538,9 +594,9 @@ def _mean_absolute_percentage_error_fdatagrid(
y_true: FDataGrid,
y_pred: FDataGrid,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> Union[float, FDataGrid]:
) -> float | FDataGrid:
from ..exploratory.stats import mean

epsilon = np.finfo(np.float64).eps
Expand All @@ -554,12 +610,29 @@ def _mean_absolute_percentage_error_fdatagrid(
return _multioutput_score_grid(error, multioutput)


@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_percentage_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:
epsilon = np.finfo(np.float64).eps

if np.any(np.abs(y_true.values) < epsilon):
warnings.warn('Zero denominator', RuntimeWarning)

mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon)
return _integral_average_fdatairregular(mape, weights=sample_weight)


@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc]
def _mean_absolute_percentage_error_fdatabasis(
y_true: FDataBasis,
y_pred: FDataBasis,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:

Expand Down Expand Up @@ -644,8 +717,9 @@ def mean_squared_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -662,8 +736,10 @@ def mean_squared_error(
Mean squared error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand All @@ -688,10 +764,10 @@ def _mean_squared_error_fdatagrid(
y_true: FDataGrid,
y_pred: FDataGrid,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
squared: bool = True,
) -> Union[float, FDataGrid]:
) -> float | FDataGrid:
from ..exploratory.stats import mean

error: FDataGrid = mean(
Expand All @@ -702,12 +778,28 @@ def _mean_squared_error_fdatagrid(
return _multioutput_score_grid(error, multioutput, squared=squared)


@mean_squared_error.register # type: ignore[attr-defined, misc]
def _mean_squared_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
squared: bool = True,
) -> float:
return _integral_average_fdatairregular(
np.power(y_true - y_pred, 2),
weights=sample_weight,
squared=squared,
)


@mean_squared_error.register # type: ignore[attr-defined, misc]
def _mean_squared_error_fdatabasis(
y_true: FDataBasis,
y_pred: FDataBasis,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
squared: bool = True,
multioutput: MultiOutputType = 'uniform_average',
) -> float:
Expand Down Expand Up @@ -791,8 +883,9 @@ def mean_squared_log_error(
where :math:`D` is the function domain and :math:`V` the volume of that
domain.
For :class:`~skfda.representation.FDataBasis` only
'uniform_average' is available.
For :class:`~skfda.representation.FDataBasis` and
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is
available.
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function
is called.
Expand All @@ -812,8 +905,10 @@ def mean_squared_log_error(
Mean squared log error.
If multioutput = 'uniform_average' or
:math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataBasis` objects, float is returned.
:math:`y\_pred` and :math:`y\_true` are both
:class:`~skfda.representation.FDataBasis` or both
:class:`~skfda.representation.FDataIrregular` objects, float is
returned.
If both :math:`y\_pred` and :math:`y\_true` are
:class:`~skfda.representation.FDataGrid`
Expand All @@ -840,10 +935,10 @@ def _mean_squared_log_error_fdatagrid(
y_true: FDataGrid,
y_pred: FDataGrid,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
squared: bool = True,
) -> Union[float, FDataGrid]:
) -> float | FDataGrid:

if np.any(y_true.data_matrix < 0) or np.any(y_pred.data_matrix < 0):
raise ValueError(
Expand All @@ -860,12 +955,36 @@ def _mean_squared_log_error_fdatagrid(
)


@mean_squared_log_error.register # type: ignore[attr-defined, misc]
def _mean_squared_log_error_fdatairregular(
y_true: FDataIrregular,
y_pred: FDataIrregular,
*,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
squared: bool = True,
) -> float:
if np.any(y_true.values < 0) or np.any(y_pred.values < 0):
raise ValueError(
"Mean Squared Logarithmic Error cannot be used when "
"targets functions have negative values.",
)

return mean_squared_error(
np.log1p(y_true),
np.log1p(y_pred),
sample_weight=sample_weight,
multioutput=multioutput,
squared=squared,
)


@mean_squared_log_error.register # type: ignore[attr-defined, misc]
def _mean_squared_log_error_fdatabasis(
y_true: FDataBasis,
y_pred: FDataBasis,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
squared: bool = True,
multioutput: MultiOutputType = 'uniform_average',
) -> float:
Expand Down Expand Up @@ -998,9 +1117,9 @@ def _r2_score_fdatagrid(
y_true: FDataGrid,
y_pred: FDataGrid,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> Union[float, FDataGrid]:
) -> float | FDataGrid:
from ..exploratory.stats import mean

if y_pred.n_samples < 2:
Expand Down Expand Up @@ -1030,7 +1149,7 @@ def _r2_score_fdatabasis(
y_true: FDataBasis,
y_pred: FDataBasis,
*,
sample_weight: Optional[NDArrayFloat] = None,
sample_weight: NDArrayFloat | None = None,
multioutput: MultiOutputType = 'uniform_average',
) -> float:

Expand Down
2 changes: 1 addition & 1 deletion skfda/representation/irregular.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _reduceat(
dtype=None,
out=None,
*,
value_empty
value_empty,
):
"""
Wrapped `np.ufunc.reduceat` to manage some edge cases.
Expand Down
Loading

0 comments on commit d19e1bd

Please sign in to comment.