-
Notifications
You must be signed in to change notification settings - Fork 58
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement scores for FDatairregular
objects as described in #609
#610
Changes from all commits
9730d51
951dea3
30c807f
a5b7617
e18bc49
5d3addc
e91419d
4bd8713
ca0a11c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,14 +4,14 @@ | |
import math | ||
import warnings | ||
from functools import singledispatch | ||
from typing import Callable, Optional, TypeVar, Union, overload | ||
from typing import Callable, TypeVar, overload | ||
|
||
import numpy as np | ||
import sklearn.metrics | ||
from typing_extensions import Literal, Protocol | ||
|
||
from .._utils import nquad_vec | ||
from ..representation import FData, FDataBasis, FDataGrid | ||
from ..representation import FData, FDataBasis, FDataGrid, FDataIrregular | ||
from ..representation._functional_data import EvalPointsType | ||
from ..typing._numpy import NDArrayFloat | ||
|
||
|
@@ -125,6 +125,42 @@ def _multioutput_score_grid( | |
return float(np.mean(score.integrate()[0]) / _domain_measure(score)) | ||
|
||
|
||
def _integral_average_fdatairregular( | ||
score: FDataIrregular, | ||
squared: bool = True, | ||
weights: NDArrayFloat | None = None, | ||
) -> float: | ||
"""Calculate the weighted average of the normalized integrals of the score. | ||
|
||
The integral of the score is normalized because each integral is divided by | ||
the length of the curve's domain. | ||
|
||
If the score is vector-valued, then the mean of each codimension integral | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this what we want? Is what we do for the other types? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand the question is regarding whether to divide by the length of the curve's domain or by the length of the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I meant the treatment of vector-valued functions, but you also raised an interesting point that I didn't notice, and maybe we should discuss in the meeting. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Answering the initial question, then: yes, for other types we also take the mean of each codimension integral in the case of vector-valued functions. I think it is a reasonable design decision. |
||
is calculated for every functional observation. | ||
|
||
Args: | ||
score: Score of the functions. | ||
squared: If False, the square root is taken. | ||
vnmabus marked this conversation as resolved.
Show resolved
Hide resolved
|
||
weights: Weights for the mean. | ||
""" | ||
if score.dim_domain != 1: | ||
raise ValueError( | ||
"Only univariate FDataIrregular objects are supported", | ||
) | ||
if not squared: | ||
score = np.sqrt(score) | ||
vnmabus marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
integrals = np.mean(score.integrate(), axis=1) | ||
lebesgue_measures = np.diff(score.sample_range, axis=-1).reshape(-1) | ||
normalized_integrals = np.divide( | ||
integrals, | ||
lebesgue_measures, | ||
out=np.zeros_like(integrals), | ||
where=lebesgue_measures != 0, | ||
) | ||
return np.average(normalized_integrals, weights=weights) | ||
|
||
|
||
@overload | ||
def explained_variance_score( | ||
y_true: DataType, | ||
|
@@ -238,9 +274,9 @@ def _explained_variance_score_fdatagrid( | |
y_true: FDataGrid, | ||
y_pred: FDataGrid, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> Union[float, FDataGrid]: | ||
) -> float | FDataGrid: | ||
|
||
num = _var(y_true - y_pred, weights=sample_weight) | ||
den = _var(y_true, weights=sample_weight) | ||
|
@@ -260,7 +296,7 @@ def _explained_variance_score_fdatabasis( | |
y_true: FDataBasis, | ||
y_pred: FDataBasis, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
|
||
|
@@ -361,8 +397,9 @@ def mean_absolute_error( | |
where :math:`D` is the function domain and :math:`V` the volume of that | ||
domain. | ||
|
||
For :class:`~skfda.representation.FDataBasis` only | ||
'uniform_average' is available. | ||
For :class:`~skfda.representation.FDataBasis` and | ||
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is | ||
available. | ||
|
||
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function | ||
is called. | ||
|
@@ -378,8 +415,10 @@ def mean_absolute_error( | |
Mean absolute error. | ||
|
||
If multioutput = 'uniform_average' or | ||
:math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataBasis` objects, float is returned. | ||
:math:`y\_pred` and :math:`y\_true` are both | ||
:class:`~skfda.representation.FDataBasis` or both | ||
:class:`~skfda.representation.FDataIrregular` objects, float is | ||
returned. | ||
|
||
If both :math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataGrid` | ||
|
@@ -403,21 +442,35 @@ def _mean_absolute_error_fdatagrid( | |
y_true: FDataGrid, | ||
y_pred: FDataGrid, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> Union[float, FDataGrid]: | ||
) -> float | FDataGrid: | ||
from ..exploratory.stats import mean | ||
|
||
error = mean(np.abs(y_true - y_pred), weights=sample_weight) | ||
return _multioutput_score_grid(error, multioutput) | ||
|
||
|
||
@mean_absolute_error.register # type: ignore[attr-defined, misc] | ||
def _mean_absolute_error_fdatairregular( | ||
y_true: FDataIrregular, | ||
y_pred: FDataIrregular, | ||
*, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
return _integral_average_fdatairregular( | ||
np.abs(y_true - y_pred), | ||
weights=sample_weight, | ||
) | ||
|
||
|
||
@mean_absolute_error.register # type: ignore[attr-defined, misc] | ||
def _mean_absolute_error_fdatabasis( | ||
y_true: FDataBasis, | ||
y_pred: FDataBasis, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
|
||
|
@@ -491,8 +544,9 @@ def mean_absolute_percentage_error( | |
where :math:`D` is the function domain and :math:`V` the volume of that | ||
domain. | ||
|
||
For :class:`~skfda.representation.FDataBasis` only | ||
'uniform_average' is available. | ||
For :class:`~skfda.representation.FDataBasis` and | ||
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is | ||
available. | ||
|
||
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function | ||
is called. | ||
|
@@ -511,8 +565,10 @@ def mean_absolute_percentage_error( | |
Mean absolute percentage error. | ||
|
||
If multioutput = 'uniform_average' or | ||
:math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataBasis` objects, float is returned. | ||
:math:`y\_pred` and :math:`y\_true` are both | ||
:class:`~skfda.representation.FDataBasis` or both | ||
:class:`~skfda.representation.FDataIrregular` objects, float is | ||
returned. | ||
|
||
If both :math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataGrid` | ||
|
@@ -538,9 +594,9 @@ def _mean_absolute_percentage_error_fdatagrid( | |
y_true: FDataGrid, | ||
y_pred: FDataGrid, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> Union[float, FDataGrid]: | ||
) -> float | FDataGrid: | ||
from ..exploratory.stats import mean | ||
|
||
epsilon = np.finfo(np.float64).eps | ||
|
@@ -554,12 +610,29 @@ def _mean_absolute_percentage_error_fdatagrid( | |
return _multioutput_score_grid(error, multioutput) | ||
|
||
|
||
@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc] | ||
def _mean_absolute_percentage_error_fdatairregular( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pep8] reported by reviewdog 🐶 |
||
y_true: FDataIrregular, | ||
y_pred: FDataIrregular, | ||
*, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
epsilon = np.finfo(np.float64).eps | ||
|
||
if np.any(np.abs(y_true.values) < epsilon): | ||
warnings.warn('Zero denominator', RuntimeWarning) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pep8] reported by reviewdog 🐶 |
||
|
||
mape = np.abs(y_pred - y_true) / np.maximum(np.abs(y_true), epsilon) | ||
return _integral_average_fdatairregular(mape, weights=sample_weight) | ||
|
||
|
||
@mean_absolute_percentage_error.register # type: ignore[attr-defined, misc] | ||
def _mean_absolute_percentage_error_fdatabasis( | ||
y_true: FDataBasis, | ||
y_pred: FDataBasis, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
|
||
|
@@ -644,8 +717,9 @@ def mean_squared_error( | |
where :math:`D` is the function domain and :math:`V` the volume of that | ||
domain. | ||
|
||
For :class:`~skfda.representation.FDataBasis` only | ||
'uniform_average' is available. | ||
For :class:`~skfda.representation.FDataBasis` and | ||
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is | ||
available. | ||
|
||
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function | ||
is called. | ||
|
@@ -662,8 +736,10 @@ def mean_squared_error( | |
Mean squared error. | ||
|
||
If multioutput = 'uniform_average' or | ||
:math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataBasis` objects, float is returned. | ||
:math:`y\_pred` and :math:`y\_true` are both | ||
:class:`~skfda.representation.FDataBasis` or both | ||
:class:`~skfda.representation.FDataIrregular` objects, float is | ||
returned. | ||
|
||
If both :math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataGrid` | ||
|
@@ -688,10 +764,10 @@ def _mean_squared_error_fdatagrid( | |
y_true: FDataGrid, | ||
y_pred: FDataGrid, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
squared: bool = True, | ||
) -> Union[float, FDataGrid]: | ||
) -> float | FDataGrid: | ||
from ..exploratory.stats import mean | ||
|
||
error: FDataGrid = mean( | ||
|
@@ -702,12 +778,28 @@ def _mean_squared_error_fdatagrid( | |
return _multioutput_score_grid(error, multioutput, squared=squared) | ||
|
||
|
||
@mean_squared_error.register # type: ignore[attr-defined, misc] | ||
def _mean_squared_error_fdatairregular( | ||
y_true: FDataIrregular, | ||
y_pred: FDataIrregular, | ||
*, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
squared: bool = True, | ||
) -> float: | ||
return _integral_average_fdatairregular( | ||
np.power(y_true - y_pred, 2), | ||
weights=sample_weight, | ||
squared=squared, | ||
) | ||
|
||
|
||
@mean_squared_error.register # type: ignore[attr-defined, misc] | ||
def _mean_squared_error_fdatabasis( | ||
y_true: FDataBasis, | ||
y_pred: FDataBasis, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
squared: bool = True, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
|
@@ -791,8 +883,9 @@ def mean_squared_log_error( | |
where :math:`D` is the function domain and :math:`V` the volume of that | ||
domain. | ||
|
||
For :class:`~skfda.representation.FDataBasis` only | ||
'uniform_average' is available. | ||
For :class:`~skfda.representation.FDataBasis` and | ||
:class:`~skfda.representation.FDataIrregular` only 'uniform_average' is | ||
available. | ||
|
||
If :math:`y\_true` and :math:`y\_pred` are numpy arrays, sklearn function | ||
is called. | ||
|
@@ -812,8 +905,10 @@ def mean_squared_log_error( | |
Mean squared log error. | ||
|
||
If multioutput = 'uniform_average' or | ||
:math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataBasis` objects, float is returned. | ||
:math:`y\_pred` and :math:`y\_true` are both | ||
:class:`~skfda.representation.FDataBasis` or both | ||
:class:`~skfda.representation.FDataIrregular` objects, float is | ||
returned. | ||
|
||
If both :math:`y\_pred` and :math:`y\_true` are | ||
:class:`~skfda.representation.FDataGrid` | ||
|
@@ -840,10 +935,10 @@ def _mean_squared_log_error_fdatagrid( | |
y_true: FDataGrid, | ||
y_pred: FDataGrid, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
squared: bool = True, | ||
) -> Union[float, FDataGrid]: | ||
) -> float | FDataGrid: | ||
|
||
if np.any(y_true.data_matrix < 0) or np.any(y_pred.data_matrix < 0): | ||
raise ValueError( | ||
|
@@ -860,12 +955,36 @@ def _mean_squared_log_error_fdatagrid( | |
) | ||
|
||
|
||
@mean_squared_log_error.register # type: ignore[attr-defined, misc] | ||
def _mean_squared_log_error_fdatairregular( | ||
y_true: FDataIrregular, | ||
y_pred: FDataIrregular, | ||
*, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
squared: bool = True, | ||
) -> float: | ||
if np.any(y_true.values < 0) or np.any(y_pred.values < 0): | ||
raise ValueError( | ||
"Mean Squared Logarithmic Error cannot be used when " | ||
"targets functions have negative values.", | ||
) | ||
|
||
return mean_squared_error( | ||
np.log1p(y_true), | ||
np.log1p(y_pred), | ||
sample_weight=sample_weight, | ||
multioutput=multioutput, | ||
squared=squared, | ||
) | ||
|
||
|
||
@mean_squared_log_error.register # type: ignore[attr-defined, misc] | ||
def _mean_squared_log_error_fdatabasis( | ||
y_true: FDataBasis, | ||
y_pred: FDataBasis, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
squared: bool = True, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
|
@@ -998,9 +1117,9 @@ def _r2_score_fdatagrid( | |
y_true: FDataGrid, | ||
y_pred: FDataGrid, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> Union[float, FDataGrid]: | ||
) -> float | FDataGrid: | ||
from ..exploratory.stats import mean | ||
|
||
if y_pred.n_samples < 2: | ||
|
@@ -1030,7 +1149,7 @@ def _r2_score_fdatabasis( | |
y_true: FDataBasis, | ||
y_pred: FDataBasis, | ||
*, | ||
sample_weight: Optional[NDArrayFloat] = None, | ||
sample_weight: NDArrayFloat | None = None, | ||
multioutput: MultiOutputType = 'uniform_average', | ||
) -> float: | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[pep8] reported by reviewdog 🐶
DAR201 Missing "Returns" in Docstring: - return