Skip to content

Commit

Permalink
🗑️ Deprecate MlflowMetricsDataSet (#73)
Browse files Browse the repository at this point in the history
  • Loading branch information
Galileo-Galilei committed Aug 29, 2021
1 parent dbeb3f4 commit 463f9db
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 2 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
### Changed

- :recycle: Move ``flatten_dict`` function to ``hooks.utils`` folder and rename it ``_flatten_dict`` to make more explicit that it is not a user facing function which should not be used directly and comes with no guarantee. This is not considered as a breaking change since it is an undocumented function.

- :wastebasket: Deprecate ``MlflowMetricsDataSet`` in favor of the 2 new datasets ``MlflowMetricDataSet`` and ``MlflowMetricHistoryDataSet`` newly added. It will be removed in ``kedro-mlflow==0.8.0``.

## [0.7.3] - 2021-08-16

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ my_model_metric:
mode: append # OPTIONAL: likely better than the default "overwrite". Will be ignored if "step" is provided.
```
# Saving the evolution of a metric during training with ``MlflowMetricHistoryDataSet``
### Saving the evolution of a metric during training with ``MlflowMetricHistoryDataSet``
The ``MlflowMetricDataSet`` is an ``AbstractDataSet`` which enable to save or load the evolutionf of a metric with various formats. You must specify the ``key`` (i.e. the name to display in mlflow) when creating the dataset. Somes examples follow:
Expand Down Expand Up @@ -133,6 +133,12 @@ my_model_metric:
```
### Saving several metrics with their entire history with ``MlflowMetricsDataSet``
```eval_rst
.. warning:: This class is deprecated and will be removed soon. Use MlflowMetricDataSet or MlflowMetricHistoryDataSet instead.
```
Since it is an ``AbstractDataSet``, it can be used with the YAML API. You can define it in your ``catalog.yml`` as:
```yaml
Expand Down
9 changes: 9 additions & 0 deletions kedro_mlflow/io/metrics/mlflow_metrics_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
from typing import Any, Dict, Generator, List, Optional, Tuple, Union

import mlflow
from deprecation import deprecated
from kedro.io import AbstractDataSet, DataSetError
from mlflow.tracking import MlflowClient

from kedro_mlflow import __version__

MetricItem = Union[Dict[str, float], List[Dict[str, float]]]
MetricTuple = Tuple[str, float, int]
MetricsDict = Dict[str, MetricItem]
Expand All @@ -14,6 +17,12 @@
class MlflowMetricsDataSet(AbstractDataSet):
"""This class represent MLflow metrics dataset."""

@deprecated(
deprecated_in="0.7.3",
removed_in="0.8.0",
current_version=__version__,
details="Deprecated in favor of 'MlflowMetricDataSet' (for a single metric) or 'MlflowMetricHistoryDataSet '(for the metric evolution over time)",
)
def __init__(
self,
run_id: str = None,
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
kedro>=0.17.1, <0.18.0
mlflow>=1.0.0, <2.0.0
deprecation==2.1.0
9 changes: 9 additions & 0 deletions tests/io/metrics/test_mlflow_metrics_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

import mlflow
import pytest
from deprecation import fail_if_not_removed
from kedro.io import DataSetError
from mlflow.tracking import MlflowClient
from pytest_lazyfixture import lazy_fixture

from kedro_mlflow.io.metrics import MlflowMetricsDataSet


@fail_if_not_removed
def assert_are_metrics_logged(
data: Dict[str, Union[float, List[float]]],
client: MlflowClient,
Expand Down Expand Up @@ -69,6 +71,7 @@ def metrics3():
return {"metric1": {"step": 0, "value": 1.1}}


@fail_if_not_removed
@pytest.mark.parametrize(
"data, prefix",
[
Expand Down Expand Up @@ -108,6 +111,7 @@ def test_mlflow_metrics_dataset_saved_and_logged(tmp_path, tracking_uri, data, p
assert data[data_key] == catalog_metrics[k]


@fail_if_not_removed
def test_mlflow_metrics_dataset_saved_without_run_id(tmp_path, tracking_uri, metrics3):
"""Check if MlflowMetricsDataSet can be saved in catalog when filepath is given,
and if logged in mlflow.
Expand All @@ -125,6 +129,7 @@ def test_mlflow_metrics_dataset_saved_without_run_id(tmp_path, tracking_uri, met
assert_are_metrics_logged(metrics3, mlflow_client, run_id, prefix)


@fail_if_not_removed
def test_mlflow_metrics_dataset_exists(tmp_path, tracking_uri, metrics3):
"""Check if MlflowMetricsDataSet is well identified as
existing if it has already been saved.
Expand All @@ -139,6 +144,7 @@ def test_mlflow_metrics_dataset_exists(tmp_path, tracking_uri, metrics3):
assert mlflow_metrics_dataset.exists()


@fail_if_not_removed
def test_mlflow_metrics_dataset_does_not_exist(tmp_path, tracking_uri, metrics3):
"""Check if MlflowMetricsDataSet is well identified as
not existingif it has never been saved.
Expand All @@ -153,6 +159,7 @@ def test_mlflow_metrics_dataset_does_not_exist(tmp_path, tracking_uri, metrics3)
assert not mlflow_metrics_dataset.exists()


@fail_if_not_removed
def test_mlflow_metrics_dataset_fails_with_invalid_metric(
tmp_path, tracking_uri, metrics3
):
Expand All @@ -171,6 +178,7 @@ def test_mlflow_metrics_dataset_fails_with_invalid_metric(
) # key: value is not valid, you must specify {key: {value, step}}


@fail_if_not_removed
def test_mlflow_metrics_logging_deactivation(tracking_uri, metrics):
mlflow_metrics_dataset = MlflowMetricsDataSet(prefix="hello")

Expand Down Expand Up @@ -200,6 +208,7 @@ def test_mlflow_metrics_logging_deactivation(tracking_uri, metrics):
assert all_runs_id_beginning == all_runs_id_end


@fail_if_not_removed
def test_mlflow_metrics_logging_deactivation_is_bool():
mlflow_metrics_dataset = MlflowMetricsDataSet(prefix="hello")

Expand Down

0 comments on commit 463f9db

Please sign in to comment.