🗑️ Deprecate MlflowMetricsDataSet (#73)

Galileo-Galilei · Aug 29, 2021 · 463f9db · 463f9db
1 parent dbeb3f4
commit 463f9db
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,7 +15,7 @@
 ### Changed
 
 - :recycle: Move ``flatten_dict`` function to ``hooks.utils`` folder and rename it ``_flatten_dict`` to make more explicit that it is not a user facing function which should not be used directly and comes with no guarantee. This is not considered as a breaking change since it is an undocumented function.
-
+- :wastebasket: Deprecate ``MlflowMetricsDataSet`` in favor of the 2 new datasets ``MlflowMetricDataSet`` and ``MlflowMetricHistoryDataSet`` newly added. It will be removed in ``kedro-mlflow==0.8.0``.
 
 ## [0.7.3] - 2021-08-16
 

diff --git a/docs/source/04_experimentation_tracking/05_version_metrics.md b/docs/source/04_experimentation_tracking/05_version_metrics.md
@@ -65,7 +65,7 @@ my_model_metric:
         mode: append #  OPTIONAL: likely better than the default "overwrite". Will be ignored if "step" is provided.
 ```
 
-# Saving the evolution of a metric during training with ``MlflowMetricHistoryDataSet``
+### Saving the evolution of a metric during training with ``MlflowMetricHistoryDataSet``
 
 The ``MlflowMetricDataSet`` is an ``AbstractDataSet`` which enable to save or load the evolutionf of a metric with various formats. You must specify the ``key`` (i.e. the name to display in mlflow) when creating the dataset. Somes examples follow:
 
@@ -133,6 +133,12 @@ my_model_metric:
 ```
 
 ### Saving several metrics with their entire history with ``MlflowMetricsDataSet``
+
+```eval_rst
+.. warning:: This class is deprecated and will be removed soon. Use MlflowMetricDataSet or MlflowMetricHistoryDataSet instead.
+```
+
+
 Since it is an ``AbstractDataSet``, it can be used with the YAML API. You can define it in your ``catalog.yml`` as:
 
 ```yaml

diff --git a/kedro_mlflow/io/metrics/mlflow_metrics_dataset.py b/kedro_mlflow/io/metrics/mlflow_metrics_dataset.py
@@ -3,9 +3,12 @@
 from typing import Any, Dict, Generator, List, Optional, Tuple, Union
 
 import mlflow
+from deprecation import deprecated
 from kedro.io import AbstractDataSet, DataSetError
 from mlflow.tracking import MlflowClient
 
+from kedro_mlflow import __version__
+
 MetricItem = Union[Dict[str, float], List[Dict[str, float]]]
 MetricTuple = Tuple[str, float, int]
 MetricsDict = Dict[str, MetricItem]
@@ -14,6 +17,12 @@
 class MlflowMetricsDataSet(AbstractDataSet):
     """This class represent MLflow metrics dataset."""
 
+    @deprecated(
+        deprecated_in="0.7.3",
+        removed_in="0.8.0",
+        current_version=__version__,
+        details="Deprecated in favor of 'MlflowMetricDataSet' (for a single metric) or 'MlflowMetricHistoryDataSet '(for the metric evolution over time)",
+    )
     def __init__(
         self,
         run_id: str = None,

diff --git a/requirements.txt b/requirements.txt
@@ -1,2 +1,3 @@
 kedro>=0.17.1, <0.18.0
 mlflow>=1.0.0, <2.0.0
+deprecation==2.1.0
diff --git a/tests/io/metrics/test_mlflow_metrics_dataset.py b/tests/io/metrics/test_mlflow_metrics_dataset.py
@@ -2,13 +2,15 @@
 
 import mlflow
 import pytest
+from deprecation import fail_if_not_removed
 from kedro.io import DataSetError
 from mlflow.tracking import MlflowClient
 from pytest_lazyfixture import lazy_fixture
 
 from kedro_mlflow.io.metrics import MlflowMetricsDataSet
 
 
+@fail_if_not_removed
 def assert_are_metrics_logged(
     data: Dict[str, Union[float, List[float]]],
     client: MlflowClient,
@@ -69,6 +71,7 @@ def metrics3():
     return {"metric1": {"step": 0, "value": 1.1}}
 
 
+@fail_if_not_removed
 @pytest.mark.parametrize(
     "data, prefix",
     [
@@ -108,6 +111,7 @@ def test_mlflow_metrics_dataset_saved_and_logged(tmp_path, tracking_uri, data, p
         assert data[data_key] == catalog_metrics[k]
 
 
+@fail_if_not_removed
 def test_mlflow_metrics_dataset_saved_without_run_id(tmp_path, tracking_uri, metrics3):
     """Check if MlflowMetricsDataSet can be saved in catalog when filepath is given,
     and if logged in mlflow.
@@ -125,6 +129,7 @@ def test_mlflow_metrics_dataset_saved_without_run_id(tmp_path, tracking_uri, met
     assert_are_metrics_logged(metrics3, mlflow_client, run_id, prefix)
 
 
+@fail_if_not_removed
 def test_mlflow_metrics_dataset_exists(tmp_path, tracking_uri, metrics3):
     """Check if MlflowMetricsDataSet is well identified as
     existing if it has already been saved.
@@ -139,6 +144,7 @@ def test_mlflow_metrics_dataset_exists(tmp_path, tracking_uri, metrics3):
     assert mlflow_metrics_dataset.exists()
 
 
+@fail_if_not_removed
 def test_mlflow_metrics_dataset_does_not_exist(tmp_path, tracking_uri, metrics3):
     """Check if MlflowMetricsDataSet is well identified as
     not existingif it has never been saved.
@@ -153,6 +159,7 @@ def test_mlflow_metrics_dataset_does_not_exist(tmp_path, tracking_uri, metrics3)
     assert not mlflow_metrics_dataset.exists()
 
 
+@fail_if_not_removed
 def test_mlflow_metrics_dataset_fails_with_invalid_metric(
     tmp_path, tracking_uri, metrics3
 ):
@@ -171,6 +178,7 @@ def test_mlflow_metrics_dataset_fails_with_invalid_metric(
         )  # key: value is not valid, you must specify {key: {value, step}}
 
 
+@fail_if_not_removed
 def test_mlflow_metrics_logging_deactivation(tracking_uri, metrics):
     mlflow_metrics_dataset = MlflowMetricsDataSet(prefix="hello")
 
@@ -200,6 +208,7 @@ def test_mlflow_metrics_logging_deactivation(tracking_uri, metrics):
     assert all_runs_id_beginning == all_runs_id_end
 
 
+@fail_if_not_removed
 def test_mlflow_metrics_logging_deactivation_is_bool():
     mlflow_metrics_dataset = MlflowMetricsDataSet(prefix="hello")