🐛 Correctly loads the artifact when both artifact_path and run_id are…

… specified in MlflowArtifactDataSet (#362)
Galileo-Galilei · Oct 3, 2022 · f2206b3 · f2206b3
1 parent d0fd8fc
commit f2206b3
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## [Unreleased]
 
+### Fixed
+
+-   :bug: ``MlflowArtifactDataSet.load()`` now correctly loads the artifact when both ``artifact_path`` and ``run_id`` arguments are specified instead of raising an error ([#362](https://github.com/Galileo-Galilei/kedro-mlflow/issues/362))
+
 ## [0.11.3] - 2022-09-06
 
 ### Changed
@@ -10,7 +14,7 @@
 
 ### Fixed
 
--   :bug: `kedro-mlflow` now use the `package_name` as experiment name by default if it is not specified. This is done to ensure consistency with the behaviour with no `mlflow.yml` file ([#328](https://github.com/Galileo-Galilei/kedro-mlflow/issues/328))
+-   :bug: `kedro-mlflow` now uses the `package_name` as experiment name by default if it is not specified. This is done to ensure consistency with the behaviour with no `mlflow.yml` file ([#328](https://github.com/Galileo-Galilei/kedro-mlflow/issues/328))
 -   :memo: Update broken links to the most recent kedro and mlflow documentation
 
 ## [0.11.2] - 2022-08-28

diff --git a/kedro_mlflow/io/artifacts/mlflow_artifact_dataset.py b/kedro_mlflow/io/artifacts/mlflow_artifact_dataset.py
@@ -96,7 +96,7 @@ def _load(self) -> Any:  # pragma: no cover
                         local_path = Path(self._path)
 
                     artifact_path = (
-                        (self.artifact_path / local_path.name).as_posix()
+                        (self.artifact_path / Path(local_path.name)).as_posix()
                         if self.artifact_path
                         else local_path.name
                     )

diff --git a/tests/io/artifacts/test_mlflow_artifact_dataset.py b/tests/io/artifacts/test_mlflow_artifact_dataset.py
@@ -228,6 +228,32 @@ def test_artifact_dataset_load_with_run_id(tmp_path, tracking_uri, df1, df2):
     assert df1.equals(mlflow_csv_dataset.load())
 
 
+@pytest.mark.parametrize("artifact_path", (None, "folder", "folder/subfolder"))
+def test_artifact_dataset_load_with_run_id_and_artifact_path(
+    tmp_path, tracking_uri, df1, artifact_path
+):
+    print("artifact_path", artifact_path)
+    mlflow.set_tracking_uri(tracking_uri.as_uri())
+
+    # save first and retrieve run id
+    mlflow_csv_dataset1 = MlflowArtifactDataSet(
+        data_set=dict(type=CSVDataSet, filepath=(tmp_path / "df1.csv").as_posix()),
+        artifact_path=artifact_path,
+    )
+    with mlflow.start_run():
+        mlflow_csv_dataset1.save(df1)
+        first_run_id = mlflow.active_run().info.run_id
+
+    # same as before, but a closed run_id is specified
+    mlflow_csv_dataset2 = MlflowArtifactDataSet(
+        data_set=dict(type=CSVDataSet, filepath=(tmp_path / "df1.csv").as_posix()),
+        artifact_path=artifact_path,
+        run_id=first_run_id,
+    )
+
+    assert df1.equals(mlflow_csv_dataset2.load())
+
+
 @pytest.mark.parametrize("artifact_path", [None, "partitioned_data"])
 def test_partitioned_dataset_save_and_reload(
     tmp_path, tracking_uri, artifact_path, df1, df2