Skip to content

Commit

Permalink
FIX #116 - Make MlflowModelSaverDataSet compatible with MlflowArtifac…
Browse files Browse the repository at this point in the history
…tDataSet by turning filepath argument to a pathlib object instead of string
  • Loading branch information
kaemo authored and Galileo-Galilei committed Nov 18, 2020
1 parent f4f4128 commit 63dcd50
Show file tree
Hide file tree
Showing 6 changed files with 40 additions and 22 deletions.
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
exclude: ^kedro_mlflow/template/project/run.py$
repos:
- repo: https://github.com/psf/black
rev: 19.10b0
rev: 20.8b1
hooks:
- id: black
language_version: python3.7
Expand All @@ -10,11 +10,11 @@ repos:
hooks:
- id: isort
- repo: https://gitlab.com/pycqa/flake8
rev: 3.7.9
rev: 3.8.4
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.5.0
rev: v3.3.0
hooks:
- id: check-merge-conflict
- id: debug-statements
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

## [Unreleased]

### Fixed

- Fix `TypeError: unsupported operand type(s) for /: 'str' and 'str'` when using `MlflowArtifactDataSet` with `MlflowModelSaverDataSet` ([#116](https://github.com/Galileo-Galilei/kedro-mlflow/issues/116))

## [0.4.0] - 2020-11-03

### Added
Expand Down
15 changes: 15 additions & 0 deletions docs/source/03_tutorial/06_version_models.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,18 @@ my_custom_model:
flavor: my_package.custom_mlflow_flavor
pyfunc_workflow: python_model # or loader_module
```

### Hwo can I save model locally and log it in MLflow in one step?

If you want to save your model both locally and remotely within the same run, you can leverage `MlflowArtifactDataSet`:

```yaml
sklearn_model:
type: kedro_mlflow.io.artifacts.MlflowArtifactDataSet
data_set:
type: kedro_mlflow.io.models.MlflowModelSaverDataSet
flavor: mlflow.sklearn
filepath: data/06_models/sklearn_model
```

This might be useful if you want to always read the lastest model saved locally and log it to MLflow each time the new model is being trained for tracking purpose.
13 changes: 7 additions & 6 deletions kedro_mlflow/io/models/mlflow_abstract_model_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import importlib
from pathlib import Path
from typing import Any, Dict, Optional

from kedro.io import AbstractVersionedDataSet, Version
Expand All @@ -12,7 +13,7 @@ class MlflowAbstractModelDataSet(AbstractVersionedDataSet):

def __init__(
self,
filepath,
filepath: str,
flavor: str,
pyfunc_workflow: Optional[str] = None,
load_args: Dict[str, Any] = None,
Expand All @@ -27,23 +28,23 @@ def __init__(
During load, the model is pulled from MLflow run with `run_id`.
Args:
filepath (str): Path to store the dataset locally.
flavor (str): Built-in or custom MLflow model flavor module.
Must be Python-importable.
filepath (str): Path to store the dataset locally.
run_id (Optional[str], optional): MLflow run ID to use to load
the model from or save the model to. If provided,
takes precedence over filepath. Defaults to None.
pyfunc_workflow (str, optional): Either `python_model` or `loader_module`.
See https://www.mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#workflows.
load_args (Dict[str, Any], optional): Arguments to `load_model`
function from specified `flavor`. Defaults to {}.
save_args (Dict[str, Any], optional): Arguments to `log_model`
function from specified `flavor`. Defaults to {}.
version (Version, optional): Specific version to load.
Raises:
DataSetError: When passed `flavor` does not exist.
"""
super().__init__(filepath, version)

super().__init__(Path(filepath), version)

self._flavor = flavor
self._pyfunc_workflow = pyfunc_workflow

Expand Down
12 changes: 4 additions & 8 deletions kedro_mlflow/io/models/mlflow_model_saver_dataset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import shutil
from pathlib import Path
from os.path import exists
from typing import Any, Dict, Optional

from kedro.io import Version
Expand Down Expand Up @@ -27,7 +27,7 @@ def __init__(
Parameters are passed from the Data Catalog.
During save, the model is saved locally at `filepat`
During save, the model is saved locally at `filepath`
During load, the model is loaded from the local `filepath`.
Args:
Expand All @@ -41,7 +41,6 @@ def __init__(
save_args (Dict[str, Any], optional): Arguments to `save_model`
function from specified `flavor`. Defaults to None.
version (Version, optional): Kedro version to use. Defaults to None.
Raises:
DataSetError: When passed `flavor` does not exist.
"""
Expand All @@ -60,11 +59,8 @@ def _load(self) -> Any:
Returns:
Any: Deserialized model.
"""

model_uri = self._get_load_path().as_uri()

return self._mlflow_model_module.load_model(
model_uri=model_uri, **self._load_args
model_uri=self._get_load_path().as_uri(), **self._load_args
)

def _save(self, model: Any) -> None:
Expand All @@ -76,7 +72,7 @@ def _save(self, model: Any) -> None:
save_path = self._get_save_path()
# In case of an unversioned model we need to remove the save path
# because MLflow cannot overwrite the target directory.
if Path(save_path).exists():
if exists(save_path):
shutil.rmtree(save_path)

if self._flavor == "mlflow.pyfunc":
Expand Down
12 changes: 7 additions & 5 deletions tests/io/models/test_mlflow_model_saver_dataset.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# import mlflow
import mlflow
import pandas as pd
import pytest
Expand Down Expand Up @@ -91,14 +90,15 @@ def kedro_pipeline_model(tmp_path, pipeline_ml_obj, dummy_catalog):


def test_save_unversioned_under_same_path(
linreg_path, linreg_model,
linreg_path,
linreg_model,
):
model_config = {
"name": "linreg",
"config": {
"type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
"flavor": "mlflow.sklearn",
"filepath": linreg_path,
"filepath": linreg_path.as_posix(),
},
}
mlflow_model_ds = MlflowModelSaverDataSet.from_config(**model_config)
Expand All @@ -114,7 +114,7 @@ def test_save_load_local(linreg_path, linreg_model, versioned):
"name": "linreg",
"config": {
"type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
"filepath": linreg_path,
"filepath": linreg_path.as_posix(),
"flavor": "mlflow.sklearn",
"versioned": versioned,
},
Expand Down Expand Up @@ -143,7 +143,9 @@ def test_pyfunc_flavor_python_model_save_and_load(
"name": "kedro_pipeline_model",
"config": {
"type": "kedro_mlflow.io.models.MlflowModelSaverDataSet",
"filepath": tmp_path / "data" / "06_models" / "my_custom_model",
"filepath": (
tmp_path / "data" / "06_models" / "my_custom_model"
).as_posix(),
"flavor": "mlflow.pyfunc",
"pyfunc_workflow": "python_model",
"save_args": {"artifacts": artifacts, "conda_env": {"python": "3.7.0"}},
Expand Down

0 comments on commit 63dcd50

Please sign in to comment.