From 2b26d29bac01c5347c430a424bfaf3fb6588c7ab Mon Sep 17 00:00:00 2001 From: Galileo Galilei Date: Mon, 19 Oct 2020 21:14:58 +0200 Subject: [PATCH 1/3] PARTIAL #6 - Fix documentation typo with wrong reference to version --- .bumpversion.cfg | 6 ++++++ CHANGELOG.md | 11 +++++----- docs/source/01_introduction/02_motivation.md | 20 +++++++++---------- .../source/01_introduction/03_installation.md | 4 ++-- .../01_example_project.md | 2 +- .../02_hello_world_example/02_first_steps.md | 2 +- .../03_tutorial/04_version_parameters.md | 2 +- .../source/03_tutorial/05_version_datasets.md | 2 +- 8 files changed, 28 insertions(+), 21 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 53075a2a..8e85ed4a 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -4,3 +4,9 @@ current_version = 0.3.0 [bumpversion:file:setup.py] [bumpversion:file:kedro_mlflow/__init__.py] +[bumpversion:file:kedro-mlflow/docs/source/01_introduction/02_motivation.md] +[bumpversion:file:kedro-mlflow/docs/source/01_introduction/03_installation.md] +[bumpversion:file:kedro-mlflow/docs/source/02_hello_world_example/01_example_project.md] +[bumpversion:file:kedro-mlflow/docs/source/02_hello_world_example/02_first_steps.md] +[bumpversion:file:kedro-mlflow/docs/source/03_tutorial/04_version_parameters.md] +[bumpversion:file:kedro-mlflow/docs/source/03_tutorial/05_version_datasets.md] diff --git a/CHANGELOG.md b/CHANGELOG.md index dfd5d1ad..1118b446 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,19 +9,20 @@ ### Fixed -- `get_mlflow_config` now uses the kedro context config_loader to get configs (#66). This indirectly solves the following issues: - - `get_mlflow_config` now works in interactive mode if `load_context` is called with a path different from the working directory (#30) +- `get_mlflow_config` now uses the Kedro `ProjectContext` `ConfigLoader` to get configs (#66). This indirectly solves the following issues: + - `get_mlflow_config` now works in interactive mode if `load_context` is called with a path different from the working directory (#30) - kedro_mlflow now works fine with kedro jupyter notebook independently of the working directory (#64) - - You can use global variables in `mlflow.yml` which is now properly parsed if you use a `TemplatedConfigLoader` (#72) + - You can use global variables in `mlflow.yml` which is now properly parsed if you use a `TemplatedConfigLoader` (#72) - `mlflow init` is now getting conf path from context.CONF_ROOT instead of hardcoded conf folder. This makes the package robust to Kedro changes. ### Changed -- `MlflowNodeHook` now has a before_pipeline_run hook which stores the ProjectContext and enable to retrieve configuration. +- `MlflowNodeHook` now has a `before_pipeline_run` hook which stores the `ProjectContext` and enable to retrieve configuration (#66). +- Documentation reference to the plugin is now dynamic when necessary (#6). ### Removed -`kedro mlflow init` command is no longer declaring hooks in `run.py`. You must now [register your hooks manually](docs/source/03_tutorial/02_setup.md#declaring-kedro-mlflow-hooks) in the ``run.py`` (kedro > 0.16.0), ``.kedro.yml`` (kedro >= 0.16.5) or ``pyproject.toml`` (kedro >= 0.16.5) +- `kedro mlflow init` command is no longer declaring hooks in `run.py`. You must now [register your hooks manually](docs/source/03_tutorial/02_setup.md#declaring-kedro-mlflow-hooks) in the ``run.py`` (kedro > 0.16.0), ``.kedro.yml`` (kedro >= 0.16.5) or ``pyproject.toml`` (kedro >= 0.16.5) ## [0.3.0] - 2020-10-11 diff --git a/docs/source/01_introduction/02_motivation.md b/docs/source/01_introduction/02_motivation.md index 1169e6a0..1d3f97be 100644 --- a/docs/source/01_introduction/02_motivation.md +++ b/docs/source/01_introduction/02_motivation.md @@ -4,7 +4,7 @@ Basically, you should use ``kedro-mlflow`` in **any ``Kedro`` project which involves machine learning** / deep learning. As stated in the [introduction](./01_introduction.md), ``Kedro``'s current versioning (as of version ``0.16.1``) is not sufficient for machine learning projects: it lacks a UI and a ``run`` management system. Besides, the ``KedroPipelineModel`` ability to serve a kedro pipeline as an API or a batch in one line of code is a great addition for collaboration and transition to production. -If you do not use ``Kedro`` or if you do pure data manipulation which do not involve machine learning, this plugin is not what you are seeking for ;) +If you do not use ``Kedro`` or if you do pure data manipulation which does not involve *machine learning*, this plugin is not what you are seeking for ;) ## Why should I use kedro-mlflow ? @@ -32,15 +32,15 @@ Above implementations have the advantage of being very straightforward and *mlfl ``kedro-mlflow`` enforces these best practices while implementing a clear interface for each mlflow action in Kedro template. Below chart maps the mlflow action to perform with the Python API provided by kedro-mlflow and the location in Kedro template where the action should be performed. -|Mlflow action |Template file |Python API | -|:----------------------------|:-----------------------|:-----------------------| -|Set up configuration |``mlflow.yml`` |``MlflowPipelineHook`` | -|Logging parameters |``run.py`` |``MlflowNodeHook`` | -|Logging artifacts |``catalog.yml`` |``MlflowArtifactDataSet`` | -|Logging models |NA |NA | -|Logging metrics |``catalog.yml`` |``MlflowMetricsDataSet``| +|Mlflow action |Template file |Python API | +|:----------------------------|:-----------------------|:-------------------------------------------------| +|Set up configuration |``mlflow.yml`` |``MlflowPipelineHook`` | +|Logging parameters |``run.py`` |``MlflowNodeHook`` | +|Logging artifacts |``catalog.yml`` |``MlflowArtifactDataSet`` | +|Logging models |NA |NA | +|Logging metrics |``catalog.yml`` |``MlflowMetricsDataSet`` | |Logging Pipeline as model |``pipeline.py`` |``KedroPipelineModel`` and ``pipeline_ml_factory``| -In the current version (``kedro_mlflow=0.2.0``), kedro-mlflow does not provide interface to log metrics, set tags or log models outside a Kedro ``Pipeline``. These decisions are subject to debate and design decisions (for instance, metrics are often updated in a loop during each epoch / training iteration and it does not always make sense to register the metric between computation steps, e.g. as a an I/O operation after a node run). +In the current version (``kedro_mlflow=0.3.0``), `kedro-mlflow` does not provide interface to set tags or log models outside a Kedro ``Pipeline``. These decisions are subject to debate and design decisions (for instance, metrics are often updated in a loop during each epoch / training iteration and it does not always make sense to register the metric between computation steps, e.g. as a an I/O operation after a node run). -_**Note:** the version ``0.2.0`` does not need any ``MLProject`` file to use mlflow inside your Kedro project. As seen in the [introduction](./01_introduction.md), this file overlaps with Kedro configuration files._ +_**Note:** the version ``0.3.0`` does not need any ``MLProject`` file to use mlflow inside your Kedro project. As seen in the [introduction](./01_introduction.md), this file overlaps with Kedro configuration files._ diff --git a/docs/source/01_introduction/03_installation.md b/docs/source/01_introduction/03_installation.md index 01196470..d298c6da 100644 --- a/docs/source/01_introduction/03_installation.md +++ b/docs/source/01_introduction/03_installation.md @@ -36,9 +36,9 @@ projects. It is developed as part of the Kedro initiative at QuantumBlack. Installed plugins: -kedro_mlflow: 0.2.0 (hooks:global,project) +kedro_mlflow: 0.3.0 (hooks:global,project) ``` -The version ``0.2.0`` of the plugin is installed and has both global and project commands. +The version ``0.3.0`` of the plugin is installed and has both global and project commands. That's it! You are now ready to go! diff --git a/docs/source/02_hello_world_example/01_example_project.md b/docs/source/02_hello_world_example/01_example_project.md index dea8a182..04699e7c 100644 --- a/docs/source/02_hello_world_example/01_example_project.md +++ b/docs/source/02_hello_world_example/01_example_project.md @@ -91,6 +91,6 @@ Install the project dependencies: ```console pip install -r src/requirements.txt -pip install --upgrade kedro-mlflow==0.2.0 +pip install --upgrade kedro-mlflow==0.3.0 ``` **Warning: Do not use ``kedro install`` commands does not seem to install the packages in your activated environment.** diff --git a/docs/source/02_hello_world_example/02_first_steps.md b/docs/source/02_hello_world_example/02_first_steps.md index ec281da2..9cef6eb2 100644 --- a/docs/source/02_hello_world_example/02_first_steps.md +++ b/docs/source/02_hello_world_example/02_first_steps.md @@ -121,4 +121,4 @@ Reopen the ui, select the last run and see that the file was uploaded: This works for any type of file (including images with ``MatplotlibWriter``) and the UI even offers a preview for ``png`` and ``csv``, which is really convenient to compare runs. -*Note: Mlflow offers specific logging for machine learning models that should be better suited for your use case, but is not supported yet in ``kedro-mlflow==0.2.0``* +*Note: Mlflow offers specific logging for machine learning models that may be better suited for your use case, but is not supported yet in ``kedro-mlflow==0.3.0``* diff --git a/docs/source/03_tutorial/04_version_parameters.md b/docs/source/03_tutorial/04_version_parameters.md index a307b566..8950caee 100644 --- a/docs/source/03_tutorial/04_version_parameters.md +++ b/docs/source/03_tutorial/04_version_parameters.md @@ -1,7 +1,7 @@ # Parameters versioning ## Automatic parameters versioning -Parameters versioning is automatic when the ``MlflowNodeHook`` is added to [the hook list of the ``ProjectContext``](./02_setup.md). In ``kedro-mlflow==0.2.0``, this hook has a parameter called ``flatten_dict_params`` which enables to [log as distinct parameters the (key, value) pairs of a ```Dict`` parameter](../05_python_objects/02_Hooks.md). +Parameters versioning is automatic when the ``MlflowNodeHook`` is added to [the hook list of the ``ProjectContext``](./02_setup.md). In ``kedro-mlflow==0.3.0``, the `mlflow.yml` configuration file has a parameter called ``flatten_dict_params`` which enables to [log as distinct parameters the (key, value) pairs of a ```Dict`` parameter](../05_python_objects/02_Hooks.md). You **do not need any additional configuration** to benefit from parameters versioning. diff --git a/docs/source/03_tutorial/05_version_datasets.md b/docs/source/03_tutorial/05_version_datasets.md index 958a131e..d0ed0e95 100644 --- a/docs/source/03_tutorial/05_version_datasets.md +++ b/docs/source/03_tutorial/05_version_datasets.md @@ -67,7 +67,7 @@ The location where artifact will be stored does not depends of the logging funct You can also refer to [this issue](https://github.com/Galileo-Galilei/kedro-mlflow/issues/15) for further details. -In ``kedro-mlflow==0.2.0`` you must configure these elements by yourself. Further releases will introduce helpers for configuration. +In ``kedro-mlflow==0.3.0`` you must configure these elements by yourself. Further releases will introduce helpers for configuration. ### Can I log an artifact in a specific run? The ``MlflowArtifactDataSet`` has an extra argument ``run_id`` which specifies the run in which the artifact will be logged. **Be cautious, because this argument will take precedence over the current run** when you call ``kedro run``, causing the artifact to be logged in another run that all the other data of the run. From 73985266aff4c3e56e491646791b68b5d6a75942 Mon Sep 17 00:00:00 2001 From: Galileo Galilei Date: Sun, 25 Oct 2020 22:21:02 +0100 Subject: [PATCH 2/3] FIX #99 - Ignore tests and setup.py for coverage --- CHANGELOG.md | 2 +- codecov.yml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 codecov.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 1118b446..6cc05d94 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,8 +17,8 @@ ### Changed -- `MlflowNodeHook` now has a `before_pipeline_run` hook which stores the `ProjectContext` and enable to retrieve configuration (#66). - Documentation reference to the plugin is now dynamic when necessary (#6). +- The test coverage now excludes `tests` and `setup.py` (#99). ### Removed diff --git a/codecov.yml b/codecov.yml new file mode 100644 index 00000000..1909721d --- /dev/null +++ b/codecov.yml @@ -0,0 +1,3 @@ +ignore: + - "setup.py" + - "tests/**/*" From cd16633df5b156aae869b9818f12d5360b9cdf57 Mon Sep 17 00:00:00 2001 From: Galileo Galilei Date: Sun, 25 Oct 2020 22:22:05 +0100 Subject: [PATCH 3/3] FIX #93 - Enforce inference pipeline output unpacking --- CHANGELOG.md | 2 + docs/source/05_python_objects/03_Pipelines.md | 8 +- kedro_mlflow/mlflow/kedro_pipeline_model.py | 12 ++- kedro_mlflow/pipeline/__init__.py | 1 + kedro_mlflow/pipeline/pipeline_ml.py | 98 ++++++++++++------- tests/mlflow/test_kedro_pipeline_model.py | 2 +- tests/pipeline/test_pipeline_ml.py | 49 ++++++++++ 7 files changed, 135 insertions(+), 37 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6cc05d94..e6ce5f78 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,8 @@ - Documentation reference to the plugin is now dynamic when necessary (#6). - The test coverage now excludes `tests` and `setup.py` (#99). +- The `KedroPipelineModel` now unpacks the result of the `inference` pipeline and no longer returns a dictionary with the name in the `DataCatalog` but only the predicted value (#93). + ### Removed diff --git a/docs/source/05_python_objects/03_Pipelines.md b/docs/source/05_python_objects/03_Pipelines.md index de60a62c..53bf0d1a 100644 --- a/docs/source/05_python_objects/03_Pipelines.md +++ b/docs/source/05_python_objects/03_Pipelines.md @@ -24,7 +24,13 @@ def create_pipelines(**kwargs) -> Dict[str, Pipeline]: } ``` -Now each time you will run ``kedro run --pipeline=training`` (provided you registered ``MlflowPipelineHook`` in you ``run.py``), the full inference pipeline will be registered as a mlflow model (with all the outputs produced by training as artifacts : the machine learning, but also the *scaler*, *vectorizer*, *imputer*, or whatever object fitted on data you create in ``training`` and that is used in ``inference``). +Now each time you will run ``kedro run --pipeline=training`` (provided you registered ``MlflowPipelineHook`` in you ``run.py``), the full inference pipeline will be registered as a mlflow model (with all the outputs produced by training as artifacts : the machine learning model, but also the *scaler*, *vectorizer*, *imputer*, or whatever object fitted on data you create in ``training`` and that is used in ``inference``). + +Note that: +- the `inference` pipeline `input_name` can be a `MemoryDataSet` and it belongs to inference pipeline `inputs` +- Apart form `input_name`, all other `inference` pipeline `inputs` must be persisted locally on disk (i.e. it must not be `MemoryDataSet` and must have a local `filepath`) +- the `inference` pipeline `inputs` must belong to training `outputs` (vectorizer, binarizer, machine learning model...) +- the `inference` pipeline must have one and only one `output` *Note: If you want to log a ``PipelineML`` object in ``mlflow`` programatically, you can use the following code snippet:* diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index 4a1989cf..b0c9475f 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -14,6 +14,8 @@ def __init__(self, pipeline_ml: PipelineML, catalog: DataCatalog): self.pipeline_ml = pipeline_ml self.initial_catalog = pipeline_ml.extract_pipeline_catalog(catalog) self.loaded_catalog = DataCatalog() + # we have the guarantee that there is only one output in inference + self.output_name = list(pipeline_ml.inference.outputs())[0] def load_context(self, context): @@ -33,7 +35,11 @@ def load_context(self, context): kedro_artifacts_keys - mlflow_artifacts_keys ) raise ValueError( - f"Provided artifacts do not match catalog entries:\n- 'artifacts - inference.inputs()' = : {in_artifacts_but_not_inference}'\n- 'inference.inputs() - artifacts' = : {in_inference_but_not_artifacts}'" + ( + "Provided artifacts do not match catalog entries:" + f"\n - 'artifacts - inference.inputs()' = : {in_artifacts_but_not_inference}" + f"\n - 'inference.inputs() - artifacts' = : {in_inference_but_not_artifacts}" + ) ) self.loaded_catalog = deepcopy(self.initial_catalog) @@ -53,4 +59,6 @@ def predict(self, context, model_input): run_outputs = runner.run( pipeline=self.pipeline_ml.inference, catalog=self.loaded_catalog ) - return run_outputs + return run_outputs[ + self.output_name + ] # unpack the result to avoid messing the json output diff --git a/kedro_mlflow/pipeline/__init__.py b/kedro_mlflow/pipeline/__init__.py index d0640b0e..cbe59521 100644 --- a/kedro_mlflow/pipeline/__init__.py +++ b/kedro_mlflow/pipeline/__init__.py @@ -1,5 +1,6 @@ from .pipeline_ml import ( KedroMlflowPipelineMLDatasetsError, KedroMlflowPipelineMLInputsError, + KedroMlflowPipelineMLOutputsError, ) from .pipeline_ml_factory import pipeline_ml, pipeline_ml_factory diff --git a/kedro_mlflow/pipeline/pipeline_ml.py b/kedro_mlflow/pipeline/pipeline_ml.py index bb61e526..466bf57c 100644 --- a/kedro_mlflow/pipeline/pipeline_ml.py +++ b/kedro_mlflow/pipeline/pipeline_ml.py @@ -5,7 +5,12 @@ from kedro.pipeline import Pipeline from kedro.pipeline.node import Node -MSG_NOT_IMPLEMENTED = "This method is not implemented because it does not make sens for 'PipelineML'. Manipulate directly the training pipeline and recreate the 'PipelineML' with 'pipeline_ml_factory' factory" +MSG_NOT_IMPLEMENTED = ( + "This method is not implemented because it does" + "not make sense for 'PipelineML'." + "Manipulate directly the training pipeline and" + "recreate the 'PipelineML' with 'pipeline_ml_factory' factory." +) class PipelineML(Pipeline): @@ -78,7 +83,6 @@ def __init__( self.inference = inference self.conda_env = conda_env self.model_name = model_name - self.input_name = input_name @property @@ -90,6 +94,58 @@ def input_name(self, name: str) -> None: self._check_input_name(name) self._input_name = name + @property + def inference(self) -> str: + return self._inference + + @inference.setter + def inference(self, inference: Pipeline) -> None: + self._check_inference(inference) + self._inference = inference + + @property + def training(self) -> Pipeline: + return Pipeline(self.nodes) + + def _check_input_name(self, input_name: str) -> str: + allowed_names = self.inference.inputs() + pp_allowed_names = "\n - ".join(allowed_names) + if input_name not in allowed_names: + raise KedroMlflowPipelineMLInputsError( + f"input_name='{input_name}' but it must be an input of 'inference', i.e. one of: \n - {pp_allowed_names}" + ) + else: + free_inputs_set = ( + self.inference.inputs() - {input_name} - self.all_outputs() + ) + if len(free_inputs_set) > 0: + raise KedroMlflowPipelineMLInputsError( + """ + The following inputs are free for the inference pipeline: + - {inputs}. + No free input is allowed. + Please make sure that 'inference.pipeline.inputs()' are all in 'training.pipeline.all_outputs()', + except eventually 'input_name'.""".format( + inputs="\n - ".join(free_inputs_set) + ) + ) + + return None + + def _check_inference(self, inference: Pipeline) -> None: + nb_outputs = len(inference.outputs()) + outputs_txt = "\n - ".join(inference.outputs()) + if len(inference.outputs()) != 1: + raise KedroMlflowPipelineMLOutputsError( + ( + "The inference pipeline must have one" + " and only one output. You are trying" + " to set a inference pipeline with" + f" '{nb_outputs}' output(s): \n - {outputs_txt}" + " " + ) + ) + def extract_pipeline_catalog(self, catalog: DataCatalog) -> DataCatalog: sub_catalog = DataCatalog() for data_set_name in self.inference.inputs(): @@ -136,36 +192,7 @@ def extract_pipeline_artifacts(self, catalog: DataCatalog): } return artifacts - @property - def training(self): - return Pipeline(self.nodes) - - def _check_input_name(self, input_name: str) -> str: - allowed_names = self.inference.inputs() - pp_allowed_names = "\n - ".join(allowed_names) - if input_name not in allowed_names: - raise KedroMlflowPipelineMLInputsError( - f"input_name='{input_name}' but it must be an input of 'inference', i.e. one of: \n - {pp_allowed_names}" - ) - else: - free_inputs_set = ( - self.inference.inputs() - {input_name} - self.all_outputs() - ) - if len(free_inputs_set) > 0: - raise KedroMlflowPipelineMLInputsError( - """ - The following inputs are free for the inference pipeline: - - {inputs}. - No free input is allowed. - Please make sure that 'inference.pipeline.inputs()' are all in 'training.pipeline.all_outputs()', - except eventually 'input_name'.""".format( - inputs="\n - ".join(free_inputs_set) - ) - ) - - return None - - def _turn_pipeline_to_ml(self, pipeline): + def _turn_pipeline_to_ml(self, pipeline: Pipeline): return PipelineML( nodes=pipeline.nodes, inference=self.inference, input_name=self.input_name ) @@ -230,10 +257,15 @@ def __or__(self, other): # pragma: no cover class KedroMlflowPipelineMLInputsError(Exception): - """Error raised when the inputs of KedroPipelineMoel are invalid + """Error raised when the inputs of KedroPipelineModel are invalid """ class KedroMlflowPipelineMLDatasetsError(Exception): """Error raised when the inputs of KedroPipelineMoel are invalid """ + + +class KedroMlflowPipelineMLOutputsError(Exception): + """Error raised when the outputs of KedroPipelineModel are invalid + """ diff --git a/tests/mlflow/test_kedro_pipeline_model.py b/tests/mlflow/test_kedro_pipeline_model.py index 65be7c69..4e7e3d25 100644 --- a/tests/mlflow/test_kedro_pipeline_model.py +++ b/tests/mlflow/test_kedro_pipeline_model.py @@ -83,7 +83,7 @@ def test_model_packaging(tmp_path, pipeline_ml_obj): loaded_model = mlflow.pyfunc.load_model( model_uri=(Path(r"runs:/") / run_id / "model").as_posix() ) - assert loaded_model.predict(1) == {"predictions": 2} + assert loaded_model.predict(1) == 2 # should very likely add tests to see what happens when the artifacts diff --git a/tests/pipeline/test_pipeline_ml.py b/tests/pipeline/test_pipeline_ml.py index 9016b2d9..4f6479f8 100644 --- a/tests/pipeline/test_pipeline_ml.py +++ b/tests/pipeline/test_pipeline_ml.py @@ -8,6 +8,7 @@ from kedro_mlflow.pipeline import ( KedroMlflowPipelineMLDatasetsError, KedroMlflowPipelineMLInputsError, + KedroMlflowPipelineMLOutputsError, pipeline_ml, pipeline_ml_factory, ) @@ -34,6 +35,14 @@ def predict_fun(model, data): return data * model +def predict_fun_with_metric(model, data): + return data * model, "super_metric" + + +def predict_fun_return_nothing(model, data): + pass + + @pytest.fixture def pipeline_with_tag(): @@ -360,3 +369,43 @@ def test_invalid_input_name(pipeline_ml_with_tag): match="input_name='whoops_bad_name' but it must be an input of 'inference'", ): pipeline_ml_with_tag.input_name = "whoops_bad_name" + + +def test_too_many_inference_outputs(): + with pytest.raises( + KedroMlflowPipelineMLOutputsError, + match="The inference pipeline must have one and only one output", + ): + pipeline_ml_factory( + training=Pipeline([node(func=train_fun, inputs="data", outputs="model",)]), + inference=Pipeline( + [ + node( + func=predict_fun_with_metric, + inputs=["model", "data"], + outputs=["predictions", "metric"], + ) + ] + ), + input_name="data", + ) + + +def test_not_enough_inference_outputs(): + with pytest.raises( + KedroMlflowPipelineMLOutputsError, + match="The inference pipeline must have one and only one output", + ): + pipeline_ml_factory( + training=Pipeline([node(func=train_fun, inputs="data", outputs="model",)]), + inference=Pipeline( + [ + node( + func=predict_fun_return_nothing, + inputs=["model", "data"], + outputs=None, + ) + ] + ), + input_name="data", + )