diff --git a/CHANGELOG.md b/CHANGELOG.md index 60cc57cf..e22a464e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ### Fixed +- :bug: Force the input dataset in ``KedroPipelineModel`` to be a ``MemoryDataSet`` to remove unnecessary dependency to the underlying Kedro ``AbstractDataSet`` used during training ([#273](https://github.com/Galileo-Galilei/kedro-mlflow/issues/273)) - :bug: Make ``MlflowArtifactDataset`` correctly log in mlflow Kedro DataSets without a ``_path`` attribute like ``kedro.io.PartitionedDataSet`` ([#258](https://github.com/Galileo-Galilei/kedro-mlflow/issues/258)). - :bug: Automatically persist pipeline parameters when calling the ``kedro mlflow modelify`` command for consistency with how ``PipelineML`` objects are handled and for ease of use ([#282](https://github.com/Galileo-Galilei/kedro-mlflow/issues/282)). diff --git a/kedro_mlflow/mlflow/kedro_pipeline_model.py b/kedro_mlflow/mlflow/kedro_pipeline_model.py index 068ba831..9523a825 100644 --- a/kedro_mlflow/mlflow/kedro_pipeline_model.py +++ b/kedro_mlflow/mlflow/kedro_pipeline_model.py @@ -109,9 +109,11 @@ def _extract_pipeline_catalog(self, catalog: DataCatalog) -> DataCatalog: for data_set_name in self.pipeline.inputs(): if data_set_name == self.input_name: # there is no obligation that this dataset is persisted - # thus it is allowed to be an empty memory dataset - data_set = catalog._data_sets.get(data_set_name) or MemoryDataSet() - sub_catalog.add(data_set_name=data_set_name, data_set=data_set) + # and even if it is, we keep only an ampty memory dataset to avoid + # extra uneccessary dependencies: this dataset will be replaced at + # inference time and we do not need to know the original type, see + # https://github.com/Galileo-Galilei/kedro-mlflow/issues/273 + sub_catalog.add(data_set_name=data_set_name, data_set=MemoryDataSet()) else: try: data_set = catalog._data_sets[data_set_name]