From 9ba1cf94b58abc76d25a9191bf37e528a076fb8b Mon Sep 17 00:00:00 2001
From: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com>
Date: Thu, 2 Nov 2023 14:36:30 +0100
Subject: [PATCH] feat: resolve relative datasample spec path (#392)

Signed-off-by: ThibaultFy <50656860+ThibaultFy@users.noreply.github.com>
---
 CHANGELOG.md              |  4 ++++
 substra/sdk/models.py     |  9 ++++++---
 substra/sdk/schemas.py    | 33 ++++++++++++++++++++++++++++-----
 tests/sdk/test_schemas.py | 30 ++++++++++++++++++++++++++++++
 4 files changed, 68 insertions(+), 8 deletions(-)
 create mode 100644 tests/sdk/test_schemas.py

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f77dd9f..2d89cbdd 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- Paths are now resolved on DatasampleSpec objects. Which means that users can pass relative paths ([#392](https://github.com/Substra/substra/pull/392))
+
 ## [0.49.0](https://github.com/Substra/substra/releases/tag/0.49.0) - 2023-10-18
 
 ### Added
diff --git a/substra/sdk/models.py b/substra/sdk/models.py
index c45bedd0..8e045f6d 100644
--- a/substra/sdk/models.py
+++ b/substra/sdk/models.py
@@ -187,7 +187,8 @@ def allowed_filters() -> List[str]:
         return ["key", "name", "owner", "permissions", "compute_plan_key", "dataset_key", "data_sample_key"]
 
     @pydantic.field_validator("inputs", mode="before")
-    def dict_input_to_list(cls, v):  # noqa: N805
+    @classmethod
+    def dict_input_to_list(cls, v):
         if isinstance(v, dict):
             # Transform the inputs dict to a list
             return [
@@ -203,7 +204,8 @@ def dict_input_to_list(cls, v):  # noqa: N805
             return v
 
     @pydantic.field_validator("outputs", mode="before")
-    def dict_output_to_list(cls, v):  # noqa: N805
+    @classmethod
+    def dict_output_to_list(cls, v):
         if isinstance(v, dict):
             # Transform the outputs dict to a list
             return [
@@ -397,7 +399,8 @@ class OutputAsset(_TaskAsset):
 
     # Deal with remote returning the actual performance object
     @pydantic.field_validator("asset", mode="before")
-    def convert_remote_performance(cls, value, values):  # noqa: N805
+    @classmethod
+    def convert_remote_performance(cls, value, values):
         if values.data.get("kind") == schemas.AssetKind.performance and isinstance(value, dict):
             return value.get("performance_value")
 
diff --git a/substra/sdk/schemas.py b/substra/sdk/schemas.py
index b5a715f9..bb9a0a3d 100644
--- a/substra/sdk/schemas.py
+++ b/substra/sdk/schemas.py
@@ -147,7 +147,8 @@ def is_many(self):
         return self.paths and len(self.paths) > 0
 
     @pydantic.model_validator(mode="before")
-    def exclusive_paths(cls, values):  # noqa: N805
+    @classmethod
+    def exclusive_paths(cls, values: typing.Any) -> typing.Any:
         """Check that one and only one path(s) field is defined."""
         if "paths" in values and "path" in values:
             raise ValueError("'path' and 'paths' fields are exclusive.")
@@ -155,6 +156,24 @@ def exclusive_paths(cls, values):  # noqa: N805
             raise ValueError("'path' or 'paths' field must be set.")
         return values
 
+    @pydantic.model_validator(mode="before")
+    @classmethod
+    def resolve_paths(cls, values: typing.Any) -> typing.Any:
+        """Resolve given path is relative."""
+        if "paths" in values:
+            paths = []
+            for path in values["paths"]:
+                path = pathlib.Path(path)
+                paths.append(path.resolve())
+
+            values["paths"] = paths
+
+        elif "path" in values:
+            path = pathlib.Path(values["path"])
+            values["path"] = path.resolve()
+
+        return values
+
     @contextlib.contextmanager
     def build_request_kwargs(self, local):
         # redefine kwargs builder to handle the local paths
@@ -293,7 +312,8 @@ class FunctionInputSpec(_Spec):
     kind: AssetKind
 
     @pydantic.model_validator(mode="before")
-    def _check_identifiers(cls, values):  # noqa: N805
+    @classmethod
+    def _check_identifiers(cls, values):
         """Checks that the multiplicity and the optionality of a data manager is always set to False"""
         if values["kind"] == AssetKind.data_manager:
             if values["multiple"]:
@@ -327,7 +347,8 @@ class FunctionOutputSpec(_Spec):
     multiple: bool
 
     @pydantic.model_validator(mode="before")
-    def _check_performance(cls, values):  # noqa: N805
+    @classmethod
+    def _check_performance(cls, values):
         """Checks that the performance is always set to False"""
         if values == AssetKind.performance and values["multiple"]:
             raise ValueError("Performance can't be multiple.")
@@ -352,7 +373,8 @@ class FunctionSpec(_Spec):
     type_: typing.ClassVar[Type] = Type.Function
 
     @pydantic.field_validator("inputs")
-    def _check_inputs(cls, v):  # noqa: N805
+    @classmethod
+    def _check_inputs(cls, v):
         inputs = v or []
         identifiers = {value.identifier for value in inputs}
         if len(identifiers) != len(inputs):
@@ -360,7 +382,8 @@ def _check_inputs(cls, v):  # noqa: N805
         return v
 
     @pydantic.field_validator("outputs")
-    def _check_outputs(cls, v):  # noqa: N805
+    @classmethod
+    def _check_outputs(cls, v):
         outputs = v or []
         identifiers = {value.identifier for value in outputs}
         if len(identifiers) != len(outputs):
diff --git a/tests/sdk/test_schemas.py b/tests/sdk/test_schemas.py
new file mode 100644
index 00000000..359a33d5
--- /dev/null
+++ b/tests/sdk/test_schemas.py
@@ -0,0 +1,30 @@
+import pathlib
+import uuid
+
+import pytest
+
+from substra.sdk.schemas import DataSampleSpec
+
+
+@pytest.mark.parametrize("path", [pathlib.Path() / "data", "./data", pathlib.Path().cwd() / "data"])
+def test_datasample_spec_resolve_path(path):
+    datasample_spec = DataSampleSpec(path=path, data_manager_keys=[str(uuid.uuid4())])
+
+    assert datasample_spec.path == pathlib.Path().cwd() / "data"
+
+
+def test_datasample_spec_resolve_paths():
+    paths = [pathlib.Path() / "data", "./data", pathlib.Path().cwd() / "data"]
+    datasample_spec = DataSampleSpec(paths=paths, data_manager_keys=[str(uuid.uuid4())])
+
+    assert all([path == pathlib.Path().cwd() / "data" for path in datasample_spec.paths])
+
+
+def test_datasample_spec_exclusive_path():
+    with pytest.raises(ValueError):
+        DataSampleSpec(paths=["fake_paths"], path="fake_paths", data_manager_keys=[str(uuid.uuid4())])
+
+
+def test_datasample_spec_no_path():
+    with pytest.raises(ValueError):
+        DataSampleSpec(data_manager_keys=[str(uuid.uuid4())])