From 24f90e4c927e5e16b67e20c2a0eaa9120a54069b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 11:40:34 +0200 Subject: [PATCH 01/78] added r_clone and r_clone_utils --- .../src/settings_library/r_clone.py | 13 ++++++ .../src/settings_library/utils_r_clone.py | 45 +++++++++++++++++++ .../tests/test_utils_r_clone.py | 21 +++++++++ 3 files changed, 79 insertions(+) create mode 100644 packages/settings-library/src/settings_library/r_clone.py create mode 100644 packages/settings-library/src/settings_library/utils_r_clone.py create mode 100644 packages/settings-library/tests/test_utils_r_clone.py diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py new file mode 100644 index 00000000000..0678a3ebf5a --- /dev/null +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -0,0 +1,13 @@ +from enum import Enum + +from .s3 import S3Settings + + +class S3Provider(str, Enum): + AWS = "AWS" + CEPH = "CEPH" + MINIO = "MINIO" + + +class RCloneSettings(S3Settings): + R_CLONE_S3_PROVIDER: S3Provider diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py new file mode 100644 index 00000000000..3c2ae2f113d --- /dev/null +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -0,0 +1,45 @@ +import configparser +from copy import deepcopy +from io import StringIO +from typing import Dict + +from .r_clone import RCloneSettings, S3Provider + +_COMMON_ENTRIES: Dict[str, str] = { + "type": "s3", + "access_key_id": "{access_key}", + "secret_access_key": "{secret_key}", + "region": "us-east-1", + "acl": "private", +} + +_PROVIDER_ENDTIRES: Dict[S3Provider, Dict[str, str]] = { + S3Provider.AWS: {"provider": "AWS"}, + S3Provider.CEPH: {"provider": "Ceph", "endpoint": "{endpoint}"}, + S3Provider.MINIO: {"provider": "Minio", "endpoint": "{endpoint}"}, +} + + +def _format_config(entries: Dict[str, str]) -> str: + config = configparser.ConfigParser() + config["dst"] = entries + with StringIO() as string_io: + config.write(string_io) + string_io.seek(0) + return string_io.read() + + +def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: + provider = r_clone_settings.R_CLONE_S3_PROVIDER + entries = deepcopy(_COMMON_ENTRIES) + entries.update(_PROVIDER_ENDTIRES[provider]) + + r_clone_config_template = _format_config(entries=entries) + + # replace entries in template + r_clone_config = r_clone_config_template.format( + endpoint=r_clone_settings.S3_ENDPOINT, + access_key=r_clone_settings.S3_ACCESS_KEY, + secret_key=r_clone_settings.S3_SECRET_KEY, + ) + return r_clone_config diff --git a/packages/settings-library/tests/test_utils_r_clone.py b/packages/settings-library/tests/test_utils_r_clone.py new file mode 100644 index 00000000000..6b99db34540 --- /dev/null +++ b/packages/settings-library/tests/test_utils_r_clone.py @@ -0,0 +1,21 @@ +import pytest +from settings_library.r_clone import RCloneSettings, S3Provider +from settings_library.utils_r_clone import _COMMON_ENTRIES, get_r_clone_config + + +@pytest.fixture(params=list(S3Provider)) +def r_clone_settings(request, monkeypatch) -> RCloneSettings: + monkeypatch.setenv("R_CLONE_S3_PROVIDER", request.param) + return RCloneSettings() + + +def test_r_clone_config_template_replacement(r_clone_settings: RCloneSettings) -> None: + r_clone_config = get_r_clone_config(r_clone_settings) + print(r_clone_config) + + assert "{endpoint}" not in r_clone_config + assert "{access_key}" not in r_clone_config + assert "{secret_key}" not in r_clone_config + + for key in _COMMON_ENTRIES.keys(): + assert key in r_clone_config From 7dca9b70a10f205a9c95ddaede5a8404a8c2bcf9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 15:03:03 +0200 Subject: [PATCH 02/78] added node_ports_v2_r_clone --- packages/simcore-sdk/requirements/_base.in | 3 + packages/simcore-sdk/requirements/_base.txt | 38 ++++- packages/simcore-sdk/requirements/_test.txt | 9 +- packages/simcore-sdk/requirements/ci.txt | 1 + packages/simcore-sdk/requirements/dev.txt | 1 + .../src/simcore_sdk/node_ports_v2/r_clone.py | 132 +++++++++++++++ .../integration/test_node_ports_v2_r_clone.py | 153 ++++++++++++++++++ .../tests/unit/test_node_ports_v2_r_clone.py | 54 +++++++ 8 files changed, 388 insertions(+), 3 deletions(-) create mode 100644 packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py create mode 100644 packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py create mode 100644 packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py diff --git a/packages/simcore-sdk/requirements/_base.in b/packages/simcore-sdk/requirements/_base.in index 299f174ce29..5bcbcccae29 100644 --- a/packages/simcore-sdk/requirements/_base.in +++ b/packages/simcore-sdk/requirements/_base.in @@ -4,11 +4,14 @@ --constraint ../../../requirements/constraints.txt --requirement ../../../packages/postgres-database/requirements/_base.in --requirement ../../../packages/service-library/requirements/_base.in +--requirement ../../../packages/settings-library/requirements/_base.in --requirement ../../../packages/models-library/requirements/_base.in +aioboto3 aiofiles aiohttp aiopg[sa] +async-cache jsonschema packaging pydantic[email] diff --git a/packages/simcore-sdk/requirements/_base.txt b/packages/simcore-sdk/requirements/_base.txt index 1ac1fa0c61a..576d9754365 100644 --- a/packages/simcore-sdk/requirements/_base.txt +++ b/packages/simcore-sdk/requirements/_base.txt @@ -4,6 +4,10 @@ # # pip-compile --output-file=requirements/_base.txt --strip-extras requirements/_base.in # +aioboto3==9.5.0 + # via -r requirements/_base.in +aiobotocore==2.2.0 + # via aioboto3 aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiofiles==0.8.0 @@ -17,12 +21,17 @@ aiohttp==3.8.1 # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/_base.in + # aiobotocore +aioitertools==0.10.0 + # via aiobotocore aiopg==1.3.3 # via -r requirements/_base.in aiosignal==1.2.0 # via aiohttp alembic==1.7.7 # via -r requirements/../../../packages/postgres-database/requirements/_base.in +async-cache==1.1.1 + # via -r requirements/_base.in async-timeout==4.0.2 # via # aiohttp @@ -32,6 +41,13 @@ attrs==20.3.0 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # aiohttp # jsonschema +boto3==1.21.21 + # via aiobotocore +botocore==1.24.21 + # via + # aiobotocore + # boto3 + # s3transfer charset-normalizer==2.0.12 # via aiohttp dnspython==2.2.1 @@ -48,6 +64,10 @@ idna==3.3 # via # email-validator # yarl +jmespath==1.0.0 + # via + # boto3 + # botocore jsonschema==3.2.0 # via # -c requirements/../../../packages/service-library/requirements/./constraints.txt @@ -81,6 +101,8 @@ pyparsing==3.0.7 # via packaging pyrsistent==0.18.1 # via jsonschema +python-dateutil==2.8.2 + # via botocore pyyaml==5.4.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -89,8 +111,12 @@ pyyaml==5.4.1 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/service-library/requirements/_base.in +s3transfer==0.5.2 + # via boto3 six==1.16.0 - # via jsonschema + # via + # jsonschema + # python-dateutil sqlalchemy==1.4.32 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt @@ -109,7 +135,17 @@ tqdm==4.63.1 typing-extensions==4.1.1 # via # aiodebug + # aioitertools # pydantic +urllib3==1.26.9 + # via + # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../requirements/constraints.txt + # botocore +wrapt==1.14.0 + # via aiobotocore yarl==1.7.2 # via # -r requirements/../../../packages/postgres-database/requirements/_base.in diff --git a/packages/simcore-sdk/requirements/_test.txt b/packages/simcore-sdk/requirements/_test.txt index 25cebdaffeb..b3c4c5d63ed 100644 --- a/packages/simcore-sdk/requirements/_test.txt +++ b/packages/simcore-sdk/requirements/_test.txt @@ -161,7 +161,9 @@ pytest-sugar==0.9.4 pytest-xdist==2.5.0 # via -r requirements/_test.in python-dateutil==2.8.2 - # via faker + # via + # -c requirements/_base.txt + # faker python-dotenv==0.20.0 # via -r requirements/_test.in requests==2.27.1 @@ -193,12 +195,15 @@ typing-extensions==4.1.1 urllib3==1.26.9 # via # -c requirements/../../../requirements/constraints.txt + # -c requirements/_base.txt # minio # requests websocket-client==1.3.1 # via docker wrapt==1.14.0 - # via astroid + # via + # -c requirements/_base.txt + # astroid yarl==1.7.2 # via # -c requirements/_base.txt diff --git a/packages/simcore-sdk/requirements/ci.txt b/packages/simcore-sdk/requirements/ci.txt index 12f77debb43..5b661d543e3 100644 --- a/packages/simcore-sdk/requirements/ci.txt +++ b/packages/simcore-sdk/requirements/ci.txt @@ -14,6 +14,7 @@ ../postgres-database ../pytest-simcore/ ../models-library/ +../settings-library/ # FIXME: these dependencies should be removed ../service-library/ diff --git a/packages/simcore-sdk/requirements/dev.txt b/packages/simcore-sdk/requirements/dev.txt index 8d1be3ce9ad..b67f43d8690 100644 --- a/packages/simcore-sdk/requirements/dev.txt +++ b/packages/simcore-sdk/requirements/dev.txt @@ -16,6 +16,7 @@ --editable ../postgres-database --editable ../models-library/ +--editable ../settings-library/ # FIXME: these dependencies should be removed --editable ../service-library/ diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py new file mode 100644 index 00000000000..94babec69e5 --- /dev/null +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py @@ -0,0 +1,132 @@ +import asyncio +import json +import logging +from contextlib import asynccontextmanager +from pathlib import Path +from typing import AsyncGenerator, Optional + +import aioboto3 +from aiofiles import tempfile +from cache import AsyncLRU +from settings_library.r_clone import RCloneSettings +from settings_library.utils_r_clone import get_r_clone_config + +from ..node_ports_common.filemanager import ETag + +logger = logging.getLogger(__name__) + + +class _CommandFailedException(Exception): + pass + + +class RCloneError(Exception): + pass + + +@asynccontextmanager +async def _config_file(config: str) -> AsyncGenerator[str, None]: + async with tempfile.NamedTemporaryFile("w") as f: + await f.write(config) + await f.flush() + yield f.name + + +async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: + proc = await asyncio.create_subprocess_shell( + command, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.STDOUT, + cwd=cwd, + ) + + stdout, _ = await proc.communicate() + decoded_stdout = stdout.decode() + if proc.returncode != 0: + raise _CommandFailedException( + f"Command {command} finished with exception:\n{decoded_stdout}" + ) + + logger.debug("'%s' result:\n%s", command, decoded_stdout) + return decoded_stdout + + +@AsyncLRU(maxsize=1) +async def is_r_clone_installed(r_clone_settings: Optional[RCloneSettings]) -> bool: + """returns: True if the `rclone` cli is installed and a configuration is provided""" + try: + await _async_command("rclone --version") + return r_clone_settings is not None + except _CommandFailedException: + return False + + +async def _get_etag_via_s3(r_clone_settings: RCloneSettings, s3_path: str) -> ETag: + session = aioboto3.Session( + aws_access_key_id=r_clone_settings.S3_ACCESS_KEY, + aws_secret_access_key=r_clone_settings.S3_SECRET_KEY, + ) + async with session.resource("s3", endpoint_url=r_clone_settings.S3_ENDPOINT) as s3: + s3_object = await s3.Object( + bucket_name=r_clone_settings.S3_BUCKET_NAME, + key=s3_path.lstrip(r_clone_settings.S3_BUCKET_NAME), + ) + e_tag_result = await s3_object.e_tag + # NOTE: above result is JSON encoded for some reason + return json.loads(e_tag_result) + + +async def sync_to_s3( + r_clone_settings: Optional[RCloneSettings], s3_path: str, local_file_path: Path +) -> ETag: + if r_clone_settings is None: + raise RCloneError( + ( + f"Could not sync {local_file_path=} to {s3_path=}, provided " + f"config is invalid{r_clone_settings=}" + ) + ) + + r_clone_config_file_content = get_r_clone_config(r_clone_settings) + async with _config_file(r_clone_config_file_content) as config_file_name: + source_path = local_file_path + destination_path = Path(s3_path) + assert local_file_path.name == destination_path.name + file_name = local_file_path.name + + # rclone only acts upon directories, so to target a specific file + # we must run the command from the file's directory. See below + # example for further details: + # + # local_file_path=`/tmp/pytest-of-silenthk/pytest-80/test_sync_to_s30/filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt` + # s3_path=`simcore/00000000-0000-0000-0000-000000000001/00000000-0000-0000-0000-000000000002/filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt` + # + # rclone + # --config + # /tmp/tmpd_1rtmss + # sync + # '/tmp/pytest-of-silenthk/pytest-80/test_sync_to_s30' + # 'dst:simcore/00000000-0000-0000-0000-000000000001/00000000-0000-0000-0000-000000000002' + # --progress + # --include + # 'filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt' + r_clone_command = [ + "rclone", + "--config", + config_file_name, + "sync", + f"'{source_path.parent}'", + f"'dst:{destination_path.parent}'", + "--progress", + "--include", + f"'{file_name}'", + ] + command_result = await _async_command( + " ".join(r_clone_command), cwd=f"{source_path.parent}" + ) + logger.debug(command_result) + + return await _get_etag_via_s3( + r_clone_settings=r_clone_settings, s3_path=s3_path + ) diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py new file mode 100644 index 00000000000..d25ed9abb45 --- /dev/null +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -0,0 +1,153 @@ +from contextlib import asynccontextmanager +import shutil +from pathlib import Path +from typing import Any, AsyncGenerator, AsyncIterable, Dict, Iterator +from uuid import UUID +import aioboto3 + +import pytest +from faker import Faker +from settings_library.r_clone import RCloneSettings, S3Provider +from simcore_sdk.node_ports_v2.r_clone import is_r_clone_installed, sync_to_s3 + +pytest_simcore_core_services_selection = [ + "postgres", +] + +pytest_simcore_ops_services_selection = [ + "minio", +] + +# FIXTURES + + +@pytest.fixture +def file_name(faker: Faker) -> str: + return f"file_{faker.uuid4()}.txt" + + +@pytest.fixture +def upload_file_dir(tmpdir: Path) -> Iterator[Path]: + temp_path = Path(tmpdir) + assert temp_path.is_dir() + yield temp_path + shutil.rmtree(temp_path) + + +@pytest.fixture +def file_to_upload(upload_file_dir: Path, file_name: str, faker: Faker) -> Path: + # generate file with data + file_path = upload_file_dir / file_name + file_path.write_text(faker.paragraph(nb_sentences=5)) + return file_path + + +@pytest.fixture +def local_file_for_download(upload_file_dir: Path, file_name: str) -> Path: + local_file_path = upload_file_dir / f"__local__{file_name}" + return local_file_path + + +@pytest.fixture +async def r_clone_settings(minio_config: Dict[str, Any]) -> RCloneSettings: + client = minio_config["client"] + settings = RCloneSettings( + S3_ENDPOINT=f"http://{client['endpoint']}", # TODO: timeout check of the command + S3_ACCESS_KEY=client["access_key"], + S3_SECRET_KEY=client["secret_key"], + S3_BUCKET_NAME=minio_config["bucket_name"], + S3_SECURE=client["secure"], + R_CLONE_S3_PROVIDER=S3Provider.MINIO, + ) + if not await is_r_clone_installed(settings): + pytest.skip("rclone not installed") + + return settings + + +@pytest.fixture +def project_id() -> UUID: + return UUID(int=1) + + +@pytest.fixture +def node_uuid() -> UUID: + return UUID(int=2) + + +@pytest.fixture +def s3_object( + r_clone_settings: RCloneSettings, project_id: UUID, node_uuid: UUID, file_name: str +) -> str: + + s3_path = ( + Path(r_clone_settings.S3_BUCKET_NAME) + / f"{project_id}" + / f"{node_uuid}" + / file_name + ) + return f"{s3_path}" + + +@pytest.fixture +async def cleanup_s3( + r_clone_settings: RCloneSettings, s3_object: str +) -> AsyncIterable[None]: + yield + async with _get_s3_object(r_clone_settings, s3_object) as s3_object: + await s3_object.delete() + + +# UTILS + + +@asynccontextmanager +async def _get_s3_object( + r_clone_settings: RCloneSettings, s3_path: str +) -> AsyncGenerator["aioboto3.resources.factory.s3.Object", None]: + session = aioboto3.Session( + aws_access_key_id=r_clone_settings.S3_ACCESS_KEY, + aws_secret_access_key=r_clone_settings.S3_SECRET_KEY, + ) + async with session.resource("s3", endpoint_url=r_clone_settings.S3_ENDPOINT) as s3: + s3_object = await s3.Object( + bucket_name=r_clone_settings.S3_BUCKET_NAME, + key=s3_path.lstrip(r_clone_settings.S3_BUCKET_NAME), + ) + yield s3_object + + +async def _download_s3_object( + r_clone_settings: RCloneSettings, s3_path: str, local_path: Path +): + async with _get_s3_object(r_clone_settings, s3_path) as s3_object: + await s3_object.download_file(f"{local_path}") + + +# TESTS + + +async def test_sync_to_s3( + r_clone_settings: RCloneSettings, + s3_object: str, + file_to_upload: Path, + local_file_for_download: Path, + cleanup_s3: None, +) -> None: + etag = await sync_to_s3( + r_clone_settings=r_clone_settings, + s3_path=s3_object, + local_file_path=file_to_upload, + ) + + assert isinstance(etag, str) + assert '"' not in etag + + await _download_s3_object( + r_clone_settings=r_clone_settings, + s3_path=s3_object, + local_path=local_file_for_download, + ) + + # check same file contents after upload and download + assert file_to_upload.read_text() == local_file_for_download.read_text() diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py new file mode 100644 index 00000000000..aa8821a9af3 --- /dev/null +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -0,0 +1,54 @@ +from pathlib import Path + +import pytest +from _pytest.logging import LogCaptureFixture +from faker import Faker +from pytest import MonkeyPatch +from settings_library.r_clone import S3Provider +from simcore_sdk.node_ports_v2 import r_clone +from simcore_sdk.node_ports_v2.r_clone import RCloneSettings + + +@pytest.fixture +def text_to_write(faker: Faker) -> str: + return faker.text() + + +@pytest.fixture(params=list(S3Provider)) +def s3_provider(request) -> S3Provider: + return request.param + + +@pytest.fixture +def r_clone_settings( + monkeypatch: MonkeyPatch, s3_provider: S3Provider +) -> RCloneSettings: + monkeypatch.setenv("R_CLONE_S3_PROVIDER", s3_provider.value) + return RCloneSettings() + + +async def test_is_r_clone_installed_cached( + caplog: LogCaptureFixture, r_clone_settings: RCloneSettings +) -> None: + for _ in range(3): + result = await r_clone.is_r_clone_installed(r_clone_settings) + assert type(result) is bool + assert "'rclone --version' result:\n" in caplog.text + assert caplog.text.count("'rclone --version' result:\n") == 1 + + assert await r_clone.is_r_clone_installed(None) is False + + +async def test__config_file(text_to_write: str) -> None: + async with r_clone._config_file(text_to_write) as file_name: + assert text_to_write == Path(file_name).read_text() + assert Path(file_name).exists() is False + + +async def test__async_command_ok() -> None: + await r_clone._async_command(" ".join(["ls", "-la"])) + + +async def test__async_command_error() -> None: + with pytest.raises(r_clone._CommandFailedException): + await r_clone._async_command("__i_do_not_exist__") From ede7db645cfa8f5c3b3e1986ff7a348200f46969 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 15:03:54 +0200 Subject: [PATCH 03/78] refactor settings --- .../src/simcore_service_director_v2/core/settings.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/core/settings.py b/services/director-v2/src/simcore_service_director_v2/core/settings.py index de829e1abd8..bec4eed6d45 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/core/settings.py @@ -27,6 +27,7 @@ from settings_library.postgres import PostgresSettings from settings_library.rabbit import RabbitSettings from settings_library.s3 import S3Settings +from settings_library.r_clone import S3Provider from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from simcore_postgres_database.models.clusters import ClusterType @@ -50,12 +51,6 @@ SUPPORTED_TRAEFIK_LOG_LEVELS: Set[str] = {"info", "debug", "warn", "error"} -class S3Provider(str, Enum): - AWS = "AWS" - CEPH = "CEPH" - MINIO = "MINIO" - - class VFSCacheMode(str, Enum): OFF = "off" MINIMAL = "minimal" From f641299c9fcb1e744443e7a0ec9495bede3ae80b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 15:50:57 +0200 Subject: [PATCH 04/78] refactor settings --- .../src/settings_library/r_clone.py | 15 +++++++++++++-- .../settings-library/src/settings_library/s3.py | 1 + .../src/settings_library/utils_r_clone.py | 8 ++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 0678a3ebf5a..62de59c2a92 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -1,5 +1,7 @@ from enum import Enum +from pydantic import Field +from .base import BaseCustomSettings from .s3 import S3Settings @@ -9,5 +11,14 @@ class S3Provider(str, Enum): MINIO = "MINIO" -class RCloneSettings(S3Settings): - R_CLONE_S3_PROVIDER: S3Provider +class _RequiredS3Settings(S3Settings): + S3_ENDPOINT: str + S3_ACCESS_KEY: str + S3_SECRET_KEY: str + S3_BUCKET_NAME: str + S3_SECURE: bool + + +class RCloneSettings(BaseCustomSettings): + R_CLONE_S3: _RequiredS3Settings = Field(auto_default_from_env=True) + R_CLONE_PROVIDER: S3Provider diff --git a/packages/settings-library/src/settings_library/s3.py b/packages/settings-library/src/settings_library/s3.py index db9965f22ec..48d0174ad9e 100644 --- a/packages/settings-library/src/settings_library/s3.py +++ b/packages/settings-library/src/settings_library/s3.py @@ -2,6 +2,7 @@ class S3Settings(BaseCustomSettings): + # TODO: try to remove defaults if this works also remove _RequiredS3Settings S3_ENDPOINT: str = "minio:9000" S3_ACCESS_KEY: str = "12345678" S3_SECRET_KEY: str = "12345678" diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index 3c2ae2f113d..b1a21e03a68 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -30,7 +30,7 @@ def _format_config(entries: Dict[str, str]) -> str: def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: - provider = r_clone_settings.R_CLONE_S3_PROVIDER + provider = r_clone_settings.R_CLONE_PROVIDER entries = deepcopy(_COMMON_ENTRIES) entries.update(_PROVIDER_ENDTIRES[provider]) @@ -38,8 +38,8 @@ def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: # replace entries in template r_clone_config = r_clone_config_template.format( - endpoint=r_clone_settings.S3_ENDPOINT, - access_key=r_clone_settings.S3_ACCESS_KEY, - secret_key=r_clone_settings.S3_SECRET_KEY, + endpoint=r_clone_settings.R_CLONE_S3.S3_ENDPOINT, + access_key=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, + secret_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) return r_clone_config From 1391d219b609b95eb5a10a9b7bf2cfc5df079be9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 15:51:10 +0200 Subject: [PATCH 05/78] refactor tests --- .../integration/test_node_ports_v2_r_clone.py | 36 ++++++++++--------- .../tests/unit/test_node_ports_v2_r_clone.py | 7 +++- 2 files changed, 26 insertions(+), 17 deletions(-) diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index d25ed9abb45..1a248b671f2 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -8,7 +8,7 @@ import pytest from faker import Faker from settings_library.r_clone import RCloneSettings, S3Provider -from simcore_sdk.node_ports_v2.r_clone import is_r_clone_installed, sync_to_s3 +from simcore_sdk.node_ports_v2.r_clone import is_r_clone_installed, sync_local_to_s3 pytest_simcore_core_services_selection = [ "postgres", @@ -51,13 +51,17 @@ def local_file_for_download(upload_file_dir: Path, file_name: str) -> Path: @pytest.fixture async def r_clone_settings(minio_config: Dict[str, Any]) -> RCloneSettings: client = minio_config["client"] - settings = RCloneSettings( - S3_ENDPOINT=f"http://{client['endpoint']}", # TODO: timeout check of the command - S3_ACCESS_KEY=client["access_key"], - S3_SECRET_KEY=client["secret_key"], - S3_BUCKET_NAME=minio_config["bucket_name"], - S3_SECURE=client["secure"], - R_CLONE_S3_PROVIDER=S3Provider.MINIO, + settings = RCloneSettings.parse_obj( + dict( + R_CLONE_S3=dict( + S3_ENDPOINT=f"http://{client['endpoint']}", # TODO: timeout check of the command + S3_ACCESS_KEY=client["access_key"], + S3_SECRET_KEY=client["secret_key"], + S3_BUCKET_NAME=minio_config["bucket_name"], + S3_SECURE=client["secure"], + ), + R_CLONE_PROVIDER=S3Provider.MINIO, + ) ) if not await is_r_clone_installed(settings): pytest.skip("rclone not installed") @@ -81,7 +85,7 @@ def s3_object( ) -> str: s3_path = ( - Path(r_clone_settings.S3_BUCKET_NAME) + Path(r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME) / f"{project_id}" / f"{node_uuid}" / file_name @@ -106,13 +110,13 @@ async def _get_s3_object( r_clone_settings: RCloneSettings, s3_path: str ) -> AsyncGenerator["aioboto3.resources.factory.s3.Object", None]: session = aioboto3.Session( - aws_access_key_id=r_clone_settings.S3_ACCESS_KEY, - aws_secret_access_key=r_clone_settings.S3_SECRET_KEY, + aws_access_key_id=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, + aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) - async with session.resource("s3", endpoint_url=r_clone_settings.S3_ENDPOINT) as s3: + async with session.resource("s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT) as s3: s3_object = await s3.Object( - bucket_name=r_clone_settings.S3_BUCKET_NAME, - key=s3_path.lstrip(r_clone_settings.S3_BUCKET_NAME), + bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, + key=s3_path.lstrip(r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME), ) yield s3_object @@ -127,14 +131,14 @@ async def _download_s3_object( # TESTS -async def test_sync_to_s3( +async def test_sync_local_to_s3( r_clone_settings: RCloneSettings, s3_object: str, file_to_upload: Path, local_file_for_download: Path, cleanup_s3: None, ) -> None: - etag = await sync_to_s3( + etag = await sync_local_to_s3( r_clone_settings=r_clone_settings, s3_path=s3_object, local_file_path=file_to_upload, diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index aa8821a9af3..b4700067e2d 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -23,7 +23,12 @@ def s3_provider(request) -> S3Provider: def r_clone_settings( monkeypatch: MonkeyPatch, s3_provider: S3Provider ) -> RCloneSettings: - monkeypatch.setenv("R_CLONE_S3_PROVIDER", s3_provider.value) + monkeypatch.setenv("R_CLONE_PROVIDER", s3_provider.value) + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") return RCloneSettings() From 33d8bbf12810b68ce62b8b596eb2073a6953ed52 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 15:53:57 +0200 Subject: [PATCH 06/78] nodeports supports optional r_clone_settings --- .../src/simcore_sdk/node_ports_v2/__init__.py | 4 ++++ .../simcore_sdk/node_ports_v2/nodeports_v2.py | 2 ++ .../src/simcore_sdk/node_ports_v2/port.py | 1 + .../simcore_sdk/node_ports_v2/port_utils.py | 22 ++++++++++++++----- .../src/simcore_sdk/node_ports_v2/r_clone.py | 18 ++++++++------- .../node_ports_v2/serialization_v2.py | 5 ++++- .../integration/test_node_ports_v2_r_clone.py | 8 ++++--- 7 files changed, 42 insertions(+), 18 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/__init__.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/__init__.py index 7668d391c83..e6e265d49a9 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/__init__.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/__init__.py @@ -1,6 +1,8 @@ import logging from typing import Optional +from settings_library.r_clone import RCloneSettings + from ..node_ports_common import config as node_config from ..node_ports_common import exceptions from ..node_ports_common.dbmanager import DBManager @@ -17,6 +19,7 @@ async def ports( node_uuid: str, *, db_manager: Optional[DBManager] = None, + r_clone_settings: Optional[RCloneSettings] = None ) -> Nodeports: log.debug("creating node_ports_v2 object using provided dbmanager: %s", db_manager) # FIXME: warning every dbmanager create a new db engine! @@ -30,6 +33,7 @@ async def ports( project_id=project_id, node_uuid=node_uuid, auto_update=True, + r_clone_settings=r_clone_settings, ) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py index 5abb8ccaec1..b562a5487d2 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/nodeports_v2.py @@ -5,6 +5,7 @@ from pydantic import BaseModel, Field from servicelib.utils import logged_gather +from settings_library.r_clone import RCloneSettings from ..node_ports_common.dbmanager import DBManager from ..node_ports_common.exceptions import PortNotFound, UnboundPortError @@ -27,6 +28,7 @@ class Nodeports(BaseModel): [DBManager, int, str, str], Coroutine[Any, Any, Type["Nodeports"]] ] auto_update: bool = False + r_clone_settings: Optional[RCloneSettings] = None class Config: arbitrary_types_allowed = True diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py index 788c9b807e6..c0d9a6f83c2 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port.py @@ -226,6 +226,7 @@ async def _set(self, new_value: ItemConcreteValue) -> None: user_id=self._node_ports.user_id, project_id=self._node_ports.project_id, node_id=self._node_ports.node_uuid, + r_clone_settings=self._node_ports.r_clone_settings, ) else: final_value = converted_value diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py index a5981f3d47a..7138c72d6d5 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py @@ -5,10 +5,12 @@ from pydantic import AnyUrl from pydantic.tools import parse_obj_as +from settings_library.r_clone import RCloneSettings from yarl import URL from ..node_ports_common import config, data_items_utils, filemanager from .links import DownloadLink, FileLink, ItemConcreteValue, ItemValue, PortLink +from .r_clone import is_r_clone_installed, sync_local_to_s3 log = logging.getLogger(__name__) @@ -145,15 +147,23 @@ async def push_file_to_store( user_id: int, project_id: str, node_id: str, + r_clone_settings: Optional[RCloneSettings] = None, ) -> FileLink: log.debug("file path %s will be uploaded to s3", file) s3_object = data_items_utils.encode_file_id(file, project_id, node_id) - store_id, e_tag = await filemanager.upload_file( - user_id=user_id, - store_name=config.STORE, - s3_object=s3_object, - local_file_path=file, - ) + + if await is_r_clone_installed(r_clone_settings): + e_tag = await sync_local_to_s3( + r_clone_settings=r_clone_settings, s3_path=s3_object, local_file_path=file + ) + store_id = 0 # simcore + else: + store_id, e_tag = await filemanager.upload_file( + user_id=user_id, + store_name=config.STORE, + s3_object=s3_object, + local_file_path=file, + ) log.debug("file path %s uploaded, received ETag %s", file, e_tag) return FileLink(store=store_id, path=s3_object, e_tag=e_tag) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py index 94babec69e5..ce47c9c4d05 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py @@ -64,20 +64,22 @@ async def is_r_clone_installed(r_clone_settings: Optional[RCloneSettings]) -> bo async def _get_etag_via_s3(r_clone_settings: RCloneSettings, s3_path: str) -> ETag: session = aioboto3.Session( - aws_access_key_id=r_clone_settings.S3_ACCESS_KEY, - aws_secret_access_key=r_clone_settings.S3_SECRET_KEY, + aws_access_key_id=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, + aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) - async with session.resource("s3", endpoint_url=r_clone_settings.S3_ENDPOINT) as s3: + async with session.resource( + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT + ) as s3: s3_object = await s3.Object( - bucket_name=r_clone_settings.S3_BUCKET_NAME, - key=s3_path.lstrip(r_clone_settings.S3_BUCKET_NAME), + bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, + key=s3_path.lstrip(r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME), ) e_tag_result = await s3_object.e_tag # NOTE: above result is JSON encoded for some reason return json.loads(e_tag_result) -async def sync_to_s3( +async def sync_local_to_s3( r_clone_settings: Optional[RCloneSettings], s3_path: str, local_file_path: Path ) -> ETag: if r_clone_settings is None: @@ -99,14 +101,14 @@ async def sync_to_s3( # we must run the command from the file's directory. See below # example for further details: # - # local_file_path=`/tmp/pytest-of-silenthk/pytest-80/test_sync_to_s30/filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt` + # local_file_path=`/tmp/pytest-of-silenthk/pytest-80/test_sync_local_to_s30/filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt` # s3_path=`simcore/00000000-0000-0000-0000-000000000001/00000000-0000-0000-0000-000000000002/filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt` # # rclone # --config # /tmp/tmpd_1rtmss # sync - # '/tmp/pytest-of-silenthk/pytest-80/test_sync_to_s30' + # '/tmp/pytest-of-silenthk/pytest-80/test_sync_local_to_s30' # 'dst:simcore/00000000-0000-0000-0000-000000000001/00000000-0000-0000-0000-000000000002' # --progress # --include diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py index 9f8ac86a58d..31df3dad445 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/serialization_v2.py @@ -1,12 +1,13 @@ import json import logging from pprint import pformat -from typing import Any, Dict, Set +from typing import Any, Dict, Optional, Set import pydantic from models_library.projects_nodes import NodeID from models_library.utils.nodes import compute_node_hash from packaging import version +from settings_library.r_clone import RCloneSettings from ..node_ports_common.dbmanager import DBManager from ..node_ports_common.exceptions import InvalidProtocolError @@ -32,6 +33,7 @@ async def load( project_id: str, node_uuid: str, auto_update: bool = False, + r_clone_settings: Optional[RCloneSettings] = None, ) -> Nodeports: """creates a nodeport object from a row from comp_tasks""" log.debug( @@ -88,6 +90,7 @@ async def load( save_to_db_cb=dump, node_port_creator_cb=load, auto_update=auto_update, + r_clone_settings=r_clone_settings, ) log.debug( "created node_ports_v2 object %s", diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index 1a248b671f2..ee732594508 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -1,10 +1,10 @@ -from contextlib import asynccontextmanager import shutil +from contextlib import asynccontextmanager from pathlib import Path from typing import Any, AsyncGenerator, AsyncIterable, Dict, Iterator from uuid import UUID -import aioboto3 +import aioboto3 import pytest from faker import Faker from settings_library.r_clone import RCloneSettings, S3Provider @@ -113,7 +113,9 @@ async def _get_s3_object( aws_access_key_id=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) - async with session.resource("s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT) as s3: + async with session.resource( + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT + ) as s3: s3_object = await s3.Object( bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, key=s3_path.lstrip(r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME), From a33cfb0e6eec56e384cfa93db9ff71a023ef3812 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 16:00:58 +0200 Subject: [PATCH 07/78] pylint --- packages/settings-library/tests/test_utils_r_clone.py | 2 ++ .../tests/integration/test_node_ports_v2_r_clone.py | 3 +++ packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py | 2 ++ 3 files changed, 7 insertions(+) diff --git a/packages/settings-library/tests/test_utils_r_clone.py b/packages/settings-library/tests/test_utils_r_clone.py index 6b99db34540..29ac2ed10a5 100644 --- a/packages/settings-library/tests/test_utils_r_clone.py +++ b/packages/settings-library/tests/test_utils_r_clone.py @@ -1,3 +1,5 @@ +# pylint: disable=redefined-outer-name + import pytest from settings_library.r_clone import RCloneSettings, S3Provider from settings_library.utils_r_clone import _COMMON_ENTRIES, get_r_clone_config diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index ee732594508..856b6a46909 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -1,3 +1,6 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=unused-argument + import shutil from contextlib import asynccontextmanager from pathlib import Path diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index b4700067e2d..595e7830c4b 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -1,3 +1,5 @@ +# pylint: disable=redefined-outer-name +# pylint: disable=protected-access from pathlib import Path import pytest From 002ad5fd627d89391b09e1ee358e4341803d8242 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 20 Apr 2022 16:01:11 +0200 Subject: [PATCH 08/78] pylint --- packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 595e7830c4b..513a31bf0ef 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -1,5 +1,6 @@ # pylint: disable=redefined-outer-name # pylint: disable=protected-access + from pathlib import Path import pytest From 66da40d4369dcc7a410c96d9c2115f7465252c01 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Apr 2022 08:38:25 +0200 Subject: [PATCH 09/78] moved endpoint where it makes sense --- packages/settings-library/src/settings_library/s3.py | 8 ++++++++ .../src/settings_library/utils_r_clone.py | 2 +- .../simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py | 2 +- .../tests/integration/test_node_ports_v2_r_clone.py | 4 ++-- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/packages/settings-library/src/settings_library/s3.py b/packages/settings-library/src/settings_library/s3.py index 48d0174ad9e..b8e58e6bbfc 100644 --- a/packages/settings-library/src/settings_library/s3.py +++ b/packages/settings-library/src/settings_library/s3.py @@ -1,4 +1,5 @@ from .base import BaseCustomSettings +from functools import cached_property class S3Settings(BaseCustomSettings): @@ -8,3 +9,10 @@ class S3Settings(BaseCustomSettings): S3_SECRET_KEY: str = "12345678" S3_BUCKET_NAME: str = "simcore" S3_SECURE: bool = False + + @cached_property + def endpoint(self) -> str: + if not self.S3_ENDPOINT.startswith("http"): + scheme = "https" if self.S3_SECURE else "http" + return f"{scheme}://{self.S3_ENDPOINT}" + return self.S3_ENDPOINT diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index b1a21e03a68..8fa8e7462c3 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -38,7 +38,7 @@ def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: # replace entries in template r_clone_config = r_clone_config_template.format( - endpoint=r_clone_settings.R_CLONE_S3.S3_ENDPOINT, + endpoint=r_clone_settings.R_CLONE_S3.endpoint, access_key=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, secret_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py index ce47c9c4d05..012556ec2b0 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py @@ -68,7 +68,7 @@ async def _get_etag_via_s3(r_clone_settings: RCloneSettings, s3_path: str) -> ET aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) async with session.resource( - "s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.endpoint ) as s3: s3_object = await s3.Object( bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index 856b6a46909..ce753a28cf9 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -57,7 +57,7 @@ async def r_clone_settings(minio_config: Dict[str, Any]) -> RCloneSettings: settings = RCloneSettings.parse_obj( dict( R_CLONE_S3=dict( - S3_ENDPOINT=f"http://{client['endpoint']}", # TODO: timeout check of the command + S3_ENDPOINT=client["endpoint"], S3_ACCESS_KEY=client["access_key"], S3_SECRET_KEY=client["secret_key"], S3_BUCKET_NAME=minio_config["bucket_name"], @@ -117,7 +117,7 @@ async def _get_s3_object( aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) async with session.resource( - "s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.endpoint ) as s3: s3_object = await s3.Object( bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, From e3730f9fb6ae37423531e15215845884b46e99ae Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Apr 2022 08:38:48 +0200 Subject: [PATCH 10/78] moved rclone endpoint and settings --- .../simcore_service_director_v2/core/settings.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/core/settings.py b/services/director-v2/src/simcore_service_director_v2/core/settings.py index bec4eed6d45..62bc5c91f3e 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/core/settings.py @@ -26,8 +26,7 @@ from settings_library.http_client_request import ClientRequestSettings from settings_library.postgres import PostgresSettings from settings_library.rabbit import RabbitSettings -from settings_library.s3 import S3Settings -from settings_library.r_clone import S3Provider +from settings_library.r_clone import RCloneSettings from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from simcore_postgres_database.models.clusters import ClusterType @@ -58,9 +57,7 @@ class VFSCacheMode(str, Enum): FULL = "full" -class RCloneSettings(S3Settings): - R_CLONE_S3_PROVIDER: S3Provider - +class RCloneSettings(RCloneSettings): R_CLONE_DIR_CACHE_TIME_SECONDS: PositiveInt = Field( 10, description="time to cache directory entries for", @@ -87,13 +84,6 @@ def enforce_r_clone_requirement(cls, v, values) -> PositiveInt: ) return v - @cached_property - def endpoint(self) -> str: - if not self.S3_ENDPOINT.startswith("http"): - scheme = "https" if self.S3_SECURE else "http" - return f"{scheme}://{self.S3_ENDPOINT}" - return self.S3_ENDPOINT - class DirectorV0Settings(BaseCustomSettings): DIRECTOR_V0_ENABLED: bool = True From 94e4c5f87027f8fa52bcd15999bc17bdc44ae9c5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Apr 2022 08:39:11 +0200 Subject: [PATCH 11/78] new way of importing --- .../modules/dynamic_sidecar/volumes_resolver.py | 15 ++++++++------- .../test_dynamic_sidecar_nodeports_integration.py | 4 +++- 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py index b1212066737..825b012cbd8 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/volumes_resolver.py @@ -4,8 +4,9 @@ from models_library.projects import ProjectID from models_library.projects_nodes_io import NodeID +from settings_library.r_clone import S3Provider -from ...core.settings import RCloneSettings, S3Provider +from ...core.settings import RCloneSettings from .errors import DynamicSidecarError @@ -20,10 +21,10 @@ def _get_s3_volume_driver_config( "Name": "rclone", "Options": { "type": "s3", - "s3-access_key_id": r_clone_settings.S3_ACCESS_KEY, - "s3-secret_access_key": r_clone_settings.S3_SECRET_KEY, + "s3-access_key_id": r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, + "s3-secret_access_key": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, "s3-endpoint": r_clone_settings.endpoint, - "path": f"{r_clone_settings.S3_BUCKET_NAME}/{project_id}/{node_uuid}/{storage_directory_name}", + "path": f"{r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME}/{project_id}/{node_uuid}/{storage_directory_name}", "allow-other": "true", "vfs-cache-mode": r_clone_settings.R_CLONE_VFS_CACHE_MODE.value, # Directly connected to how much time it takes for @@ -37,19 +38,19 @@ def _get_s3_volume_driver_config( extra_options = None - if r_clone_settings.R_CLONE_S3_PROVIDER == S3Provider.MINIO: + if r_clone_settings.R_CLONE_PROVIDER == S3Provider.MINIO: extra_options = { "s3-provider": "Minio", "s3-region": "us-east-1", "s3-location_constraint": "", "s3-server_side_encryption": "", } - elif r_clone_settings.R_CLONE_S3_PROVIDER == S3Provider.CEPH: + elif r_clone_settings.R_CLONE_PROVIDER == S3Provider.CEPH: extra_options = { "s3-provider": "Ceph", "s3-acl": "private", } - elif r_clone_settings.R_CLONE_S3_PROVIDER == S3Provider.AWS: + elif r_clone_settings.R_CLONE_PROVIDER == S3Provider.AWS: extra_options = { "s3-provider": "AWS", "s3-region": "us-east-1", diff --git a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py index b36c859482b..4ba30ed77ea 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py +++ b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py @@ -611,7 +611,9 @@ async def _fetch_data_via_aioboto( aws_access_key_id=r_clone_settings.S3_ACCESS_KEY, aws_secret_access_key=r_clone_settings.S3_SECRET_KEY, ) - async with session.resource("s3", endpoint_url=r_clone_settings.endpoint) as s3: + async with session.resource( + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.endpoint + ) as s3: bucket = await s3.Bucket(r_clone_settings.S3_BUCKET_NAME) async for s3_object in bucket.objects.all(): key_path = f"{project_id}/{node_id}/{DY_SERVICES_R_CLONE_DIR_NAME}/" From a7be73bd38d1908b3d3e2778256cbcdcc43b1c83 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Apr 2022 14:24:25 +0200 Subject: [PATCH 12/78] added tests and fixed existing tests for nodeports --- api/specs/storage/openapi.yaml | 92 +++++++++++- .../src/settings_library/r_clone.py | 6 + .../src/settings_library/utils_r_clone.py | 1 + .../simcore_sdk/node_ports_v2/port_utils.py | 11 +- .../src/simcore_sdk/node_ports_v2/r_clone.py | 132 ++++++++++++++---- .../simcore-sdk/tests/integration/conftest.py | 26 ++++ .../test_node_ports_v2_nodeports2.py | 94 +++++++++++-- .../integration/test_node_ports_v2_r_clone.py | 111 ++++++++------- .../tests/unit/test_node_ports_v2_port.py | 3 +- .../tests/unit/test_node_ports_v2_r_clone.py | 7 +- 10 files changed, 381 insertions(+), 102 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index f5737d013a8..8ac73b667c3 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -214,6 +214,36 @@ paths: default: $ref: "#/components/responses/DefaultErrorResponse" + /locations/{location_id}/files/{fileId}/s3/link: + get: + summary: Returns an s3 file link if the user has permissions + operationId: get_s3_link + parameters: + - name: fileId + in: path + required: true + schema: + type: string + - name: location_id + in: path + required: true + schema: + type: string + - name: user_id + in: query + required: true + schema: + type: string + responses: + "200": + description: "Provides an s3 link for the requested fileId" + content: + application/json: + schema: + $ref: "#/components/schemas/S3LinkEnvelope" + default: + $ref: "#/components/responses/DefaultErrorResponse" + /locations/{location_id}/files/{fileId}/metadata: get: summary: Get file metadata @@ -257,11 +287,6 @@ paths: required: true schema: type: string - requestBody: - content: - application/json: - schema: - $ref: "#/components/schemas/FileMetaData" responses: "200": description: "Returns file metadata" @@ -271,6 +296,34 @@ paths: $ref: "#/components/schemas/FileMetaEnvelope" default: $ref: "#/components/responses/DefaultErrorResponse" + delete: + summary: Removes a file's meta data entry + operationId: delete_file_meta_data + parameters: + - name: fileId + in: path + required: true + schema: + type: string + - name: location_id + in: path + required: true + schema: + type: string + - name: user_id + in: query + required: true + schema: + type: string + responses: + "200": + description: "Removes the file meta data entry from the database" + content: + application/json: + schema: + $ref: "#/components/schemas/FileMetaDataDeleteEnvelope" + default: + $ref: "#/components/responses/DefaultErrorResponse" /locations/{location_id}/files/{fileId}: get: @@ -766,6 +819,35 @@ components: items: $ref: "#/components/schemas/DatasetMetaData" + S3LinkEnvelope: + type: object + required: + - data + - error + properties: + data: + nullable: true + type: object + properties: + s3_link: + type: string + error: + nullable: true + default: null + + FileMetaDataDeleteEnvelope: + type: object + required: + - data + - error + properties: + data: + nullable: true + default: null + error: + nullable: true + default: null + FileMetaEnvelope: type: object required: diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 62de59c2a92..535aff3f0eb 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -22,3 +22,9 @@ class _RequiredS3Settings(S3Settings): class RCloneSettings(BaseCustomSettings): R_CLONE_S3: _RequiredS3Settings = Field(auto_default_from_env=True) R_CLONE_PROVIDER: S3Provider + R_CLONE_STORAGE_ENDPOINT: str = Field( + ..., description="endpoint where storage is present" + ) + + R_CLONE_AIOHTTP_CLIENT_TIMEOUT_TOTAL: float = 20 + R_CLONE_AIOHTTP_CLIENT_TIMEOUT_SOCK_CONNECT: float = 5 diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index 8fa8e7462c3..1c10a29cd3e 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -14,6 +14,7 @@ } _PROVIDER_ENDTIRES: Dict[S3Provider, Dict[str, str]] = { + # NOTE: # AWS_SESSION_TOKEN should be required for STS S3Provider.AWS: {"provider": "AWS"}, S3Provider.CEPH: {"provider": "Ceph", "endpoint": "{endpoint}"}, S3Provider.MINIO: {"provider": "Minio", "endpoint": "{endpoint}"}, diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py index 7138c72d6d5..c191cab5b4a 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py @@ -10,7 +10,7 @@ from ..node_ports_common import config, data_items_utils, filemanager from .links import DownloadLink, FileLink, ItemConcreteValue, ItemValue, PortLink -from .r_clone import is_r_clone_installed, sync_local_to_s3 +from .r_clone import is_r_clone_available, sync_local_to_s3 log = logging.getLogger(__name__) @@ -152,11 +152,14 @@ async def push_file_to_store( log.debug("file path %s will be uploaded to s3", file) s3_object = data_items_utils.encode_file_id(file, project_id, node_id) - if await is_r_clone_installed(r_clone_settings): + if await is_r_clone_available(r_clone_settings): e_tag = await sync_local_to_s3( - r_clone_settings=r_clone_settings, s3_path=s3_object, local_file_path=file + r_clone_settings=r_clone_settings, + s3_object=s3_object, + local_file_path=file, + user_id=user_id, ) - store_id = 0 # simcore + store_id = 0 # simcore only feature else: store_id, e_tag = await filemanager.upload_file( user_id=user_id, diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py index 012556ec2b0..5b835ace69d 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py @@ -1,12 +1,13 @@ import asyncio -import json import logging +import re +import urllib.parse from contextlib import asynccontextmanager from pathlib import Path from typing import AsyncGenerator, Optional -import aioboto3 from aiofiles import tempfile +from aiohttp import ClientSession, ClientTimeout, web from cache import AsyncLRU from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config @@ -52,8 +53,8 @@ async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: return decoded_stdout -@AsyncLRU(maxsize=1) -async def is_r_clone_installed(r_clone_settings: Optional[RCloneSettings]) -> bool: +@AsyncLRU(maxsize=2) +async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bool: """returns: True if the `rclone` cli is installed and a configuration is provided""" try: await _async_command("rclone --version") @@ -62,34 +63,103 @@ async def is_r_clone_installed(r_clone_settings: Optional[RCloneSettings]) -> bo return False -async def _get_etag_via_s3(r_clone_settings: RCloneSettings, s3_path: str) -> ETag: - session = aioboto3.Session( - aws_access_key_id=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, - aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, - ) - async with session.resource( - "s3", endpoint_url=r_clone_settings.R_CLONE_S3.endpoint - ) as s3: - s3_object = await s3.Object( - bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, - key=s3_path.lstrip(r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME), +@asynccontextmanager +async def _get_client_session( + r_clone_settings: RCloneSettings, +) -> AsyncGenerator[ClientSession, None]: + client_timeout = ClientTimeout( + total=r_clone_settings.R_CLONE_AIOHTTP_CLIENT_TIMEOUT_TOTAL, + sock_connect=r_clone_settings.R_CLONE_AIOHTTP_CLIENT_TIMEOUT_SOCK_CONNECT, + ) # type: ignore + + async with ClientSession(timeout=client_timeout) as session: + yield session + + +async def _get_s3_link( + r_clone_settings: RCloneSettings, s3_object: str, user_id: int +) -> str: + async with _get_client_session(r_clone_settings) as session: + url = "{endpoint}/v0/locations/0/files/{s3_object}/s3/link".format( + endpoint=r_clone_settings.R_CLONE_STORAGE_ENDPOINT, + s3_object=urllib.parse.quote_plus(s3_object), + ) + logger.debug("%s", f"{url=}") + result = await session.get(url, params=dict(user_id=user_id)) + + if result.status == web.HTTPForbidden.status_code: + raise RCloneError( + ( + f"Insufficient permissions to upload {s3_object=} for {user_id=}. " + f"Storage: {await result.text()}" + ) + ) + + if result.status != web.HTTPOk.status_code: + raise RCloneError( + f"Could not fetch s3_link: status={result.status} {await result.text()}" + ) + + response = await result.json() + return response["data"]["s3_link"] + + +async def _update_file_meta_data( + r_clone_settings: RCloneSettings, s3_object: str +) -> ETag: + async with _get_client_session(r_clone_settings) as session: + url = "{endpoint}/v0/locations/0/files/{s3_object}/metadata".format( + endpoint=r_clone_settings.R_CLONE_STORAGE_ENDPOINT, + s3_object=urllib.parse.quote_plus(s3_object), ) - e_tag_result = await s3_object.e_tag - # NOTE: above result is JSON encoded for some reason - return json.loads(e_tag_result) + logger.debug("%s", f"{url=}") + result = await session.patch(url) + if result.status != web.HTTPOk.status_code: + raise RCloneError( + f"Could not fetch metadata: status={result.status} {await result.text()}" + ) + + response = await result.json() + logger.debug("metadata response %s", response) + return response["data"]["entity_tag"] + + +async def _delete_file_meta_data( + r_clone_settings: RCloneSettings, s3_object: str, user_id: int +) -> None: + async with _get_client_session(r_clone_settings) as session: + url = "{endpoint}/v0/locations/0/files/{s3_object}/metadata".format( + endpoint=r_clone_settings.R_CLONE_STORAGE_ENDPOINT, + s3_object=urllib.parse.quote_plus(s3_object), + ) + logger.debug("%s", f"{url=}") + result = await session.delete(url, params=dict(user_id=user_id)) + if result.status != web.HTTPOk.status_code: + raise RCloneError( + f"Could not fetch metadata: status={result.status} {await result.text()}" + ) async def sync_local_to_s3( - r_clone_settings: Optional[RCloneSettings], s3_path: str, local_file_path: Path + r_clone_settings: Optional[RCloneSettings], + s3_object: str, + local_file_path: Path, + user_id: int, ) -> ETag: if r_clone_settings is None: raise RCloneError( ( - f"Could not sync {local_file_path=} to {s3_path=}, provided " + f"Could not sync {local_file_path=} to {s3_object=}, provided " f"config is invalid{r_clone_settings=}" ) ) + s3_link = await _get_s3_link( + r_clone_settings=r_clone_settings, s3_object=s3_object, user_id=user_id + ) + s3_path = re.sub(r"^s3://", "", s3_link) + logger.debug(" %s; %s", f"{s3_link=}", f"{s3_path=}") + r_clone_config_file_content = get_r_clone_config(r_clone_settings) async with _config_file(r_clone_config_file_content) as config_file_name: source_path = local_file_path @@ -111,6 +181,7 @@ async def sync_local_to_s3( # '/tmp/pytest-of-silenthk/pytest-80/test_sync_local_to_s30' # 'dst:simcore/00000000-0000-0000-0000-000000000001/00000000-0000-0000-0000-000000000002' # --progress + # --copy-links # --include # 'filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt' r_clone_command = [ @@ -121,14 +192,21 @@ async def sync_local_to_s3( f"'{source_path.parent}'", f"'dst:{destination_path.parent}'", "--progress", + "--copy-links", "--include", f"'{file_name}'", ] - command_result = await _async_command( - " ".join(r_clone_command), cwd=f"{source_path.parent}" - ) - logger.debug(command_result) - return await _get_etag_via_s3( - r_clone_settings=r_clone_settings, s3_path=s3_path - ) + try: + await _async_command(" ".join(r_clone_command), cwd=f"{source_path.parent}") + return await _update_file_meta_data( + r_clone_settings=r_clone_settings, s3_object=s3_object + ) + except Exception as e: + logger.warning( + "There was an error while uploading %s. Removing metadata", s3_object + ) + await _delete_file_meta_data( + r_clone_settings=r_clone_settings, s3_object=s3_object, user_id=user_id + ) + raise e diff --git a/packages/simcore-sdk/tests/integration/conftest.py b/packages/simcore-sdk/tests/integration/conftest.py index 0605d3e3ea6..fb1187e3707 100644 --- a/packages/simcore-sdk/tests/integration/conftest.py +++ b/packages/simcore-sdk/tests/integration/conftest.py @@ -14,11 +14,13 @@ import sqlalchemy as sa from aiohttp import ClientSession from pytest_simcore.helpers.rawdata_fakers import random_project, random_user +from settings_library.r_clone import RCloneSettings, S3Provider from simcore_postgres_database.models.comp_pipeline import comp_pipeline from simcore_postgres_database.models.comp_tasks import comp_tasks from simcore_postgres_database.models.projects import projects from simcore_postgres_database.models.users import users from simcore_sdk.node_ports import node_config +from simcore_sdk.node_ports_v2.r_clone import is_r_clone_available from yarl import URL @@ -323,3 +325,27 @@ def _assign_config( ) if not entry[2] is None: config_dict[port_type].update({entry[0]: entry[2]}) + + +@pytest.fixture +async def r_clone_settings( + minio_config: Dict[str, Any], storage_service: URL +) -> RCloneSettings: + client = minio_config["client"] + settings = RCloneSettings.parse_obj( + dict( + R_CLONE_S3=dict( + S3_ENDPOINT=client["endpoint"], + S3_ACCESS_KEY=client["access_key"], + S3_SECRET_KEY=client["secret_key"], + S3_BUCKET_NAME=minio_config["bucket_name"], + S3_SECURE=client["secure"], + ), + R_CLONE_PROVIDER=S3Provider.MINIO, + R_CLONE_STORAGE_ENDPOINT=f"{storage_service}", + ) + ) + if not await is_r_clone_available(settings): + pytest.skip("rclone not installed") + + return settings diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py index c2b1f39fa52..6bb904b4ef7 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py @@ -10,12 +10,13 @@ import threading from asyncio import gather from pathlib import Path -from typing import Any, Callable, Dict, Iterable, Type, Union +from typing import Any, Callable, Dict, Iterable, Optional, Type, Union from uuid import uuid4 import np_helpers # pylint: disable=no-name-in-module import pytest import sqlalchemy as sa +from settings_library.r_clone import RCloneSettings from simcore_sdk import node_ports_v2 from simcore_sdk.node_ports_common.exceptions import UnboundPortError from simcore_sdk.node_ports_v2 import exceptions @@ -28,7 +29,10 @@ "storage", ] -pytest_simcore_ops_services_selection = ["minio", "adminer"] +pytest_simcore_ops_services_selection = [ + "minio", + "adminer", +] async def _check_port_valid( @@ -135,27 +139,47 @@ def config_value_symlink_path(symlink_path: Path) -> Dict[str, Any]: return {"store": "0", "path": symlink_path} +@pytest.fixture(params=[True, False]) +def option_r_clone_settings( + request, r_clone_settings: RCloneSettings +) -> Optional[RCloneSettings]: + if request.param: + return r_clone_settings + return None + + async def test_default_configuration( user_id: int, project_id: str, node_uuid: str, default_configuration: Dict[str, Any], + option_r_clone_settings: Optional[RCloneSettings], ): config_dict = default_configuration await check_config_valid( await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ), config_dict, ) async def test_invalid_ports( - user_id: int, project_id: str, node_uuid: str, special_configuration: Callable + user_id: int, + project_id: str, + node_uuid: str, + special_configuration: Callable, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, _, _ = special_configuration() PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) @@ -189,6 +213,7 @@ async def test_port_value_accessors( item_type: str, item_value: ItemConcreteValue, item_pytype: Type, + option_r_clone_settings: Optional[RCloneSettings], ): # pylint: disable=W0613, W0621 item_key = "some_key" config_dict, _, _ = special_configuration( @@ -197,7 +222,10 @@ async def test_port_value_accessors( ) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) @@ -241,6 +269,7 @@ async def test_port_file_accessors( project_id: str, node_uuid: str, e_tag: str, + option_r_clone_settings: Optional[RCloneSettings], ): # pylint: disable=W0613, W0621 config_value["path"] = f"{project_id}/{node_uuid}/{Path(config_value['path']).name}" @@ -254,7 +283,10 @@ async def test_port_file_accessors( assert _node_uuid == node_uuid PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) assert await (await PORTS.outputs)["out_34"].get() is None # check emptyness @@ -298,10 +330,14 @@ async def test_adding_new_ports( node_uuid: str, special_configuration: Callable, postgres_db: sa.engine.Engine, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, project_id, node_uuid = special_configuration() PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) @@ -345,13 +381,17 @@ async def test_removing_ports( node_uuid: str, special_configuration: Callable, postgres_db: sa.engine.Engine, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, project_id, node_uuid = special_configuration( inputs=[("in_14", "integer", 15), ("in_17", "boolean", False)], outputs=[("out_123", "string", "blahblah"), ("out_2", "number", -12.3)], ) # pylint: disable=W0612 PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) # let's remove the first input @@ -394,6 +434,7 @@ async def test_get_value_from_previous_node( item_type: str, item_value: ItemConcreteValue, item_pytype: Type, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, _, _ = special_2nodes_configuration( prev_node_inputs=None, @@ -406,7 +447,10 @@ async def test_get_value_from_previous_node( ) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) @@ -434,6 +478,7 @@ async def test_get_file_from_previous_node( item_type: str, item_value: str, item_pytype: Type, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, _, _ = special_2nodes_configuration( prev_node_inputs=None, @@ -445,7 +490,10 @@ async def test_get_file_from_previous_node( node_id=node_uuid, ) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) file_path = await (await PORTS.inputs)["in_15"].get() @@ -484,6 +532,7 @@ async def test_get_file_from_previous_node_with_mapping_of_same_key_name( item_value: str, item_alias: str, item_pytype: Type, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, _, this_node_uuid = special_2nodes_configuration( prev_node_inputs=None, @@ -495,7 +544,10 @@ async def test_get_file_from_previous_node_with_mapping_of_same_key_name( node_id=node_uuid, ) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) # add a filetokeymap @@ -541,6 +593,7 @@ async def test_file_mapping( item_value: str, item_alias: str, item_pytype: Type, + option_r_clone_settings: Optional[RCloneSettings], ): config_dict, project_id, node_uuid = special_configuration( inputs=[("in_1", item_type, await store_link(item_value))], @@ -549,7 +602,10 @@ async def test_file_mapping( node_id=node_uuid, ) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) # add a filetokeymap @@ -622,6 +678,7 @@ async def test_regression_concurrent_port_update_fails( int_item_value: int, parallel_int_item_value: int, port_count: int, + option_r_clone_settings: Optional[RCloneSettings], ) -> None: """ when using `await PORTS.outputs` test will fail @@ -632,7 +689,10 @@ async def test_regression_concurrent_port_update_fails( config_dict, _, _ = special_configuration(inputs=[], outputs=outputs) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) @@ -667,13 +727,17 @@ async def test_batch_update_inputs_outputs( node_uuid: str, special_configuration: Callable, port_count: int, + option_r_clone_settings: Optional[RCloneSettings], ) -> None: outputs = [(f"value_out_{i}", "integer", None) for i in range(port_count)] inputs = [(f"value_in_{i}", "integer", None) for i in range(port_count)] config_dict, _, _ = special_configuration(inputs=inputs, outputs=outputs) PORTS = await node_ports_v2.ports( - user_id=user_id, project_id=project_id, node_uuid=node_uuid + user_id=user_id, + project_id=project_id, + node_uuid=node_uuid, + r_clone_settings=option_r_clone_settings, ) await check_config_valid(PORTS, config_dict) diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index ce753a28cf9..49826af750b 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -4,23 +4,33 @@ import shutil from contextlib import asynccontextmanager from pathlib import Path -from typing import Any, AsyncGenerator, AsyncIterable, Dict, Iterator -from uuid import UUID +from typing import AsyncGenerator, AsyncIterable, Iterator import aioboto3 import pytest +import sqlalchemy as sa from faker import Faker -from settings_library.r_clone import RCloneSettings, S3Provider -from simcore_sdk.node_ports_v2.r_clone import is_r_clone_installed, sync_local_to_s3 +from pytest_mock.plugin import MockerFixture +from settings_library.r_clone import RCloneSettings +from simcore_postgres_database.models.file_meta_data import file_meta_data +from simcore_sdk.node_ports_v2 import r_clone pytest_simcore_core_services_selection = [ + "migration", "postgres", + "storage", ] pytest_simcore_ops_services_selection = [ "minio", + "adminer", ] + +class _TestException(Exception): + pass + + # FIXTURES @@ -52,47 +62,8 @@ def local_file_for_download(upload_file_dir: Path, file_name: str) -> Path: @pytest.fixture -async def r_clone_settings(minio_config: Dict[str, Any]) -> RCloneSettings: - client = minio_config["client"] - settings = RCloneSettings.parse_obj( - dict( - R_CLONE_S3=dict( - S3_ENDPOINT=client["endpoint"], - S3_ACCESS_KEY=client["access_key"], - S3_SECRET_KEY=client["secret_key"], - S3_BUCKET_NAME=minio_config["bucket_name"], - S3_SECURE=client["secure"], - ), - R_CLONE_PROVIDER=S3Provider.MINIO, - ) - ) - if not await is_r_clone_installed(settings): - pytest.skip("rclone not installed") - - return settings - - -@pytest.fixture -def project_id() -> UUID: - return UUID(int=1) - - -@pytest.fixture -def node_uuid() -> UUID: - return UUID(int=2) - - -@pytest.fixture -def s3_object( - r_clone_settings: RCloneSettings, project_id: UUID, node_uuid: UUID, file_name: str -) -> str: - - s3_path = ( - Path(r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME) - / f"{project_id}" - / f"{node_uuid}" - / file_name - ) +def s3_object(project_id: str, node_uuid: str, file_name: str) -> str: + s3_path = Path(project_id) / node_uuid / file_name return f"{s3_path}" @@ -105,6 +76,17 @@ async def cleanup_s3( await s3_object.delete() +@pytest.fixture +def mock_update_file_meta_data(mocker: MockerFixture) -> None: + async def _raise_error(*args, **kwargs) -> None: + raise _TestException() + + mocker.patch( + "simcore_sdk.node_ports_v2.r_clone._update_file_meta_data", + side_effect=_raise_error, + ) + + # UTILS @@ -133,6 +115,16 @@ async def _download_s3_object( await s3_object.download_file(f"{local_path}") +def _is_file_present(postgres_db: sa.engine.Engine, s3_object: str) -> bool: + with postgres_db.begin() as conn: + result = conn.execute( + file_meta_data.select().where(file_meta_data.c.file_uuid == s3_object) + ) + result_len = len(list(result)) + assert result_len <= 1 + return result_len == 1 + + # TESTS @@ -141,12 +133,15 @@ async def test_sync_local_to_s3( s3_object: str, file_to_upload: Path, local_file_for_download: Path, + user_id: int, + postgres_db: sa.engine.Engine, cleanup_s3: None, ) -> None: - etag = await sync_local_to_s3( + etag = await r_clone.sync_local_to_s3( r_clone_settings=r_clone_settings, - s3_path=s3_object, + s3_object=s3_object, local_file_path=file_to_upload, + user_id=user_id, ) assert isinstance(etag, str) @@ -160,3 +155,25 @@ async def test_sync_local_to_s3( # check same file contents after upload and download assert file_to_upload.read_text() == local_file_for_download.read_text() + + assert _is_file_present(postgres_db=postgres_db, s3_object=s3_object) is True + + +async def test_sync_local_to_s3_cleanup_on_error( + r_clone_settings: RCloneSettings, + s3_object: str, + file_to_upload: Path, + local_file_for_download: Path, + user_id: int, + cleanup_s3: None, + mock_update_file_meta_data: None, + postgres_db: sa.engine.Engine, +) -> None: + with pytest.raises(_TestException): + await r_clone.sync_local_to_s3( + r_clone_settings=r_clone_settings, + s3_object=s3_object, + local_file_path=file_to_upload, + user_id=user_id, + ) + assert _is_file_present(postgres_db=postgres_db, s3_object=s3_object) is False diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py index a0ae1bfb6a4..94adf886001 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py @@ -23,7 +23,7 @@ Type, Union, ) -from unittest.mock import AsyncMock +from unittest.mock import AsyncMock, Mock import pytest from aiohttp.client import ClientSession @@ -607,6 +607,7 @@ class FakeNodePorts: user_id: int project_id: str node_uuid: str + r_clone_settings: Optional[Any] = None @staticmethod async def get(key): diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 513a31bf0ef..383df96898d 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -32,19 +32,20 @@ def r_clone_settings( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") return RCloneSettings() -async def test_is_r_clone_installed_cached( +async def test_is_r_clone_available_cached( caplog: LogCaptureFixture, r_clone_settings: RCloneSettings ) -> None: for _ in range(3): - result = await r_clone.is_r_clone_installed(r_clone_settings) + result = await r_clone.is_r_clone_available(r_clone_settings) assert type(result) is bool assert "'rclone --version' result:\n" in caplog.text assert caplog.text.count("'rclone --version' result:\n") == 1 - assert await r_clone.is_r_clone_installed(None) is False + assert await r_clone.is_r_clone_available(None) is False async def test__config_file(text_to_write: str) -> None: From db6be1f90f264fb0e6e4b2720d0d5dc3ed210f62 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Apr 2022 14:24:58 +0200 Subject: [PATCH 13/78] extended stprage service --- .../api/v0/openapi.yaml | 89 +++++++++++++++++-- .../src/simcore_service_storage/dsm.py | 56 +++++++++++- .../src/simcore_service_storage/handlers.py | 68 +++++++++++--- .../api/v0/openapi.yaml | 22 ++--- 4 files changed, 205 insertions(+), 30 deletions(-) diff --git a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml index be06e693867..4fce932ed58 100644 --- a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml +++ b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml @@ -208,6 +208,35 @@ paths: $ref: '#/components/schemas/FileMetaDataArrayEnveloped' default: $ref: '#/components/responses/DefaultErrorResponse' + '/locations/{location_id}/files/{fileId}/s3/link': + get: + summary: Returns an s3 file link if the user has permissions + operationId: get_s3_link + parameters: + - name: fileId + in: path + required: true + schema: + type: string + - name: location_id + in: path + required: true + schema: + type: string + - name: user_id + in: query + required: true + schema: + type: string + responses: + '200': + description: Provides an s3 link for the requested fileId + content: + application/json: + schema: + $ref: '#/components/schemas/S3LinkEnvelope' + default: + $ref: '#/components/responses/DefaultErrorResponse' '/locations/{location_id}/files/{fileId}/metadata': get: summary: Get file metadata @@ -251,11 +280,6 @@ paths: required: true schema: type: string - requestBody: - content: - application/json: - schema: - $ref: '#/components/schemas/FileMetaData' responses: '200': description: Returns file metadata @@ -265,6 +289,34 @@ paths: $ref: '#/components/schemas/FileMetaEnvelope' default: $ref: '#/components/responses/DefaultErrorResponse' + delete: + summary: Removes a file's meta data entry + operationId: delete_file_meta_data + parameters: + - name: fileId + in: path + required: true + schema: + type: string + - name: location_id + in: path + required: true + schema: + type: string + - name: user_id + in: query + required: true + schema: + type: string + responses: + '200': + description: Removes the file meta data entry from the database + content: + application/json: + schema: + $ref: '#/components/schemas/FileMetaDataDeleteEnvelope' + default: + $ref: '#/components/responses/DefaultErrorResponse' '/locations/{location_id}/files/{fileId}': get: summary: Gets download link for file at location @@ -724,6 +776,33 @@ components: type: array items: $ref: '#/components/schemas/DatasetMetaData' + S3LinkEnvelope: + type: object + required: + - data + - error + properties: + data: + nullable: true + type: object + properties: + s3_link: + type: string + error: + nullable: true + default: null + FileMetaDataDeleteEnvelope: + type: object + required: + - data + - error + properties: + data: + nullable: true + default: null + error: + nullable: true + default: null FileMetaEnvelope: type: object required: diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index e10155d4f77..6f7d58f8886 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -28,6 +28,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.sql.expression import literal_column from tenacity import retry +from tenacity._asyncio import AsyncRetrying from tenacity.before_sleep import before_sleep_log from tenacity.retry import retry_if_exception_type, retry_if_result from tenacity.stop import stop_after_delay @@ -488,15 +489,44 @@ async def try_update_database_from_storage( ) async def auto_update_database_from_storage_task( self, file_uuid: str, bucket_name: str, object_name: str - ): + ) -> Optional[FileMetaDataEx]: return await self.try_update_database_from_storage( file_uuid, bucket_name, object_name, silence_exception=True ) - async def upload_link(self, user_id: str, file_uuid: str): + async def update_metadata(self, file_uuid: str) -> Optional[FileMetaDataEx]: + bucket_name = self.simcore_bucket_name + object_name = file_uuid + return await self.auto_update_database_from_storage_task( + file_uuid=file_uuid, + bucket_name=bucket_name, + object_name=object_name, + ) + + async def delete_metadata(self, user_id: int, file_uuid: str) -> None: + async with self.engine.acquire() as conn: + can: Optional[AccessRights] = await get_file_access_rights( + conn, int(user_id), file_uuid + ) + if not can.write: + message = f"User {user_id} was not allowed to upload file {file_uuid}" + logger.debug(message) + raise web.HTTPForbidden(reason=message) + + try: + await conn.execute( + file_meta_data.delete().where( + file_meta_data.c.file_uuid == file_uuid + ) + ) + except Exception: + message = f"Could not delete metada entry for file {file_uuid}" + logger.debug(message) + raise web.HTTPForbidden(reason=message) + + async def _generate_metadata_for_link(self, user_id: str, file_uuid: str): """ - Creates pre-signed upload link and updates metadata table when - link is used and upload is successfuly completed + Updates metadata table when link is used and upload is successfuly completed SEE _metadata_file_updater """ @@ -533,6 +563,12 @@ async def _init_metadata() -> Tuple[int, str]: await _init_metadata() + async def upload_link(self, user_id: str, file_uuid: str): + """returns: a presigned upload link + + NOTE: updates metadata once the upload is concluded""" + await self._generate_metadata_for_link(user_id=user_id, file_uuid=file_uuid) + bucket_name = self.simcore_bucket_name object_name = file_uuid @@ -547,6 +583,18 @@ async def _init_metadata() -> Tuple[int, str]: ) return self.s3_client.create_presigned_put_url(bucket_name, object_name) + async def get_s3_link(self, user_id: str, file_uuid: str) -> str: + """ + returns: the s3 file link and creates a databas entry for the file + + NOTE: the user must call the update metadata endpoint to update + the metadata once the upload is finished + """ + await self._generate_metadata_for_link(user_id=user_id, file_uuid=file_uuid) + bucket_name = self.simcore_bucket_name + object_name = file_uuid + return f"s3://{bucket_name}/{object_name.lstrip('/')}" + async def download_link_s3(self, file_uuid: str, user_id: int) -> str: # access layer diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index 4cd53585060..a1d150b6b18 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -2,7 +2,8 @@ import json import logging from contextlib import contextmanager -from typing import Any, Dict +from typing import Any, Dict, Optional +import urllib.parse import attr from aiohttp import web @@ -16,6 +17,7 @@ from .db_tokens import get_api_token_and_secret from .dsm import DataStorageManager, DatCoreApiToken from .settings import Settings +from .models import FileMetaDataEx log = logging.getLogger(__name__) @@ -62,7 +64,7 @@ def handle_storage_errors(): except InvalidFileIdentifier as err: raise web.HTTPUnprocessableEntity( - reason=f"{err.identifier} is an invalid file identifier" + reason=f"{err} is an invalid file identifier" ) from err @@ -267,7 +269,7 @@ async def _go(): return {"error": None, "data": sync_results} -# DISABLED: @routes.patch(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}/metadata") # type: ignore +@routes.patch(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}/metadata") # type: ignore async def update_file_meta_data(request: web.Request): params, query, body = await extract_and_validate(request) @@ -275,17 +277,39 @@ async def update_file_meta_data(request: web.Request): assert query, "query %s" % query # nosec assert not body, "body %s" % body # nosec - assert params["location_id"] # nosec - assert params["fileId"] # nosec - assert query["user_id"] # nosec + with handle_storage_errors(): + file_uuid = urllib.parse.unquote_plus(params["fileId"]) + + log.error("file_uuid=%s", file_uuid) + dsm = await _prepare_storage_manager(params, query, request) + + data: Optional[FileMetaDataEx] = await dsm.update_metadata(file_uuid=file_uuid) + if data is None: + raise web.HTTPForbidden(reason=f"Could not update metadata for {file_uuid}") + + return { + "error": None, + "data": {**attr.asdict(data.fmd), "parent_id": data.parent_id}, + } + + +@routes.delete(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}/metadata") # type: ignore +async def delete_file_meta_data(request: web.Request): + params, query, body = await extract_and_validate(request) + + assert params, "params %s" % params # nosec + assert query, "query %s" % query # nosec + assert not body, "body %s" % body # nosec with handle_storage_errors(): - location_id = params["location_id"] - _user_id = query["user_id"] - _file_uuid = params["fileId"] + user_id = query["user_id"] + file_uuid = urllib.parse.unquote_plus(params["fileId"]) + log.error("file_uuid=%s", file_uuid) dsm = await _prepare_storage_manager(params, query, request) - _location = dsm.location_from_id(location_id) + + await dsm.delete_metadata(user_id=user_id, file_uuid=file_uuid) + return {"error": None, "data": None} @routes.get(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}") # type: ignore @@ -315,6 +339,30 @@ async def download_file(request: web.Request): return {"error": None, "data": {"link": link}} +@routes.get(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}/s3/link") # type: ignore +async def get_s3_link(request: web.Request) -> Dict[str, Any]: + params, query, body = await extract_and_validate(request) + + assert params, "params %s" % params # nosec + assert query, "query %s" % query # nosec + assert not body, "body %s" % body # nosec + + with handle_storage_errors(): + location_id = params["location_id"] + user_id = query["user_id"] + file_uuid = urllib.parse.unquote_plus(params["fileId"]) + + if int(location_id) != SIMCORE_S3_ID: + raise web.HTTPForbidden( + reason=f"Only allowed to fetch s3 link for '{SIMCORE_S3_STR}'" + ) + + dsm = await _prepare_storage_manager(params, query, request) + + s3_link: str = await dsm.get_s3_link(user_id=user_id, file_uuid=file_uuid) + return {"error": None, "data": {"s3_link": s3_link}} + + @routes.put(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}") # type: ignore async def upload_file(request: web.Request): params, query, body = await extract_and_validate(request) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index fb73ffe925e..4ac2ed7e749 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -1235,16 +1235,7 @@ paths: type: string responses: '200': - description: Returns presigned link - content: - application/json: - schema: - type: object - properties: - link: - type: string - example: - link: example_link + $ref: '#/paths/~1storage~1locations~1%7Blocation_id%7D~1files~1%7BfileId%7D/put/responses/200' put: summary: Returns upload link or performs copy operation to datcore tags: @@ -1273,7 +1264,16 @@ paths: type: string responses: '200': - $ref: '#/paths/~1storage~1locations~1%7Blocation_id%7D~1files~1%7BfileId%7D/get/responses/200' + description: Returns presigned link + content: + application/json: + schema: + type: object + properties: + link: + type: string + example: + link: example_link delete: summary: Deletes File tags: From 1789bc17adfe6bfd6c724ffc2dd4c85238f75d56 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Apr 2022 14:31:26 +0200 Subject: [PATCH 14/78] fix tests --- packages/settings-library/tests/test_utils_r_clone.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/packages/settings-library/tests/test_utils_r_clone.py b/packages/settings-library/tests/test_utils_r_clone.py index 29ac2ed10a5..2f16ac6a930 100644 --- a/packages/settings-library/tests/test_utils_r_clone.py +++ b/packages/settings-library/tests/test_utils_r_clone.py @@ -7,7 +7,13 @@ @pytest.fixture(params=list(S3Provider)) def r_clone_settings(request, monkeypatch) -> RCloneSettings: - monkeypatch.setenv("R_CLONE_S3_PROVIDER", request.param) + monkeypatch.setenv("R_CLONE_PROVIDER", request.param) + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", False) return RCloneSettings() From efc1d1da64012057e43e667d4ad9ddd573c2e91e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Apr 2022 14:38:41 +0200 Subject: [PATCH 15/78] pylint --- packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py | 2 +- .../src/simcore_service_director_v2/core/settings.py | 4 ++-- services/director-v2/tests/unit/test_core_settings.py | 6 +++--- services/storage/src/simcore_service_storage/dsm.py | 5 +++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py index 94adf886001..064c89b7b06 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_port.py @@ -23,7 +23,7 @@ Type, Union, ) -from unittest.mock import AsyncMock, Mock +from unittest.mock import AsyncMock import pytest from aiohttp.client import ClientSession diff --git a/services/director-v2/src/simcore_service_director_v2/core/settings.py b/services/director-v2/src/simcore_service_director_v2/core/settings.py index 62bc5c91f3e..49e18f100fa 100644 --- a/services/director-v2/src/simcore_service_director_v2/core/settings.py +++ b/services/director-v2/src/simcore_service_director_v2/core/settings.py @@ -25,8 +25,8 @@ from settings_library.docker_registry import RegistrySettings from settings_library.http_client_request import ClientRequestSettings from settings_library.postgres import PostgresSettings -from settings_library.rabbit import RabbitSettings from settings_library.r_clone import RCloneSettings +from settings_library.rabbit import RabbitSettings from settings_library.tracing import TracingSettings from settings_library.utils_logging import MixinLoggingSettings from simcore_postgres_database.models.clusters import ClusterType @@ -57,7 +57,7 @@ class VFSCacheMode(str, Enum): FULL = "full" -class RCloneSettings(RCloneSettings): +class RCloneSettings(RCloneSettings): # pylint: disable=function-redefined R_CLONE_DIR_CACHE_TIME_SECONDS: PositiveInt = Field( 10, description="time to cache directory entries for", diff --git a/services/director-v2/tests/unit/test_core_settings.py b/services/director-v2/tests/unit/test_core_settings.py index 3201badb3e0..4baab8e7684 100644 --- a/services/director-v2/tests/unit/test_core_settings.py +++ b/services/director-v2/tests/unit/test_core_settings.py @@ -9,12 +9,12 @@ from models_library.basic_types import LogLevel from pydantic import ValidationError from pytest import FixtureRequest +from settings_library.r_clone import S3Provider from simcore_service_director_v2.core.settings import ( AppSettings, BootModeEnum, DynamicSidecarSettings, RCloneSettings, - S3Provider, ) @@ -50,8 +50,8 @@ def test_expected_s3_endpoint( r_clone_settings = RCloneSettings() scheme = "https" if is_secure else "http" - assert r_clone_settings.endpoint.startswith(f"{scheme}://") - assert r_clone_settings.endpoint.endswith(endpoint) + assert r_clone_settings.R_CLONE_S3.endpoint.startswith(f"{scheme}://") + assert r_clone_settings.R_CLONE_S3.endpoint.endswith(endpoint) def test_enforce_r_clone_requirement(monkeypatch: MonkeyPatch) -> None: diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index 6f7d58f8886..b265f11e241 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -28,7 +28,6 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.sql.expression import literal_column from tenacity import retry -from tenacity._asyncio import AsyncRetrying from tenacity.before_sleep import before_sleep_log from tenacity.retry import retry_if_exception_type, retry_if_result from tenacity.stop import stop_after_delay @@ -522,7 +521,9 @@ async def delete_metadata(self, user_id: int, file_uuid: str) -> None: except Exception: message = f"Could not delete metada entry for file {file_uuid}" logger.debug(message) - raise web.HTTPForbidden(reason=message) + raise web.HTTPForbidden( # pylint: disable=raise-missing-from + reason=message + ) async def _generate_metadata_for_link(self, user_id: str, file_uuid: str): """ From 77c9b33df31254e9bc66525bb5a2c8a39c5f9045 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Apr 2022 15:03:02 +0200 Subject: [PATCH 16/78] dynamic-sidecar now requires nodeports --- .../src/simcore_service_dynamic_sidecar/core/settings.py | 2 ++ .../simcore_service_dynamic_sidecar/modules/nodeports.py | 1 + services/dynamic-sidecar/tests/conftest.py | 8 ++++++++ .../dynamic-sidecar/tests/unit/test_core_docker_logs.py | 8 ++++++++ services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py | 8 ++++++++ services/dynamic-sidecar/tests/unit/test_core_settings.py | 8 ++++++++ 6 files changed, 35 insertions(+) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py index f2efbdc27c9..6d306feceee 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/settings.py @@ -10,6 +10,7 @@ from pydantic import Field, PositiveInt, validator from settings_library.base import BaseCustomSettings from settings_library.docker_registry import RegistrySettings +from settings_library.r_clone import RCloneSettings from settings_library.rabbit import RabbitSettings @@ -93,6 +94,7 @@ def match_logging_level(cls, v: str) -> str: REGISTRY_SETTINGS: RegistrySettings = Field(auto_default_from_env=True) RABBIT_SETTINGS: Optional[RabbitSettings] = Field(auto_default_from_env=True) + DY_SIDECAR_R_CLONE_SETTINGS: RCloneSettings = Field(auto_default_from_env=True) @property def is_development_mode(self) -> bool: diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 8842b01a4e2..96cdc1422cf 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -60,6 +60,7 @@ async def upload_outputs(outputs_path: Path, port_keys: List[str]) -> None: user_id=settings.DY_SIDECAR_USER_ID, project_id=str(settings.DY_SIDECAR_PROJECT_ID), node_uuid=str(settings.DY_SIDECAR_NODE_ID), + r_clone_settings=settings.DY_SIDECAR_R_CLONE_SETTINGS, ) # let's gather the tasks diff --git a/services/dynamic-sidecar/tests/conftest.py b/services/dynamic-sidecar/tests/conftest.py index 2464e02019a..234e458d8fe 100644 --- a/services/dynamic-sidecar/tests/conftest.py +++ b/services/dynamic-sidecar/tests/conftest.py @@ -101,6 +101,14 @@ def mock_environment( ) monkeypatch_module.setenv("RABBIT_SETTINGS", "null") + monkeypatch_module.setenv("S3_ENDPOINT", "endpoint") + monkeypatch_module.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch_module.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch_module.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch_module.setenv("S3_SECURE", "false") + monkeypatch_module.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch_module.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + monkeypatch_module.setattr(mounted_fs, "DY_VOLUMES", mock_dy_volumes) diff --git a/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py b/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py index afb8e8c0329..6c052677d3b 100644 --- a/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py +++ b/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py @@ -57,6 +57,14 @@ def app( monkeypatch_module.setattr(mounted_fs, "DY_VOLUMES", mock_dy_volumes) + monkeypatch_module.setenv("S3_ENDPOINT", "endpoint") + monkeypatch_module.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch_module.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch_module.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch_module.setenv("S3_SECURE", "false") + monkeypatch_module.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch_module.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + yield assemble_application() diff --git a/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py b/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py index 16867aa021f..56a5083a79d 100644 --- a/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py +++ b/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py @@ -106,6 +106,14 @@ def mock_environment( monkeypatch_module.setattr(mounted_fs, "DY_VOLUMES", mock_dy_volumes) + monkeypatch_module.setenv("S3_ENDPOINT", "endpoint") + monkeypatch_module.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch_module.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch_module.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch_module.setenv("S3_SECURE", "false") + monkeypatch_module.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch_module.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + @pytest.fixture def app(mock_environment: None) -> FastAPI: diff --git a/services/dynamic-sidecar/tests/unit/test_core_settings.py b/services/dynamic-sidecar/tests/unit/test_core_settings.py index 8ee9737d2b3..ec9d3ab7e51 100644 --- a/services/dynamic-sidecar/tests/unit/test_core_settings.py +++ b/services/dynamic-sidecar/tests/unit/test_core_settings.py @@ -35,6 +35,14 @@ def mocked_non_request_settings(tmp_dir: Path, monkeypatch: MonkeyPatch) -> None monkeypatch.setenv("DY_SIDECAR_PROJECT_ID", f"{uuid.uuid4()}") monkeypatch.setenv("DY_SIDECAR_NODE_ID", f"{uuid.uuid4()}") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + def test_non_request_dynamic_sidecar_settings( mocked_non_request_settings: None, From 76a052813df0ab6fa1a4c1b064c12597cb7bd921 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Apr 2022 15:28:12 +0200 Subject: [PATCH 17/78] fix simcore-sdk requiremetns --- packages/simcore-sdk/requirements/_base.in | 1 - packages/simcore-sdk/requirements/_base.txt | 53 ++++++-------------- packages/simcore-sdk/requirements/_test.in | 1 + packages/simcore-sdk/requirements/_test.txt | 51 +++++++++++++------ packages/simcore-sdk/requirements/_tools.txt | 13 ++--- 5 files changed, 59 insertions(+), 60 deletions(-) diff --git a/packages/simcore-sdk/requirements/_base.in b/packages/simcore-sdk/requirements/_base.in index 5bcbcccae29..1322c5ecea3 100644 --- a/packages/simcore-sdk/requirements/_base.in +++ b/packages/simcore-sdk/requirements/_base.in @@ -7,7 +7,6 @@ --requirement ../../../packages/settings-library/requirements/_base.in --requirement ../../../packages/models-library/requirements/_base.in -aioboto3 aiofiles aiohttp aiopg[sa] diff --git a/packages/simcore-sdk/requirements/_base.txt b/packages/simcore-sdk/requirements/_base.txt index 576d9754365..d560d2a1540 100644 --- a/packages/simcore-sdk/requirements/_base.txt +++ b/packages/simcore-sdk/requirements/_base.txt @@ -4,10 +4,6 @@ # # pip-compile --output-file=requirements/_base.txt --strip-extras requirements/_base.in # -aioboto3==9.5.0 - # via -r requirements/_base.in -aiobotocore==2.2.0 - # via aioboto3 aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiofiles==0.8.0 @@ -19,11 +15,9 @@ aiohttp==3.8.1 # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/_base.in - # aiobotocore -aioitertools==0.10.0 - # via aiobotocore aiopg==1.3.3 # via -r requirements/_base.in aiosignal==1.2.0 @@ -41,15 +35,10 @@ attrs==20.3.0 # -c requirements/../../../packages/service-library/requirements/./constraints.txt # aiohttp # jsonschema -boto3==1.21.21 - # via aiobotocore -botocore==1.24.21 - # via - # aiobotocore - # boto3 - # s3transfer charset-normalizer==2.0.12 # via aiohttp +click==8.1.2 + # via typer dnspython==2.2.1 # via email-validator email-validator==1.1.3 @@ -64,10 +53,6 @@ idna==3.3 # via # email-validator # yarl -jmespath==1.0.0 - # via - # boto3 - # botocore jsonschema==3.2.0 # via # -c requirements/../../../packages/service-library/requirements/./constraints.txt @@ -91,37 +76,35 @@ pydantic==1.9.0 # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/_base.in pyinstrument==4.1.1 # via -r requirements/../../../packages/service-library/requirements/_base.in -pyparsing==3.0.7 +pyparsing==3.0.8 # via packaging pyrsistent==0.18.1 # via jsonschema -python-dateutil==2.8.2 - # via botocore pyyaml==5.4.1 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/./constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/service-library/requirements/_base.in -s3transfer==0.5.2 - # via boto3 six==1.16.0 - # via - # jsonschema - # python-dateutil -sqlalchemy==1.4.32 + # via jsonschema +sqlalchemy==1.4.35 # via # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/postgres-database/requirements/_base.in # aiopg @@ -130,22 +113,14 @@ tenacity==8.0.1 # via # -r requirements/../../../packages/service-library/requirements/_base.in # -r requirements/_base.in -tqdm==4.63.1 +tqdm==4.64.0 # via -r requirements/_base.in -typing-extensions==4.1.1 +typer==0.4.1 + # via -r requirements/../../../packages/settings-library/requirements/_base.in +typing-extensions==4.2.0 # via # aiodebug - # aioitertools # pydantic -urllib3==1.26.9 - # via - # -c requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt - # -c requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt - # -c requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt - # -c requirements/../../../requirements/constraints.txt - # botocore -wrapt==1.14.0 - # via aiobotocore yarl==1.7.2 # via # -r requirements/../../../packages/postgres-database/requirements/_base.in diff --git a/packages/simcore-sdk/requirements/_test.in b/packages/simcore-sdk/requirements/_test.in index 5fd198d4ba5..9bea2458f75 100644 --- a/packages/simcore-sdk/requirements/_test.in +++ b/packages/simcore-sdk/requirements/_test.in @@ -21,6 +21,7 @@ pytest-xdist pytest-lazy-fixture # mockups/fixtures +aioboto3 aioresponses alembic click diff --git a/packages/simcore-sdk/requirements/_test.txt b/packages/simcore-sdk/requirements/_test.txt index b3c4c5d63ed..c6ba67c5b79 100644 --- a/packages/simcore-sdk/requirements/_test.txt +++ b/packages/simcore-sdk/requirements/_test.txt @@ -4,12 +4,19 @@ # # pip-compile --output-file=requirements/_test.txt --strip-extras requirements/_test.in # +aioboto3==9.5.0 + # via -r requirements/_test.in +aiobotocore==2.2.0 + # via aioboto3 aiohttp==3.8.1 # via # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt + # aiobotocore # aioresponses # pytest-aiohttp +aioitertools==0.10.0 + # via aiobotocore aioresponses==0.7.3 # via -r requirements/_test.in aiosignal==1.2.0 @@ -20,7 +27,7 @@ alembic==1.7.7 # via # -c requirements/_base.txt # -r requirements/_test.in -astroid==2.11.2 +astroid==2.11.3 # via pylint async-timeout==4.0.2 # via @@ -31,6 +38,13 @@ attrs==20.3.0 # -c requirements/_base.txt # aiohttp # pytest +boto3==1.21.21 + # via aiobotocore +botocore==1.24.21 + # via + # aiobotocore + # boto3 + # s3transfer certifi==2021.10.8 # via # minio @@ -40,8 +54,10 @@ charset-normalizer==2.0.12 # -c requirements/_base.txt # aiohttp # requests -click==8.0.4 - # via -r requirements/_test.in +click==8.1.2 + # via + # -c requirements/_base.txt + # -r requirements/_test.in coverage==6.3.2 # via # -r requirements/_test.in @@ -57,7 +73,7 @@ docopt==0.6.2 # via coveralls execnet==1.9.0 # via pytest-xdist -faker==13.3.3 +faker==13.3.5 # via -r requirements/_test.in frozenlist==1.3.0 # via @@ -68,7 +84,7 @@ greenlet==1.1.2 # via # -c requirements/_base.txt # sqlalchemy -icdiff==2.0.4 +icdiff==2.0.5 # via pytest-icdiff idna==3.3 # via @@ -79,6 +95,10 @@ iniconfig==1.1.1 # via pytest isort==5.10.1 # via pylint +jmespath==1.0.0 + # via + # boto3 + # botocore lazy-object-proxy==1.7.1 # via astroid mako==1.2.0 @@ -105,7 +125,7 @@ packaging==21.3 # -c requirements/_base.txt # pytest # pytest-sugar -platformdirs==2.5.1 +platformdirs==2.5.2 # via pylint pluggy==1.0.0 # via pytest @@ -119,9 +139,9 @@ py==1.11.0 # via # pytest # pytest-forked -pylint==2.13.2 +pylint==2.13.7 # via -r requirements/_test.in -pyparsing==3.0.7 +pyparsing==3.0.8 # via # -c requirements/_base.txt # packaging @@ -162,7 +182,7 @@ pytest-xdist==2.5.0 # via -r requirements/_test.in python-dateutil==2.8.2 # via - # -c requirements/_base.txt + # botocore # faker python-dotenv==0.20.0 # via -r requirements/_test.in @@ -171,11 +191,13 @@ requests==2.27.1 # -r requirements/_test.in # coveralls # docker +s3transfer==0.5.2 + # via boto3 six==1.16.0 # via # -c requirements/_base.txt # python-dateutil -sqlalchemy==1.4.32 +sqlalchemy==1.4.35 # via # -c requirements/../../../requirements/constraints.txt # -c requirements/_base.txt @@ -187,22 +209,23 @@ tomli==2.0.1 # coverage # pylint # pytest -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # -c requirements/_base.txt + # aioitertools # astroid # pylint urllib3==1.26.9 # via # -c requirements/../../../requirements/constraints.txt - # -c requirements/_base.txt + # botocore # minio # requests -websocket-client==1.3.1 +websocket-client==1.3.2 # via docker wrapt==1.14.0 # via - # -c requirements/_base.txt + # aiobotocore # astroid yarl==1.7.2 # via diff --git a/packages/simcore-sdk/requirements/_tools.txt b/packages/simcore-sdk/requirements/_tools.txt index b52c3ba2620..017b0e4f7e4 100644 --- a/packages/simcore-sdk/requirements/_tools.txt +++ b/packages/simcore-sdk/requirements/_tools.txt @@ -10,8 +10,9 @@ bump2version==1.0.1 # via -r requirements/../../../requirements/devenv.txt cfgv==3.3.1 # via pre-commit -click==8.0.4 +click==8.1.2 # via + # -c requirements/_base.txt # -c requirements/_test.txt # black # pip-tools @@ -33,14 +34,14 @@ pathspec==0.9.0 # via black pep517==0.12.0 # via pip-tools -pip-tools==6.5.1 +pip-tools==6.6.0 # via -r requirements/../../../requirements/devenv.txt -platformdirs==2.5.1 +platformdirs==2.5.2 # via # -c requirements/_test.txt # black # virtualenv -pre-commit==2.17.0 +pre-commit==2.18.1 # via -r requirements/../../../requirements/devenv.txt pyyaml==5.4.1 # via @@ -59,12 +60,12 @@ tomli==2.0.1 # -c requirements/_test.txt # black # pep517 -typing-extensions==4.1.1 +typing-extensions==4.2.0 # via # -c requirements/_base.txt # -c requirements/_test.txt # black -virtualenv==20.14.0 +virtualenv==20.14.1 # via pre-commit wheel==0.37.1 # via pip-tools From 8409f38e847b65594e7a81f9e00bd0af13e2a6dd Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:50:24 +0200 Subject: [PATCH 18/78] properly formatting URL --- packages/settings-library/src/settings_library/r_clone.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 535aff3f0eb..8ea8f04c86e 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -28,3 +28,9 @@ class RCloneSettings(BaseCustomSettings): R_CLONE_AIOHTTP_CLIENT_TIMEOUT_TOTAL: float = 20 R_CLONE_AIOHTTP_CLIENT_TIMEOUT_SOCK_CONNECT: float = 5 + + @property + def storage_endpoint(self) -> str: + if not self.R_CLONE_STORAGE_ENDPOINT.startswith("http"): + return f"http://{self.R_CLONE_STORAGE_ENDPOINT}" + return self.R_CLONE_STORAGE_ENDPOINT From c61de67ed31b331bfff88fbd9b20c0a2c14b2957 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:50:39 +0200 Subject: [PATCH 19/78] using http in urls --- .../simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py index 5b835ace69d..2d17d7dfbb0 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py @@ -81,7 +81,7 @@ async def _get_s3_link( ) -> str: async with _get_client_session(r_clone_settings) as session: url = "{endpoint}/v0/locations/0/files/{s3_object}/s3/link".format( - endpoint=r_clone_settings.R_CLONE_STORAGE_ENDPOINT, + endpoint=r_clone_settings.storage_endpoint, s3_object=urllib.parse.quote_plus(s3_object), ) logger.debug("%s", f"{url=}") @@ -109,7 +109,7 @@ async def _update_file_meta_data( ) -> ETag: async with _get_client_session(r_clone_settings) as session: url = "{endpoint}/v0/locations/0/files/{s3_object}/metadata".format( - endpoint=r_clone_settings.R_CLONE_STORAGE_ENDPOINT, + endpoint=r_clone_settings.storage_endpoint, s3_object=urllib.parse.quote_plus(s3_object), ) logger.debug("%s", f"{url=}") @@ -129,7 +129,7 @@ async def _delete_file_meta_data( ) -> None: async with _get_client_session(r_clone_settings) as session: url = "{endpoint}/v0/locations/0/files/{s3_object}/metadata".format( - endpoint=r_clone_settings.R_CLONE_STORAGE_ENDPOINT, + endpoint=r_clone_settings.storage_endpoint, s3_object=urllib.parse.quote_plus(s3_object), ) logger.debug("%s", f"{url=}") From 365b34b83c641eb8c07a809ab8157eeef3f85d9a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:50:51 +0200 Subject: [PATCH 20/78] moved requirements --- services/director-v2/requirements/_base.txt | 15 +++++++++++++-- services/director-v2/requirements/_test.txt | 4 +++- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/services/director-v2/requirements/_base.txt b/services/director-v2/requirements/_base.txt index 327513ea865..07f7b4b56fb 100644 --- a/services/director-v2/requirements/_base.txt +++ b/services/director-v2/requirements/_base.txt @@ -15,7 +15,7 @@ aiodebug==2.3.0 # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in aiodocker==0.19.1 # via -r requirements/_base.in -aiofiles==0.5.0 +aiofiles==0.8.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in # -r requirements/../../../packages/service-library/requirements/_base.in @@ -33,6 +33,7 @@ aiohttp==3.8.1 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/simcore-sdk/requirements/_base.in @@ -58,6 +59,8 @@ anyio==3.2.1 # starlette asgiref==3.4.1 # via uvicorn +async-cache==1.1.1 + # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in async-timeout==4.0.2 # via # aiohttp @@ -169,6 +172,7 @@ jinja2==2.11.3 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt @@ -245,6 +249,7 @@ pydantic==1.9.0 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/dask-task-models-library/requirements/../../../packages/models-library/requirements/_base.in @@ -254,6 +259,7 @@ pydantic==1.9.0 # -r requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/_base.in # -r requirements/../../../packages/simcore-sdk/requirements/_base.in # -r requirements/_base.in # fastapi @@ -290,6 +296,7 @@ pyyaml==5.4.1 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/./constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/service-library/requirements/_base.in @@ -332,6 +339,7 @@ sqlalchemy==1.4.31 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../packages/postgres-database/requirements/_base.in @@ -370,7 +378,9 @@ tornado==6.1 tqdm==4.62.3 # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in typer==0.4.1 - # via -r requirements/../../../packages/settings-library/requirements/_base.in + # via + # -r requirements/../../../packages/settings-library/requirements/_base.in + # -r requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/_base.in typing-extensions==4.1.1 # via # aiodebug @@ -391,6 +401,7 @@ urllib3==1.26.7 # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/models-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/postgres-database/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/service-library/requirements/../../../requirements/constraints.txt + # -c requirements/../../../packages/simcore-sdk/requirements/../../../packages/settings-library/requirements/../../../requirements/constraints.txt # -c requirements/../../../packages/simcore-sdk/requirements/../../../requirements/constraints.txt # -c requirements/../../../requirements/constraints.txt # -r requirements/../../../services/dask-sidecar/requirements/_dask-distributed.txt diff --git a/services/director-v2/requirements/_test.txt b/services/director-v2/requirements/_test.txt index b3414eadbf0..08defe86043 100644 --- a/services/director-v2/requirements/_test.txt +++ b/services/director-v2/requirements/_test.txt @@ -210,7 +210,9 @@ pamqp==2.3.0 # -c requirements/_base.txt # aiormq paramiko==2.10.3 - # via docker + # via + # -c requirements/../../../requirements/constraints.txt + # docker pillow==9.0.1 # via bokeh platformdirs==2.5.1 From df29fd56b8dae49300a6f826a7816b9fbe82a8c6 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:51:04 +0200 Subject: [PATCH 21/78] injecting mandatory settings --- .../dynamic_sidecar/docker_service_specs/sidecar.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index 53c211dcfc3..1832070af0c 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -25,6 +25,9 @@ def _get_environment_variables( ) -> Dict[str, str]: registry_settings = app_settings.DIRECTOR_V2_DOCKER_REGISTRY rabbit_settings = app_settings.DIRECTOR_V2_RABBITMQ + r_clone_settings = ( + app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_R_CLONE_SETTINGS + ) state_exclude = [] if scheduler_data.paths_mapping.state_exclude is not None: @@ -60,6 +63,13 @@ def _get_environment_variables( "RABBIT_USER": f"{rabbit_settings.RABBIT_USER}", "RABBIT_PASSWORD": f"{rabbit_settings.RABBIT_PASSWORD.get_secret_value()}", "RABBIT_CHANNELS": json_dumps(rabbit_settings.RABBIT_CHANNELS), + "S3_ENDPOINT": r_clone_settings.R_CLONE_S3.endpoint, + "S3_ACCESS_KEY": r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, + "S3_SECRET_KEY": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, + "S3_BUCKET_NAME": r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, + "S3_SECURE": f"{r_clone_settings.R_CLONE_S3.S3_SECURE}", + "R_CLONE_PROVIDER": r_clone_settings.R_CLONE_PROVIDER, + "R_CLONE_STORAGE_ENDPOINT": r_clone_settings.R_CLONE_STORAGE_ENDPOINT, } From 49e48f997fb66615b6ea9c454fe1689bdbc361fc Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:51:11 +0200 Subject: [PATCH 22/78] test refactoring --- services/director-v2/tests/conftest.py | 2 +- .../director-v2/tests/integration/01/test_computation_api.py | 2 +- .../tests/integration/02/test_dynamic_services_routes.py | 2 +- .../02/test_dynamic_sidecar_nodeports_integration.py | 2 +- .../02/test_mixed_dynamic_sidecar_and_legacy_project.py | 2 +- services/director-v2/tests/unit/test_core_settings.py | 4 ++-- .../unit/test_modules_dynamic_sidecar_docker_service_specs.py | 2 +- ...st_modules_dynamic_sidecar_docker_service_specs_sidecar.py | 2 +- .../with_dbs/test_modules_comp_scheduler_dask_scheduler.py | 2 +- .../director-v2/tests/unit/with_dbs/test_route_clusters.py | 2 +- .../tests/unit/with_dbs/test_route_clusters_details.py | 2 +- services/director-v2/tests/unit/with_dbs/test_utils_dask.py | 2 +- .../with_swarm/test_modules_dynamic_sidecar_docker_api.py | 4 ++-- .../unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py | 2 +- .../tests/unit/with_swarm/test_routes_dynamic_services.py | 2 +- 15 files changed, 17 insertions(+), 17 deletions(-) diff --git a/services/director-v2/tests/conftest.py b/services/director-v2/tests/conftest.py index 7c7313c7246..0e06bfb8bd6 100644 --- a/services/director-v2/tests/conftest.py +++ b/services/director-v2/tests/conftest.py @@ -103,7 +103,7 @@ def mock_env(monkeypatch: MonkeyPatch, dynamic_sidecar_docker_image: str) -> Non monkeypatch.setenv("REGISTRY_PW", "test") monkeypatch.setenv("REGISTRY_SSL", "false") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") monkeypatch.setenv("POSTGRES_HOST", "mocked_host") monkeypatch.setenv("POSTGRES_USER", "mocked_user") diff --git a/services/director-v2/tests/integration/01/test_computation_api.py b/services/director-v2/tests/integration/01/test_computation_api.py index da5795b430b..be0930cf3a8 100644 --- a/services/director-v2/tests/integration/01/test_computation_api.py +++ b/services/director-v2/tests/integration/01/test_computation_api.py @@ -70,7 +70,7 @@ def mock_env( monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", "test_swarm_network_name") monkeypatch.setenv("SWARM_STACK_NAME", "test_mocked_stack_name") monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_mocked_simcore_zone") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") @pytest.fixture() diff --git a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py index 926dca8c886..e6a8207667e 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py +++ b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py @@ -124,7 +124,7 @@ async def test_client( monkeypatch.setenv("POSTGRES_PASSWORD", "mocked_password") monkeypatch.setenv("POSTGRES_DB", "mocked_db") monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "false") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") # patch host for dynamic-sidecar, not reachable via localhost # the dynamic-sidecar (running inside a container) will use diff --git a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py index 4ba30ed77ea..194f89dfb0f 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py +++ b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py @@ -328,7 +328,7 @@ def mock_env( # this address to reach the rabbit service monkeypatch.setenv("RABBIT_HOST", f"{get_localhost_ip()}") monkeypatch.setenv("POSTGRES_HOST", f"{get_localhost_ip()}") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") monkeypatch.setenv("DIRECTOR_V2_DEV_FEATURES_ENABLED", dev_features_enabled) monkeypatch.setenv("DIRECTOR_V2_TRACING", "null") monkeypatch.setenv( diff --git a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py index ae7dd2e2fa9..958f6e8e1f8 100644 --- a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py +++ b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py @@ -154,7 +154,7 @@ async def director_v2_client( monkeypatch.setenv("POSTGRES_HOST", f"{get_localhost_ip()}") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "false") monkeypatch.setenv("COMPUTATIONAL_BACKEND_ENABLED", "false") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") # patch host for dynamic-sidecar, not reachable via localhost # the dynamic-sidecar (running inside a container) will use diff --git a/services/director-v2/tests/unit/test_core_settings.py b/services/director-v2/tests/unit/test_core_settings.py index 4baab8e7684..372ead7d299 100644 --- a/services/director-v2/tests/unit/test_core_settings.py +++ b/services/director-v2/tests/unit/test_core_settings.py @@ -43,7 +43,7 @@ def test_supported_backends_did_not_change() -> None: def test_expected_s3_endpoint( endpoint: str, is_secure: bool, monkeypatch: MonkeyPatch ) -> None: - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") monkeypatch.setenv("S3_ENDPOINT", endpoint) monkeypatch.setenv("S3_SECURE", "true" if is_secure else "false") @@ -55,7 +55,7 @@ def test_expected_s3_endpoint( def test_enforce_r_clone_requirement(monkeypatch: MonkeyPatch) -> None: - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") monkeypatch.setenv("R_CLONE_POLL_INTERVAL_SECONDS", "11") with pytest.raises(ValueError): RCloneSettings() diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py index 6d375becd14..ddbc2708111 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py @@ -37,7 +37,7 @@ def mocked_env(monkeypatch: MonkeyPatch) -> Iterator[Dict[str, str]]: "SIMCORE_SERVICES_NETWORK_NAME": "simcore_services_network_name", "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", "SWARM_STACK_NAME": "test_swarm_name", - "R_CLONE_S3_PROVIDER": "MINIO", + "R_CLONE_PROVIDER": "MINIO", } with monkeypatch.context() as m: diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py index 853773d666b..999d1afc0ce 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py @@ -22,7 +22,7 @@ "SIMCORE_SERVICES_NETWORK_NAME": "simcore_services_network_name", "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", "SWARM_STACK_NAME": "test_swarm_name", - "R_CLONE_S3_PROVIDER": "MINIO", + "R_CLONE_PROVIDER": "MINIO", } EXPECTED_DYNAMIC_SIDECAR_ENV_VAR_NAMES = { diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py b/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py index 161a8b7f4d8..96d662bdbab 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py @@ -80,7 +80,7 @@ def minimal_dask_scheduler_config( monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_ENABLED", "1") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_clusters.py b/services/director-v2/tests/unit/with_dbs/test_route_clusters.py index 28e1908686c..bbae1c02d64 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_clusters.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_clusters.py @@ -49,7 +49,7 @@ def clusters_config( ): monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "1") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py b/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py index 2e355ecb899..6711d5b52f9 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py @@ -37,7 +37,7 @@ def clusters_config( ): monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "1") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_utils_dask.py b/services/director-v2/tests/unit/with_dbs/test_utils_dask.py index d4e1b1fad0e..427745d6ec5 100644 --- a/services/director-v2/tests/unit/with_dbs/test_utils_dask.py +++ b/services/director-v2/tests/unit/with_dbs/test_utils_dask.py @@ -228,7 +228,7 @@ def app_with_db( postgres_host_config: Dict[str, str], ): monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") async def test_compute_input_data( diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py index 0ff86e81f49..4e3ddaade6a 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py +++ b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py @@ -59,7 +59,7 @@ def dynamic_sidecar_settings( monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_traefik_zone") monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", "test_network_name") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") return DynamicSidecarSettings.create_from_envs() @@ -399,7 +399,7 @@ def test_valid_network_names( monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", simcore_services_network_name) monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_traefik_zone") monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") dynamic_sidecar_settings = DynamicSidecarSettings.create_from_envs() assert dynamic_sidecar_settings diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py index 720ed315665..3ae245fa86f 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py +++ b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py @@ -196,7 +196,7 @@ def dynamic_sidecar_settings( "DIRECTOR_V2_DYNAMIC_SCHEDULER_INTERVAL_SECONDS", str(TEST_SCHEDULER_INTERVAL_SECONDS), ) - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") app_settings = AppSettings.create_from_envs() return app_settings diff --git a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py b/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py index 2a47a38afeb..03fbc9ee00e 100644 --- a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py +++ b/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py @@ -97,7 +97,7 @@ def mock_env(monkeypatch: MonkeyPatch, docker_swarm: None) -> None: monkeypatch.setenv("SC_BOOT_MODE", "production") - monkeypatch.setenv("R_CLONE_S3_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") @pytest.fixture From 92d8b62b83286b9d682bf02dcbe9f0262057299f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:52:06 +0200 Subject: [PATCH 23/78] fix env vars for rlcone --- services/director-v2/.env-devel | 2 +- .../src/simcore_service_dynamic_sidecar/modules/nodeports.py | 1 + services/storage/src/simcore_service_storage/handlers.py | 1 - 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/services/director-v2/.env-devel b/services/director-v2/.env-devel index b5b68df0ddc..9fd76eb1234 100644 --- a/services/director-v2/.env-devel +++ b/services/director-v2/.env-devel @@ -52,7 +52,7 @@ S3_ACCESS_KEY=12345678 S3_SECRET_KEY=12345678 S3_BUCKET_NAME=simcore S3_SECURE=0 -R_CLONE_S3_PROVIDER=MINIO +R_CLONE_PROVIDER=MINIO TRACING_ENABLED=True TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py index 96cdc1422cf..4a7686116f3 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/nodeports.py @@ -181,6 +181,7 @@ async def download_target_ports( user_id=settings.DY_SIDECAR_USER_ID, project_id=str(settings.DY_SIDECAR_PROJECT_ID), node_uuid=str(settings.DY_SIDECAR_NODE_ID), + r_clone_settings=settings.DY_SIDECAR_R_CLONE_SETTINGS, ) data = {} diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index a1d150b6b18..9a998ef5218 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -274,7 +274,6 @@ async def update_file_meta_data(request: web.Request): params, query, body = await extract_and_validate(request) assert params, "params %s" % params # nosec - assert query, "query %s" % query # nosec assert not body, "body %s" % body # nosec with handle_storage_errors(): From a3b595bda170c670d5896c283e07529bfc76d2a1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Sun, 24 Apr 2022 10:52:27 +0200 Subject: [PATCH 24/78] adding r_clone --- .env-devel | 2 +- services/docker-compose.yml | 3 ++- services/dynamic-sidecar/Dockerfile | 7 +++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.env-devel b/.env-devel index 3aa61838001..f7fcf3a6e3e 100644 --- a/.env-devel +++ b/.env-devel @@ -60,7 +60,7 @@ S3_BUCKET_NAME=simcore S3_ENDPOINT=172.17.0.1:9001 S3_SECRET_KEY=12345678 S3_SECURE=0 -R_CLONE_S3_PROVIDER=MINIO +R_CLONE_PROVIDER=MINIO SCICRUNCH_API_BASE_URL=https://scicrunch.org/api/1 SCICRUNCH_API_KEY=REPLACE_ME_with_valid_api_key diff --git a/services/docker-compose.yml b/services/docker-compose.yml index 49466337bdd..a50f78be9cb 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -115,7 +115,8 @@ services: - S3_BUCKET_NAME=${S3_BUCKET_NAME} - S3_ENDPOINT=${S3_ENDPOINT} - S3_SECRET_KEY=${S3_SECRET_KEY} - - R_CLONE_S3_PROVIDER=${R_CLONE_S3_PROVIDER} + - R_CLONE_PROVIDER=${R_CLONE_PROVIDER} + - R_CLONE_STORAGE_ENDPOINT=${STORAGE_ENDPOINT} - MONITORING_ENABLED=${MONITORING_ENABLED:-True} - SIMCORE_SERVICES_NETWORK_NAME=interactive_services_subnet - TRACING_THRIFT_COMPACT_ENDPOINT=${TRACING_THRIFT_COMPACT_ENDPOINT} diff --git a/services/dynamic-sidecar/Dockerfile b/services/dynamic-sidecar/Dockerfile index da7c8eed1d3..cabdca914c6 100644 --- a/services/dynamic-sidecar/Dockerfile +++ b/services/dynamic-sidecar/Dockerfile @@ -15,6 +15,7 @@ RUN set -eux && \ apt-get install -y \ gosu \ libmagic1 \ + curl \ && \ rm -rf /var/lib/apt/lists/* && \ # verify that the binary works @@ -46,6 +47,12 @@ ENV PATH="${VIRTUAL_ENV}/bin:$PATH" # volumes between itself and the spawned containers ENV DY_VOLUMES="/dy-volumes" +# rclone installation +ARG R_CLONE_VERSION="1.58.0" +RUN curl -sLO "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ + dpkg -i "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ + rm "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ + rclone --version # -------------------------- Build stage ------------------- # Installs build/package management tools and third party dependencies From aea65b9487f72c8ddb4b148ea4304dbc24a77b0a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Apr 2022 07:31:07 +0200 Subject: [PATCH 25/78] fix unit test simore-sdk --- .../tests/unit/test_node_ports_v2_r_clone.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 383df96898d..115d1d7d111 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -1,6 +1,7 @@ # pylint: disable=redefined-outer-name # pylint: disable=protected-access +import subprocess from pathlib import Path import pytest @@ -36,8 +37,18 @@ def r_clone_settings( return RCloneSettings() +@pytest.fixture +def skip_if_r_clone_is_missing() -> None: + try: + subprocess.check_output(["rclone", "--version"]) + except Exception: + pytest.skip("rclone is not installed") + + async def test_is_r_clone_available_cached( - caplog: LogCaptureFixture, r_clone_settings: RCloneSettings + caplog: LogCaptureFixture, + r_clone_settings: RCloneSettings, + skip_if_r_clone_is_missing: None, ) -> None: for _ in range(3): result = await r_clone.is_r_clone_available(r_clone_settings) From 8113043277640e84f004ea3f0a243f95169ddd3e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Apr 2022 07:31:28 +0200 Subject: [PATCH 26/78] fix directr-v2 integration tests --- .../pytest-simcore/src/pytest_simcore/minio_service.py | 4 ++-- .../tests/integration/01/test_computation_api.py | 2 ++ .../tests/integration/02/test_dynamic_services_routes.py | 7 +++++++ .../02/test_dynamic_sidecar_nodeports_integration.py | 8 ++++++++ .../02/test_mixed_dynamic_sidecar_and_legacy_project.py | 8 ++++++++ 5 files changed, 27 insertions(+), 2 deletions(-) diff --git a/packages/pytest-simcore/src/pytest_simcore/minio_service.py b/packages/pytest-simcore/src/pytest_simcore/minio_service.py index 57ec53c1f44..b01af90769f 100644 --- a/packages/pytest-simcore/src/pytest_simcore/minio_service.py +++ b/packages/pytest-simcore/src/pytest_simcore/minio_service.py @@ -3,7 +3,7 @@ # pylint: disable=unused-variable import logging -from typing import Dict, Iterator +from typing import Any, Dict, Iterator import pytest import tenacity @@ -42,7 +42,7 @@ def _ensure_remove_bucket(client: Minio, bucket_name: str): @pytest.fixture(scope="module") def minio_config( docker_stack: Dict, testing_environ_vars: Dict, monkeypatch_module: MonkeyPatch -) -> Dict[str, str]: +) -> Dict[str, Any]: assert "pytest-ops_minio" in docker_stack["services"] config = { diff --git a/services/director-v2/tests/integration/01/test_computation_api.py b/services/director-v2/tests/integration/01/test_computation_api.py index be0930cf3a8..11fa4d0212a 100644 --- a/services/director-v2/tests/integration/01/test_computation_api.py +++ b/services/director-v2/tests/integration/01/test_computation_api.py @@ -71,6 +71,8 @@ def mock_env( monkeypatch.setenv("SWARM_STACK_NAME", "test_mocked_stack_name") monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_mocked_simcore_zone") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage:8000") + @pytest.fixture() diff --git a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py index e6a8207667e..39b8df9799f 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py +++ b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py @@ -125,6 +125,13 @@ async def test_client( monkeypatch.setenv("POSTGRES_DB", "mocked_db") monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "false") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage:8000") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + # patch host for dynamic-sidecar, not reachable via localhost # the dynamic-sidecar (running inside a container) will use diff --git a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py index 194f89dfb0f..1a18137cc4a 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py +++ b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py @@ -301,6 +301,8 @@ def mock_env( dev_features_enabled: str, rabbit_service: RabbitSettings, dask_scheduler_service: str, + minio_config: Dict[str, Any], + storage_service: URL, ) -> None: # Works as below line in docker.compose.yml # ${DOCKER_REGISTRY:-itisfoundation}/dynamic-sidecar:${DOCKER_IMAGE_TAG:-latest} @@ -329,6 +331,12 @@ def mock_env( monkeypatch.setenv("RABBIT_HOST", f"{get_localhost_ip()}") monkeypatch.setenv("POSTGRES_HOST", f"{get_localhost_ip()}") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", f"{storage_service}") + monkeypatch.setenv("S3_ENDPOINT", minio_config["client"]["endpoint"]) + monkeypatch.setenv("S3_ACCESS_KEY", minio_config["client"]["access_key"]) + monkeypatch.setenv("S3_SECRET_KEY", minio_config["client"]["secret_key"]) + monkeypatch.setenv("S3_BUCKET_NAME", minio_config["bucket_name"]) + monkeypatch.setenv("S3_SECURE", minio_config["client"]["secure"]) monkeypatch.setenv("DIRECTOR_V2_DEV_FEATURES_ENABLED", dev_features_enabled) monkeypatch.setenv("DIRECTOR_V2_TRACING", "null") monkeypatch.setenv( diff --git a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py index 958f6e8e1f8..a12cc633190 100644 --- a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py +++ b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py @@ -128,6 +128,8 @@ def _assemble_node_data(spec: Dict, label: str) -> Dict[str, str]: @pytest.fixture async def director_v2_client( minimal_configuration: None, + minio_config: Dict[str, Any], + storage_service: URL, network_name: str, monkeypatch, ) -> AsyncIterable[httpx.AsyncClient]: @@ -155,6 +157,12 @@ async def director_v2_client( monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "false") monkeypatch.setenv("COMPUTATIONAL_BACKEND_ENABLED", "false") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", f"{storage_service}") + monkeypatch.setenv("S3_ENDPOINT", minio_config["client"]["endpoint"]) + monkeypatch.setenv("S3_ACCESS_KEY", minio_config["client"]["access_key"]) + monkeypatch.setenv("S3_SECRET_KEY", minio_config["client"]["secret_key"]) + monkeypatch.setenv("S3_BUCKET_NAME", minio_config["bucket_name"]) + monkeypatch.setenv("S3_SECURE", minio_config["client"]["secure"]) # patch host for dynamic-sidecar, not reachable via localhost # the dynamic-sidecar (running inside a container) will use From dad78df9f8568663925dafb7da50dabadcc28efa Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Apr 2022 08:10:34 +0200 Subject: [PATCH 27/78] fixed first barch of directorv-2 unittest fixes --- .env-devel | 1 + .../tests/integration/test_node_ports_v2_r_clone.py | 5 +++-- services/director-v2/.env-devel | 1 + .../director-v2/tests/unit/test_core_settings.py | 5 +++++ ..._modules_dynamic_sidecar_docker_service_specs.py | 6 ++++++ ..._dynamic_sidecar_docker_service_specs_sidecar.py | 13 +++++++++++++ 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.env-devel b/.env-devel index f7fcf3a6e3e..78b65b09958 100644 --- a/.env-devel +++ b/.env-devel @@ -61,6 +61,7 @@ S3_ENDPOINT=172.17.0.1:9001 S3_SECRET_KEY=12345678 S3_SECURE=0 R_CLONE_PROVIDER=MINIO +R_CLONE_STORAGE_ENDPOINT=storage:8080 SCICRUNCH_API_BASE_URL=https://scicrunch.org/api/1 SCICRUNCH_API_KEY=REPLACE_ME_with_valid_api_key diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index 49826af750b..e2ae51a5abc 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -120,8 +120,9 @@ def _is_file_present(postgres_db: sa.engine.Engine, s3_object: str) -> bool: result = conn.execute( file_meta_data.select().where(file_meta_data.c.file_uuid == s3_object) ) - result_len = len(list(result)) - assert result_len <= 1 + result_list = list(result) + result_len = len(result_list) + assert result_len <= 1, result_list return result_len == 1 diff --git a/services/director-v2/.env-devel b/services/director-v2/.env-devel index 9fd76eb1234..30a3cbf5cb2 100644 --- a/services/director-v2/.env-devel +++ b/services/director-v2/.env-devel @@ -53,6 +53,7 @@ S3_SECRET_KEY=12345678 S3_BUCKET_NAME=simcore S3_SECURE=0 R_CLONE_PROVIDER=MINIO +R_CLONE_STORAGE_ENDPOINT=http://storage:8080 TRACING_ENABLED=True TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 diff --git a/services/director-v2/tests/unit/test_core_settings.py b/services/director-v2/tests/unit/test_core_settings.py index 372ead7d299..062bda963a6 100644 --- a/services/director-v2/tests/unit/test_core_settings.py +++ b/services/director-v2/tests/unit/test_core_settings.py @@ -46,6 +46,11 @@ def test_expected_s3_endpoint( monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") monkeypatch.setenv("S3_ENDPOINT", endpoint) monkeypatch.setenv("S3_SECURE", "true" if is_secure else "false") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + r_clone_settings = RCloneSettings() diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py index ddbc2708111..1b4ad93cf29 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py @@ -38,6 +38,12 @@ def mocked_env(monkeypatch: MonkeyPatch) -> Iterator[Dict[str, str]]: "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", "SWARM_STACK_NAME": "test_swarm_name", "R_CLONE_PROVIDER": "MINIO", + "S3_ENDPOINT": "endpoint", + "S3_ACCESS_KEY": "access_key", + "S3_SECRET_KEY": "secret_key", + "S3_BUCKET_NAME": "bucket_name", + "S3_SECURE": "false", + "R_CLONE_STORAGE_ENDPOINT": "storage_endpoint", } with monkeypatch.context() as m: diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py index 999d1afc0ce..e495e9ef0c2 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py @@ -23,6 +23,12 @@ "TRAEFIK_SIMCORE_ZONE": "test_traefik_zone", "SWARM_STACK_NAME": "test_swarm_name", "R_CLONE_PROVIDER": "MINIO", + "S3_ENDPOINT": "endpoint", + "S3_ACCESS_KEY":"s3_access_key", + "S3_SECRET_KEY": "s3_secret_key", + "S3_BUCKET_NAME": "bucket_name", + "S3_SECURE": "false", + "R_CLONE_STORAGE_ENDPOINT": "storage_endpoint", } EXPECTED_DYNAMIC_SIDECAR_ENV_VAR_NAMES = { @@ -53,6 +59,13 @@ "REGISTRY_USER", "SIMCORE_HOST_NAME", "STORAGE_ENDPOINT", + "R_CLONE_PROVIDER", + "S3_ENDPOINT", + "S3_ACCESS_KEY", + "S3_SECRET_KEY", + "S3_BUCKET_NAME", + "S3_SECURE", + "R_CLONE_STORAGE_ENDPOINT", } From 3032ab368f46222b95a1fd01de413c66067aa4af Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Apr 2022 09:12:11 +0200 Subject: [PATCH 28/78] pylint --- packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 115d1d7d111..78a943105b2 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -1,5 +1,6 @@ # pylint: disable=redefined-outer-name # pylint: disable=protected-access +# pylint: disable=unused-argument import subprocess from pathlib import Path @@ -41,7 +42,7 @@ def r_clone_settings( def skip_if_r_clone_is_missing() -> None: try: subprocess.check_output(["rclone", "--version"]) - except Exception: + except Exception: # pylint: disable=broad-except pytest.skip("rclone is not installed") From 1fabc74254086e16f74630062c8f5042f4ac40d4 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Apr 2022 09:12:20 +0200 Subject: [PATCH 29/78] second btach of fixes director-v2 --- .../with_dbs/test_modules_comp_scheduler_dask_scheduler.py | 7 +++++++ .../director-v2/tests/unit/with_dbs/test_route_clusters.py | 6 ++++++ .../tests/unit/with_dbs/test_route_clusters_details.py | 6 ++++++ .../tests/unit/with_dbs/test_route_computations.py | 7 +++++++ .../director-v2/tests/unit/with_dbs/test_utils_dask.py | 6 ++++++ 5 files changed, 32 insertions(+) diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py b/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py index 96d662bdbab..678bafade69 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py @@ -81,6 +81,13 @@ def minimal_dask_scheduler_config( monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_ENABLED", "1") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") + @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_clusters.py b/services/director-v2/tests/unit/with_dbs/test_route_clusters.py index bbae1c02d64..809f5b69746 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_clusters.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_clusters.py @@ -50,6 +50,12 @@ def clusters_config( monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "1") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py b/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py index 6711d5b52f9..d113ac56833 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py @@ -38,6 +38,12 @@ def clusters_config( monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "1") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_computations.py b/services/director-v2/tests/unit/with_dbs/test_route_computations.py index 533ccdc6bd1..43eefebd0e4 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_computations.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_computations.py @@ -36,6 +36,13 @@ def minimal_configuration( ): monkeypatch.setenv("DIRECTOR_V2_DYNAMIC_SIDECAR_ENABLED", "false") monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") + monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") async def test_get_computation_from_empty_project( diff --git a/services/director-v2/tests/unit/with_dbs/test_utils_dask.py b/services/director-v2/tests/unit/with_dbs/test_utils_dask.py index 427745d6ec5..2813eb78a8c 100644 --- a/services/director-v2/tests/unit/with_dbs/test_utils_dask.py +++ b/services/director-v2/tests/unit/with_dbs/test_utils_dask.py @@ -229,6 +229,12 @@ def app_with_db( ): monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "1") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") async def test_compute_input_data( From 123495d30cdebe2a5de2978de25acef808d9e5db Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 08:42:39 +0200 Subject: [PATCH 30/78] fixed other tests --- .../with_swarm/test_modules_dynamic_sidecar_docker_api.py | 6 ++++++ .../with_swarm/test_modules_dynamic_sidecar_scheduler.py | 6 ++++++ .../tests/unit/with_swarm/test_routes_dynamic_services.py | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py index 4e3ddaade6a..7278374dc0c 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py +++ b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py @@ -60,6 +60,12 @@ def dynamic_sidecar_settings( monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") monkeypatch.setenv("SIMCORE_SERVICES_NETWORK_NAME", "test_network_name") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") return DynamicSidecarSettings.create_from_envs() diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py index 3ae245fa86f..c83294aa497 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py +++ b/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py @@ -197,6 +197,12 @@ def dynamic_sidecar_settings( str(TEST_SCHEDULER_INTERVAL_SECONDS), ) monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") app_settings = AppSettings.create_from_envs() return app_settings diff --git a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py b/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py index 03fbc9ee00e..ab6b185265d 100644 --- a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py +++ b/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py @@ -98,6 +98,12 @@ def mock_env(monkeypatch: MonkeyPatch, docker_swarm: None) -> None: monkeypatch.setenv("SC_BOOT_MODE", "production") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture From 14be36c1008f42b26e6a854e13a31ca98114de0e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 09:38:49 +0200 Subject: [PATCH 31/78] moved tests --- .../test_modules_dynamic_sidecar_docker_api.py | 6 ++++++ .../test_modules_dynamic_sidecar_scheduler.py | 0 .../unit/{with_swarm => }/test_routes_dynamic_services.py | 0 3 files changed, 6 insertions(+) rename services/director-v2/tests/unit/{with_swarm => }/test_modules_dynamic_sidecar_docker_api.py (98%) rename services/director-v2/tests/unit/{with_swarm => }/test_modules_dynamic_sidecar_scheduler.py (100%) rename services/director-v2/tests/unit/{with_swarm => }/test_routes_dynamic_services.py (100%) diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py similarity index 98% rename from services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py rename to services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py index 7278374dc0c..36588d31ba9 100644 --- a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_docker_api.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py @@ -406,6 +406,12 @@ def test_valid_network_names( monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_traefik_zone") monkeypatch.setenv("SWARM_STACK_NAME", "test_swarm_name") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") + monkeypatch.setenv("S3_ENDPOINT", "endpoint") + monkeypatch.setenv("S3_ACCESS_KEY", "access_key") + monkeypatch.setenv("S3_SECRET_KEY", "secret_key") + monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") + monkeypatch.setenv("S3_SECURE", "false") + monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") dynamic_sidecar_settings = DynamicSidecarSettings.create_from_envs() assert dynamic_sidecar_settings diff --git a/services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler.py similarity index 100% rename from services/director-v2/tests/unit/with_swarm/test_modules_dynamic_sidecar_scheduler.py rename to services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler.py diff --git a/services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py b/services/director-v2/tests/unit/test_routes_dynamic_services.py similarity index 100% rename from services/director-v2/tests/unit/with_swarm/test_routes_dynamic_services.py rename to services/director-v2/tests/unit/test_routes_dynamic_services.py From fcd0ad0dd95317ec53690539585d0855e41935ae Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 10:30:08 +0200 Subject: [PATCH 32/78] refactor to use storage_client module --- .../node_ports_common/constants.py | 1 + .../node_ports_common/filemanager.py | 20 ++++- .../r_clone.py | 86 +++---------------- .../node_ports_common/storage_client.py | 56 +++++++++++- .../simcore_sdk/node_ports_v2/port_utils.py | 23 ++--- .../simcore-sdk/tests/integration/conftest.py | 2 +- .../integration/test_node_ports_v2_r_clone.py | 22 ++++- .../tests/unit/test_node_ports_v2_r_clone.py | 4 +- 8 files changed, 109 insertions(+), 105 deletions(-) create mode 100644 packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py rename packages/simcore-sdk/src/simcore_sdk/{node_ports_v2 => node_ports_common}/r_clone.py (59%) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py new file mode 100644 index 00000000000..c46317b16af --- /dev/null +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py @@ -0,0 +1 @@ +ETag = str diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index bbdd940a47c..14ddfe72d9e 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -8,11 +8,14 @@ import aiofiles from aiohttp import ClientPayloadError, ClientSession from pydantic.networks import AnyUrl +from settings_library.r_clone import RCloneSettings from tqdm import tqdm from yarl import URL from ..node_ports_common.client_session_manager import ClientSessionContextManager from . import exceptions, storage_client +from .constants import ETag +from .r_clone import is_r_clone_available, sync_local_to_s3 log = logging.getLogger(__name__) @@ -90,9 +93,6 @@ async def _download_link_to_file(session: ClientSession, url: URL, file_path: Pa raise exceptions.TransferError(url) from exc -ETag = str - - async def _upload_file_to_link( session: ClientSession, url: URL, file_path: Path ) -> ETag: @@ -251,6 +251,7 @@ async def upload_file( s3_object: str, local_file_path: Path, client_session: Optional[ClientSession] = None, + r_clone_settings: Optional[RCloneSettings] = None, ) -> Tuple[str, str]: """Uploads a file to S3 @@ -279,7 +280,18 @@ async def upload_file( if not upload_link: raise exceptions.S3InvalidPathError(s3_object) - e_tag = await _upload_file_to_link(session, upload_link, local_file_path) + if await is_r_clone_available(r_clone_settings): + e_tag = await sync_local_to_s3( + session=session, + r_clone_settings=r_clone_settings, + s3_object=s3_object, + local_file_path=local_file_path, + user_id=user_id, + ) + # TODO: maybe a better check here or an error if do not match? + store_id = "0" # simcore only feature + else: + e_tag = await _upload_file_to_link(session, upload_link, local_file_path) return store_id, e_tag diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py similarity index 59% rename from packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py rename to packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 2d17d7dfbb0..001cc187bdb 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -1,18 +1,18 @@ import asyncio import logging import re -import urllib.parse from contextlib import asynccontextmanager from pathlib import Path from typing import AsyncGenerator, Optional from aiofiles import tempfile -from aiohttp import ClientSession, ClientTimeout, web +from aiohttp import ClientSession, ClientTimeout from cache import AsyncLRU from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config -from ..node_ports_common.filemanager import ETag +from .constants import ETag +from .storage_client import delete_file_meta_data, get_s3_link, update_file_meta_data logger = logging.getLogger(__name__) @@ -53,7 +53,7 @@ async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: return decoded_stdout -@AsyncLRU(maxsize=2) +# @AsyncLRU(maxsize=2) async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bool: """returns: True if the `rclone` cli is installed and a configuration is provided""" try: @@ -76,71 +76,8 @@ async def _get_client_session( yield session -async def _get_s3_link( - r_clone_settings: RCloneSettings, s3_object: str, user_id: int -) -> str: - async with _get_client_session(r_clone_settings) as session: - url = "{endpoint}/v0/locations/0/files/{s3_object}/s3/link".format( - endpoint=r_clone_settings.storage_endpoint, - s3_object=urllib.parse.quote_plus(s3_object), - ) - logger.debug("%s", f"{url=}") - result = await session.get(url, params=dict(user_id=user_id)) - - if result.status == web.HTTPForbidden.status_code: - raise RCloneError( - ( - f"Insufficient permissions to upload {s3_object=} for {user_id=}. " - f"Storage: {await result.text()}" - ) - ) - - if result.status != web.HTTPOk.status_code: - raise RCloneError( - f"Could not fetch s3_link: status={result.status} {await result.text()}" - ) - - response = await result.json() - return response["data"]["s3_link"] - - -async def _update_file_meta_data( - r_clone_settings: RCloneSettings, s3_object: str -) -> ETag: - async with _get_client_session(r_clone_settings) as session: - url = "{endpoint}/v0/locations/0/files/{s3_object}/metadata".format( - endpoint=r_clone_settings.storage_endpoint, - s3_object=urllib.parse.quote_plus(s3_object), - ) - logger.debug("%s", f"{url=}") - result = await session.patch(url) - if result.status != web.HTTPOk.status_code: - raise RCloneError( - f"Could not fetch metadata: status={result.status} {await result.text()}" - ) - - response = await result.json() - logger.debug("metadata response %s", response) - return response["data"]["entity_tag"] - - -async def _delete_file_meta_data( - r_clone_settings: RCloneSettings, s3_object: str, user_id: int -) -> None: - async with _get_client_session(r_clone_settings) as session: - url = "{endpoint}/v0/locations/0/files/{s3_object}/metadata".format( - endpoint=r_clone_settings.storage_endpoint, - s3_object=urllib.parse.quote_plus(s3_object), - ) - logger.debug("%s", f"{url=}") - result = await session.delete(url, params=dict(user_id=user_id)) - if result.status != web.HTTPOk.status_code: - raise RCloneError( - f"Could not fetch metadata: status={result.status} {await result.text()}" - ) - - async def sync_local_to_s3( + session: ClientSession, r_clone_settings: Optional[RCloneSettings], s3_object: str, local_file_path: Path, @@ -154,9 +91,7 @@ async def sync_local_to_s3( ) ) - s3_link = await _get_s3_link( - r_clone_settings=r_clone_settings, s3_object=s3_object, user_id=user_id - ) + s3_link = await get_s3_link(session=session, s3_object=s3_object, user_id=user_id) s3_path = re.sub(r"^s3://", "", s3_link) logger.debug(" %s; %s", f"{s3_link=}", f"{s3_path=}") @@ -166,6 +101,7 @@ async def sync_local_to_s3( destination_path = Path(s3_path) assert local_file_path.name == destination_path.name file_name = local_file_path.name + # TODO: capture and send progress somehow? # rclone only acts upon directories, so to target a specific file # we must run the command from the file's directory. See below @@ -199,14 +135,12 @@ async def sync_local_to_s3( try: await _async_command(" ".join(r_clone_command), cwd=f"{source_path.parent}") - return await _update_file_meta_data( - r_clone_settings=r_clone_settings, s3_object=s3_object - ) + return await update_file_meta_data(session=session, s3_object=s3_object) except Exception as e: logger.warning( "There was an error while uploading %s. Removing metadata", s3_object ) - await _delete_file_meta_data( - r_clone_settings=r_clone_settings, s3_object=s3_object, user_id=user_id + await delete_file_meta_data( + session=session, s3_object=s3_object, user_id=user_id ) raise e diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index f1e38d7f704..5ff426d6c79 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -1,9 +1,9 @@ from functools import wraps from json import JSONDecodeError from typing import Any, Callable, Dict -from urllib.parse import quote +from urllib.parse import quote, quote_plus -from aiohttp import ClientSession +from aiohttp import ClientSession, web from aiohttp.client_exceptions import ClientConnectionError, ClientResponseError from models_library.api_schemas_storage import ( FileLocationArray, @@ -15,6 +15,7 @@ from pydantic.networks import AnyUrl from . import config, exceptions +from .constants import ETag def handle_client_exception(handler: Callable): @@ -172,3 +173,54 @@ async def delete_file( params={"user_id": f"{user_id}"}, ) as response: response.raise_for_status() + + +@handle_client_exception +async def get_s3_link(session: ClientSession, s3_object: str, user_id: int) -> str: + url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/s3/link" + result = await session.get(url, params=dict(user_id=user_id)) + + if result.status == web.HTTPForbidden.status_code: + raise exceptions.StorageInvalidCall( + ( + f"Insufficient permissions to upload {s3_object=} for {user_id=}. " + f"Storage: {await result.text()}" + ) + ) + + if result.status != web.HTTPOk.status_code: + raise exceptions.StorageInvalidCall( + f"Could not fetch s3_link: status={result.status} {await result.text()}" + ) + + response = await result.json() + return response["data"]["s3_link"] + + +@handle_client_exception +async def update_file_meta_data(session: ClientSession, s3_object: str) -> ETag: + # API: check user access rights here when updating + # TODO: check permissions + + url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" + result = await session.patch(url) + if result.status != web.HTTPOk.status_code: + raise exceptions.StorageInvalidCall( + f"Could not fetch metadata: status={result.status} {await result.text()}" + ) + + response = await result.json() + return response["data"]["entity_tag"] + + +@handle_client_exception +async def delete_file_meta_data( + session: ClientSession, s3_object: str, user_id: int +) -> None: + # TODO: check permissions + url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" + result = await session.delete(url, params=dict(user_id=user_id)) + if result.status != web.HTTPOk.status_code: + raise exceptions.StorageInvalidCall( + f"Could not fetch metadata: status={result.status} {await result.text()}" + ) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py index c191cab5b4a..13faec1d6c6 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_v2/port_utils.py @@ -10,7 +10,6 @@ from ..node_ports_common import config, data_items_utils, filemanager from .links import DownloadLink, FileLink, ItemConcreteValue, ItemValue, PortLink -from .r_clone import is_r_clone_available, sync_local_to_s3 log = logging.getLogger(__name__) @@ -152,21 +151,13 @@ async def push_file_to_store( log.debug("file path %s will be uploaded to s3", file) s3_object = data_items_utils.encode_file_id(file, project_id, node_id) - if await is_r_clone_available(r_clone_settings): - e_tag = await sync_local_to_s3( - r_clone_settings=r_clone_settings, - s3_object=s3_object, - local_file_path=file, - user_id=user_id, - ) - store_id = 0 # simcore only feature - else: - store_id, e_tag = await filemanager.upload_file( - user_id=user_id, - store_name=config.STORE, - s3_object=s3_object, - local_file_path=file, - ) + store_id, e_tag = await filemanager.upload_file( + user_id=user_id, + store_name=config.STORE, + s3_object=s3_object, + local_file_path=file, + r_clone_settings=r_clone_settings, + ) log.debug("file path %s uploaded, received ETag %s", file, e_tag) return FileLink(store=store_id, path=s3_object, e_tag=e_tag) diff --git a/packages/simcore-sdk/tests/integration/conftest.py b/packages/simcore-sdk/tests/integration/conftest.py index fb1187e3707..789dd0c8797 100644 --- a/packages/simcore-sdk/tests/integration/conftest.py +++ b/packages/simcore-sdk/tests/integration/conftest.py @@ -20,7 +20,7 @@ from simcore_postgres_database.models.projects import projects from simcore_postgres_database.models.users import users from simcore_sdk.node_ports import node_config -from simcore_sdk.node_ports_v2.r_clone import is_r_clone_available +from simcore_sdk.node_ports_common.r_clone import is_r_clone_available from yarl import URL diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index e2ae51a5abc..57adfcdeb08 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -3,17 +3,19 @@ import shutil from contextlib import asynccontextmanager +from importlib import reload from pathlib import Path from typing import AsyncGenerator, AsyncIterable, Iterator import aioboto3 import pytest import sqlalchemy as sa +from aiohttp import ClientSession from faker import Faker from pytest_mock.plugin import MockerFixture from settings_library.r_clone import RCloneSettings from simcore_postgres_database.models.file_meta_data import file_meta_data -from simcore_sdk.node_ports_v2 import r_clone +from simcore_sdk.node_ports_common import r_clone, storage_client pytest_simcore_core_services_selection = [ "migration", @@ -82,9 +84,17 @@ async def _raise_error(*args, **kwargs) -> None: raise _TestException() mocker.patch( - "simcore_sdk.node_ports_v2.r_clone._update_file_meta_data", + "simcore_sdk.node_ports_common.storage_client.update_file_meta_data", side_effect=_raise_error, ) + reload(r_clone) + reload(storage_client) + + +@pytest.fixture +async def client_session(filemanager_cfg: None) -> AsyncIterable[ClientSession]: + async with ClientSession() as session: + yield session # UTILS @@ -136,9 +146,12 @@ async def test_sync_local_to_s3( local_file_for_download: Path, user_id: int, postgres_db: sa.engine.Engine, + client_session: ClientSession, cleanup_s3: None, ) -> None: + etag = await r_clone.sync_local_to_s3( + session=client_session, r_clone_settings=r_clone_settings, s3_object=s3_object, local_file_path=file_to_upload, @@ -164,14 +177,15 @@ async def test_sync_local_to_s3_cleanup_on_error( r_clone_settings: RCloneSettings, s3_object: str, file_to_upload: Path, - local_file_for_download: Path, user_id: int, + postgres_db: sa.engine.Engine, + client_session: ClientSession, cleanup_s3: None, mock_update_file_meta_data: None, - postgres_db: sa.engine.Engine, ) -> None: with pytest.raises(_TestException): await r_clone.sync_local_to_s3( + session=client_session, r_clone_settings=r_clone_settings, s3_object=s3_object, local_file_path=file_to_upload, diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 78a943105b2..d0d3ca24f0a 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -10,8 +10,8 @@ from faker import Faker from pytest import MonkeyPatch from settings_library.r_clone import S3Provider -from simcore_sdk.node_ports_v2 import r_clone -from simcore_sdk.node_ports_v2.r_clone import RCloneSettings +from simcore_sdk.node_ports_common import r_clone +from simcore_sdk.node_ports_common.r_clone import RCloneSettings @pytest.fixture From f9af13d0e78928c66473a3ab8cda2bdfb201e629 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 10:51:56 +0200 Subject: [PATCH 33/78] replacing caching --- packages/simcore-sdk/requirements/_base.in | 2 +- packages/simcore-sdk/requirements/_base.txt | 4 ++-- .../simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/simcore-sdk/requirements/_base.in b/packages/simcore-sdk/requirements/_base.in index 1322c5ecea3..a4da462a4b5 100644 --- a/packages/simcore-sdk/requirements/_base.in +++ b/packages/simcore-sdk/requirements/_base.in @@ -7,10 +7,10 @@ --requirement ../../../packages/settings-library/requirements/_base.in --requirement ../../../packages/models-library/requirements/_base.in +aiocache aiofiles aiohttp aiopg[sa] -async-cache jsonschema packaging pydantic[email] diff --git a/packages/simcore-sdk/requirements/_base.txt b/packages/simcore-sdk/requirements/_base.txt index d560d2a1540..ca7ac4ac9e8 100644 --- a/packages/simcore-sdk/requirements/_base.txt +++ b/packages/simcore-sdk/requirements/_base.txt @@ -4,6 +4,8 @@ # # pip-compile --output-file=requirements/_base.txt --strip-extras requirements/_base.in # +aiocache==0.11.1 + # via -r requirements/_base.in aiodebug==2.3.0 # via -r requirements/../../../packages/service-library/requirements/_base.in aiofiles==0.8.0 @@ -24,8 +26,6 @@ aiosignal==1.2.0 # via aiohttp alembic==1.7.7 # via -r requirements/../../../packages/postgres-database/requirements/_base.in -async-cache==1.1.1 - # via -r requirements/_base.in async-timeout==4.0.2 # via # aiohttp diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 001cc187bdb..4aae3985446 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -5,9 +5,9 @@ from pathlib import Path from typing import AsyncGenerator, Optional +from aiocache import cached from aiofiles import tempfile from aiohttp import ClientSession, ClientTimeout -from cache import AsyncLRU from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config @@ -53,7 +53,7 @@ async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: return decoded_stdout -# @AsyncLRU(maxsize=2) +@cached() async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bool: """returns: True if the `rclone` cli is installed and a configuration is provided""" try: From 2b80e1590166b5938034b611f26eb5d6dd076942 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 11:01:55 +0200 Subject: [PATCH 34/78] removed r_clone exra settings --- .env-devel | 1 - .../src/settings_library/r_clone.py | 12 ------------ .../settings-library/tests/test_utils_r_clone.py | 1 - .../src/simcore_sdk/node_ports_common/r_clone.py | 15 +-------------- .../simcore-sdk/tests/integration/conftest.py | 1 - .../tests/unit/test_node_ports_v2_r_clone.py | 1 - services/director-v2/.env-devel | 1 - .../docker_service_specs/sidecar.py | 1 - .../tests/integration/01/test_computation_api.py | 2 -- .../02/test_dynamic_services_routes.py | 2 -- .../test_dynamic_sidecar_nodeports_integration.py | 1 - ...st_mixed_dynamic_sidecar_and_legacy_project.py | 1 - .../director-v2/tests/unit/test_core_settings.py | 2 -- .../test_modules_dynamic_sidecar_docker_api.py | 2 -- ...odules_dynamic_sidecar_docker_service_specs.py | 1 - ...ynamic_sidecar_docker_service_specs_sidecar.py | 4 +--- .../test_modules_dynamic_sidecar_scheduler.py | 1 - .../tests/unit/test_routes_dynamic_services.py | 1 - .../test_modules_comp_scheduler_dask_scheduler.py | 2 -- .../tests/unit/with_dbs/test_route_clusters.py | 1 - .../unit/with_dbs/test_route_clusters_details.py | 1 - .../unit/with_dbs/test_route_computations.py | 1 - .../tests/unit/with_dbs/test_utils_dask.py | 1 - services/docker-compose.yml | 1 - services/dynamic-sidecar/tests/conftest.py | 1 - .../tests/unit/test_core_docker_logs.py | 1 - .../tests/unit/test_core_rabbitmq.py | 1 - .../tests/unit/test_core_settings.py | 1 - 28 files changed, 2 insertions(+), 59 deletions(-) diff --git a/.env-devel b/.env-devel index 78b65b09958..f7fcf3a6e3e 100644 --- a/.env-devel +++ b/.env-devel @@ -61,7 +61,6 @@ S3_ENDPOINT=172.17.0.1:9001 S3_SECRET_KEY=12345678 S3_SECURE=0 R_CLONE_PROVIDER=MINIO -R_CLONE_STORAGE_ENDPOINT=storage:8080 SCICRUNCH_API_BASE_URL=https://scicrunch.org/api/1 SCICRUNCH_API_KEY=REPLACE_ME_with_valid_api_key diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 8ea8f04c86e..62de59c2a92 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -22,15 +22,3 @@ class _RequiredS3Settings(S3Settings): class RCloneSettings(BaseCustomSettings): R_CLONE_S3: _RequiredS3Settings = Field(auto_default_from_env=True) R_CLONE_PROVIDER: S3Provider - R_CLONE_STORAGE_ENDPOINT: str = Field( - ..., description="endpoint where storage is present" - ) - - R_CLONE_AIOHTTP_CLIENT_TIMEOUT_TOTAL: float = 20 - R_CLONE_AIOHTTP_CLIENT_TIMEOUT_SOCK_CONNECT: float = 5 - - @property - def storage_endpoint(self) -> str: - if not self.R_CLONE_STORAGE_ENDPOINT.startswith("http"): - return f"http://{self.R_CLONE_STORAGE_ENDPOINT}" - return self.R_CLONE_STORAGE_ENDPOINT diff --git a/packages/settings-library/tests/test_utils_r_clone.py b/packages/settings-library/tests/test_utils_r_clone.py index 2f16ac6a930..b367fd57b4d 100644 --- a/packages/settings-library/tests/test_utils_r_clone.py +++ b/packages/settings-library/tests/test_utils_r_clone.py @@ -8,7 +8,6 @@ @pytest.fixture(params=list(S3Provider)) def r_clone_settings(request, monkeypatch) -> RCloneSettings: monkeypatch.setenv("R_CLONE_PROVIDER", request.param) - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") monkeypatch.setenv("S3_ENDPOINT", "endpoint") monkeypatch.setenv("S3_ACCESS_KEY", "access_key") monkeypatch.setenv("S3_SECRET_KEY", "secret_key") diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 4aae3985446..0028c45fb86 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -7,7 +7,7 @@ from aiocache import cached from aiofiles import tempfile -from aiohttp import ClientSession, ClientTimeout +from aiohttp import ClientSession from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config @@ -63,19 +63,6 @@ async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bo return False -@asynccontextmanager -async def _get_client_session( - r_clone_settings: RCloneSettings, -) -> AsyncGenerator[ClientSession, None]: - client_timeout = ClientTimeout( - total=r_clone_settings.R_CLONE_AIOHTTP_CLIENT_TIMEOUT_TOTAL, - sock_connect=r_clone_settings.R_CLONE_AIOHTTP_CLIENT_TIMEOUT_SOCK_CONNECT, - ) # type: ignore - - async with ClientSession(timeout=client_timeout) as session: - yield session - - async def sync_local_to_s3( session: ClientSession, r_clone_settings: Optional[RCloneSettings], diff --git a/packages/simcore-sdk/tests/integration/conftest.py b/packages/simcore-sdk/tests/integration/conftest.py index 789dd0c8797..2122d527fca 100644 --- a/packages/simcore-sdk/tests/integration/conftest.py +++ b/packages/simcore-sdk/tests/integration/conftest.py @@ -342,7 +342,6 @@ async def r_clone_settings( S3_SECURE=client["secure"], ), R_CLONE_PROVIDER=S3Provider.MINIO, - R_CLONE_STORAGE_ENDPOINT=f"{storage_service}", ) ) if not await is_r_clone_available(settings): diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index d0d3ca24f0a..16f77f3bace 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -34,7 +34,6 @@ def r_clone_settings( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") return RCloneSettings() diff --git a/services/director-v2/.env-devel b/services/director-v2/.env-devel index 30a3cbf5cb2..9fd76eb1234 100644 --- a/services/director-v2/.env-devel +++ b/services/director-v2/.env-devel @@ -53,7 +53,6 @@ S3_SECRET_KEY=12345678 S3_BUCKET_NAME=simcore S3_SECURE=0 R_CLONE_PROVIDER=MINIO -R_CLONE_STORAGE_ENDPOINT=http://storage:8080 TRACING_ENABLED=True TRACING_ZIPKIN_ENDPOINT=http://jaeger:9411 diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index 1832070af0c..df043fe3802 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -69,7 +69,6 @@ def _get_environment_variables( "S3_BUCKET_NAME": r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, "S3_SECURE": f"{r_clone_settings.R_CLONE_S3.S3_SECURE}", "R_CLONE_PROVIDER": r_clone_settings.R_CLONE_PROVIDER, - "R_CLONE_STORAGE_ENDPOINT": r_clone_settings.R_CLONE_STORAGE_ENDPOINT, } diff --git a/services/director-v2/tests/integration/01/test_computation_api.py b/services/director-v2/tests/integration/01/test_computation_api.py index 11fa4d0212a..be0930cf3a8 100644 --- a/services/director-v2/tests/integration/01/test_computation_api.py +++ b/services/director-v2/tests/integration/01/test_computation_api.py @@ -71,8 +71,6 @@ def mock_env( monkeypatch.setenv("SWARM_STACK_NAME", "test_mocked_stack_name") monkeypatch.setenv("TRAEFIK_SIMCORE_ZONE", "test_mocked_simcore_zone") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage:8000") - @pytest.fixture() diff --git a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py index 39b8df9799f..314673b113b 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_services_routes.py +++ b/services/director-v2/tests/integration/02/test_dynamic_services_routes.py @@ -125,14 +125,12 @@ async def test_client( monkeypatch.setenv("POSTGRES_DB", "mocked_db") monkeypatch.setenv("DIRECTOR_V2_POSTGRES_ENABLED", "false") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage:8000") monkeypatch.setenv("S3_ENDPOINT", "endpoint") monkeypatch.setenv("S3_ACCESS_KEY", "access_key") monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - # patch host for dynamic-sidecar, not reachable via localhost # the dynamic-sidecar (running inside a container) will use # this address to reach the rabbit service diff --git a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py index 1a18137cc4a..9636f08e059 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py +++ b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py @@ -331,7 +331,6 @@ def mock_env( monkeypatch.setenv("RABBIT_HOST", f"{get_localhost_ip()}") monkeypatch.setenv("POSTGRES_HOST", f"{get_localhost_ip()}") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", f"{storage_service}") monkeypatch.setenv("S3_ENDPOINT", minio_config["client"]["endpoint"]) monkeypatch.setenv("S3_ACCESS_KEY", minio_config["client"]["access_key"]) monkeypatch.setenv("S3_SECRET_KEY", minio_config["client"]["secret_key"]) diff --git a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py index a12cc633190..7e4a9120b00 100644 --- a/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py +++ b/services/director-v2/tests/integration/02/test_mixed_dynamic_sidecar_and_legacy_project.py @@ -157,7 +157,6 @@ async def director_v2_client( monkeypatch.setenv("COMPUTATIONAL_BACKEND_DASK_CLIENT_ENABLED", "false") monkeypatch.setenv("COMPUTATIONAL_BACKEND_ENABLED", "false") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", f"{storage_service}") monkeypatch.setenv("S3_ENDPOINT", minio_config["client"]["endpoint"]) monkeypatch.setenv("S3_ACCESS_KEY", minio_config["client"]["access_key"]) monkeypatch.setenv("S3_SECRET_KEY", minio_config["client"]["secret_key"]) diff --git a/services/director-v2/tests/unit/test_core_settings.py b/services/director-v2/tests/unit/test_core_settings.py index 062bda963a6..f479d90743c 100644 --- a/services/director-v2/tests/unit/test_core_settings.py +++ b/services/director-v2/tests/unit/test_core_settings.py @@ -49,8 +49,6 @@ def test_expected_s3_endpoint( monkeypatch.setenv("S3_ACCESS_KEY", "access_key") monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") - r_clone_settings = RCloneSettings() diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py index 36588d31ba9..69b3c6684ee 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_api.py @@ -65,7 +65,6 @@ def dynamic_sidecar_settings( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") return DynamicSidecarSettings.create_from_envs() @@ -411,7 +410,6 @@ def test_valid_network_names( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") dynamic_sidecar_settings = DynamicSidecarSettings.create_from_envs() assert dynamic_sidecar_settings diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py index 1b4ad93cf29..2f514b1c3f0 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs.py @@ -43,7 +43,6 @@ def mocked_env(monkeypatch: MonkeyPatch) -> Iterator[Dict[str, str]]: "S3_SECRET_KEY": "secret_key", "S3_BUCKET_NAME": "bucket_name", "S3_SECURE": "false", - "R_CLONE_STORAGE_ENDPOINT": "storage_endpoint", } with monkeypatch.context() as m: diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py index e495e9ef0c2..aec75c45d27 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_docker_service_specs_sidecar.py @@ -24,11 +24,10 @@ "SWARM_STACK_NAME": "test_swarm_name", "R_CLONE_PROVIDER": "MINIO", "S3_ENDPOINT": "endpoint", - "S3_ACCESS_KEY":"s3_access_key", + "S3_ACCESS_KEY": "s3_access_key", "S3_SECRET_KEY": "s3_secret_key", "S3_BUCKET_NAME": "bucket_name", "S3_SECURE": "false", - "R_CLONE_STORAGE_ENDPOINT": "storage_endpoint", } EXPECTED_DYNAMIC_SIDECAR_ENV_VAR_NAMES = { @@ -65,7 +64,6 @@ "S3_SECRET_KEY", "S3_BUCKET_NAME", "S3_SECURE", - "R_CLONE_STORAGE_ENDPOINT", } diff --git a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler.py b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler.py index c83294aa497..0c40b0e530d 100644 --- a/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler.py +++ b/services/director-v2/tests/unit/test_modules_dynamic_sidecar_scheduler.py @@ -202,7 +202,6 @@ def dynamic_sidecar_settings( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") app_settings = AppSettings.create_from_envs() return app_settings diff --git a/services/director-v2/tests/unit/test_routes_dynamic_services.py b/services/director-v2/tests/unit/test_routes_dynamic_services.py index ab6b185265d..8641b9df887 100644 --- a/services/director-v2/tests/unit/test_routes_dynamic_services.py +++ b/services/director-v2/tests/unit/test_routes_dynamic_services.py @@ -103,7 +103,6 @@ def mock_env(monkeypatch: MonkeyPatch, docker_swarm: None) -> None: monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py b/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py index 678bafade69..02b9fe04ef1 100644 --- a/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py +++ b/services/director-v2/tests/unit/with_dbs/test_modules_comp_scheduler_dask_scheduler.py @@ -86,8 +86,6 @@ def minimal_dask_scheduler_config( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") - @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_clusters.py b/services/director-v2/tests/unit/with_dbs/test_route_clusters.py index 809f5b69746..b76f1cdf54e 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_clusters.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_clusters.py @@ -55,7 +55,6 @@ def clusters_config( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py b/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py index d113ac56833..a0581828d8e 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_clusters_details.py @@ -43,7 +43,6 @@ def clusters_config( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture diff --git a/services/director-v2/tests/unit/with_dbs/test_route_computations.py b/services/director-v2/tests/unit/with_dbs/test_route_computations.py index 43eefebd0e4..3d35401461b 100644 --- a/services/director-v2/tests/unit/with_dbs/test_route_computations.py +++ b/services/director-v2/tests/unit/with_dbs/test_route_computations.py @@ -42,7 +42,6 @@ def minimal_configuration( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") async def test_get_computation_from_empty_project( diff --git a/services/director-v2/tests/unit/with_dbs/test_utils_dask.py b/services/director-v2/tests/unit/with_dbs/test_utils_dask.py index 2813eb78a8c..9ac61131772 100644 --- a/services/director-v2/tests/unit/with_dbs/test_utils_dask.py +++ b/services/director-v2/tests/unit/with_dbs/test_utils_dask.py @@ -234,7 +234,6 @@ def app_with_db( monkeypatch.setenv("S3_SECRET_KEY", "secret_key") monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") async def test_compute_input_data( diff --git a/services/docker-compose.yml b/services/docker-compose.yml index a50f78be9cb..4c5d04765f8 100644 --- a/services/docker-compose.yml +++ b/services/docker-compose.yml @@ -116,7 +116,6 @@ services: - S3_ENDPOINT=${S3_ENDPOINT} - S3_SECRET_KEY=${S3_SECRET_KEY} - R_CLONE_PROVIDER=${R_CLONE_PROVIDER} - - R_CLONE_STORAGE_ENDPOINT=${STORAGE_ENDPOINT} - MONITORING_ENABLED=${MONITORING_ENABLED:-True} - SIMCORE_SERVICES_NETWORK_NAME=interactive_services_subnet - TRACING_THRIFT_COMPACT_ENDPOINT=${TRACING_THRIFT_COMPACT_ENDPOINT} diff --git a/services/dynamic-sidecar/tests/conftest.py b/services/dynamic-sidecar/tests/conftest.py index 234e458d8fe..e35c18e6b22 100644 --- a/services/dynamic-sidecar/tests/conftest.py +++ b/services/dynamic-sidecar/tests/conftest.py @@ -107,7 +107,6 @@ def mock_environment( monkeypatch_module.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch_module.setenv("S3_SECURE", "false") monkeypatch_module.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch_module.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") monkeypatch_module.setattr(mounted_fs, "DY_VOLUMES", mock_dy_volumes) diff --git a/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py b/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py index 6c052677d3b..a57b8d5a7a1 100644 --- a/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py +++ b/services/dynamic-sidecar/tests/unit/test_core_docker_logs.py @@ -63,7 +63,6 @@ def app( monkeypatch_module.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch_module.setenv("S3_SECURE", "false") monkeypatch_module.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch_module.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") yield assemble_application() diff --git a/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py b/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py index 56a5083a79d..693219732ae 100644 --- a/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py +++ b/services/dynamic-sidecar/tests/unit/test_core_rabbitmq.py @@ -112,7 +112,6 @@ def mock_environment( monkeypatch_module.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch_module.setenv("S3_SECURE", "false") monkeypatch_module.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch_module.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") @pytest.fixture diff --git a/services/dynamic-sidecar/tests/unit/test_core_settings.py b/services/dynamic-sidecar/tests/unit/test_core_settings.py index ec9d3ab7e51..9ec3a9988b4 100644 --- a/services/dynamic-sidecar/tests/unit/test_core_settings.py +++ b/services/dynamic-sidecar/tests/unit/test_core_settings.py @@ -41,7 +41,6 @@ def mocked_non_request_settings(tmp_dir: Path, monkeypatch: MonkeyPatch) -> None monkeypatch.setenv("S3_BUCKET_NAME", "bucket_name") monkeypatch.setenv("S3_SECURE", "false") monkeypatch.setenv("R_CLONE_PROVIDER", "MINIO") - monkeypatch.setenv("R_CLONE_STORAGE_ENDPOINT", "storage_endpoint") def test_non_request_dynamic_sidecar_settings( From b557dac1e12bf62b1c1a6922bdcf67ad065fe470 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 11:10:45 +0200 Subject: [PATCH 35/78] moved region to settings --- packages/settings-library/src/settings_library/r_clone.py | 1 + .../settings-library/src/settings_library/utils_r_clone.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 62de59c2a92..40e12975aac 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -22,3 +22,4 @@ class _RequiredS3Settings(S3Settings): class RCloneSettings(BaseCustomSettings): R_CLONE_S3: _RequiredS3Settings = Field(auto_default_from_env=True) R_CLONE_PROVIDER: S3Provider + R_CLONE_REGION: str = Field("us-east-1", description="S3 region to use") diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index 1c10a29cd3e..2447638fb53 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -9,7 +9,7 @@ "type": "s3", "access_key_id": "{access_key}", "secret_access_key": "{secret_key}", - "region": "us-east-1", + "region": "{aws_region}", "acl": "private", } @@ -42,5 +42,6 @@ def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: endpoint=r_clone_settings.R_CLONE_S3.endpoint, access_key=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, secret_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, + aws_region=r_clone_settings.R_CLONE_REGION, ) return r_clone_config From 24c1236a411c4ce931b576cb418e5e1f2b58ced1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 11:11:06 +0200 Subject: [PATCH 36/78] rename --- .../settings-library/src/settings_library/utils_r_clone.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index 2447638fb53..3bf6eb00111 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -13,7 +13,7 @@ "acl": "private", } -_PROVIDER_ENDTIRES: Dict[S3Provider, Dict[str, str]] = { +_PROVIDER_ENTRIES: Dict[S3Provider, Dict[str, str]] = { # NOTE: # AWS_SESSION_TOKEN should be required for STS S3Provider.AWS: {"provider": "AWS"}, S3Provider.CEPH: {"provider": "Ceph", "endpoint": "{endpoint}"}, @@ -33,7 +33,7 @@ def _format_config(entries: Dict[str, str]) -> str: def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: provider = r_clone_settings.R_CLONE_PROVIDER entries = deepcopy(_COMMON_ENTRIES) - entries.update(_PROVIDER_ENDTIRES[provider]) + entries.update(_PROVIDER_ENTRIES[provider]) r_clone_config_template = _format_config(entries=entries) From 50934014cc4bd3c152240db57d7e729d08abe071 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 12:33:54 +0200 Subject: [PATCH 37/78] more refactoring --- api/specs/storage/openapi.yaml | 12 ++--- .../node_ports_common/storage_client.py | 2 +- .../integration/test_node_ports_v2_r_clone.py | 14 +++--- .../tests/unit/test_node_ports_v2_r_clone.py | 8 +--- .../api/v0/openapi.yaml | 6 +-- .../src/simcore_service_storage/dsm.py | 10 +++-- .../src/simcore_service_storage/handlers.py | 14 +++--- .../api/v0/openapi.yaml | 44 +++++++++---------- 8 files changed, 53 insertions(+), 57 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index 8ac73b667c3..035ef75a59b 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -316,12 +316,8 @@ paths: schema: type: string responses: - "200": + "204": description: "Removes the file meta data entry from the database" - content: - application/json: - schema: - $ref: "#/components/schemas/FileMetaDataDeleteEnvelope" default: $ref: "#/components/responses/DefaultErrorResponse" @@ -373,6 +369,12 @@ paths: required: true schema: type: string + - name: as_s3 + in: query + required: false + description: return and s3 link instead of a presigned link; not working with datcore + schema: + type: boolean - name: extra_location in: query required: false diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index 5ff426d6c79..7a8fbc2283d 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -220,7 +220,7 @@ async def delete_file_meta_data( # TODO: check permissions url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" result = await session.delete(url, params=dict(user_id=user_id)) - if result.status != web.HTTPOk.status_code: + if result.status != web.HTTPNoContent.status_code: raise exceptions.StorageInvalidCall( f"Could not fetch metadata: status={result.status} {await result.text()}" ) diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index 57adfcdeb08..1b860d84e93 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -42,11 +42,10 @@ def file_name(faker: Faker) -> str: @pytest.fixture -def upload_file_dir(tmpdir: Path) -> Iterator[Path]: - temp_path = Path(tmpdir) - assert temp_path.is_dir() - yield temp_path - shutil.rmtree(temp_path) +def upload_file_dir(tmp_path: Path) -> Iterator[Path]: + assert tmp_path.is_dir() + yield tmp_path + shutil.rmtree(tmp_path) @pytest.fixture @@ -80,12 +79,9 @@ async def cleanup_s3( @pytest.fixture def mock_update_file_meta_data(mocker: MockerFixture) -> None: - async def _raise_error(*args, **kwargs) -> None: - raise _TestException() - mocker.patch( "simcore_sdk.node_ports_common.storage_client.update_file_meta_data", - side_effect=_raise_error, + side_effect=_TestException, ) reload(r_clone) reload(storage_client) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 16f77f3bace..a34f885931c 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -14,11 +14,6 @@ from simcore_sdk.node_ports_common.r_clone import RCloneSettings -@pytest.fixture -def text_to_write(faker: Faker) -> str: - return faker.text() - - @pytest.fixture(params=list(S3Provider)) def s3_provider(request) -> S3Provider: return request.param @@ -59,7 +54,8 @@ async def test_is_r_clone_available_cached( assert await r_clone.is_r_clone_available(None) is False -async def test__config_file(text_to_write: str) -> None: +async def test__config_file(faker: Faker) -> None: + text_to_write = faker.text() async with r_clone._config_file(text_to_write) as file_name: assert text_to_write == Path(file_name).read_text() assert Path(file_name).exists() is False diff --git a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml index 4fce932ed58..baa588de7f0 100644 --- a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml +++ b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml @@ -309,12 +309,8 @@ paths: schema: type: string responses: - '200': + '204': description: Removes the file meta data entry from the database - content: - application/json: - schema: - $ref: '#/components/schemas/FileMetaDataDeleteEnvelope' default: $ref: '#/components/responses/DefaultErrorResponse' '/locations/{location_id}/files/{fileId}': diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index b265f11e241..bf662713902 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -27,6 +27,8 @@ from servicelib.utils import fire_and_forget_task from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.sql.expression import literal_column +from pydantic import AnyUrl +from pydantic.tools import parse_obj_as from tenacity import retry from tenacity.before_sleep import before_sleep_log from tenacity.retry import retry_if_exception_type, retry_if_result @@ -519,9 +521,9 @@ async def delete_metadata(self, user_id: int, file_uuid: str) -> None: ) ) except Exception: - message = f"Could not delete metada entry for file {file_uuid}" - logger.debug(message) - raise web.HTTPForbidden( # pylint: disable=raise-missing-from + message = f"Could not delete metadata entry for file {file_uuid}" + logger.warning(message) + raise web.HTTPNotFound( # pylint: disable=raise-missing-from reason=message ) @@ -594,7 +596,7 @@ async def get_s3_link(self, user_id: str, file_uuid: str) -> str: await self._generate_metadata_for_link(user_id=user_id, file_uuid=file_uuid) bucket_name = self.simcore_bucket_name object_name = file_uuid - return f"s3://{bucket_name}/{object_name.lstrip('/')}" + return parse_obj_as(AnyUrl, f"s3://{bucket_name}/{object_name.lstrip('/')}") async def download_link_s3(self, file_uuid: str, user_id: int) -> str: diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index 9a998ef5218..b2264a8572c 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -279,12 +279,11 @@ async def update_file_meta_data(request: web.Request): with handle_storage_errors(): file_uuid = urllib.parse.unquote_plus(params["fileId"]) - log.error("file_uuid=%s", file_uuid) dsm = await _prepare_storage_manager(params, query, request) data: Optional[FileMetaDataEx] = await dsm.update_metadata(file_uuid=file_uuid) if data is None: - raise web.HTTPForbidden(reason=f"Could not update metadata for {file_uuid}") + raise web.HTTPNotFound(reason=f"Could not update metadata for {file_uuid}") return { "error": None, @@ -304,11 +303,11 @@ async def delete_file_meta_data(request: web.Request): user_id = query["user_id"] file_uuid = urllib.parse.unquote_plus(params["fileId"]) - log.error("file_uuid=%s", file_uuid) dsm = await _prepare_storage_manager(params, query, request) await dsm.delete_metadata(user_id=user_id, file_uuid=file_uuid) - return {"error": None, "data": None} + + return web.HTTPNoContent(content_type="application/json") @routes.get(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}") # type: ignore @@ -328,6 +327,11 @@ async def download_file(request: web.Request): user_id = query["user_id"] file_uuid = params["fileId"] + if int(location_id) != SIMCORE_S3_ID: + raise web.HTTPPreconditionFailed( + reason=f"Only allowed to fetch s3 link for '{SIMCORE_S3_STR}'" + ) + dsm = await _prepare_storage_manager(params, query, request) location = dsm.location_from_id(location_id) if location == SIMCORE_S3_STR: @@ -352,7 +356,7 @@ async def get_s3_link(request: web.Request) -> Dict[str, Any]: file_uuid = urllib.parse.unquote_plus(params["fileId"]) if int(location_id) != SIMCORE_S3_ID: - raise web.HTTPForbidden( + raise web.HTTPPreconditionFailed( reason=f"Only allowed to fetch s3 link for '{SIMCORE_S3_STR}'" ) diff --git a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml index 4384b3752b8..ac392b284eb 100644 --- a/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml +++ b/services/web/server/src/simcore_service_webserver/api/v0/openapi.yaml @@ -1235,7 +1235,16 @@ paths: type: string responses: '200': - $ref: '#/paths/~1storage~1locations~1%7Blocation_id%7D~1files~1%7BfileId%7D/put/responses/200' + description: Returns presigned link + content: + application/json: + schema: + type: object + properties: + link: + type: string + example: + link: example_link put: summary: Returns upload link or performs copy operation to datcore tags: @@ -1264,16 +1273,7 @@ paths: type: string responses: '200': - description: Returns presigned link - content: - application/json: - schema: - type: object - properties: - link: - type: string - example: - link: example_link + $ref: '#/paths/~1storage~1locations~1%7Blocation_id%7D~1files~1%7BfileId%7D/get/responses/200' delete: summary: Deletes File tags: @@ -2730,15 +2730,7 @@ paths: content: application/json: schema: - type: object - required: - - data - properties: - data: - $ref: '#/paths/~1projects~1%7Bproject_id%7D~1nodes~1%7Bnode_id%7D~1resources/put/requestBody/content/application~1json/schema' - error: - nullable: true - default: null + $ref: '#/paths/~1projects~1%7Bproject_id%7D~1nodes~1%7Bnode_id%7D~1resources/put/responses/200/content/application~1json/schema' default: $ref: '#/components/responses/DefaultErrorResponse' put: @@ -2774,7 +2766,15 @@ paths: content: application/json: schema: - $ref: '#/paths/~1projects~1%7Bproject_id%7D~1nodes~1%7Bnode_id%7D~1resources/get/responses/200/content/application~1json/schema' + type: object + required: + - data + properties: + data: + $ref: '#/paths/~1projects~1%7Bproject_id%7D~1nodes~1%7Bnode_id%7D~1resources/put/requestBody/content/application~1json/schema' + error: + nullable: true + default: null default: $ref: '#/components/responses/DefaultErrorResponse' '/nodes/{nodeInstanceUUID}/outputUi/{outputKey}': @@ -4712,7 +4712,7 @@ paths: content: application/json: schema: - $ref: '#/paths/~1projects~1%7Bproject_id%7D~1nodes~1%7Bnode_id%7D~1resources/get/responses/200/content/application~1json/schema' + $ref: '#/paths/~1projects~1%7Bproject_id%7D~1nodes~1%7Bnode_id%7D~1resources/put/responses/200/content/application~1json/schema' default: $ref: '#/components/responses/DefaultErrorResponse' '/clusters:ping': From e847cbdbfd69011b53eae9e874884c9a97ce7f6b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 13:46:37 +0200 Subject: [PATCH 38/78] revert --- api/specs/storage/openapi.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index 035ef75a59b..1c3010b4771 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -369,12 +369,6 @@ paths: required: true schema: type: string - - name: as_s3 - in: query - required: false - description: return and s3 link instead of a presigned link; not working with datcore - schema: - type: boolean - name: extra_location in: query required: false From 3237d1580c39d13a6f403de20f56f379685dc918 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 14:24:56 +0200 Subject: [PATCH 39/78] file metadata update check permissions --- api/specs/storage/openapi.yaml | 5 +++++ .../simcore_sdk/node_ports_common/r_clone.py | 20 ++++++------------- .../node_ports_common/storage_client.py | 14 ++++++------- .../api/v0/openapi.yaml | 5 +++++ .../src/simcore_service_storage/dsm.py | 13 +++++++++++- .../src/simcore_service_storage/handlers.py | 6 +++++- 6 files changed, 40 insertions(+), 23 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index 1c3010b4771..a080dde981f 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -287,6 +287,11 @@ paths: required: true schema: type: string + - name: user_id + in: query + required: true + schema: + type: string responses: "200": description: "Returns file metadata" diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 0028c45fb86..3cdbdc00580 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -10,6 +10,7 @@ from aiohttp import ClientSession from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config +from models_library.users import UserID from .constants import ETag from .storage_client import delete_file_meta_data, get_s3_link, update_file_meta_data @@ -21,10 +22,6 @@ class _CommandFailedException(Exception): pass -class RCloneError(Exception): - pass - - @asynccontextmanager async def _config_file(config: str) -> AsyncGenerator[str, None]: async with tempfile.NamedTemporaryFile("w") as f: @@ -65,18 +62,11 @@ async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bo async def sync_local_to_s3( session: ClientSession, - r_clone_settings: Optional[RCloneSettings], + r_clone_settings: RCloneSettings, s3_object: str, local_file_path: Path, - user_id: int, + user_id: UserID, ) -> ETag: - if r_clone_settings is None: - raise RCloneError( - ( - f"Could not sync {local_file_path=} to {s3_object=}, provided " - f"config is invalid{r_clone_settings=}" - ) - ) s3_link = await get_s3_link(session=session, s3_object=s3_object, user_id=user_id) s3_path = re.sub(r"^s3://", "", s3_link) @@ -122,7 +112,9 @@ async def sync_local_to_s3( try: await _async_command(" ".join(r_clone_command), cwd=f"{source_path.parent}") - return await update_file_meta_data(session=session, s3_object=s3_object) + return await update_file_meta_data( + session=session, s3_object=s3_object, user_id=user_id + ) except Exception as e: logger.warning( "There was an error while uploading %s. Removing metadata", s3_object diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index 7a8fbc2283d..f0860408aaa 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -176,7 +176,7 @@ async def delete_file( @handle_client_exception -async def get_s3_link(session: ClientSession, s3_object: str, user_id: int) -> str: +async def get_s3_link(session: ClientSession, s3_object: str, user_id: UserID) -> str: url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/s3/link" result = await session.get(url, params=dict(user_id=user_id)) @@ -198,12 +198,11 @@ async def get_s3_link(session: ClientSession, s3_object: str, user_id: int) -> s @handle_client_exception -async def update_file_meta_data(session: ClientSession, s3_object: str) -> ETag: - # API: check user access rights here when updating - # TODO: check permissions - +async def update_file_meta_data( + session: ClientSession, s3_object: str, user_id: UserID +) -> ETag: url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" - result = await session.patch(url) + result = await session.patch(url, params=dict(user_id=user_id)) if result.status != web.HTTPOk.status_code: raise exceptions.StorageInvalidCall( f"Could not fetch metadata: status={result.status} {await result.text()}" @@ -215,8 +214,9 @@ async def update_file_meta_data(session: ClientSession, s3_object: str) -> ETag: @handle_client_exception async def delete_file_meta_data( - session: ClientSession, s3_object: str, user_id: int + session: ClientSession, s3_object: str, user_id: UserID ) -> None: + # TODO: this needs to be repalced as well! # TODO: check permissions url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" result = await session.delete(url, params=dict(user_id=user_id)) diff --git a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml index baa588de7f0..c4237bc7925 100644 --- a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml +++ b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml @@ -280,6 +280,11 @@ paths: required: true schema: type: string + - name: user_id + in: query + required: true + schema: + type: string responses: '200': description: Returns file metadata diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index bf662713902..cc9ef2d3fea 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -495,7 +495,18 @@ async def auto_update_database_from_storage_task( file_uuid, bucket_name, object_name, silence_exception=True ) - async def update_metadata(self, file_uuid: str) -> Optional[FileMetaDataEx]: + async def update_metadata( + self, file_uuid: str, user_id: int + ) -> Optional[FileMetaDataEx]: + async with self.engine.acquire() as conn: + can: Optional[AccessRights] = await get_file_access_rights( + conn, int(user_id), file_uuid + ) + if not can.write: + message = f"User {user_id} was not allowed to upload file {file_uuid}" + logger.debug(message) + raise web.HTTPForbidden(reason=message) + bucket_name = self.simcore_bucket_name object_name = file_uuid return await self.auto_update_database_from_storage_task( diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index b2264a8572c..b8018e9de02 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -274,14 +274,18 @@ async def update_file_meta_data(request: web.Request): params, query, body = await extract_and_validate(request) assert params, "params %s" % params # nosec + assert query, "query %s" % query # nosec assert not body, "body %s" % body # nosec with handle_storage_errors(): file_uuid = urllib.parse.unquote_plus(params["fileId"]) + user_id = query["user_id"] dsm = await _prepare_storage_manager(params, query, request) - data: Optional[FileMetaDataEx] = await dsm.update_metadata(file_uuid=file_uuid) + data: Optional[FileMetaDataEx] = await dsm.update_metadata( + file_uuid=file_uuid, user_id=user_id + ) if data is None: raise web.HTTPNotFound(reason=f"Could not update metadata for {file_uuid}") From caf5b17e66fd028f7b1cc65cc2995390b1eb124c Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 14:50:45 +0200 Subject: [PATCH 40/78] refactor command exception --- .../src/simcore_sdk/node_ports_common/r_clone.py | 11 +++++------ .../tests/unit/test_node_ports_v2_r_clone.py | 14 +++++++++++--- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 3cdbdc00580..3f60e67acdc 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -8,9 +8,10 @@ from aiocache import cached from aiofiles import tempfile from aiohttp import ClientSession +from models_library.users import UserID +from pydantic.errors import PydanticErrorMixin from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config -from models_library.users import UserID from .constants import ETag from .storage_client import delete_file_meta_data, get_s3_link, update_file_meta_data @@ -18,8 +19,8 @@ logger = logging.getLogger(__name__) -class _CommandFailedException(Exception): - pass +class _CommandFailedException(PydanticErrorMixin, RuntimeError): + msg_template: str = "Command {command} finished with exception:\n{stdout}" @asynccontextmanager @@ -42,9 +43,7 @@ async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: stdout, _ = await proc.communicate() decoded_stdout = stdout.decode() if proc.returncode != 0: - raise _CommandFailedException( - f"Command {command} finished with exception:\n{decoded_stdout}" - ) + raise _CommandFailedException(command=command, stdout=decoded_stdout) logger.debug("'%s' result:\n%s", command, decoded_stdout) return decoded_stdout diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index a34f885931c..6b446758eed 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -65,6 +65,14 @@ async def test__async_command_ok() -> None: await r_clone._async_command(" ".join(["ls", "-la"])) -async def test__async_command_error() -> None: - with pytest.raises(r_clone._CommandFailedException): - await r_clone._async_command("__i_do_not_exist__") +@pytest.mark.parametrize( + "cmd", + ["__i_do_not_exist__", "ls_ -lah"], +) +async def test__async_command_error(cmd: str) -> None: + with pytest.raises(r_clone._CommandFailedException) as exe_info: + await r_clone._async_command(cmd) + assert ( + f"{exe_info.value}" + == f"Command {cmd} finished with exception:\n/bin/sh: 1: {cmd.split(' ')[0]}: not found\n" + ) From ffb00bb6adf06eba8a406fe6a0c595ecca636a39 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 15:01:09 +0200 Subject: [PATCH 41/78] command is now a list not a string --- .../src/simcore_sdk/node_ports_common/r_clone.py | 14 +++++++------- .../tests/unit/test_node_ports_v2_r_clone.py | 16 ++++++++++------ 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 3f60e67acdc..d746108d9ea 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -3,7 +3,7 @@ import re from contextlib import asynccontextmanager from pathlib import Path -from typing import AsyncGenerator, Optional +from typing import AsyncGenerator, List, Optional from aiocache import cached from aiofiles import tempfile @@ -31,9 +31,9 @@ async def _config_file(config: str) -> AsyncGenerator[str, None]: yield f.name -async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: +async def _async_command(cmd: List[str], *, cwd: Optional[str] = None) -> str: proc = await asyncio.create_subprocess_shell( - command, + " ".join(cmd), stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT, @@ -43,9 +43,9 @@ async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: stdout, _ = await proc.communicate() decoded_stdout = stdout.decode() if proc.returncode != 0: - raise _CommandFailedException(command=command, stdout=decoded_stdout) + raise _CommandFailedException(command=cmd, stdout=decoded_stdout) - logger.debug("'%s' result:\n%s", command, decoded_stdout) + logger.debug("'%s' result:\n%s", cmd, decoded_stdout) return decoded_stdout @@ -53,7 +53,7 @@ async def _async_command(command: str, *, cwd: Optional[str] = None) -> str: async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bool: """returns: True if the `rclone` cli is installed and a configuration is provided""" try: - await _async_command("rclone --version") + await _async_command(["rclone", "--version"]) return r_clone_settings is not None except _CommandFailedException: return False @@ -110,7 +110,7 @@ async def sync_local_to_s3( ] try: - await _async_command(" ".join(r_clone_command), cwd=f"{source_path.parent}") + await _async_command(r_clone_command, cwd=f"{source_path.parent}") return await update_file_meta_data( session=session, s3_object=s3_object, user_id=user_id ) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 6b446758eed..296f612c77d 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -4,6 +4,7 @@ import subprocess from pathlib import Path +from typing import List import pytest from _pytest.logging import LogCaptureFixture @@ -48,8 +49,8 @@ async def test_is_r_clone_available_cached( for _ in range(3): result = await r_clone.is_r_clone_available(r_clone_settings) assert type(result) is bool - assert "'rclone --version' result:\n" in caplog.text - assert caplog.text.count("'rclone --version' result:\n") == 1 + assert "'['rclone', '--version']' result:\n" in caplog.text + assert caplog.text.count("'['rclone', '--version']' result:\n") == 1 assert await r_clone.is_r_clone_available(None) is False @@ -62,17 +63,20 @@ async def test__config_file(faker: Faker) -> None: async def test__async_command_ok() -> None: - await r_clone._async_command(" ".join(["ls", "-la"])) + await r_clone._async_command(["ls", "-la"]) @pytest.mark.parametrize( "cmd", - ["__i_do_not_exist__", "ls_ -lah"], + [ + ["__i_do_not_exist__"], + ["ls_", "-lah"], + ], ) -async def test__async_command_error(cmd: str) -> None: +async def test__async_command_error(cmd: List[str]) -> None: with pytest.raises(r_clone._CommandFailedException) as exe_info: await r_clone._async_command(cmd) assert ( f"{exe_info.value}" - == f"Command {cmd} finished with exception:\n/bin/sh: 1: {cmd.split(' ')[0]}: not found\n" + == f"Command {cmd} finished with exception:\n/bin/sh: 1: {cmd[0]}: not found\n" ) From 039cd4e65806849d52e04f9f1012f35da124781e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 15:09:33 +0200 Subject: [PATCH 42/78] moving from list to *args --- .../src/simcore_sdk/node_ports_common/r_clone.py | 12 ++++++------ .../tests/unit/test_node_ports_v2_r_clone.py | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index d746108d9ea..618e2db7a27 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -3,7 +3,7 @@ import re from contextlib import asynccontextmanager from pathlib import Path -from typing import AsyncGenerator, List, Optional +from typing import AsyncGenerator, Optional from aiocache import cached from aiofiles import tempfile @@ -31,7 +31,7 @@ async def _config_file(config: str) -> AsyncGenerator[str, None]: yield f.name -async def _async_command(cmd: List[str], *, cwd: Optional[str] = None) -> str: +async def _async_command(*cmd: str, cwd: Optional[str] = None) -> str: proc = await asyncio.create_subprocess_shell( " ".join(cmd), stdin=asyncio.subprocess.PIPE, @@ -53,7 +53,7 @@ async def _async_command(cmd: List[str], *, cwd: Optional[str] = None) -> str: async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bool: """returns: True if the `rclone` cli is installed and a configuration is provided""" try: - await _async_command(["rclone", "--version"]) + await _async_command("rclone", "--version") return r_clone_settings is not None except _CommandFailedException: return False @@ -96,7 +96,7 @@ async def sync_local_to_s3( # --copy-links # --include # 'filee3e70682-c209-4cac-a29f-6fbed82c07cd.txt' - r_clone_command = [ + r_clone_command = ( "rclone", "--config", config_file_name, @@ -107,10 +107,10 @@ async def sync_local_to_s3( "--copy-links", "--include", f"'{file_name}'", - ] + ) try: - await _async_command(r_clone_command, cwd=f"{source_path.parent}") + await _async_command(*r_clone_command, cwd=f"{source_path.parent}") return await update_file_meta_data( session=session, s3_object=s3_object, user_id=user_id ) diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 296f612c77d..5dcaa8f61ff 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -49,8 +49,8 @@ async def test_is_r_clone_available_cached( for _ in range(3): result = await r_clone.is_r_clone_available(r_clone_settings) assert type(result) is bool - assert "'['rclone', '--version']' result:\n" in caplog.text - assert caplog.text.count("'['rclone', '--version']' result:\n") == 1 + assert "'('rclone', '--version')' result:\n" in caplog.text + assert caplog.text.count("'('rclone', '--version')' result:\n") == 1 assert await r_clone.is_r_clone_available(None) is False @@ -63,19 +63,19 @@ async def test__config_file(faker: Faker) -> None: async def test__async_command_ok() -> None: - await r_clone._async_command(["ls", "-la"]) + await r_clone._async_command("ls", "-la") @pytest.mark.parametrize( "cmd", [ - ["__i_do_not_exist__"], - ["ls_", "-lah"], + ("__i_do_not_exist__",), + ("ls_", "-lah"), ], ) async def test__async_command_error(cmd: List[str]) -> None: with pytest.raises(r_clone._CommandFailedException) as exe_info: - await r_clone._async_command(cmd) + await r_clone._async_command(*cmd) assert ( f"{exe_info.value}" == f"Command {cmd} finished with exception:\n/bin/sh: 1: {cmd[0]}: not found\n" From 69db8bd74ab6e23b4ee1a517a56d241676d411a8 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 15:46:00 +0200 Subject: [PATCH 43/78] removed test dir which no longer exists --- ci/github/unit-testing/director-v2.bash | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ci/github/unit-testing/director-v2.bash b/ci/github/unit-testing/director-v2.bash index f57902e0a12..896aac6a0fd 100755 --- a/ci/github/unit-testing/director-v2.bash +++ b/ci/github/unit-testing/director-v2.bash @@ -18,14 +18,13 @@ test() { --color=yes --cov-report=term-missing --cov-report=xml --cov-config=.coveragerc \ -v -m "not travis" services/director-v2/tests/unit --ignore=services/director-v2/tests/unit/with_dbs \ --asyncio-mode=auto \ - --ignore=services/director-v2/tests/unit/with_swarm # these tests cannot be run in parallel pytest --log-format="%(asctime)s %(levelname)s %(message)s" \ --log-date-format="%Y-%m-%d %H:%M:%S" \ --cov=simcore_service_director_v2 --durations=10 --cov-append \ --color=yes --cov-report=term-missing --cov-report=xml --cov-config=.coveragerc \ --asyncio-mode=auto \ - -v -m "not travis" services/director-v2/tests/unit/with_swarm services/director-v2/tests/unit/with_dbs + -v -m "not travis" services/director-v2/tests/unit/with_dbs } # Check if the function exists (bash specific) From a13b90e4c6dab29501be6ee3e579c45dd3a3be80 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 16:43:03 +0200 Subject: [PATCH 44/78] fix issues with encoding invalid characters --- .../src/simcore_sdk/node_ports_common/r_clone.py | 1 - .../integration/test_node_ports_v2_r_clone.py | 14 +++++++++++--- .../storage/src/simcore_service_storage/dsm.py | 5 ++++- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 618e2db7a27..a897b986fd2 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -75,7 +75,6 @@ async def sync_local_to_s3( async with _config_file(r_clone_config_file_content) as config_file_name: source_path = local_file_path destination_path = Path(s3_path) - assert local_file_path.name == destination_path.name file_name = local_file_path.name # TODO: capture and send progress somehow? diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index 1b860d84e93..79c129098c0 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -6,6 +6,7 @@ from importlib import reload from pathlib import Path from typing import AsyncGenerator, AsyncIterable, Iterator +from uuid import uuid4 import aioboto3 import pytest @@ -16,6 +17,7 @@ from settings_library.r_clone import RCloneSettings from simcore_postgres_database.models.file_meta_data import file_meta_data from simcore_sdk.node_ports_common import r_clone, storage_client +from _pytest.fixtures import FixtureRequest pytest_simcore_core_services_selection = [ "migration", @@ -36,9 +38,15 @@ class _TestException(Exception): # FIXTURES -@pytest.fixture -def file_name(faker: Faker) -> str: - return f"file_{faker.uuid4()}.txt" +@pytest.fixture( + params=[ + f"{uuid4()}.bin", + "some funky name.txt", + "öä$äö2-34 no extension", + ] +) +def file_name(request: FixtureRequest) -> str: + return request.param @pytest.fixture diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index cc9ef2d3fea..6f0303b93de 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -12,6 +12,7 @@ from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field from pathlib import Path +import urllib.parse from typing import Any, Dict, Final, List, Optional, Tuple, Union import attr @@ -607,7 +608,9 @@ async def get_s3_link(self, user_id: str, file_uuid: str) -> str: await self._generate_metadata_for_link(user_id=user_id, file_uuid=file_uuid) bucket_name = self.simcore_bucket_name object_name = file_uuid - return parse_obj_as(AnyUrl, f"s3://{bucket_name}/{object_name.lstrip('/')}") + return parse_obj_as( + AnyUrl, f"s3://{bucket_name}/{urllib.parse.quote(object_name.lstrip('/'))}" + ) async def download_link_s3(self, file_uuid: str, user_id: int) -> str: From c17f082bb6052df87d25e821cb015cb88ef9ea2b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Apr 2022 16:43:32 +0200 Subject: [PATCH 45/78] removing defaults --- .../settings-library/src/settings_library/r_clone.py | 10 +--------- packages/settings-library/src/settings_library/s3.py | 9 ++++----- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 40e12975aac..0f6ee753fe0 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -11,15 +11,7 @@ class S3Provider(str, Enum): MINIO = "MINIO" -class _RequiredS3Settings(S3Settings): - S3_ENDPOINT: str - S3_ACCESS_KEY: str - S3_SECRET_KEY: str - S3_BUCKET_NAME: str - S3_SECURE: bool - - class RCloneSettings(BaseCustomSettings): - R_CLONE_S3: _RequiredS3Settings = Field(auto_default_from_env=True) + R_CLONE_S3: S3Settings = Field(auto_default_from_env=True) R_CLONE_PROVIDER: S3Provider R_CLONE_REGION: str = Field("us-east-1", description="S3 region to use") diff --git a/packages/settings-library/src/settings_library/s3.py b/packages/settings-library/src/settings_library/s3.py index caa46d140a7..b467830aed0 100644 --- a/packages/settings-library/src/settings_library/s3.py +++ b/packages/settings-library/src/settings_library/s3.py @@ -5,12 +5,11 @@ class S3Settings(BaseCustomSettings): - # TODO: try to remove defaults if this works also remove _RequiredS3Settings - S3_ENDPOINT: str = "minio:9000" - S3_ACCESS_KEY: str = "12345678" - S3_SECRET_KEY: str = "12345678" + S3_ENDPOINT: str + S3_ACCESS_KEY: str + S3_SECRET_KEY: str S3_ACCESS_TOKEN: Optional[str] = None - S3_BUCKET_NAME: str = "simcore" + S3_BUCKET_NAME: str S3_SECURE: bool = False @cached_property From 486d481c70fb41ac88f7b98173ad991617168638 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 08:12:24 +0200 Subject: [PATCH 46/78] refactor to use exising upload_link fetching --- api/specs/storage/openapi.yaml | 30 ----------------- .../node_ports_common/filemanager.py | 2 +- .../simcore_sdk/node_ports_common/r_clone.py | 14 ++++++-- .../node_ports_common/storage_client.py | 33 +++++-------------- .../tests/unit/test_storage_client.py | 4 ++- .../api/v0/openapi.yaml | 29 ---------------- .../src/simcore_service_storage/dsm.py | 5 +-- .../src/simcore_service_storage/handlers.py | 24 -------------- 8 files changed, 26 insertions(+), 115 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index 5a01bc303aa..aa2940c7ac6 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -214,36 +214,6 @@ paths: default: $ref: "#/components/responses/DefaultErrorResponse" - /locations/{location_id}/files/{fileId}/s3/link: - get: - summary: Returns an s3 file link if the user has permissions - operationId: get_s3_link - parameters: - - name: fileId - in: path - required: true - schema: - type: string - - name: location_id - in: path - required: true - schema: - type: string - - name: user_id - in: query - required: true - schema: - type: string - responses: - "200": - description: "Provides an s3 link for the requested fileId" - content: - application/json: - schema: - $ref: "#/components/schemas/S3LinkEnvelope" - default: - $ref: "#/components/responses/DefaultErrorResponse" - /locations/{location_id}/files/{fileId}/metadata: get: summary: Get file metadata diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index bf849e18304..6d13d2387ae 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -57,7 +57,7 @@ async def _get_upload_link( session: ClientSession, ) -> URL: presigned_link: AnyUrl = await storage_client.get_upload_file_presigned_link( - session, file_id, store_id, user_id + session, file_id, store_id, user_id, as_presigned_link=True ) if not presigned_link: raise exceptions.S3InvalidPathError(file_id) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index a897b986fd2..c6d9376b61e 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -14,7 +14,11 @@ from settings_library.utils_r_clone import get_r_clone_config from .constants import ETag -from .storage_client import delete_file_meta_data, get_s3_link, update_file_meta_data +from .storage_client import ( + delete_file_meta_data, + get_upload_file_presigned_link, + update_file_meta_data, +) logger = logging.getLogger(__name__) @@ -67,7 +71,13 @@ async def sync_local_to_s3( user_id: UserID, ) -> ETag: - s3_link = await get_s3_link(session=session, s3_object=s3_object, user_id=user_id) + s3_link = await get_upload_file_presigned_link( + session=session, + file_id=s3_object, + location_id="0", # only works with simcore s3 + user_id=user_id, + as_presigned_link=False, + ) s3_path = re.sub(r"^s3://", "", s3_link) logger.debug(" %s; %s", f"{s3_link=}", f"{s3_path=}") diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index f0860408aaa..e63306f4111 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -95,7 +95,11 @@ async def get_download_file_presigned_link( @handle_client_exception async def get_upload_file_presigned_link( - session: ClientSession, file_id: str, location_id: str, user_id: UserID + session: ClientSession, + file_id: str, + location_id: str, + user_id: UserID, + as_presigned_link: bool, ) -> AnyUrl: if ( not isinstance(file_id, str) @@ -111,7 +115,10 @@ async def get_upload_file_presigned_link( ) async with session.put( f"{_base_url()}/locations/{location_id}/files/{quote(file_id, safe='')}", - params={"user_id": f"{user_id}"}, + params={ + "user_id": f"{user_id}", + "link_type": "presigned" if as_presigned_link else "s3", + }, ) as response: response.raise_for_status() @@ -175,28 +182,6 @@ async def delete_file( response.raise_for_status() -@handle_client_exception -async def get_s3_link(session: ClientSession, s3_object: str, user_id: UserID) -> str: - url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/s3/link" - result = await session.get(url, params=dict(user_id=user_id)) - - if result.status == web.HTTPForbidden.status_code: - raise exceptions.StorageInvalidCall( - ( - f"Insufficient permissions to upload {s3_object=} for {user_id=}. " - f"Storage: {await result.text()}" - ) - ) - - if result.status != web.HTTPOk.status_code: - raise exceptions.StorageInvalidCall( - f"Could not fetch s3_link: status={result.status} {await result.text()}" - ) - - response = await result.json() - return response["data"]["s3_link"] - - @handle_client_exception async def update_file_meta_data( session: ClientSession, s3_object: str, user_id: UserID diff --git a/packages/simcore-sdk/tests/unit/test_storage_client.py b/packages/simcore-sdk/tests/unit/test_storage_client.py index 31d598fdb37..47b1723312f 100644 --- a/packages/simcore-sdk/tests/unit/test_storage_client.py +++ b/packages/simcore-sdk/tests/unit/test_storage_client.py @@ -76,7 +76,7 @@ async def test_get_upload_file_presigned_link( ): async with aiohttp.ClientSession() as session: link = await get_upload_file_presigned_link( - session, file_id, location_id, user_id + session, file_id, location_id, user_id, as_presigned_link=True ) assert isinstance(link, AnyUrl) @@ -123,4 +123,6 @@ async def test_invalid_calls( }, **{invalid_keyword: None}, } + if fct_call == get_upload_file_presigned_link: + kwargs["as_presigned_link"] = True await fct_call(session=session, **kwargs) diff --git a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml index 417760947e6..5e4f4853e06 100644 --- a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml +++ b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml @@ -208,35 +208,6 @@ paths: $ref: '#/components/schemas/FileMetaDataArrayEnveloped' default: $ref: '#/components/responses/DefaultErrorResponse' - '/locations/{location_id}/files/{fileId}/s3/link': - get: - summary: Returns an s3 file link if the user has permissions - operationId: get_s3_link - parameters: - - name: fileId - in: path - required: true - schema: - type: string - - name: location_id - in: path - required: true - schema: - type: string - - name: user_id - in: query - required: true - schema: - type: string - responses: - '200': - description: Provides an s3 link for the requested fileId - content: - application/json: - schema: - $ref: '#/components/schemas/S3LinkEnvelope' - default: - $ref: '#/components/responses/DefaultErrorResponse' '/locations/{location_id}/files/{fileId}/metadata': get: summary: Get file metadata diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index 095b054a9cd..5cc77114382 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -13,7 +13,6 @@ from concurrent.futures import ThreadPoolExecutor from dataclasses import dataclass, field from pathlib import Path -import urllib.parse from typing import Any, Dict, Final, List, Optional, Tuple, Union import attr @@ -31,8 +30,6 @@ from servicelib.utils import fire_and_forget_task from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.sql.expression import literal_column -from pydantic import AnyUrl -from pydantic.tools import parse_obj_as from tenacity import retry from tenacity.before_sleep import before_sleep_log from tenacity.retry import retry_if_exception_type, retry_if_result @@ -581,7 +578,7 @@ async def _init_metadata() -> Tuple[int, str]: await _init_metadata() - async def upload_link(self, user_id: str, file_uuid: str): + async def upload_link(self, user_id: str, file_uuid: str, as_presigned_link: bool): """returns: a presigned upload link NOTE: updates metadata once the upload is concluded""" diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index fb8eb775320..e779690951a 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -349,30 +349,6 @@ async def download_file(request: web.Request): return {"error": None, "data": {"link": link}} -@routes.get(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}/s3/link") # type: ignore -async def get_s3_link(request: web.Request) -> Dict[str, Any]: - params, query, body = await extract_and_validate(request) - - assert params, "params %s" % params # nosec - assert query, "query %s" % query # nosec - assert not body, "body %s" % body # nosec - - with handle_storage_errors(): - location_id = params["location_id"] - user_id = query["user_id"] - file_uuid = urllib.parse.unquote_plus(params["fileId"]) - - if int(location_id) != SIMCORE_S3_ID: - raise web.HTTPPreconditionFailed( - reason=f"Only allowed to fetch s3 link for '{SIMCORE_S3_STR}'" - ) - - dsm = await _prepare_storage_manager(params, query, request) - - s3_link: str = await dsm.get_s3_link(user_id=user_id, file_uuid=file_uuid) - return {"error": None, "data": {"s3_link": s3_link}} - - @routes.put(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}") # type: ignore async def upload_file(request: web.Request): params, query, body = await extract_and_validate(request) From eb6f940b5b8ffe3518aad60084e743b389b8fc08 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 08:14:22 +0200 Subject: [PATCH 47/78] removed unused from specs --- api/specs/storage/openapi.yaml | 29 ------------------- .../api/v0/openapi.yaml | 27 ----------------- 2 files changed, 56 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index aa2940c7ac6..2cb46b26dc7 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -809,35 +809,6 @@ components: items: $ref: "#/components/schemas/DatasetMetaData" - S3LinkEnvelope: - type: object - required: - - data - - error - properties: - data: - nullable: true - type: object - properties: - s3_link: - type: string - error: - nullable: true - default: null - - FileMetaDataDeleteEnvelope: - type: object - required: - - data - - error - properties: - data: - nullable: true - default: null - error: - nullable: true - default: null - FileMetaEnvelope: type: object required: diff --git a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml index 5e4f4853e06..e411ada4160 100644 --- a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml +++ b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml @@ -766,33 +766,6 @@ components: type: array items: $ref: '#/components/schemas/DatasetMetaData' - S3LinkEnvelope: - type: object - required: - - data - - error - properties: - data: - nullable: true - type: object - properties: - s3_link: - type: string - error: - nullable: true - default: null - FileMetaDataDeleteEnvelope: - type: object - required: - - data - - error - properties: - data: - nullable: true - default: null - error: - nullable: true - default: null FileMetaEnvelope: type: object required: From 81ace91ca8a0b2b603c55053fa8e3bc7e6326b73 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 08:15:46 +0200 Subject: [PATCH 48/78] pylint --- packages/simcore-sdk/tests/unit/test_storage_client.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/simcore-sdk/tests/unit/test_storage_client.py b/packages/simcore-sdk/tests/unit/test_storage_client.py index 47b1723312f..5b2e4635576 100644 --- a/packages/simcore-sdk/tests/unit/test_storage_client.py +++ b/packages/simcore-sdk/tests/unit/test_storage_client.py @@ -123,6 +123,8 @@ async def test_invalid_calls( }, **{invalid_keyword: None}, } - if fct_call == get_upload_file_presigned_link: + if ( # plylint: disable=comparison-with-callable + fct_call == get_upload_file_presigned_link + ): kwargs["as_presigned_link"] = True await fct_call(session=session, **kwargs) From c75fbc0c4dacfaa38b9bfb62517a9e2f6aab63ee Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 08:21:22 +0200 Subject: [PATCH 49/78] removed old comment --- .../src/simcore_sdk/node_ports_common/storage_client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index e63306f4111..22ced1f4093 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -201,8 +201,6 @@ async def update_file_meta_data( async def delete_file_meta_data( session: ClientSession, s3_object: str, user_id: UserID ) -> None: - # TODO: this needs to be repalced as well! - # TODO: check permissions url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" result = await session.delete(url, params=dict(user_id=user_id)) if result.status != web.HTTPNoContent.status_code: From e43cb285fc4efd1c78fec3fdc49c80be71d916ea Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 08:39:47 +0200 Subject: [PATCH 50/78] pylint --- .../simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py | 2 +- packages/simcore-sdk/tests/unit/test_storage_client.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py index 79c129098c0..c49e9ddfa67 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py @@ -11,13 +11,13 @@ import aioboto3 import pytest import sqlalchemy as sa +from _pytest.fixtures import FixtureRequest from aiohttp import ClientSession from faker import Faker from pytest_mock.plugin import MockerFixture from settings_library.r_clone import RCloneSettings from simcore_postgres_database.models.file_meta_data import file_meta_data from simcore_sdk.node_ports_common import r_clone, storage_client -from _pytest.fixtures import FixtureRequest pytest_simcore_core_services_selection = [ "migration", diff --git a/packages/simcore-sdk/tests/unit/test_storage_client.py b/packages/simcore-sdk/tests/unit/test_storage_client.py index 5b2e4635576..11d52ea4f23 100644 --- a/packages/simcore-sdk/tests/unit/test_storage_client.py +++ b/packages/simcore-sdk/tests/unit/test_storage_client.py @@ -123,7 +123,7 @@ async def test_invalid_calls( }, **{invalid_keyword: None}, } - if ( # plylint: disable=comparison-with-callable + if ( # pylint: disable=comparison-with-callable fct_call == get_upload_file_presigned_link ): kwargs["as_presigned_link"] = True From 5e180322cb99cc46889a59a0574fbabe55eceb43 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 09:23:29 +0200 Subject: [PATCH 51/78] dynamic-sidecar work also gets transferred via rclone --- .../simcore-sdk/src/simcore_sdk/node_data/data_manager.py | 8 +++++++- .../src/simcore_sdk/node_ports_common/r_clone.py | 7 ++++--- .../simcore-sdk/tests/unit/test_node_data_data_manager.py | 2 ++ .../simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py | 6 +++--- 4 files changed, 16 insertions(+), 7 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py index 725f4c50dca..ce530b7ccb2 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_data/data_manager.py @@ -5,6 +5,7 @@ from typing import Optional, Union from servicelib.archiving_utils import archive_dir, unarchive_dir +from settings_library.r_clone import RCloneSettings from ..node_ports_common import filemanager @@ -24,6 +25,7 @@ async def _push_file( node_uuid: str, file_path: Path, rename_to: Optional[str], + r_clone_settings: Optional[RCloneSettings] = None, ): store_id = "0" # this is for simcore.s3 s3_object = _create_s3_object( @@ -36,6 +38,7 @@ async def _push_file( store_name=None, s3_object=s3_object, local_file_path=file_path, + r_clone_settings=r_clone_settings, ) log.info("%s successfuly uploaded", file_path) @@ -46,6 +49,7 @@ async def push( node_uuid: str, file_or_folder: Path, rename_to: Optional[str] = None, + r_clone_settings: Optional[RCloneSettings] = None, ): if file_or_folder.is_file(): return await _push_file( @@ -64,7 +68,9 @@ async def push( compress=False, # disabling compression for faster speeds store_relative_path=True, ) - return await _push_file(user_id, project_id, node_uuid, archive_file_path, None) + return await _push_file( + user_id, project_id, node_uuid, archive_file_path, None, r_clone_settings + ) async def _pull_file( diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index c6d9376b61e..56286ea7eab 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -36,8 +36,9 @@ async def _config_file(config: str) -> AsyncGenerator[str, None]: async def _async_command(*cmd: str, cwd: Optional[str] = None) -> str: + str_cmd = " ".join(cmd) proc = await asyncio.create_subprocess_shell( - " ".join(cmd), + str_cmd, stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.STDOUT, @@ -47,9 +48,9 @@ async def _async_command(*cmd: str, cwd: Optional[str] = None) -> str: stdout, _ = await proc.communicate() decoded_stdout = stdout.decode() if proc.returncode != 0: - raise _CommandFailedException(command=cmd, stdout=decoded_stdout) + raise _CommandFailedException(command=str_cmd, stdout=decoded_stdout) - logger.debug("'%s' result:\n%s", cmd, decoded_stdout) + logger.debug("'%s' result:\n%s", str_cmd, decoded_stdout) return decoded_stdout diff --git a/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py b/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py index 6e48ba2a3a6..fbde402d55c 100644 --- a/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py +++ b/packages/simcore-sdk/tests/unit/test_node_data_data_manager.py @@ -74,6 +74,7 @@ async def test_push_folder( mock_temporary_directory.assert_called_once() mock_filemanager.upload_file.assert_called_once_with( local_file_path=(test_compression_folder / "{}.zip".format(test_folder.stem)), + r_clone_settings=None, s3_object=f"{project_id}/{node_uuid}/{test_folder.stem}.zip", store_id="0", store_name=None, @@ -119,6 +120,7 @@ async def test_push_file( await data_manager.push(user_id, project_id, node_uuid, file_path) mock_temporary_directory.assert_not_called() mock_filemanager.upload_file.assert_called_once_with( + r_clone_settings=None, local_file_path=file_path, s3_object=f"{project_id}/{node_uuid}/{file_path.name}", store_id="0", diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index 5dcaa8f61ff..c9e3c970d18 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -49,8 +49,8 @@ async def test_is_r_clone_available_cached( for _ in range(3): result = await r_clone.is_r_clone_available(r_clone_settings) assert type(result) is bool - assert "'('rclone', '--version')' result:\n" in caplog.text - assert caplog.text.count("'('rclone', '--version')' result:\n") == 1 + assert "'rclone --version' result:\n" in caplog.text + assert caplog.text.count("'rclone --version' result:\n") == 1 assert await r_clone.is_r_clone_available(None) is False @@ -78,5 +78,5 @@ async def test__async_command_error(cmd: List[str]) -> None: await r_clone._async_command(*cmd) assert ( f"{exe_info.value}" - == f"Command {cmd} finished with exception:\n/bin/sh: 1: {cmd[0]}: not found\n" + == f"Command {' '.join(cmd)} finished with exception:\n/bin/sh: 1: {cmd[0]}: not found\n" ) From f5204d4dfba8db691c7c015ca9d690298df60831 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 09:23:39 +0200 Subject: [PATCH 52/78] add missing file --- .../src/simcore_service_dynamic_sidecar/modules/data_manager.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py index 0582acc355b..7a8873b2e0e 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/data_manager.py @@ -76,5 +76,6 @@ async def upload_path_if_exists(path: Path, state_exclude: List[str]) -> None: project_id=str(settings.DY_SIDECAR_PROJECT_ID), node_uuid=str(settings.DY_SIDECAR_NODE_ID), file_or_folder=path, + r_clone_settings=settings.DY_SIDECAR_R_CLONE_SETTINGS, ) logger.info("Finished upload of %s", path) From 8bf58c5971fbd6f07d2c1d11514cbe71461b64b0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 10:09:12 +0200 Subject: [PATCH 53/78] directory watcher disabled during docker-compse up --- .../api/containers.py | 14 ++++++++------ .../modules/directory_watcher.py | 14 +++++++++++++- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py index 031de75520d..58839d4a2f2 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py @@ -39,6 +39,7 @@ ) from ..models.domains.shared_store import SharedStore from ..models.schemas.application_health import ApplicationHealth +from ..modules.directory_watcher import directory_watcher_disabler from ..modules.mounted_fs import MountedVolumes logger = logging.getLogger(__name__) @@ -64,12 +65,13 @@ async def _task_docker_compose_up( "docker-compose --project-name {project} --file {file_path} " "up --no-build --detach" ) - finished_without_errors, stdout = await write_file_and_run_command( - settings=settings, - file_content=shared_store.compose_spec, - command=command, - command_timeout=None, - ) + with directory_watcher_disabler(app): + finished_without_errors, stdout = await write_file_and_run_command( + settings=settings, + file_content=shared_store.compose_spec, + command=command, + command_timeout=None, + ) message = f"Finished {command} with output\n{stdout}" if finished_without_errors: diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py index 87be7e6cdf2..e87dbe10fdb 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py @@ -3,10 +3,11 @@ import time from asyncio import AbstractEventLoop from collections import deque +from contextlib import contextmanager from functools import wraps from os import name from pathlib import Path -from typing import Any, Awaitable, Callable, Deque, Optional +from typing import Any, Awaitable, Callable, Deque, Generator, Optional from fastapi import FastAPI from servicelib.utils import logged_gather @@ -107,6 +108,7 @@ def _invoke_push_directory(self) -> None: async_push_directory(self.loop, self.directory_path) def on_any_event(self, event: FileSystemEvent) -> None: + logger.debug("Detected Event %s", event) super().on_any_event(event) if self._is_enabled: self._invoke_push_directory() @@ -215,8 +217,18 @@ def enable_directory_watcher(app: FastAPI) -> None: app.state.dir_watcher.enable_event_propagation() +@contextmanager +def directory_watcher_disabler(app: FastAPI) -> Generator[None, None, None]: + disable_directory_watcher(app) + try: + yield None + finally: + enable_directory_watcher(app) + + __all__ = [ "disable_directory_watcher", "enable_directory_watcher", + "directory_watcher_disabler", "setup_directory_watcher", ] From 14abcdc34a97abba5b68895059e91cdadc95f20b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 27 Apr 2022 10:19:11 +0200 Subject: [PATCH 54/78] removed log message --- .../simcore_service_dynamic_sidecar/modules/directory_watcher.py | 1 - 1 file changed, 1 deletion(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py index e87dbe10fdb..b86c908d606 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py @@ -108,7 +108,6 @@ def _invoke_push_directory(self) -> None: async_push_directory(self.loop, self.directory_path) def on_any_event(self, event: FileSystemEvent) -> None: - logger.debug("Detected Event %s", event) super().on_any_event(event) if self._is_enabled: self._invoke_push_directory() From 8bcce57ed5ff24ad3019e4f305f4797e22b131da Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 11:38:36 +0200 Subject: [PATCH 55/78] addind middleware to log HTTP errors serialized to client --- .../servicelib/aiohttp/dev_error_logger.py | 36 +++++++++++++++++++ .../simcore_service_storage/access_layer.py | 4 +-- .../simcore_service_storage/application.py | 4 +++ .../src/simcore_service_storage/handlers.py | 4 +-- .../simcore_service_storage/rest_models.py | 1 + .../storage/tests/helpers/utils_assert.py | 1 - 6 files changed, 45 insertions(+), 5 deletions(-) create mode 100644 packages/service-library/src/servicelib/aiohttp/dev_error_logger.py diff --git a/packages/service-library/src/servicelib/aiohttp/dev_error_logger.py b/packages/service-library/src/servicelib/aiohttp/dev_error_logger.py new file mode 100644 index 00000000000..99d5fa72d59 --- /dev/null +++ b/packages/service-library/src/servicelib/aiohttp/dev_error_logger.py @@ -0,0 +1,36 @@ + +from aiohttp.web import Application, middleware, Request, HTTPError +from servicelib.aiohttp.typing_extension import Handler, Middleware +import logging +import traceback + +logger = logging.getLogger(__name__) + +_SEP = "|||" + + +def _middleware_factory() -> Middleware: + @middleware + async def middleware_handler(request: Request, handler: Handler): + try: + return await handler(request) + except HTTPError as err: + fields = { + "Body": err.body, + "Status": err.status, + "Reason": err.reason, + "Headers": err.headers, + "Traceback": "\n".join(traceback.format_tb(err.__traceback__)), + } + formatted_error = "".join( + [f"\n{_SEP}{k}{_SEP}\n{v}" for k, v in fields.items()] + ) + logger.debug("Error serialized to client:%s", formatted_error) + raise err + + return middleware_handler + + +def setup_dev_error_logger(app: Application) -> None: + logger.info("Setting up dev_error_logger") + app.middlewares.append(_middleware_factory()) diff --git a/services/storage/src/simcore_service_storage/access_layer.py b/services/storage/src/simcore_service_storage/access_layer.py index c453708aa20..bde85f59f4d 100644 --- a/services/storage/src/simcore_service_storage/access_layer.py +++ b/services/storage/src/simcore_service_storage/access_layer.py @@ -70,7 +70,7 @@ def none(cls) -> "AccessRights": class AccessLayerError(Exception): - """ Base class for access-layer related errors """ + """Base class for access-layer related errors""" class InvalidFileIdentifier(AccessLayerError): @@ -282,6 +282,6 @@ async def get_file_access_rights( async def get_readable_project_ids(conn: SAConnection, user_id: int) -> List[ProjectID]: - """ Returns a list of projects where user has granted read-access """ + """Returns a list of projects where user has granted read-access""" projects_access_rights = await list_projects_access_rights(conn, int(user_id)) return [pid for pid, access in projects_access_rights.items() if access.read] diff --git a/services/storage/src/simcore_service_storage/application.py b/services/storage/src/simcore_service_storage/application.py index 51b0668c424..896c78810ea 100644 --- a/services/storage/src/simcore_service_storage/application.py +++ b/services/storage/src/simcore_service_storage/application.py @@ -7,6 +7,7 @@ from aiohttp import web from servicelib.aiohttp.application import APP_CONFIG_KEY, create_safe_application +from servicelib.aiohttp.dev_error_logger import setup_dev_error_logger from servicelib.aiohttp.monitoring import setup_monitoring from servicelib.aiohttp.tracing import setup_tracing @@ -45,6 +46,9 @@ def create(settings: Settings) -> web.Application: setup_dsm(app) # core subsystem. Needs s3 and db setups done setup_rest(app) # lastly, we expose API to the world + if settings.LOG_LEVEL == "DEBUG": + setup_dev_error_logger(app) + if settings.STORAGE_MONITORING_ENABLED: setup_monitoring(app, app_name, version=f"{version}") diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index e779690951a..85a8c9a0a11 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -1,9 +1,9 @@ import asyncio import json import logging +import urllib.parse from contextlib import contextmanager from typing import Any, Dict, Optional -import urllib.parse import attr from aiohttp import web @@ -16,8 +16,8 @@ from .constants import APP_DSM_KEY, DATCORE_STR, SIMCORE_S3_ID, SIMCORE_S3_STR from .db_tokens import get_api_token_and_secret from .dsm import DataStorageManager, DatCoreApiToken -from .settings import Settings from .models import FileMetaDataEx +from .settings import Settings log = logging.getLogger(__name__) diff --git a/services/storage/src/simcore_service_storage/rest_models.py b/services/storage/src/simcore_service_storage/rest_models.py index 3eadb58d635..a35710ceaa7 100644 --- a/services/storage/src/simcore_service_storage/rest_models.py +++ b/services/storage/src/simcore_service_storage/rest_models.py @@ -2,6 +2,7 @@ """ from datetime import datetime + from pydantic import BaseModel diff --git a/services/storage/tests/helpers/utils_assert.py b/services/storage/tests/helpers/utils_assert.py index f719b22b9c5..9cc72b719a4 100644 --- a/services/storage/tests/helpers/utils_assert.py +++ b/services/storage/tests/helpers/utils_assert.py @@ -1,7 +1,6 @@ from pprint import pformat from aiohttp import web - from servicelib.aiohttp.rest_responses import unwrap_envelope From 6f5cecc9110c53d1b7ca81fd735c8aad0adcc984 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 11:48:51 +0200 Subject: [PATCH 56/78] removing logs since they get automatically captured --- .../src/simcore_service_storage/dsm.py | 60 +++++-------------- .../src/simcore_service_storage/handlers.py | 2 +- 2 files changed, 17 insertions(+), 45 deletions(-) diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index 5cc77114382..e8f8a4e4c17 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -504,9 +504,9 @@ async def update_metadata( conn, int(user_id), file_uuid ) if not can.write: - message = f"User {user_id} was not allowed to upload file {file_uuid}" - logger.debug(message) - raise web.HTTPForbidden(reason=message) + raise web.HTTPForbidden( + reason=f"User {user_id} was not allowed to upload file {file_uuid}" + ) bucket_name = self.simcore_bucket_name object_name = file_uuid @@ -522,9 +522,9 @@ async def delete_metadata(self, user_id: int, file_uuid: str) -> None: conn, int(user_id), file_uuid ) if not can.write: - message = f"User {user_id} was not allowed to upload file {file_uuid}" - logger.debug(message) - raise web.HTTPForbidden(reason=message) + raise web.HTTPForbidden( + reason=f"User {user_id} was not allowed to upload file {file_uuid}" + ) try: await conn.execute( @@ -532,12 +532,10 @@ async def delete_metadata(self, user_id: int, file_uuid: str) -> None: file_meta_data.c.file_uuid == file_uuid ) ) - except Exception: - message = f"Could not delete metadata entry for file {file_uuid}" - logger.warning(message) - raise web.HTTPNotFound( # pylint: disable=raise-missing-from - reason=message - ) + except Exception as err: + raise web.HTTPNotFound( + reason=f"Could not delete metadata entry for file {file_uuid}" + ) from err async def _generate_metadata_for_link(self, user_id: str, file_uuid: str): """ @@ -551,11 +549,8 @@ async def _generate_metadata_for_link(self, user_id: str, file_uuid: str): conn, int(user_id), file_uuid ) if not can.write: - logger.debug( - "User %s was not allowed to upload file %s", user_id, file_uuid - ) raise web.HTTPForbidden( - reason=f"User does not have enough access rights to upload file {file_uuid}" + reason=f"User {user_id} does not have enough access rights to upload file {file_uuid}" ) @retry(**postgres_service_retry_policy_kwargs) @@ -617,11 +612,8 @@ async def download_link_s3( # If write permission would be required, then shared projects as views cannot # recover data in nodes (e.g. jupyter cannot pull work data) # - logger.debug( - "User %s was not allowed to download file %s", user_id, file_uuid - ) raise web.HTTPForbidden( - reason=f"User does not have enough rights to download {file_uuid}" + reason=f"User {user_id} does not have enough rights to download file {file_uuid}" ) bucket_name = self.simcore_bucket_name @@ -809,23 +801,13 @@ async def deep_copy_project_simcore_s3( conn, int(user_id), project_id=dest_folder ) if not source_access_rights.read: - logger.debug( - "User %s was not allowed to read from project %s", - user_id, - source_folder, - ) raise web.HTTPForbidden( - reason=f"User does not have enough access rights to read from project '{source_folder}'" + reason=f"User {user_id} does not have enough access rights to read from project '{source_folder}'" ) if not dest_access_rights.write: - logger.debug( - "User %s was not allowed to write to project %s", - user_id, - dest_folder, - ) raise web.HTTPForbidden( - reason=f"User does not have enough access rights to write to project '{dest_folder}'" + reason=f"User {user_id} does not have enough access rights to write to project '{dest_folder}'" ) # build up naming map based on labels @@ -984,13 +966,8 @@ async def delete_file(self, user_id: str, location: str, file_uuid: str): conn, int(user_id), file_uuid ) if not can.delete: - logger.debug( - "User %s was not allowed to delete file %s", - user_id, - file_uuid, - ) raise web.HTTPForbidden( - reason=f"User '{user_id}' does not have enough access rights to delete file {file_uuid}" + reason=f"User {user_id} does not have enough access rights to delete file {file_uuid}" ) query = sa.select( @@ -1035,13 +1012,8 @@ async def delete_project_simcore_s3( conn, int(user_id), project_id ) if not can.delete: - logger.debug( - "User %s was not allowed to delete project %s", - user_id, - project_id, - ) raise web.HTTPForbidden( - reason=f"User does not have delete access for {project_id}" + reason=f"User {user_id} does not have delete access for {project_id}" ) delete_me = file_meta_data.delete().where( diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index 85a8c9a0a11..5ff9a8b5473 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -311,7 +311,7 @@ async def delete_file_meta_data(request: web.Request): await dsm.delete_metadata(user_id=user_id, file_uuid=file_uuid) - return web.HTTPNoContent(content_type="application/json") + raise web.HTTPNoContent(content_type="application/json") @routes.get(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}") # type: ignore From 1160e5736ef12877de0664dfc55e627d21dac6d2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 11:55:46 +0200 Subject: [PATCH 57/78] returning AnyUrl --- services/storage/src/simcore_service_storage/dsm.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index e8f8a4e4c17..131374c1ebe 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -573,7 +573,9 @@ async def _init_metadata() -> Tuple[int, str]: await _init_metadata() - async def upload_link(self, user_id: str, file_uuid: str, as_presigned_link: bool): + async def upload_link( + self, user_id: str, file_uuid: str, as_presigned_link: bool + ) -> AnyUrl: """returns: a presigned upload link NOTE: updates metadata once the upload is concluded""" @@ -591,12 +593,10 @@ async def upload_link(self, user_id: str, file_uuid: str, as_presigned_link: boo object_name=object_name, ) ) - link = parse_obj_as( - AnyUrl, f"s3://{bucket_name}/{urllib.parse.quote( object_name)}" - ) + link = f"s3://{bucket_name}/{urllib.parse.quote( object_name)}" if as_presigned_link: link = self.s3_client.create_presigned_put_url(bucket_name, object_name) - return f"{link}" + return parse_obj_as(AnyUrl, f"{link}") async def download_link_s3( self, file_uuid: str, user_id: int, as_presigned_link: bool From 8e7fb4151fb4cd7fa2be4e49a607abffdcc5dcf3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 12:31:41 +0200 Subject: [PATCH 58/78] more changes --- services/storage/src/simcore_service_storage/dsm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index 131374c1ebe..20193183103 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -25,7 +25,11 @@ from aiopg.sa import Engine from aiopg.sa.result import ResultProxy, RowProxy from pydantic import AnyUrl, parse_obj_as -from servicelib.aiohttp.aiopg_utils import DBAPIError, PostgresRetryPolicyUponOperation +from servicelib.aiohttp.aiopg_utils import ( + DatabaseError, + DBAPIError, + PostgresRetryPolicyUponOperation, +) from servicelib.aiohttp.client_session import get_client_session from servicelib.utils import fire_and_forget_task from sqlalchemy.dialects.postgresql import insert as pg_insert @@ -532,7 +536,7 @@ async def delete_metadata(self, user_id: int, file_uuid: str) -> None: file_meta_data.c.file_uuid == file_uuid ) ) - except Exception as err: + except DatabaseError as err: raise web.HTTPNotFound( reason=f"Could not delete metadata entry for file {file_uuid}" ) from err From 9d6acd079f1b489ea87fe1cf55751d8744961e89 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 13:31:04 +0200 Subject: [PATCH 59/78] rename and quote shell --- .../src/simcore_sdk/node_ports_common/filemanager.py | 2 +- .../src/simcore_sdk/node_ports_common/r_clone.py | 7 ++++--- .../src/simcore_sdk/node_ports_common/storage_client.py | 2 +- packages/simcore-sdk/tests/unit/test_storage_client.py | 8 ++++---- 4 files changed, 10 insertions(+), 9 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index 6d13d2387ae..5a1845862ec 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -56,7 +56,7 @@ async def _get_upload_link( file_id: str, session: ClientSession, ) -> URL: - presigned_link: AnyUrl = await storage_client.get_upload_file_presigned_link( + presigned_link: AnyUrl = await storage_client.get_upload_file_link( session, file_id, store_id, user_id, as_presigned_link=True ) if not presigned_link: diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 56286ea7eab..25ad564da7f 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import AsyncGenerator, Optional +import shlex from aiocache import cached from aiofiles import tempfile from aiohttp import ClientSession @@ -16,7 +17,7 @@ from .constants import ETag from .storage_client import ( delete_file_meta_data, - get_upload_file_presigned_link, + get_upload_file_link, update_file_meta_data, ) @@ -36,7 +37,7 @@ async def _config_file(config: str) -> AsyncGenerator[str, None]: async def _async_command(*cmd: str, cwd: Optional[str] = None) -> str: - str_cmd = " ".join(cmd) + str_cmd = shlex.quote(" ".join(cmd)) proc = await asyncio.create_subprocess_shell( str_cmd, stdin=asyncio.subprocess.PIPE, @@ -72,7 +73,7 @@ async def sync_local_to_s3( user_id: UserID, ) -> ETag: - s3_link = await get_upload_file_presigned_link( + s3_link = await get_upload_file_link( session=session, file_id=s3_object, location_id="0", # only works with simcore s3 diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index 22ced1f4093..19fb4e6736e 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -94,7 +94,7 @@ async def get_download_file_presigned_link( @handle_client_exception -async def get_upload_file_presigned_link( +async def get_upload_file_link( session: ClientSession, file_id: str, location_id: str, diff --git a/packages/simcore-sdk/tests/unit/test_storage_client.py b/packages/simcore-sdk/tests/unit/test_storage_client.py index 11d52ea4f23..af75ba676cd 100644 --- a/packages/simcore-sdk/tests/unit/test_storage_client.py +++ b/packages/simcore-sdk/tests/unit/test_storage_client.py @@ -17,7 +17,7 @@ get_download_file_presigned_link, get_file_metadata, get_storage_locations, - get_upload_file_presigned_link, + get_upload_file_link, ) @@ -75,7 +75,7 @@ async def test_get_upload_file_presigned_link( location_id: str, ): async with aiohttp.ClientSession() as session: - link = await get_upload_file_presigned_link( + link = await get_upload_file_link( session, file_id, location_id, user_id, as_presigned_link=True ) assert isinstance(link, AnyUrl) @@ -101,7 +101,7 @@ async def test_get_file_metada( [ get_file_metadata, get_download_file_presigned_link, - get_upload_file_presigned_link, + get_upload_file_link, ], ) async def test_invalid_calls( @@ -124,7 +124,7 @@ async def test_invalid_calls( **{invalid_keyword: None}, } if ( # pylint: disable=comparison-with-callable - fct_call == get_upload_file_presigned_link + fct_call == get_upload_file_link ): kwargs["as_presigned_link"] = True await fct_call(session=session, **kwargs) From 9af8adb3f1ce411150edbe3415aaa969c0f11db7 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 13:53:00 +0200 Subject: [PATCH 60/78] added quotes and optimized checks --- .../src/simcore_sdk/node_ports_common/r_clone.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 25ad564da7f..1051b7f6537 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -37,7 +37,7 @@ async def _config_file(config: str) -> AsyncGenerator[str, None]: async def _async_command(*cmd: str, cwd: Optional[str] = None) -> str: - str_cmd = shlex.quote(" ".join(cmd)) + str_cmd = " ".join(cmd) proc = await asyncio.create_subprocess_shell( str_cmd, stdin=asyncio.subprocess.PIPE, @@ -58,9 +58,11 @@ async def _async_command(*cmd: str, cwd: Optional[str] = None) -> str: @cached() async def is_r_clone_available(r_clone_settings: Optional[RCloneSettings]) -> bool: """returns: True if the `rclone` cli is installed and a configuration is provided""" + if r_clone_settings is None: + return False try: await _async_command("rclone", "--version") - return r_clone_settings is not None + return True except _CommandFailedException: return False @@ -88,7 +90,7 @@ async def sync_local_to_s3( source_path = local_file_path destination_path = Path(s3_path) file_name = local_file_path.name - # TODO: capture and send progress somehow? + # FIXME: capture progress and connect progressbars or some event to inform the UI # rclone only acts upon directories, so to target a specific file # we must run the command from the file's directory. See below @@ -112,12 +114,12 @@ async def sync_local_to_s3( "--config", config_file_name, "sync", - f"'{source_path.parent}'", - f"'dst:{destination_path.parent}'", + shlex.quote(f"{source_path.parent}"), + shlex.quote(f"dst:{destination_path.parent}"), "--progress", "--copy-links", "--include", - f"'{file_name}'", + shlex.quote(f"{file_name}"), ) try: From 656e4993aa44f35ec86cab5d4ef3c04e3a049d98 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 14:22:25 +0200 Subject: [PATCH 61/78] fixing tests --- .github/workflows/ci-testing-deploy.yml | 2 + .../simcore-sdk/tests/integration/conftest.py | 46 +++++++++++-------- ...ger.py => test_node_data_data_manager_.py} | 0 .../test_node_ports_v2_nodeports2.py | 8 ++-- ...lone.py => test_node_ports_v2_r_clone_.py} | 0 5 files changed, 34 insertions(+), 22 deletions(-) rename packages/simcore-sdk/tests/integration/{test_node_data_data_manager.py => test_node_data_data_manager_.py} (100%) rename packages/simcore-sdk/tests/integration/{test_node_ports_v2_r_clone.py => test_node_ports_v2_r_clone_.py} (100%) diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index 958c854cc44..d0a756f5f64 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -1086,6 +1086,8 @@ jobs: driver: docker - name: setup docker-compose run: sudo ./ci/github/helpers/setup_docker_compose.bash ${{ matrix.docker_compose }} ${{ matrix.docker_compose_sha }} + - name: setup rclone docker volume plugin + run: sudo ./ci/github/helpers/install_rclone.bash - name: setup python environment uses: actions/setup-python@v3 with: diff --git a/packages/simcore-sdk/tests/integration/conftest.py b/packages/simcore-sdk/tests/integration/conftest.py index 2122d527fca..f903950b32a 100644 --- a/packages/simcore-sdk/tests/integration/conftest.py +++ b/packages/simcore-sdk/tests/integration/conftest.py @@ -5,7 +5,7 @@ import json from pathlib import Path -from typing import Any, Callable, Dict, Iterable, List, Tuple +from typing import Any, Awaitable, Callable, Coroutine, Dict, Iterable, List, Tuple from urllib.parse import quote_plus from uuid import uuid4 @@ -328,23 +328,33 @@ def _assign_config( @pytest.fixture -async def r_clone_settings( +async def r_clone_settings_factory( minio_config: Dict[str, Any], storage_service: URL -) -> RCloneSettings: - client = minio_config["client"] - settings = RCloneSettings.parse_obj( - dict( - R_CLONE_S3=dict( - S3_ENDPOINT=client["endpoint"], - S3_ACCESS_KEY=client["access_key"], - S3_SECRET_KEY=client["secret_key"], - S3_BUCKET_NAME=minio_config["bucket_name"], - S3_SECURE=client["secure"], - ), - R_CLONE_PROVIDER=S3Provider.MINIO, +) -> Awaitable[RCloneSettings]: + async def _factory() -> RCloneSettings: + client = minio_config["client"] + settings = RCloneSettings.parse_obj( + dict( + R_CLONE_S3=dict( + S3_ENDPOINT=client["endpoint"], + S3_ACCESS_KEY=client["access_key"], + S3_SECRET_KEY=client["secret_key"], + S3_BUCKET_NAME=minio_config["bucket_name"], + S3_SECURE=client["secure"], + ), + R_CLONE_PROVIDER=S3Provider.MINIO, + ) ) - ) - if not await is_r_clone_available(settings): - pytest.skip("rclone not installed") + if not await is_r_clone_available(settings): + pytest.skip("rclone not installed") + + return settings - return settings + return _factory() + + +@pytest.fixture +async def r_clone_settings( + r_clone_settings_factory: Awaitable[RCloneSettings], +) -> RCloneSettings: + return await r_clone_settings_factory diff --git a/packages/simcore-sdk/tests/integration/test_node_data_data_manager.py b/packages/simcore-sdk/tests/integration/test_node_data_data_manager_.py similarity index 100% rename from packages/simcore-sdk/tests/integration/test_node_data_data_manager.py rename to packages/simcore-sdk/tests/integration/test_node_data_data_manager_.py diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py index 6bb904b4ef7..30878b49d4d 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_nodeports2.py @@ -10,7 +10,7 @@ import threading from asyncio import gather from pathlib import Path -from typing import Any, Callable, Dict, Iterable, Optional, Type, Union +from typing import Any, Awaitable, Callable, Dict, Iterable, Optional, Type, Union from uuid import uuid4 import np_helpers # pylint: disable=no-name-in-module @@ -140,11 +140,11 @@ def config_value_symlink_path(symlink_path: Path) -> Dict[str, Any]: @pytest.fixture(params=[True, False]) -def option_r_clone_settings( - request, r_clone_settings: RCloneSettings +async def option_r_clone_settings( + request, r_clone_settings_factory: Awaitable[RCloneSettings] ) -> Optional[RCloneSettings]: if request.param: - return r_clone_settings + return await r_clone_settings_factory return None diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py similarity index 100% rename from packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone.py rename to packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py From de6c1510c88490610da2d2110201a961c080e76d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 14:26:02 +0200 Subject: [PATCH 62/78] renaming --- .../src/simcore_service_dynamic_sidecar/api/containers.py | 4 ++-- .../modules/directory_watcher.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py index 58839d4a2f2..ffe50483bd3 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers.py @@ -39,7 +39,7 @@ ) from ..models.domains.shared_store import SharedStore from ..models.schemas.application_health import ApplicationHealth -from ..modules.directory_watcher import directory_watcher_disabler +from ..modules.directory_watcher import directory_watcher_disabled from ..modules.mounted_fs import MountedVolumes logger = logging.getLogger(__name__) @@ -65,7 +65,7 @@ async def _task_docker_compose_up( "docker-compose --project-name {project} --file {file_path} " "up --no-build --detach" ) - with directory_watcher_disabler(app): + with directory_watcher_disabled(app): finished_without_errors, stdout = await write_file_and_run_command( settings=settings, file_content=shared_store.compose_spec, diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py index b86c908d606..4760cefe648 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/modules/directory_watcher.py @@ -217,7 +217,7 @@ def enable_directory_watcher(app: FastAPI) -> None: @contextmanager -def directory_watcher_disabler(app: FastAPI) -> Generator[None, None, None]: +def directory_watcher_disabled(app: FastAPI) -> Generator[None, None, None]: disable_directory_watcher(app) try: yield None @@ -228,6 +228,6 @@ def directory_watcher_disabler(app: FastAPI) -> Generator[None, None, None]: __all__ = [ "disable_directory_watcher", "enable_directory_watcher", - "directory_watcher_disabler", + "directory_watcher_disabled", "setup_directory_watcher", ] From 0af0f33cb48d5e46ffa4d941e30fc719f1855007 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 15:22:48 +0200 Subject: [PATCH 63/78] removed delete file metedata metada is updated when uploading file --- api/specs/storage/openapi.yaml | 24 ------------ .../node_ports_common/constants.py | 3 ++ .../node_ports_common/filemanager.py | 16 +++++--- .../simcore_sdk/node_ports_common/r_clone.py | 27 +++++++------ .../node_ports_common/storage_client.py | 12 ------ .../test_node_ports_v2_r_clone_.py | 38 ++++++++++++------- .../api/v0/openapi.yaml | 24 ------------ .../src/simcore_service_storage/dsm.py | 21 ---------- .../src/simcore_service_storage/handlers.py | 19 ---------- 9 files changed, 52 insertions(+), 132 deletions(-) diff --git a/api/specs/storage/openapi.yaml b/api/specs/storage/openapi.yaml index 2cb46b26dc7..e3440b8315d 100644 --- a/api/specs/storage/openapi.yaml +++ b/api/specs/storage/openapi.yaml @@ -271,30 +271,6 @@ paths: $ref: "#/components/schemas/FileMetaEnvelope" default: $ref: "#/components/responses/DefaultErrorResponse" - delete: - summary: Removes a file's meta data entry - operationId: delete_file_meta_data - parameters: - - name: fileId - in: path - required: true - schema: - type: string - - name: location_id - in: path - required: true - schema: - type: string - - name: user_id - in: query - required: true - schema: - type: string - responses: - "204": - description: "Removes the file meta data entry from the database" - default: - $ref: "#/components/responses/DefaultErrorResponse" /locations/{location_id}/files/{fileId}: get: diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py index c46317b16af..342cd410c05 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/constants.py @@ -1 +1,4 @@ +from typing import Final + +SIMCORE_LOCATION: Final[str] = "0" ETag = str diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index 5a1845862ec..85920f1b438 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -13,8 +13,9 @@ from yarl import URL from ..node_ports_common.client_session_manager import ClientSessionContextManager +from ..node_ports_common.storage_client import update_file_meta_data from . import exceptions, storage_client -from .constants import ETag +from .constants import SIMCORE_LOCATION, ETag from .r_clone import is_r_clone_available, sync_local_to_s3 log = logging.getLogger(__name__) @@ -281,17 +282,22 @@ async def upload_file( raise exceptions.S3InvalidPathError(s3_object) if await is_r_clone_available(r_clone_settings): - e_tag = await sync_local_to_s3( + if store_id != SIMCORE_LOCATION: + raise exceptions.S3InvalidStore(store_id) + await sync_local_to_s3( session=session, r_clone_settings=r_clone_settings, s3_object=s3_object, local_file_path=local_file_path, user_id=user_id, + store_id=store_id, ) - # TODO: maybe a better check here or an error if do not match? - store_id = "0" # simcore only feature else: - e_tag = await _upload_file_to_link(session, upload_link, local_file_path) + await _upload_file_to_link(session, upload_link, local_file_path) + + e_tag = await update_file_meta_data( + session=session, s3_object=s3_object, user_id=user_id + ) return store_id, e_tag diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 1051b7f6537..10ce5891a16 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -1,11 +1,11 @@ import asyncio import logging import re +import shlex from contextlib import asynccontextmanager from pathlib import Path from typing import AsyncGenerator, Optional -import shlex from aiocache import cached from aiofiles import tempfile from aiohttp import ClientSession @@ -14,12 +14,8 @@ from settings_library.r_clone import RCloneSettings from settings_library.utils_r_clone import get_r_clone_config -from .constants import ETag -from .storage_client import ( - delete_file_meta_data, - get_upload_file_link, - update_file_meta_data, -) +from .constants import SIMCORE_LOCATION +from .storage_client import delete_file, get_upload_file_link logger = logging.getLogger(__name__) @@ -73,12 +69,15 @@ async def sync_local_to_s3( s3_object: str, local_file_path: Path, user_id: UserID, -) -> ETag: + store_id: str, +) -> None: + """NOTE: only works with simcore location""" + assert store_id == SIMCORE_LOCATION s3_link = await get_upload_file_link( session=session, file_id=s3_object, - location_id="0", # only works with simcore s3 + location_id=store_id, user_id=user_id, as_presigned_link=False, ) @@ -124,14 +123,14 @@ async def sync_local_to_s3( try: await _async_command(*r_clone_command, cwd=f"{source_path.parent}") - return await update_file_meta_data( - session=session, s3_object=s3_object, user_id=user_id - ) except Exception as e: logger.warning( "There was an error while uploading %s. Removing metadata", s3_object ) - await delete_file_meta_data( - session=session, s3_object=s3_object, user_id=user_id + await delete_file( + session=session, + file_id=s3_object, + location_id=store_id, + user_id=user_id, ) raise e diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py index 19fb4e6736e..490f0652800 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/storage_client.py @@ -195,15 +195,3 @@ async def update_file_meta_data( response = await result.json() return response["data"]["entity_tag"] - - -@handle_client_exception -async def delete_file_meta_data( - session: ClientSession, s3_object: str, user_id: UserID -) -> None: - url = f"{_base_url()}/locations/0/files/{quote_plus(s3_object)}/metadata" - result = await session.delete(url, params=dict(user_id=user_id)) - if result.status != web.HTTPNoContent.status_code: - raise exceptions.StorageInvalidCall( - f"Could not fetch metadata: status={result.status} {await result.text()}" - ) diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py index c49e9ddfa67..c07e93ddcec 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py @@ -1,11 +1,11 @@ # pylint: disable=redefined-outer-name # pylint: disable=unused-argument +import asyncio import shutil from contextlib import asynccontextmanager -from importlib import reload from pathlib import Path -from typing import AsyncGenerator, AsyncIterable, Iterator +from typing import AsyncGenerator, AsyncIterable, Final, Iterator, Optional from uuid import uuid4 import aioboto3 @@ -17,7 +17,8 @@ from pytest_mock.plugin import MockerFixture from settings_library.r_clone import RCloneSettings from simcore_postgres_database.models.file_meta_data import file_meta_data -from simcore_sdk.node_ports_common import r_clone, storage_client +from simcore_sdk.node_ports_common import r_clone +from simcore_sdk.node_ports_common.constants import SIMCORE_LOCATION pytest_simcore_core_services_selection = [ "migration", @@ -31,6 +32,9 @@ ] +WAIT_FOR_S3_BACKEND_TO_UPDATE: Final[float] = 1.0 + + class _TestException(Exception): pass @@ -86,13 +90,21 @@ async def cleanup_s3( @pytest.fixture -def mock_update_file_meta_data(mocker: MockerFixture) -> None: +def raise_error_after_upload( + mocker: MockerFixture, postgres_db: sa.engine.Engine, s3_object: str +) -> None: + handler = r_clone._async_command # pylint: disable=protected-access + + async def _mock_async_command(*cmd: str, cwd: Optional[str] = None) -> str: + await handler(*cmd, cwd=cwd) + assert _is_file_present(postgres_db=postgres_db, s3_object=s3_object) is True + + raise _TestException() + mocker.patch( - "simcore_sdk.node_ports_common.storage_client.update_file_meta_data", - side_effect=_TestException, + "simcore_sdk.node_ports_common.r_clone._async_command", + side_effect=_mock_async_command, ) - reload(r_clone) - reload(storage_client) @pytest.fixture @@ -125,6 +137,7 @@ async def _get_s3_object( async def _download_s3_object( r_clone_settings: RCloneSettings, s3_path: str, local_path: Path ): + await asyncio.sleep(WAIT_FOR_S3_BACKEND_TO_UPDATE) async with _get_s3_object(r_clone_settings, s3_path) as s3_object: await s3_object.download_file(f"{local_path}") @@ -154,17 +167,15 @@ async def test_sync_local_to_s3( cleanup_s3: None, ) -> None: - etag = await r_clone.sync_local_to_s3( + await r_clone.sync_local_to_s3( session=client_session, r_clone_settings=r_clone_settings, s3_object=s3_object, local_file_path=file_to_upload, user_id=user_id, + store_id=SIMCORE_LOCATION, ) - assert isinstance(etag, str) - assert '"' not in etag - await _download_s3_object( r_clone_settings=r_clone_settings, s3_path=s3_object, @@ -185,7 +196,7 @@ async def test_sync_local_to_s3_cleanup_on_error( postgres_db: sa.engine.Engine, client_session: ClientSession, cleanup_s3: None, - mock_update_file_meta_data: None, + raise_error_after_upload: None, ) -> None: with pytest.raises(_TestException): await r_clone.sync_local_to_s3( @@ -194,5 +205,6 @@ async def test_sync_local_to_s3_cleanup_on_error( s3_object=s3_object, local_file_path=file_to_upload, user_id=user_id, + store_id=SIMCORE_LOCATION, ) assert _is_file_present(postgres_db=postgres_db, s3_object=s3_object) is False diff --git a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml index e411ada4160..25fa6d2d2bb 100644 --- a/services/storage/src/simcore_service_storage/api/v0/openapi.yaml +++ b/services/storage/src/simcore_service_storage/api/v0/openapi.yaml @@ -265,30 +265,6 @@ paths: $ref: '#/components/schemas/FileMetaEnvelope' default: $ref: '#/components/responses/DefaultErrorResponse' - delete: - summary: Removes a file's meta data entry - operationId: delete_file_meta_data - parameters: - - name: fileId - in: path - required: true - schema: - type: string - - name: location_id - in: path - required: true - schema: - type: string - - name: user_id - in: query - required: true - schema: - type: string - responses: - '204': - description: Removes the file meta data entry from the database - default: - $ref: '#/components/responses/DefaultErrorResponse' '/locations/{location_id}/files/{fileId}': get: summary: Gets download link for file at location diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index 20193183103..d16e21a57b1 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -520,27 +520,6 @@ async def update_metadata( object_name=object_name, ) - async def delete_metadata(self, user_id: int, file_uuid: str) -> None: - async with self.engine.acquire() as conn: - can: Optional[AccessRights] = await get_file_access_rights( - conn, int(user_id), file_uuid - ) - if not can.write: - raise web.HTTPForbidden( - reason=f"User {user_id} was not allowed to upload file {file_uuid}" - ) - - try: - await conn.execute( - file_meta_data.delete().where( - file_meta_data.c.file_uuid == file_uuid - ) - ) - except DatabaseError as err: - raise web.HTTPNotFound( - reason=f"Could not delete metadata entry for file {file_uuid}" - ) from err - async def _generate_metadata_for_link(self, user_id: str, file_uuid: str): """ Updates metadata table when link is used and upload is successfuly completed diff --git a/services/storage/src/simcore_service_storage/handlers.py b/services/storage/src/simcore_service_storage/handlers.py index 5ff9a8b5473..9082adee1ae 100644 --- a/services/storage/src/simcore_service_storage/handlers.py +++ b/services/storage/src/simcore_service_storage/handlers.py @@ -295,25 +295,6 @@ async def update_file_meta_data(request: web.Request): } -@routes.delete(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}/metadata") # type: ignore -async def delete_file_meta_data(request: web.Request): - params, query, body = await extract_and_validate(request) - - assert params, "params %s" % params # nosec - assert query, "query %s" % query # nosec - assert not body, "body %s" % body # nosec - - with handle_storage_errors(): - user_id = query["user_id"] - file_uuid = urllib.parse.unquote_plus(params["fileId"]) - - dsm = await _prepare_storage_manager(params, query, request) - - await dsm.delete_metadata(user_id=user_id, file_uuid=file_uuid) - - raise web.HTTPNoContent(content_type="application/json") - - @routes.get(f"/{api_vtag}/locations/{{location_id}}/files/{{fileId}}") # type: ignore async def download_file(request: web.Request): params, query, body = await extract_and_validate(request) From 01f56ebd6760bfcc797114714c9d34f25354f4d7 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 15:35:05 +0200 Subject: [PATCH 64/78] removes file and metadata in case of upload error --- .../src/simcore_sdk/node_ports_common/filemanager.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index 85920f1b438..da9657bc170 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -293,7 +293,16 @@ async def upload_file( store_id=store_id, ) else: - await _upload_file_to_link(session, upload_link, local_file_path) + try: + await _upload_file_to_link(session, upload_link, local_file_path) + except exceptions.S3TransferError as err: + await delete_file( + user_id=user_id, + store_id=store_id, + s3_object=s3_object, + client_session=session, + ) + raise err e_tag = await update_file_meta_data( session=session, s3_object=s3_object, user_id=user_id From b02d8bb3d956157717f97eee0a145dae21b419ed Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 15:53:50 +0200 Subject: [PATCH 65/78] added nosec --- .../simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index 10ce5891a16..deb9340d615 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -72,7 +72,7 @@ async def sync_local_to_s3( store_id: str, ) -> None: """NOTE: only works with simcore location""" - assert store_id == SIMCORE_LOCATION + assert store_id == SIMCORE_LOCATION # nosec s3_link = await get_upload_file_link( session=session, From 46d4f7a87a69276ff87a78467b06f1a5f90f52f5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 29 Apr 2022 16:15:55 +0200 Subject: [PATCH 66/78] fix pylint --- packages/simcore-sdk/tests/integration/conftest.py | 2 +- services/storage/src/simcore_service_storage/dsm.py | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/packages/simcore-sdk/tests/integration/conftest.py b/packages/simcore-sdk/tests/integration/conftest.py index f903950b32a..d266284fea3 100644 --- a/packages/simcore-sdk/tests/integration/conftest.py +++ b/packages/simcore-sdk/tests/integration/conftest.py @@ -5,7 +5,7 @@ import json from pathlib import Path -from typing import Any, Awaitable, Callable, Coroutine, Dict, Iterable, List, Tuple +from typing import Any, Awaitable, Callable, Dict, Iterable, List, Tuple from urllib.parse import quote_plus from uuid import uuid4 diff --git a/services/storage/src/simcore_service_storage/dsm.py b/services/storage/src/simcore_service_storage/dsm.py index d16e21a57b1..5e821eaee3b 100644 --- a/services/storage/src/simcore_service_storage/dsm.py +++ b/services/storage/src/simcore_service_storage/dsm.py @@ -25,11 +25,7 @@ from aiopg.sa import Engine from aiopg.sa.result import ResultProxy, RowProxy from pydantic import AnyUrl, parse_obj_as -from servicelib.aiohttp.aiopg_utils import ( - DatabaseError, - DBAPIError, - PostgresRetryPolicyUponOperation, -) +from servicelib.aiohttp.aiopg_utils import DBAPIError, PostgresRetryPolicyUponOperation from servicelib.aiohttp.client_session import get_client_session from servicelib.utils import fire_and_forget_task from sqlalchemy.dialects.postgresql import insert as pg_insert From 9f2707e1ac69be30f11cb5b43d3937697fea8d33 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 2 May 2022 13:02:44 +0200 Subject: [PATCH 67/78] new dict format migration --- .../pytest-simcore/src/pytest_simcore/minio_service.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/pytest-simcore/src/pytest_simcore/minio_service.py b/packages/pytest-simcore/src/pytest_simcore/minio_service.py index 3a15df0419f..3d1963b75ad 100644 --- a/packages/pytest-simcore/src/pytest_simcore/minio_service.py +++ b/packages/pytest-simcore/src/pytest_simcore/minio_service.py @@ -3,7 +3,7 @@ # pylint: disable=unused-variable import logging -from typing import Any, Dict, Iterator +from typing import Any, Iterator import pytest from _pytest.monkeypatch import MonkeyPatch @@ -43,8 +43,8 @@ def _ensure_remove_bucket(client: Minio, bucket_name: str): @pytest.fixture(scope="module") def minio_config( - docker_stack: Dict, testing_environ_vars: Dict, monkeypatch_module: MonkeyPatch -) -> Dict[str, Any]: + docker_stack: dict, testing_environ_vars: dict, monkeypatch_module: MonkeyPatch +) -> dict[str, Any]: assert "pytest-ops_minio" in docker_stack["services"] config = { @@ -68,7 +68,7 @@ def minio_config( @pytest.fixture(scope="module") -def minio_service(minio_config: Dict[str, str]) -> Iterator[Minio]: +def minio_service(minio_config: dict[str, str]) -> Iterator[Minio]: client = Minio(**minio_config["client"]) @@ -99,7 +99,7 @@ def minio_service(minio_config: Dict[str, str]) -> Iterator[Minio]: @pytest.fixture(scope="module") -def bucket(minio_config: Dict[str, str], minio_service: Minio) -> Iterator[str]: +def bucket(minio_config: dict[str, str], minio_service: Minio) -> Iterator[str]: bucket_name = minio_config["bucket_name"] _ensure_remove_bucket(minio_service, bucket_name) From 181d3ebe52a857d18834560835012a6b3f86d135 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 2 May 2022 13:19:22 +0200 Subject: [PATCH 68/78] update requirements --- services/director-v2/requirements/_base.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/director-v2/requirements/_base.txt b/services/director-v2/requirements/_base.txt index 07f7b4b56fb..13fc08ec30a 100644 --- a/services/director-v2/requirements/_base.txt +++ b/services/director-v2/requirements/_base.txt @@ -7,7 +7,9 @@ aio-pika==6.8.0 # via -r requirements/_base.in aiocache==0.11.1 - # via -r requirements/_base.in + # via + # -r requirements/../../../packages/simcore-sdk/requirements/_base.in + # -r requirements/_base.in aiodebug==2.3.0 # via # -c requirements/../../../packages/service-library/requirements/./_base.in @@ -59,8 +61,6 @@ anyio==3.2.1 # starlette asgiref==3.4.1 # via uvicorn -async-cache==1.1.1 - # via -r requirements/../../../packages/simcore-sdk/requirements/_base.in async-timeout==4.0.2 # via # aiohttp From c34badcbf2d8b4aa22481c4b20fc886f24073561 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 2 May 2022 13:19:56 +0200 Subject: [PATCH 69/78] refacror installation --- .github/workflows/ci-testing-deploy.yml | 8 +++++--- ci/github/helpers/install_rclone.bash | 13 +++++------- .../install_rclone_docker_volume_plugin.bash | 20 +++++++++++++++++++ 3 files changed, 30 insertions(+), 11 deletions(-) create mode 100755 ci/github/helpers/install_rclone_docker_volume_plugin.bash diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml index d0a756f5f64..5db3402d809 100644 --- a/.github/workflows/ci-testing-deploy.yml +++ b/.github/workflows/ci-testing-deploy.yml @@ -1036,6 +1036,8 @@ jobs: driver: docker - name: setup docker-compose run: sudo ./ci/github/helpers/setup_docker_compose.bash ${{ matrix.docker_compose }} ${{ matrix.docker_compose_sha }} + - name: install rclone in CI + run: sudo ./ci/github/helpers/install_rclone.bash - name: setup python environment uses: actions/setup-python@v3 with: @@ -1086,8 +1088,6 @@ jobs: driver: docker - name: setup docker-compose run: sudo ./ci/github/helpers/setup_docker_compose.bash ${{ matrix.docker_compose }} ${{ matrix.docker_compose_sha }} - - name: setup rclone docker volume plugin - run: sudo ./ci/github/helpers/install_rclone.bash - name: setup python environment uses: actions/setup-python@v3 with: @@ -1345,7 +1345,7 @@ jobs: - name: setup docker-compose run: sudo ./ci/github/helpers/setup_docker_compose.bash ${{ matrix.docker_compose }} ${{ matrix.docker_compose_sha }} - name: setup rclone docker volume plugin - run: sudo ./ci/github/helpers/install_rclone.bash + run: sudo ./ci/github/helpers/install_rclone_docker_volume_plugin.bash - name: setup python environment uses: actions/setup-python@v3 with: @@ -1412,6 +1412,8 @@ jobs: driver: docker - name: setup docker-compose run: sudo ./ci/github/helpers/setup_docker_compose.bash ${{ matrix.docker_compose }} ${{ matrix.docker_compose_sha }} + - name: install rclone in CI + run: sudo ./ci/github/helpers/install_rclone.bash - name: setup python environment uses: actions/setup-python@v3 with: diff --git a/ci/github/helpers/install_rclone.bash b/ci/github/helpers/install_rclone.bash index 3bc38a27db9..5f6f561fa42 100755 --- a/ci/github/helpers/install_rclone.bash +++ b/ci/github/helpers/install_rclone.bash @@ -10,11 +10,8 @@ set -o pipefail # don't hide errors within pipes IFS=$'\n\t' -# Installation instructions from https://rclone.org/docker/ - -apt-get -y install fuse=2.9.9-3 -mkdir -p /var/lib/docker-plugins/rclone/config -mkdir -p /var/lib/docker-plugins/rclone/cache -docker plugin install rclone/docker-volume-rclone:amd64-1.57.0 args="-v" --alias rclone --grant-all-permissions -docker plugin list -docker plugin inspect rclone +R_CLONE_VERSION="1.58.0" +curl -sLO "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +dpkg -i "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +rm "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +rclone --version diff --git a/ci/github/helpers/install_rclone_docker_volume_plugin.bash b/ci/github/helpers/install_rclone_docker_volume_plugin.bash new file mode 100755 index 00000000000..3bc38a27db9 --- /dev/null +++ b/ci/github/helpers/install_rclone_docker_volume_plugin.bash @@ -0,0 +1,20 @@ +#!/bin/bash +# +# Installs the latest version of rclone plugin +# + +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes +IFS=$'\n\t' + + +# Installation instructions from https://rclone.org/docker/ + +apt-get -y install fuse=2.9.9-3 +mkdir -p /var/lib/docker-plugins/rclone/config +mkdir -p /var/lib/docker-plugins/rclone/cache +docker plugin install rclone/docker-volume-rclone:amd64-1.57.0 args="-v" --alias rclone --grant-all-permissions +docker plugin list +docker plugin inspect rclone From 5d18d781de443084484f248cd79141c9603301c5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 2 May 2022 13:43:45 +0200 Subject: [PATCH 70/78] @sanderegg always upload instead of raising error --- .../src/simcore_sdk/node_ports_common/filemanager.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py index da9657bc170..bea1a2dc400 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/filemanager.py @@ -281,9 +281,10 @@ async def upload_file( if not upload_link: raise exceptions.S3InvalidPathError(s3_object) - if await is_r_clone_available(r_clone_settings): - if store_id != SIMCORE_LOCATION: - raise exceptions.S3InvalidStore(store_id) + if ( + await is_r_clone_available(r_clone_settings) + and store_id == SIMCORE_LOCATION + ): await sync_local_to_s3( session=session, r_clone_settings=r_clone_settings, From 7c6cd29177ad1fe6ede4abed919adf1efc3be984 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 3 May 2022 10:00:09 +0200 Subject: [PATCH 71/78] fix tests after merge --- .../simcore_sdk/node_ports_common/r_clone.py | 4 +-- .../tests/unit/test_node_ports_v2_r_clone.py | 28 +++++++++++++++---- .../tests/unit/test_storage_client.py | 2 +- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py index deb9340d615..237339d663b 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/r_clone.py @@ -15,7 +15,7 @@ from settings_library.utils_r_clone import get_r_clone_config from .constants import SIMCORE_LOCATION -from .storage_client import delete_file, get_upload_file_link +from .storage_client import LinkType, delete_file, get_upload_file_link logger = logging.getLogger(__name__) @@ -79,7 +79,7 @@ async def sync_local_to_s3( file_id=s3_object, location_id=store_id, user_id=user_id, - as_presigned_link=False, + link_type=LinkType.S3, ) s3_path = re.sub(r"^s3://", "", s3_link) logger.debug(" %s; %s", f"{s3_link=}", f"{s3_path=}") diff --git a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py index c9e3c970d18..74267ab3463 100644 --- a/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py +++ b/packages/simcore-sdk/tests/unit/test_node_ports_v2_r_clone.py @@ -4,10 +4,11 @@ import subprocess from pathlib import Path -from typing import List +from typing import Iterable, List, Optional +from unittest.mock import Mock import pytest -from _pytest.logging import LogCaptureFixture +from pytest_mock.plugin import MockerFixture from faker import Faker from pytest import MonkeyPatch from settings_library.r_clone import S3Provider @@ -41,16 +42,33 @@ def skip_if_r_clone_is_missing() -> None: pytest.skip("rclone is not installed") +@pytest.fixture +def mock_async_command(mocker: MockerFixture) -> Iterable[Mock]: + mock = Mock() + + original_async_command = r_clone._async_command + + async def _mock_async_command(*cmd: str, cwd: Optional[str] = None) -> str: + mock() + return await original_async_command(*cmd, cwd=cwd) + + mocker.patch( + "simcore_sdk.node_ports_common.r_clone._async_command", + side_effect=_mock_async_command, + ) + + yield mock + + async def test_is_r_clone_available_cached( - caplog: LogCaptureFixture, r_clone_settings: RCloneSettings, + mock_async_command: Mock, skip_if_r_clone_is_missing: None, ) -> None: for _ in range(3): result = await r_clone.is_r_clone_available(r_clone_settings) assert type(result) is bool - assert "'rclone --version' result:\n" in caplog.text - assert caplog.text.count("'rclone --version' result:\n") == 1 + assert mock_async_command.call_count == 1 assert await r_clone.is_r_clone_available(None) is False diff --git a/packages/simcore-sdk/tests/unit/test_storage_client.py b/packages/simcore-sdk/tests/unit/test_storage_client.py index ced8804c339..03429963ae9 100644 --- a/packages/simcore-sdk/tests/unit/test_storage_client.py +++ b/packages/simcore-sdk/tests/unit/test_storage_client.py @@ -143,5 +143,5 @@ async def test_invalid_calls( if ( # pylint: disable=comparison-with-callable fct_call == get_upload_file_link ): - kwargs["as_presigned_link"] = True + kwargs["link_type"] = LinkType.S3 await fct_call(session=session, **kwargs) From 448bce14b081e5ee9f63f82329bce069f0802545 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 4 May 2022 10:30:39 +0200 Subject: [PATCH 72/78] using full parameter names --- ci/github/helpers/install_rclone.bash | 4 ++-- ci/github/helpers/install_rclone_docker_volume_plugin.bash | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/github/helpers/install_rclone.bash b/ci/github/helpers/install_rclone.bash index 5f6f561fa42..66f6a3ec83a 100755 --- a/ci/github/helpers/install_rclone.bash +++ b/ci/github/helpers/install_rclone.bash @@ -11,7 +11,7 @@ IFS=$'\n\t' R_CLONE_VERSION="1.58.0" -curl -sLO "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" -dpkg -i "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +curl --silent --location --remote-name "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +dpkg --install "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" rm "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" rclone --version diff --git a/ci/github/helpers/install_rclone_docker_volume_plugin.bash b/ci/github/helpers/install_rclone_docker_volume_plugin.bash index 3bc38a27db9..23f1968b027 100755 --- a/ci/github/helpers/install_rclone_docker_volume_plugin.bash +++ b/ci/github/helpers/install_rclone_docker_volume_plugin.bash @@ -13,8 +13,8 @@ IFS=$'\n\t' # Installation instructions from https://rclone.org/docker/ apt-get -y install fuse=2.9.9-3 -mkdir -p /var/lib/docker-plugins/rclone/config -mkdir -p /var/lib/docker-plugins/rclone/cache +mkdir --parents /var/lib/docker-plugins/rclone/config +mkdir --parents /var/lib/docker-plugins/rclone/cache docker plugin install rclone/docker-volume-rclone:amd64-1.57.0 args="-v" --alias rclone --grant-all-permissions docker plugin list docker plugin inspect rclone From 7b226fa0a03e1cd8097b7915baf23dc51505d229 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 4 May 2022 10:30:59 +0200 Subject: [PATCH 73/78] repalced helper with validator --- .../settings-library/src/settings_library/s3.py | 15 ++++++++------- .../src/settings_library/utils_r_clone.py | 2 +- .../integration/test_node_ports_v2_r_clone_.py | 2 +- .../docker_service_specs/sidecar.py | 2 +- .../test_dynamic_sidecar_nodeports_integration.py | 2 +- .../director-v2/tests/unit/test_core_settings.py | 6 +++--- 6 files changed, 15 insertions(+), 14 deletions(-) diff --git a/packages/settings-library/src/settings_library/s3.py b/packages/settings-library/src/settings_library/s3.py index b467830aed0..bf6644a0ca7 100644 --- a/packages/settings-library/src/settings_library/s3.py +++ b/packages/settings-library/src/settings_library/s3.py @@ -1,7 +1,7 @@ from typing import Optional from .base import BaseCustomSettings -from functools import cached_property +from pydantic import validator class S3Settings(BaseCustomSettings): @@ -12,9 +12,10 @@ class S3Settings(BaseCustomSettings): S3_BUCKET_NAME: str S3_SECURE: bool = False - @cached_property - def endpoint(self) -> str: - if not self.S3_ENDPOINT.startswith("http"): - scheme = "https" if self.S3_SECURE else "http" - return f"{scheme}://{self.S3_ENDPOINT}" - return self.S3_ENDPOINT + @validator("S3_ENDPOINT", pre=True) + @classmethod + def ensure_scheme(cls, v: str, values) -> str: + if not v.startswith("http"): + scheme = "https" if values.get("S3_SECURE") else "http" + return f"{scheme}://{v}" + return v diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index 3bf6eb00111..38914f064ed 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -39,7 +39,7 @@ def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: # replace entries in template r_clone_config = r_clone_config_template.format( - endpoint=r_clone_settings.R_CLONE_S3.endpoint, + endpoint=r_clone_settings.R_CLONE_S3.S3_ENDPOINT, access_key=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, secret_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, aws_region=r_clone_settings.R_CLONE_REGION, diff --git a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py index c07e93ddcec..d7345dada3f 100644 --- a/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py +++ b/packages/simcore-sdk/tests/integration/test_node_ports_v2_r_clone_.py @@ -125,7 +125,7 @@ async def _get_s3_object( aws_secret_access_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, ) async with session.resource( - "s3", endpoint_url=r_clone_settings.R_CLONE_S3.endpoint + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT ) as s3: s3_object = await s3.Object( bucket_name=r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py index a7908dab2d2..49c776c97e8 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_service_specs/sidecar.py @@ -63,7 +63,7 @@ def _get_environment_variables( "RABBIT_USER": f"{rabbit_settings.RABBIT_USER}", "RABBIT_PASSWORD": f"{rabbit_settings.RABBIT_PASSWORD.get_secret_value()}", "RABBIT_CHANNELS": json_dumps(rabbit_settings.RABBIT_CHANNELS), - "S3_ENDPOINT": r_clone_settings.R_CLONE_S3.endpoint, + "S3_ENDPOINT": r_clone_settings.R_CLONE_S3.S3_ENDPOINT, "S3_ACCESS_KEY": r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, "S3_SECRET_KEY": r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, "S3_BUCKET_NAME": r_clone_settings.R_CLONE_S3.S3_BUCKET_NAME, diff --git a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py index 9636f08e059..5db46afcec2 100644 --- a/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py +++ b/services/director-v2/tests/integration/02/test_dynamic_sidecar_nodeports_integration.py @@ -619,7 +619,7 @@ async def _fetch_data_via_aioboto( aws_secret_access_key=r_clone_settings.S3_SECRET_KEY, ) async with session.resource( - "s3", endpoint_url=r_clone_settings.R_CLONE_S3.endpoint + "s3", endpoint_url=r_clone_settings.R_CLONE_S3.S3_ENDPOINT ) as s3: bucket = await s3.Bucket(r_clone_settings.S3_BUCKET_NAME) async for s3_object in bucket.objects.all(): diff --git a/services/director-v2/tests/unit/test_core_settings.py b/services/director-v2/tests/unit/test_core_settings.py index f479d90743c..16061df9ffb 100644 --- a/services/director-v2/tests/unit/test_core_settings.py +++ b/services/director-v2/tests/unit/test_core_settings.py @@ -35,7 +35,7 @@ def test_supported_backends_did_not_change() -> None: "endpoint, is_secure", [ ("localhost", False), - ("s3_aws", True), + ("s3_aws", False), ("https://ceph.home", True), ("http://local.dev", False), ], @@ -53,8 +53,8 @@ def test_expected_s3_endpoint( r_clone_settings = RCloneSettings() scheme = "https" if is_secure else "http" - assert r_clone_settings.R_CLONE_S3.endpoint.startswith(f"{scheme}://") - assert r_clone_settings.R_CLONE_S3.endpoint.endswith(endpoint) + assert r_clone_settings.R_CLONE_S3.S3_ENDPOINT.startswith(f"{scheme}://") + assert r_clone_settings.R_CLONE_S3.S3_ENDPOINT.endswith(endpoint) def test_enforce_r_clone_requirement(monkeypatch: MonkeyPatch) -> None: From b109667ecefbd0fe33ed99f1bdf940fa52cbbeb2 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 4 May 2022 10:31:08 +0200 Subject: [PATCH 74/78] using full parameter names --- services/dynamic-sidecar/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/dynamic-sidecar/Dockerfile b/services/dynamic-sidecar/Dockerfile index cabdca914c6..167d6c51610 100644 --- a/services/dynamic-sidecar/Dockerfile +++ b/services/dynamic-sidecar/Dockerfile @@ -49,8 +49,8 @@ ENV DY_VOLUMES="/dy-volumes" # rclone installation ARG R_CLONE_VERSION="1.58.0" -RUN curl -sLO "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ - dpkg -i "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ +RUN curl --silent --location --remote-name "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ + dpkg --install "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ rm "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ rclone --version From 5a922dc4ba52ac53d70bc6c6ac92bb6f4bdccaba Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 4 May 2022 13:47:10 +0200 Subject: [PATCH 75/78] fix issue with minio S3 endpoint format --- services/storage/src/simcore_service_storage/s3.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/s3.py b/services/storage/src/simcore_service_storage/s3.py index daef20d7315..60adba38e9f 100644 --- a/services/storage/src/simcore_service_storage/s3.py +++ b/services/storage/src/simcore_service_storage/s3.py @@ -6,6 +6,7 @@ from aiohttp import web from tenacity import before_sleep_log, retry, stop_after_attempt, wait_fixed +from pydantic import AnyUrl, parse_obj_as from .constants import APP_CONFIG_KEY, APP_S3_KEY from .s3wrapper.s3_client import MinioClientWrapper @@ -54,6 +55,14 @@ async def do_create_bucket(): log.debug("tear-down %s.setup.cleanup_ctx", __name__) +def _minio_client_endpint(s3_endpoint: str) -> str: + # Minio client adds http and https based on the secure paramenter + # provided at construction time, already including the schema + # will cause issues, encoding url to HOST:PORT + url = parse_obj_as(AnyUrl, s3_endpoint) + return f"{url.host}:{url.port}" + + def setup_s3(app: web.Application): """minio/s3 service setup""" @@ -67,7 +76,7 @@ def setup_s3(app: web.Application): cfg = app[APP_CONFIG_KEY] s3_client = MinioClientWrapper( - cfg.STORAGE_S3.S3_ENDPOINT, + _minio_client_endpint(cfg.STORAGE_S3.S3_ENDPOINT), cfg.STORAGE_S3.S3_ACCESS_KEY, cfg.STORAGE_S3.S3_SECRET_KEY, secure=cfg.STORAGE_S3.S3_SECURE, From ac3e21461039422c0a4b40d2816bc20165c2eab3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Wed, 4 May 2022 15:01:57 +0200 Subject: [PATCH 76/78] using full parameter names --- ci/github/helpers/install_rclone.bash | 4 ++-- ci/github/helpers/install_rclone_docker_volume_plugin.bash | 4 ++-- services/dynamic-sidecar/Dockerfile | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/ci/github/helpers/install_rclone.bash b/ci/github/helpers/install_rclone.bash index 5f6f561fa42..66f6a3ec83a 100755 --- a/ci/github/helpers/install_rclone.bash +++ b/ci/github/helpers/install_rclone.bash @@ -11,7 +11,7 @@ IFS=$'\n\t' R_CLONE_VERSION="1.58.0" -curl -sLO "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" -dpkg -i "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +curl --silent --location --remote-name "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" +dpkg --install "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" rm "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" rclone --version diff --git a/ci/github/helpers/install_rclone_docker_volume_plugin.bash b/ci/github/helpers/install_rclone_docker_volume_plugin.bash index 3bc38a27db9..23f1968b027 100755 --- a/ci/github/helpers/install_rclone_docker_volume_plugin.bash +++ b/ci/github/helpers/install_rclone_docker_volume_plugin.bash @@ -13,8 +13,8 @@ IFS=$'\n\t' # Installation instructions from https://rclone.org/docker/ apt-get -y install fuse=2.9.9-3 -mkdir -p /var/lib/docker-plugins/rclone/config -mkdir -p /var/lib/docker-plugins/rclone/cache +mkdir --parents /var/lib/docker-plugins/rclone/config +mkdir --parents /var/lib/docker-plugins/rclone/cache docker plugin install rclone/docker-volume-rclone:amd64-1.57.0 args="-v" --alias rclone --grant-all-permissions docker plugin list docker plugin inspect rclone diff --git a/services/dynamic-sidecar/Dockerfile b/services/dynamic-sidecar/Dockerfile index cabdca914c6..167d6c51610 100644 --- a/services/dynamic-sidecar/Dockerfile +++ b/services/dynamic-sidecar/Dockerfile @@ -49,8 +49,8 @@ ENV DY_VOLUMES="/dy-volumes" # rclone installation ARG R_CLONE_VERSION="1.58.0" -RUN curl -sLO "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ - dpkg -i "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ +RUN curl --silent --location --remote-name "https://downloads.rclone.org/v${R_CLONE_VERSION}/rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ + dpkg --install "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ rm "rclone-v${R_CLONE_VERSION}-linux-amd64.deb" && \ rclone --version From 5b9e1673417a083a6cdf15c4dfd1f883e6c671a8 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 5 May 2022 08:39:54 +0200 Subject: [PATCH 77/78] changing endpoint formatting --- .../src/simcore_service_dask_sidecar/file_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py b/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py index e71abbe0b5a..374948c7703 100644 --- a/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py +++ b/services/dask-sidecar/src/simcore_service_dask_sidecar/file_utils.py @@ -65,9 +65,7 @@ def _s3fs_settings_from_s3_settings(s3_settings: S3Settings) -> S3FsSettingsDict "secret": s3_settings.S3_SECRET_KEY, "token": s3_settings.S3_ACCESS_TOKEN, "use_ssl": s3_settings.S3_SECURE, - "client_kwargs": { - "endpoint_url": f"http{'s' if s3_settings.S3_SECURE else ''}://{s3_settings.S3_ENDPOINT}" - }, + "client_kwargs": {"endpoint_url": s3_settings.S3_ENDPOINT}, } From 32fda969258d2748c56e1ac68ffd7dd63a28a1b3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 5 May 2022 09:16:18 +0200 Subject: [PATCH 78/78] moved s3_region --- packages/settings-library/src/settings_library/r_clone.py | 1 - packages/settings-library/src/settings_library/s3.py | 1 + packages/settings-library/src/settings_library/utils_r_clone.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/settings-library/src/settings_library/r_clone.py b/packages/settings-library/src/settings_library/r_clone.py index 0f6ee753fe0..53d3757b5cf 100644 --- a/packages/settings-library/src/settings_library/r_clone.py +++ b/packages/settings-library/src/settings_library/r_clone.py @@ -14,4 +14,3 @@ class S3Provider(str, Enum): class RCloneSettings(BaseCustomSettings): R_CLONE_S3: S3Settings = Field(auto_default_from_env=True) R_CLONE_PROVIDER: S3Provider - R_CLONE_REGION: str = Field("us-east-1", description="S3 region to use") diff --git a/packages/settings-library/src/settings_library/s3.py b/packages/settings-library/src/settings_library/s3.py index bf6644a0ca7..b9c70f4f0c5 100644 --- a/packages/settings-library/src/settings_library/s3.py +++ b/packages/settings-library/src/settings_library/s3.py @@ -11,6 +11,7 @@ class S3Settings(BaseCustomSettings): S3_ACCESS_TOKEN: Optional[str] = None S3_BUCKET_NAME: str S3_SECURE: bool = False + S3_REGION: str = "us-east-1" @validator("S3_ENDPOINT", pre=True) @classmethod diff --git a/packages/settings-library/src/settings_library/utils_r_clone.py b/packages/settings-library/src/settings_library/utils_r_clone.py index 38914f064ed..7c488d735a6 100644 --- a/packages/settings-library/src/settings_library/utils_r_clone.py +++ b/packages/settings-library/src/settings_library/utils_r_clone.py @@ -42,6 +42,6 @@ def get_r_clone_config(r_clone_settings: RCloneSettings) -> str: endpoint=r_clone_settings.R_CLONE_S3.S3_ENDPOINT, access_key=r_clone_settings.R_CLONE_S3.S3_ACCESS_KEY, secret_key=r_clone_settings.R_CLONE_S3.S3_SECRET_KEY, - aws_region=r_clone_settings.R_CLONE_REGION, + aws_region=r_clone_settings.R_CLONE_S3.S3_REGION, ) return r_clone_config