Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨♻️ Storage refactoring step3 #3144

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
4ac165f
corrections from last refactoring
sanderegg Jun 26, 2022
b88f99d
replace minio
sanderegg Jun 26, 2022
be94c4b
cleanup file_meta_data table +
sanderegg Jun 26, 2022
b2f1807
removed file_uuid
sanderegg Jun 26, 2022
576ba0c
add models
sanderegg Jun 26, 2022
534ebb0
adding tests
sanderegg Jun 27, 2022
64c260c
adding aiobot3 client
sanderegg Jun 27, 2022
49257c2
merging
sanderegg Jun 27, 2022
b54bcd9
refactoring storage massively
sanderegg Jun 27, 2022
7512bca
added upload/abort mechanism
sanderegg Jun 27, 2022
c2ed268
remove multipart
sanderegg Jun 27, 2022
f3a6963
upgrade openapi to have abort call as well
sanderegg Jun 27, 2022
c3656b5
adding test for health check
sanderegg Jun 27, 2022
3398ead
completed s3 healtch status
sanderegg Jun 27, 2022
79a0c96
fixed upload return value
sanderegg Jun 27, 2022
401e749
Squashed commit of the following:
sanderegg Jun 28, 2022
18890e6
rename
sanderegg Jun 28, 2022
39476db
cleanup
sanderegg Jun 28, 2022
60fb523
patch metadata is gone
sanderegg Jun 28, 2022
f9daa68
correctly handle not found error from storage
sanderegg Jun 28, 2022
015b18b
cleanup
sanderegg Jun 28, 2022
e641c52
reduce sonarcloud issues where fixed
sanderegg Jun 28, 2022
4db777c
added new projectaccessrighterror
sanderegg Jun 28, 2022
f840290
codeclimate
sanderegg Jun 28, 2022
dc31fa6
test both old and new way
sanderegg Jun 28, 2022
3caf32c
back to the future
sanderegg Jun 28, 2022
1ecb4c6
properly document that storage cannot return a 404 in get_file_meta_data
sanderegg Jun 29, 2022
cbc74ef
added aioresponses
sanderegg Jun 29, 2022
464c43f
added mock for datcore adapter
sanderegg Jun 29, 2022
e334f3d
@pcrespov review: sorting
sanderegg Jun 29, 2022
268b365
it must use the same keywords
sanderegg Jun 29, 2022
9223852
added a fixture to let mock datcore adapter
sanderegg Jun 29, 2022
a83c6d6
lazy update of file when searching
sanderegg Jun 29, 2022
0bfcf2c
also passthrough with local ip
sanderegg Jun 29, 2022
52d7b91
user_id is gone
sanderegg Jun 29, 2022
1824e4d
@pcrespov review: rename parameter
sanderegg Jun 29, 2022
f9d6e5d
codeclimate
sanderegg Jun 29, 2022
451d752
pylance
sanderegg Jun 29, 2022
84d1d10
do not give up on start
sanderegg Jun 29, 2022
93a2de1
fixes detection of connection
sanderegg Jun 29, 2022
37198ff
added test for utils_handlers
sanderegg Jun 29, 2022
d67d16f
fix client setup
sanderegg Jun 29, 2022
e60d917
@pcrespov review: regenerated openapi
sanderegg Jun 29, 2022
f351933
linter
sanderegg Jun 30, 2022
a3b4fa6
reverted unwanted pollution
sanderegg Jun 30, 2022
2cdc4eb
added human readable fct for pytest parametrizations
sanderegg Jun 30, 2022
a5636a4
@GitHK: envelope is done automatically
sanderegg Jun 30, 2022
84142e9
@GitHK review: unify exceptions
sanderegg Jun 30, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions api/specs/storage/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -243,9 +243,11 @@ paths:
$ref: "#/components/schemas/FileMetaEnvelope"
default:
$ref: "#/components/responses/DefaultErrorResponse"
patch:
summary: Update file metadata
operationId: update_file_meta_data

/locations/{location_id}/files/{file_id}:abort:
post:
summary: Asks the server to abort the upload and revert to the last valid version if any
operationId: abort_upload_file
parameters:
- name: file_id
in: path
Expand All @@ -263,12 +265,8 @@ paths:
schema:
type: string
responses:
"200":
description: "Returns file metadata"
content:
application/json:
schema:
$ref: "#/components/schemas/FileMetaEnvelope"
"204":
description: Abort OK
default:
$ref: "#/components/responses/DefaultErrorResponse"

Expand Down Expand Up @@ -346,6 +344,12 @@ paths:
application/json:
schema:
$ref: "#/components/schemas/PresignedLinkEnveloped"
links:
AbortUpload:
operationId: abort_upload_file
parameters:
path.location_id: "$request.path.location_id"
path.file_id: "$request.path.file_id"
default:
$ref: "#/components/responses/DefaultErrorResponse"
delete:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,26 @@

import re
from datetime import datetime
from typing import List, Optional, Pattern, Union
from enum import Enum
from typing import Any, Optional, Pattern, Union
from uuid import UUID

from models_library.projects_nodes_io import LocationID, LocationName, StorageFileID
from pydantic import BaseModel, ByteSize, ConstrainedStr, Extra, Field, validator
from models_library.projects_nodes_io import (
LocationID,
LocationName,
NodeID,
SimcoreS3FileID,
StorageFileID,
)
from pydantic import (
BaseModel,
ByteSize,
ConstrainedStr,
Extra,
Field,
root_validator,
validator,
)
from pydantic.networks import AnyUrl

from .basic_regex import DATCORE_DATASET_NAME_RE, S3_BUCKET_NAME_RE
Expand Down Expand Up @@ -127,8 +142,8 @@ class FileMetaDataGet(BaseModel):

@validator("location_id", pre=True)
@classmethod
def convert_from_str(cls, v):
if isinstance(v, str):
def ensure_location_is_integer(cls, v):
if v is not None:
return int(v)
return v

Expand Down Expand Up @@ -198,14 +213,46 @@ class Config:


class FileMetaDataArray(BaseModel):
__root__: List[FileMetaDataGet] = []
__root__: list[FileMetaDataGet] = []


# /locations/{location_id}/files/{file_id}


class LinkType(str, Enum):
PRESIGNED = "PRESIGNED"
S3 = "S3"


class PresignedLink(BaseModel):
link: AnyUrl


# /simcore-s3/


class FoldersBody(BaseModel):
source: dict[str, Any] = Field(default_factory=dict)
destination: dict[str, Any] = Field(default_factory=dict)
nodes_map: dict[NodeID, NodeID] = Field(default_factory=dict)

@root_validator()
@classmethod
def ensure_consistent_entries(cls, values):
source_node_keys = (
NodeID(n) for n in values["source"].get("workbench", {}).keys()
)
if set(source_node_keys) != set(values["nodes_map"].keys()):
raise ValueError("source project nodes do not fit with nodes_map entries")
destination_node_keys = (
NodeID(n) for n in values["destination"].get("workbench", {}).keys()
)
if set(destination_node_keys) != set(values["nodes_map"].values()):
raise ValueError(
"destination project nodes do not fit with nodes_map values"
)
return values


class SoftCopyBody(BaseModel):
link_id: SimcoreS3FileID
4 changes: 2 additions & 2 deletions packages/models-library/src/models_library/basic_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from pydantic import conint, constr

from .basic_regex import UUID_RE_BASE, VERSION_RE
from .basic_regex import UUID_RE, VERSION_RE

# port number range
PortInt = conint(gt=0, lt=65535)
Expand All @@ -21,7 +21,7 @@
EnvVarKey = constr(regex=r"[a-zA-Z][a-azA-Z0-9_]*")

# e.g. '5c833a78-1af3-43a7-9ed7-6a63b188f4d8'
UUIDStr = constr(regex=UUID_RE_BASE)
UUIDStr = constr(regex=UUID_RE)


class LogLevel(str, Enum):
Expand Down
17 changes: 9 additions & 8 deletions packages/models-library/src/models_library/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from copy import deepcopy
from datetime import datetime
from enum import Enum
from typing import Any, Dict, List, Optional
from typing import Any, Optional
from uuid import UUID

from pydantic import BaseModel, EmailStr, Extra, Field, HttpUrl, constr, validator
Expand All @@ -22,7 +22,7 @@
ClassifierID = str

# TODO: for some reason class Workbench(BaseModel): __root__= does not work as I thought ... investigate!
Workbench = Dict[NodeIDStr, Node]
Workbench = dict[NodeIDStr, Node]


# NOTE: careful this is in sync with packages/postgres-database/src/simcore_postgres_database/models/projects.py!!!
Expand Down Expand Up @@ -97,6 +97,7 @@ def convert_sql_alchemy_enum(cls, v):
class Config:
orm_mode = True
use_enum_values = True
allow_population_by_field_name = True


class Project(BaseProjectModel):
Expand All @@ -121,15 +122,15 @@ class Project(BaseProjectModel):
examples=["2018-07-01T11:13:43Z"],
alias="lastChangeDate",
)
access_rights: Dict[GroupIDStr, AccessRights] = Field(
access_rights: dict[GroupIDStr, AccessRights] = Field(
...,
description="object containing the GroupID as key and read/write/execution permissions as value",
alias="accessRights",
)

# Classification
tags: Optional[List[int]] = []
classifiers: Optional[List[ClassifierID]] = Field(
tags: Optional[list[int]] = []
classifiers: Optional[list[ClassifierID]] = Field(
default_factory=list,
description="Contains the reference to the project classifiers",
examples=["some:id:to:a:classifier"],
Expand All @@ -142,20 +143,20 @@ class Project(BaseProjectModel):
ui: Optional[StudyUI] = None

# Quality
quality: Dict[str, Any] = Field(
quality: dict[str, Any] = Field(
{}, description="stores the study quality assessment"
)

# Dev only
dev: Optional[Dict] = Field(description="object used for development purposes only")
dev: Optional[dict] = Field(description="object used for development purposes only")

class Config:
description = "Document that stores metadata, pipeline and UI setup of a study"
title = "osparc-simcore project"
extra = Extra.forbid

@staticmethod
def schema_extra(schema: Dict, _model: "Project"):
def schema_extra(schema: dict, _model: "Project"):
# pylint: disable=unsubscriptable-object

# Patch to allow jsonschema nullable
Expand Down
13 changes: 3 additions & 10 deletions packages/models-library/src/models_library/projects_nodes_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@
"""

import re
from enum import IntEnum
from pathlib import Path
from typing import Literal, Optional, Pattern, Union
from typing import Optional, Pattern, Union
from uuid import UUID

from pydantic import AnyUrl, BaseModel, ConstrainedStr, Extra, Field, validator
Expand All @@ -24,14 +23,8 @@ class NodeIDStr(ConstrainedStr):
regex: Optional[Pattern[str]] = re.compile(UUID_RE)


# NOTE: this trick is used to keep backward compatility simcore.s3 is not a valid python variable name
Location = IntEnum(
value="Location",
names=[("simcore.s3", 0), ("SIMCORE_S3", 0), ("datcore", 1), ("DATCORE", 1)],
)

LocationID = Union[Literal[0], Literal[1]]
LocationName = Union[Literal["simcore.s3"], Literal["datcore"]]
LocationID = int
LocationName = str


class SimcoreS3FileID(ConstrainedStr):
Expand Down
5 changes: 3 additions & 2 deletions packages/models-library/tests/test_basic_regex.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from models_library.basic_regex import (
DATE_RE,
PUBLIC_VARIABLE_NAME_RE,
UUID_RE_BASE,
UUID_RE,
VERSION_RE,
)
from packaging.version import Version
Expand All @@ -30,6 +30,7 @@ def assert_match_and_get_capture(regex_str, test_str, expected) -> Optional[Sequ
assert match is not None
print(regex_str, "captured:", match.group(), "->", match.groups())
else:
assert match
captured = match.groups()
assert captured == expected
return captured
Expand All @@ -56,7 +57,7 @@ def test_VERSION_RE(version_str, expected):
],
)
def test_UUID_RE(uuid_str, expected):
assert_match_and_get_capture(UUID_RE_BASE, uuid_str, expected)
assert_match_and_get_capture(UUID_RE, uuid_str, expected)


class webserver_timedate_utils:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
"""file_meta_data: remove unused columns, add expiration of upload

Revision ID: 2cc556e5c52d
Revises: cf3bac482ce0
Create Date: 2022-06-26 19:12:13.478593+00:00

"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "2cc556e5c52d"
down_revision = "cf3bac482ce0"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"file_meta_data", sa.Column("upload_expires_at", sa.DateTime(), nullable=True)
)
op.alter_column(
"file_meta_data", "file_id", existing_type=sa.VARCHAR(), nullable=False
)
op.drop_column("file_meta_data", "display_file_path")
op.drop_column("file_meta_data", "node_name")
op.drop_column("file_meta_data", "raw_file_path")
op.drop_column("file_meta_data", "project_name")
op.drop_column("file_meta_data", "file_name")
op.drop_column("file_meta_data", "file_uuid")
op.drop_column("file_meta_data", "user_name")
# ### end Alembic commands ###
op.create_primary_key("pk_file_meta_data", "file_meta_data", ["file_id"])


def downgrade():
op.drop_constraint("pk_file_meta_data", "file_meta_data", "primary")
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"file_meta_data",
sa.Column("user_name", sa.VARCHAR(), autoincrement=False, nullable=True),
)
op.add_column(
"file_meta_data",
sa.Column("file_uuid", sa.VARCHAR(), autoincrement=False, nullable=True),
)
op.add_column(
"file_meta_data",
sa.Column("file_name", sa.VARCHAR(), autoincrement=False, nullable=True),
)
op.add_column(
"file_meta_data",
sa.Column("project_name", sa.VARCHAR(), autoincrement=False, nullable=True),
)
op.add_column(
"file_meta_data",
sa.Column("raw_file_path", sa.VARCHAR(), autoincrement=False, nullable=True),
)
op.add_column(
"file_meta_data",
sa.Column("node_name", sa.VARCHAR(), autoincrement=False, nullable=True),
)
op.add_column(
"file_meta_data",
sa.Column(
"display_file_path", sa.VARCHAR(), autoincrement=False, nullable=True
),
)
op.alter_column(
"file_meta_data", "file_id", existing_type=sa.VARCHAR(), nullable=True
)
op.drop_column("file_meta_data", "upload_expires_at")
# ### end Alembic commands ###
conn = op.get_bind()
for row in conn.execute(sa.DDL("SELECT file_id FROM file_meta_data")):
file_id = row["file_id"]
conn.execute(
sa.DDL(
f"""
UPDATE file_meta_data
SET file_uuid = '{file_id}'
WHERE file_id = '{file_id}'
"""
)
)
op.create_primary_key("pk_file_meta_data", "file_meta_data", ["file_uuid"])
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,14 @@
file_meta_data = sa.Table(
"file_meta_data",
metadata,
sa.Column("file_uuid", sa.String(), primary_key=True),
sa.Column("location_id", sa.String()),
sa.Column("location", sa.String()),
sa.Column("bucket_name", sa.String()),
sa.Column("object_name", sa.String()),
sa.Column("project_id", sa.String()),
sa.Column("project_name", sa.String()),
sa.Column("node_id", sa.String()),
sa.Column("node_name", sa.String()),
sa.Column("file_name", sa.String()),
sa.Column("user_id", sa.String()),
sa.Column("user_name", sa.String()),
sa.Column("file_id", sa.String()),
sa.Column("raw_file_path", sa.String()),
sa.Column("display_file_path", sa.String()),
sa.Column("file_id", sa.String(), primary_key=True),
sa.Column("created_at", sa.String()),
sa.Column("last_modified", sa.String()),
sa.Column("file_size", sa.BigInteger()),
Expand All @@ -38,4 +31,7 @@
doc="If true, this file is a soft link."
"i.e. is another entry with the same object_name",
),
sa.Column(
"upload_expires_at", sa.DateTime(), nullable=True, doc="Timestamp of expiration"
),
)
Loading