diff --git a/tarchia/api/v1/__init__.py b/tarchia/api/v1/__init__.py index 7d0d6b2..bd49bc5 100644 --- a/tarchia/api/v1/__init__.py +++ b/tarchia/api/v1/__init__.py @@ -5,6 +5,7 @@ from .data_management import router as data_router from .hook_management import router as hook_router from .owner_management import router as owner_router +from .relation_management import router as relation_router from .search import router as search_router from .table_management import router as table_router @@ -15,4 +16,5 @@ v1_router.include_router(hook_router, tags=["Hook Management"]) v1_router.include_router(owner_router, tags=["Owner Management"]) v1_router.include_router(search_router, tags=["Search"]) +v1_router.include_router(relation_router, tags=["Relation Management"]) v1_router.include_router(table_router, tags=["Table Management"]) diff --git a/tarchia/api/v1/commit_management.py b/tarchia/api/v1/commit_management.py index 2666a99..ea6282c 100644 --- a/tarchia/api/v1/commit_management.py +++ b/tarchia/api/v1/commit_management.py @@ -20,6 +20,7 @@ from tarchia.utils.constants import HISTORY_ROOT from tarchia.utils.constants import IDENTIFIER_REG_EX from tarchia.utils.constants import MAIN_BRANCH +from tarchia.utils.constants import SHA_OR_HEAD_REG_EX router = APIRouter() @@ -29,7 +30,9 @@ async def get_table_commit( request: Request, owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), - commit_sha: Union[str, Literal["head"]] = Path(description="The commit to retrieve."), + commit_sha: Union[str, Literal["head"]] = Path( + description="The commit to retrieve.", pattern=SHA_OR_HEAD_REG_EX + ), filters: Optional[str] = Query(None, description="Filters to push to manifest reader"), ): from tarchia.interfaces.storage import storage_factory diff --git a/tarchia/api/v1/data_management.py b/tarchia/api/v1/data_management.py index 96773d8..93a21ad 100644 --- a/tarchia/api/v1/data_management.py +++ b/tarchia/api/v1/data_management.py @@ -35,6 +35,7 @@ from tarchia.utils.constants import IDENTIFIER_REG_EX from tarchia.utils.constants import MAIN_BRANCH from tarchia.utils.constants import MANIFEST_ROOT +from tarchia.utils.constants import SHA_OR_HEAD_REG_EX router = APIRouter() @@ -139,7 +140,9 @@ def xor_hex_strings(hex_strings: List[str]) -> str: async def start_transaction( owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), - commit_sha: Union[str, Literal["head"]] = Path(description="The commit to retrieve."), + commit_sha: Union[str, Literal["head"]] = Path( + description="The commit to retrieve.", pattern=SHA_OR_HEAD_REG_EX + ), ): from tarchia.interfaces.storage import storage_factory from tarchia.utils import build_root diff --git a/tarchia/api/v1/hook_management.py b/tarchia/api/v1/hook_management.py index fad6ba7..297c481 100644 --- a/tarchia/api/v1/hook_management.py +++ b/tarchia/api/v1/hook_management.py @@ -10,67 +10,120 @@ """ from fastapi import APIRouter +from fastapi import Path from fastapi import Request from fastapi.responses import ORJSONResponse +from tarchia.utils.constants import IDENTIFIER_REG_EX + router = APIRouter() @router.get("/tables/{owner}/{table}/hooks", response_class=ORJSONResponse) -async def get_table_hooks(request: Request, owner: str): +async def get_table_hooks( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.post("/tables/{owner}/{table}/hooks", response_class=ORJSONResponse) -async def create_table_hooks(request: Request, owner: str): +async def create_table_hooks( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.get("/tables/{owner}/{table}/hooks/{hook}", response_class=ORJSONResponse) -async def get_table_hooks_by_id(request: Request, owner: str): +async def get_table_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.patch("/tables/{owner}/{table}/hooks/{hook}", response_class=ORJSONResponse) -async def update_table_hooks_by_id(request: Request, owner: str): +async def update_table_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.delete("/tables/{owner}/{table}/hooks/{hook}", response_class=ORJSONResponse) -async def delete_table_hooks_by_id(request: Request, owner: str): +async def delete_table_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.get("/tables/{owner}/{table}/hooks/{hook}/ping", response_class=ORJSONResponse) -async def ping_table_hooks_by_id(request: Request, owner: str): +async def ping_table_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + table: str = Path(description="The name of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.get("/owner/{owner}/hooks", response_class=ORJSONResponse) -async def get_owner_hooks(request: Request, owner: str): +async def get_owner_hooks( + request: Request, + owner: str = Path(description="The owner of the relation.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.post("/owner/{owner}/hooks", response_class=ORJSONResponse) -async def create_owner_hooks(request: Request, owner: str): +async def create_owner_hooks( + request: Request, + owner: str = Path(description="The owner of the relation.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.get("/owner/{owner}/hooks/{hook}", response_class=ORJSONResponse) -async def get_owner_hooks_by_id(request: Request, owner: str): +async def get_owner_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.patch("/owner/{owner}/hooks/{hook}", response_class=ORJSONResponse) -async def update_owner_hooks_by_id(request: Request, owner: str): +async def update_owner_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.delete("/owner/{owner}/hooks/{hook}", response_class=ORJSONResponse) -async def delete_owner_hooks_by_id(request: Request, owner: str): +async def delete_owner_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") @router.get("/owner/{owner}/hooks/{hook}/ping", response_class=ORJSONResponse) -async def ping_owner_hooks_by_id(request: Request, owner: str): +async def ping_owner_hooks_by_id( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), + hook: str = Path(description="Unique identifier for hook"), +): raise NotImplementedError("Not Implemented") diff --git a/tarchia/api/v1/owner_management.py b/tarchia/api/v1/owner_management.py index a39343c..39137fe 100644 --- a/tarchia/api/v1/owner_management.py +++ b/tarchia/api/v1/owner_management.py @@ -89,7 +89,11 @@ async def read_owner( @router.patch("/owners/{owner}/{attribute}", response_class=ORJSONResponse) -async def update_owner(owner: str, attribute: str, request: UpdateValueRequest): +async def update_owner( + attribute: str, + request: UpdateValueRequest, + owner: str = Path(description="The owner.", pattern=IDENTIFIER_REG_EX), +): """ Update an attribute of an owner. @@ -119,7 +123,7 @@ async def update_owner(owner: str, attribute: str, request: UpdateValueRequest): @router.delete("/owners/{owner}", response_class=ORJSONResponse) -async def delete_owner(owner: str): +async def delete_owner(owner: str = Path(description="The owner.", pattern=IDENTIFIER_REG_EX)): """ Delete an owner. diff --git a/tarchia/api/v1/relation_management.py b/tarchia/api/v1/relation_management.py new file mode 100644 index 0000000..4ea658a --- /dev/null +++ b/tarchia/api/v1/relation_management.py @@ -0,0 +1,29 @@ +""" """ + +from fastapi import APIRouter +from fastapi import Path +from fastapi import Request +from fastapi.responses import ORJSONResponse + +from tarchia.interfaces.catalog import catalog_factory +from tarchia.utils.constants import IDENTIFIER_REG_EX + +router = APIRouter() +catalog_provider = catalog_factory() + + +@router.get("/relations/{owner}", response_class=ORJSONResponse) +async def list_relations( + request: Request, + owner: str = Path(description="The owner of the relation.", pattern=IDENTIFIER_REG_EX), +): + raise NotImplementedError("") + + +@router.get("/relations/{owner}/{relation}", response_class=ORJSONResponse) +async def list_relations( + request: Request, + owner: str = Path(description="The owner of the relation.", pattern=IDENTIFIER_REG_EX), + relation: str = Path(description="The name of the relation.", pattern=IDENTIFIER_REG_EX), +): + raise NotImplementedError("") diff --git a/tarchia/api/v1/table_management.py b/tarchia/api/v1/table_management.py index cb42493..cf460ef 100644 --- a/tarchia/api/v1/table_management.py +++ b/tarchia/api/v1/table_management.py @@ -26,7 +26,10 @@ @router.get("/tables/{owner}", response_class=ORJSONResponse) -async def list_tables(owner: str, request: Request): +async def list_tables( + request: Request, + owner: str = Path(description="The owner of the table.", pattern=IDENTIFIER_REG_EX), +): """ Retrieve a list of tables and their current commits. diff --git a/tarchia/api/v1/view_management.py b/tarchia/api/v1/view_management.py index b677230..763c1eb 100644 --- a/tarchia/api/v1/view_management.py +++ b/tarchia/api/v1/view_management.py @@ -1,25 +1,42 @@ from fastapi import APIRouter +from fastapi import Path from fastapi import Request from fastapi.responses import ORJSONResponse +from tarchia.utils.constants import IDENTIFIER_REG_EX + router = APIRouter() @router.get("/views/{owner}", response_class=ORJSONResponse) -async def list_views(request: Request, owner: str): +async def list_views( + request: Request, + owner: str = Path(description="The owner of the view.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.post("/views/{owner}", response_class=ORJSONResponse) -async def create_view(request: Request, owner: str): +async def create_view( + request: Request, + owner: str = Path(description="The owner of the view.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.get("/views/{owner}/{view}", response_class=ORJSONResponse) -async def get_view(request: Request, owner: str, view: str): +async def get_view( + request: Request, + owner: str = Path(description="The owner of the view.", pattern=IDENTIFIER_REG_EX), + view: str = Path(description="The view.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") @router.delete("/views/{owner}/{view}", response_class=ORJSONResponse) -async def delete_view(request: Request, owner: str, view: str): +async def delete_view( + request: Request, + owner: str = Path(description="The owner of the view.", pattern=IDENTIFIER_REG_EX), + view: str = Path(description="The view.", pattern=IDENTIFIER_REG_EX), +): raise NotImplementedError("Not Implemented") diff --git a/tarchia/interfaces/catalog/__init__.py b/tarchia/interfaces/catalog/__init__.py index 0ed0ab0..8dfed6b 100644 --- a/tarchia/interfaces/catalog/__init__.py +++ b/tarchia/interfaces/catalog/__init__.py @@ -1,8 +1,10 @@ from tarchia.exceptions import InvalidConfigurationError from tarchia.utils import config +from .provider_base import CatalogProvider -def catalog_factory(): # pragma: no cover + +def catalog_factory() -> CatalogProvider: # pragma: no cover if config.CATALOG_PROVIDER is None or config.CATALOG_PROVIDER.upper() == "DEVELOPMENT": from tarchia.interfaces.catalog.dev_catalog import DevelopmentCatalogProvider diff --git a/tarchia/interfaces/catalog/gcs_firestore.py b/tarchia/interfaces/catalog/gcs_firestore.py index 8c2ff63..9ded064 100644 --- a/tarchia/interfaces/catalog/gcs_firestore.py +++ b/tarchia/interfaces/catalog/gcs_firestore.py @@ -67,8 +67,10 @@ def get_table(self, owner: str, table: str) -> dict: from google.cloud.firestore_v1.base_query import FieldFilter documents = self.database.collection(self.collection) - documents = documents.where(filter=FieldFilter("owner", "==", owner)).where( - filter=FieldFilter("name", "==", table) + documents = documents.where( + filter=FieldFilter("relation", "==", "table") + .where(FieldFilter("owner", "==", owner)) + .where(filter=FieldFilter("name", "==", table)) ) documents = documents.stream() documents = list(doc.to_dict() for doc in documents) diff --git a/tarchia/interfaces/storage/__init__.py b/tarchia/interfaces/storage/__init__.py index 5364cb1..f9ddc15 100644 --- a/tarchia/interfaces/storage/__init__.py +++ b/tarchia/interfaces/storage/__init__.py @@ -51,7 +51,6 @@ def storage_factory(provider: Optional[str] = None) -> StorageProvider: # pragm return GoogleCloudStorage() if provider in ("AMAZON", "S3", "MINIO"): - from .s3_storage import S3Storage + raise NotImplementedError(f"{provider} storage provider not implemented") - return S3Storage() raise InvalidConfigurationError(setting="STORAGE_PROVIDER") diff --git a/tarchia/models/metadata_models.py b/tarchia/models/metadata_models.py index 05025de..b27fc74 100644 --- a/tarchia/models/metadata_models.py +++ b/tarchia/models/metadata_models.py @@ -2,6 +2,7 @@ from enum import Enum from typing import Any from typing import List +from typing import Literal from typing import Optional from orso.schema import FlatColumn @@ -138,6 +139,7 @@ class EventTypes(Enum): steward: str owner: str table_id: str + relation: Literal["table"] = "table" location: Optional[str] partitioning: Optional[List[str]] last_updated_ms: int @@ -178,6 +180,8 @@ class EventTypes(Enum): TABLE_CREATED = "TABLE_CREATED" TABLE_DELETED = "TABLE_DELETED" + VIEW_CREATED = "VIEW_CREATED" + VIEW_DELETED = "VIEW_DELETED" name: str owner_id: str @@ -195,6 +199,23 @@ def is_valid(self): ) +class ViewCatalogEntry(TarchiaBaseModel): + """ + The Catalog entry for a view. + """ + + name: str + steward: str + owner: str + view_id: str + statement: str + relation: Literal["view"] = "view" + metadata: dict = Field(default_factory=dict) + created_at: int = int(time.time_ns() / 1e6) + description: Optional[str] = "" + format_version: int = Field(default=1) + + class Transaction(TarchiaBaseModel): transaction_id: str expires_at: int diff --git a/tarchia/utils/constants.py b/tarchia/utils/constants.py index 0673dbc..5cf1eff 100644 --- a/tarchia/utils/constants.py +++ b/tarchia/utils/constants.py @@ -1,6 +1,7 @@ """Define once, use everywhere""" IDENTIFIER_REG_EX = r"^[a-zA-Z_][a-zA-Z0-9_]*$" +SHA_OR_HEAD_REG_EX = r"^(head|[a-f0-9]{64})$" HISTORY_ROOT = "[metadata_root]/[owner]/[table_id]/metadata/history" MANIFEST_ROOT = "[metadata_root]/[owner]/[table_id]/metadata/manifests" diff --git a/tests/common.py b/tests/common.py index 9160493..46f3978 100644 --- a/tests/common.py +++ b/tests/common.py @@ -11,7 +11,7 @@ def ensure_owner(): client = TestClient(application) owner = CreateOwnerRequest( - name=TEST_OWNER, steward="billy", type=OwnerType.INDIVIDUAL, memberships=[] + name=TEST_OWNER, steward="billy", type=OwnerType.INDIVIDUAL, memberships=[], description="test" ) # create the owner diff --git a/tests/endpoints/test_owner_endpoints.py b/tests/endpoints/test_owner_endpoints.py index cb27f27..3be83b6 100644 --- a/tests/endpoints/test_owner_endpoints.py +++ b/tests/endpoints/test_owner_endpoints.py @@ -32,7 +32,7 @@ def test_create_read_update_delete_owner(): client = TestClient(application) owner = CreateOwnerRequest( - name=TEST_OWNER, steward="billy", type=OwnerType.INDIVIDUAL, memberships=[] + name=TEST_OWNER, steward="billy", type=OwnerType.INDIVIDUAL, memberships=[], description="test" ) # create the owner @@ -56,10 +56,17 @@ def test_create_read_update_delete_owner(): response = client.patch(url=f"/v1/owners/{TEST_OWNER}/steward", content='{"value":"bobby"}') assert response.status_code == 200, f"{response.status_code} - {response.content}" + # can we update this owner + response = client.patch(url=f"/v1/owners/{TEST_OWNER}/description", content='{"value":"not test"}') + assert response.status_code == 200, f"{response.status_code} - {response.content}" + # is the owner updated response = client.get(url=f"/v1/owners/{TEST_OWNER}") assert response.status_code == 200, f"{response.status_code} - {response.content}" - assert response.json()["steward"] == "bobby" + entry = response.json() + assert entry["steward"] == "bobby" + assert entry["description"] == "not test" + # delete the owner response = client.delete(url=f"/v1/owners/{TEST_OWNER}") @@ -87,6 +94,7 @@ def test_owner_rules(): table_schema=Schema(columns=[Column(name="column")]), freshness_life_in_days=0, retention_in_days=0, + description="test" ) # add a table @@ -98,7 +106,7 @@ def test_owner_rules(): assert response.status_code == 409, f"{response.status_code} - {response.content}" owner = CreateOwnerRequest( - name=TEST_OWNER, steward="***", type=OwnerType.INDIVIDUAL, memberships=[] + name=TEST_OWNER, steward="***", type=OwnerType.INDIVIDUAL, memberships=[], description="test" ) # can't create an owner with an invalid name @@ -116,7 +124,7 @@ def test_invalid_owner(): client = TestClient(application) owner = CreateOwnerRequest( - name="$owner", steward="billy", type=OwnerType.INDIVIDUAL, memberships=[] + name="$owner", steward="billy", type=OwnerType.INDIVIDUAL, memberships=[], description="test" ) # can't crreate an owner with an invalid name diff --git a/tests/endpoints/test_table_endpoints.py b/tests/endpoints/test_table_endpoints.py index 4fb6543..eb4c348 100644 --- a/tests/endpoints/test_table_endpoints.py +++ b/tests/endpoints/test_table_endpoints.py @@ -45,6 +45,7 @@ def test_create_read_update_delete_table(): table_schema=Schema(columns=[Column(name="column")]), freshness_life_in_days=0, retention_in_days=0, + description="test" ) # can't create the table for non-existent owner @@ -100,7 +101,8 @@ def test_maintain_table_metadata(): steward="bob", table_schema=Schema(columns=[Column(name="column")]), freshness_life_in_days=0, - retention_in_days=0 + retention_in_days=0, + description="test" ) # create the table @@ -119,12 +121,21 @@ def test_maintain_table_metadata(): ) assert response.status_code == 200, f"{response.status_code} - {response.content}" + # update the description + response = client.patch( + url=f"/v1/tables/{TEST_OWNER}/test_dataset_metadata_test/description", + content=orjson.dumps({"value": "not test"}), + ) + assert response.status_code == 200, f"{response.status_code} - {response.content}" + # confirm the metadata has been updated correctly response = client.get(url=f"/v1/tables/{TEST_OWNER}/test_dataset_metadata_test") assert response.status_code == 200, f"{response.status_code} - {response.content}" - metadata = response.json()["metadata"] + entry = response.json() + metadata = entry["metadata"] assert metadata is not None assert metadata["set"], metadata + assert entry["description"] == "not test" # delete the table response = client.delete(url=f"/v1/tables/{TEST_OWNER}/test_dataset_metadata_test")