diff --git a/.github/workflows/pr-tests.yaml b/.github/workflows/pr-tests.yaml index 3606a5e2..4ed7cc14 100644 --- a/.github/workflows/pr-tests.yaml +++ b/.github/workflows/pr-tests.yaml @@ -31,29 +31,23 @@ jobs: # runs-on: ${{ matrix.os }} runs-on: ${{ matrix.runner }} steps: - - name: Permission to home directory - run: | - sudo chown -R $USER:$USER $HOME - - - name: Install Git - if: runner.os == 'Linux' - run: | - sudo apt-get update - sudo apt-get install git -y - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Install pip packages + - name: Install uv run: | - python -m pip install --upgrade pip - pip install -U uv tox tox-uv pytest + curl -LsSf https://astral.sh/uv/0.4.20/install.sh | sh + source $HOME/.cargo/env uv --version + - name: Install Python and create venv + run: | + uv python install ${{ matrix.python-version }} + uv venv + + - name: Install pip packages + run: | + uv pip install -U tox tox-uv pytest + - name: Get uv cache dir id: pip-cache shell: bash @@ -72,4 +66,4 @@ jobs: env: TOX_PYTHON: python${{ matrix.python-version }} run: | - tox -e syft.test.unit + uv run tox -e syft.test.unit diff --git a/syftbox/client/plugins/sync.py b/syftbox/client/plugins/sync.py index bf14e103..99df6ac2 100644 --- a/syftbox/client/plugins/sync.py +++ b/syftbox/client/plugins/sync.py @@ -3,7 +3,6 @@ from collections import defaultdict from datetime import datetime from threading import Event -from typing import Tuple from watchdog.events import DirModifiedEvent @@ -26,7 +25,7 @@ IGNORE_FOLDERS = [CLIENT_CHANGELOG_FOLDER, STAGING, CLIENT_APPS] -def get_ignore_rules(dir_state: DirState) -> Tuple[str, str, str]: +def get_ignore_rules(dir_state: DirState) -> list[str, str, str]: # get the ignore files syft_ignore_files = [] folder_path = dir_state.sync_folder + "/" + dir_state.sub_path diff --git a/syftbox/lib/lib.py b/syftbox/lib/lib.py index 258e4c47..4466e6c2 100644 --- a/syftbox/lib/lib.py +++ b/syftbox/lib/lib.py @@ -19,7 +19,12 @@ from typing_extensions import Self from syftbox.client.utils import macos -from syftbox.server.models import get_file_hash, get_file_last_modified +from syftbox.server.models import ( + DirState, + FileInfo, + get_file_hash, + get_file_last_modified, +) current_dir = Path(__file__).parent ASSETS_FOLDER = current_dir.parent / "assets" @@ -107,7 +112,7 @@ class SyftPermission(Jsonable): @classmethod def datasite_default(cls, email: str) -> Self: - return SyftPermission( + return cls( admin=[email], read=[email], write=[email], @@ -213,14 +218,6 @@ def strtobin(encoded_data): return zlib.decompress(base64.b85decode(encoded_data.encode("utf-8"))) -@dataclass -class DirState(Jsonable): - tree: dict[str, FileInfo] - timestamp: float - sync_folder: str - sub_path: str - - def get_symlink(file_path) -> str: return os.readlink(file_path) @@ -249,12 +246,6 @@ def ignore_dirs(directory: str, root: str, ignore_folders=None) -> bool: return False -@dataclass -class FileInfo(Jsonable): - file_hash: str - last_modified: float - - def hash_dir( sync_folder: str, sub_path: str, @@ -295,7 +286,10 @@ def ignore_file(directory: str, root: str, filename: str) -> bool: return False -def get_datasites(sync_folder: str) -> list[str]: +def get_datasites(sync_folder: str | Path) -> list[str]: + sync_folder = ( + str(sync_folder.resolve()) if isinstance(sync_folder, Path) else sync_folder + ) datasites = [] folders = os.listdir(sync_folder) for folder in folders: diff --git a/syftbox/server/models.py b/syftbox/server/models.py index 7a458646..d0cc5b2f 100644 --- a/syftbox/server/models.py +++ b/syftbox/server/models.py @@ -11,6 +11,11 @@ def to_dict(self) -> dict: # used until we remote Jsonable from the code base return self.model_dump(mode="json") + def save(self, path: str) -> bool: + with open(path, "w") as f: + f.write(self.model_dump_json()) + return self.model_dump(mode="json") + class FileChangeKind(Enum): CREATE: str = "create" @@ -61,12 +66,17 @@ def newer(self) -> bool: return False - def read(self) -> bytes: + def is_directory(self) -> bool: + return os.path.isdir(self.full_path) + + def read(self) -> bytes | None: # if is_symlink(self.full_path): # # write a text file with a syftlink # data = convert_to_symlink(self.full_path).encode("utf-8") # return data # else: + if self.is_directory(): + return None with open(self.full_path, "rb") as f: return f.read() @@ -141,6 +151,41 @@ class ListDatasitesResponse(BaseModel): status: str +class ReadResponse(BaseModel): + change: FileChange + status: str + is_directory: bool = False + data: Optional[str] = None + + +class ReadRequest(BaseModel): + email: str + change: FileChange + + +class FileInfo(SyftBaseModel): + file_hash: str + last_modified: float + + +class DirState(SyftBaseModel): + tree: dict[str, FileInfo] + timestamp: float + sync_folder: str + sub_path: str + + +class DirStateRequest(SyftBaseModel): + email: str + sub_path: str + + +class DirStateResponse(SyftBaseModel): + sub_path: str + dir_state: DirState + status: str + + def get_file_last_modified(file_path: str) -> float: return os.path.getmtime(file_path) diff --git a/syftbox/server/server.py b/syftbox/server/server.py index f38a0b72..e0f0951b 100644 --- a/syftbox/server/server.py +++ b/syftbox/server/server.py @@ -33,9 +33,11 @@ strtobin, ) from syftbox.server.models import ( - FileChange, - FileChangeKind, + DirStateRequest, + DirStateResponse, ListDatasitesResponse, + ReadRequest, + ReadResponse, WriteRequest, WriteResponse, ) @@ -372,45 +374,31 @@ async def write( ) -@app.post("/read") +@app.post("/read", response_model=ReadResponse) async def read( - request: Request, server_settings: ServerSettings = Depends(get_server_settings) -): - data = await request.json() - email = data["email"] - change_dict = data["change"] - change_dict["kind"] = FileChangeKind(change_dict["kind"]) - change = FileChange(**change_dict) + request: ReadRequest, server_settings: ServerSettings = Depends(get_server_settings) +) -> ReadResponse: + email = request.email + change = request.change change.sync_folder = os.path.abspath(str(server_settings.snapshot_folder)) - - json_dict = {"change": change.model_dump(mode="json")} - - if change.kind_write: - if os.path.isdir(change.full_path): - # Handle directory - json_dict["is_directory"] = True - else: - # Handle file - bin_data = change.read() - json_dict["data"] = bintostr(bin_data) - elif change.kind_delete: - # Handle delete operation if needed - pass - else: - raise Exception(f"Unknown type of change kind. {change.kind}") - print(f"> {email} {change.kind}: {change.internal_path}") - return JSONResponse({"status": "success"} | json_dict, status_code=200) + # TODO: handle permissions, create and delete + return ReadResponse( + status="success", + change=change.model_dump(mode="json"), + data=bintostr(change.read()) if change.kind_write else None, + is_directory=change.is_directory(), + ) -@app.post("/dir_state") +@app.post("/dir_state", response_model=DirStateResponse) async def dir_state( - request: Request, server_settings: ServerSettings = Depends(get_server_settings) -): + request: DirStateRequest, + server_settings: ServerSettings = Depends(get_server_settings), +) -> DirStateResponse: try: - data = await request.json() - email = data["email"] - sub_path = data["sub_path"] + email = request.email + sub_path = request.sub_path snapshot_folder = str(server_settings.snapshot_folder) full_path = os.path.join(snapshot_folder, sub_path) remote_dir_state = hash_dir(snapshot_folder, sub_path) @@ -422,10 +410,13 @@ async def dir_state( read_state = filter_read_state(email, remote_dir_state, perm_tree) remote_dir_state.tree = read_state - response_json = {"sub_path": sub_path, "dir_state": remote_dir_state.to_dict()} if remote_dir_state: - return JSONResponse({"status": "success"} | response_json, status_code=200) - return JSONResponse({"status": "error"}, status_code=400) + return DirStateResponse( + sub_path=sub_path, + dir_state=remote_dir_state, + status="success", + ) + raise HTTPException(status_code=400, detail={"status": "error"}) except Exception as e: # TODO dir_state can fail in hash_dir os.path.join # example: if sub_path is absolute, os.path.join will return sub_path and not snapshot_folder @@ -433,8 +424,10 @@ async def dir_state( print("Failed to run /dir_state", e) -@app.get("/list_datasites") -async def datasites(server_settings: ServerSettings = Depends(get_server_settings)): +@app.get("/list_datasites", response_model=ListDatasitesResponse) +async def datasites( + server_settings: ServerSettings = Depends(get_server_settings), +) -> ListDatasitesResponse: datasites = get_datasites(server_settings.snapshot_folder) if datasites: return ListDatasitesResponse( diff --git a/tests/server/endpoint_test.py b/tests/server/endpoint_test.py index 0e3cf397..dbd68f29 100644 --- a/tests/server/endpoint_test.py +++ b/tests/server/endpoint_test.py @@ -1,3 +1,4 @@ +import json import time import pytest @@ -8,6 +9,13 @@ from syftbox.server.settings import ServerSettings TEST_DATASITE_NAME = "test_datasite@openmined.org" +TEST_FILE = "test_file.txt" +PERMFILE_FILE = "_.syftperm" +PERMFILE_DICT = { + "admin": [TEST_DATASITE_NAME], + "read": ["GLOBAL"], + "write": [TEST_DATASITE_NAME], +} @pytest.fixture(scope="function") @@ -23,6 +31,14 @@ def client(monkeypatch, tmp_path): datasite = settings.snapshot_folder / datasite_name datasite.mkdir(parents=True) + datafile = datasite / TEST_FILE + datafile.touch() + datafile.write_bytes(b"Hello, World!") + + permfile = datasite / PERMFILE_FILE + permfile.touch() + permfile.write_text(json.dumps(PERMFILE_DICT)) + with TestClient(app) as client: yield client @@ -65,3 +81,28 @@ def test_list_datasites(client: TestClient): response = client.get(f"/datasites/{TEST_DATASITE_NAME}/") assert response.status_code == 200 + + +def test_read_file(client: TestClient): + change = { + "kind": "write", + "parent_path": TEST_DATASITE_NAME, + "sub_path": TEST_FILE, + "file_hash": "some_hash", + "last_modified": time.time(), + } + response = client.post( + "/read", json={"email": TEST_DATASITE_NAME, "change": change} + ) + + response.raise_for_status() + + +def test_dir_state(client: TestClient): + response = client.post( + "/dir_state", json={"email": TEST_DATASITE_NAME, "sub_path": "."} + ) + + response.raise_for_status() + tree = response.json()["dir_state"]["tree"] + assert "test_datasite@openmined.org/test_file.txt" in tree