Skip to content

Commit

Permalink
Merge pull request #97 from OpenMined/eelco/basic-sync-tests
Browse files Browse the repository at this point in the history
add tests for public file operations, various fixes
  • Loading branch information
abyesilyurt authored Oct 11, 2024
2 parents c9c4861 + 9c57750 commit b875fcf
Show file tree
Hide file tree
Showing 5 changed files with 154 additions and 45 deletions.
12 changes: 7 additions & 5 deletions syftbox/client/plugins/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from syftbox.lib import (
DirState,
FileInfo,
PermissionTree,
ResettableTimer,
bintostr,
Expand Down Expand Up @@ -236,7 +235,9 @@ def filter_changes(
return valid_changes, valid_change_files, invalid_changes


def push_changes(client_config: ClientConfig, changes: list[FileChange]):
def push_changes(
client_config: ClientConfig, changes: list[FileChange]
) -> list[FileChange]:
written_changes = []
for change in changes:
try:
Expand Down Expand Up @@ -312,6 +313,7 @@ def pull_changes(client_config, changes: list[FileChange]):
f"> {client_config.email} FAILED /read {change.kind} {change.internal_path}",
)
except Exception as e:
traceback.print_exc()
print("Failed to call /read on the server", str(e))
return remote_changes

Expand Down Expand Up @@ -351,7 +353,7 @@ def get_remote_state(client_config: ClientConfig, sub_path: str):
dir_state = DirState(**state_response["dir_state"])
fix_tree = {}
for key, value in dir_state.tree.items():
fix_tree[key] = FileInfo(**value)
fix_tree[key] = value
dir_state.tree = fix_tree
return dir_state
else:
Expand Down Expand Up @@ -425,7 +427,7 @@ def filter_changes_ignore(
return filtered_changes


def sync_up(client_config):
def sync_up(client_config: ClientConfig):
# create a folder to store the change log
change_log_folder = f"{client_config.sync_folder}/{CLIENT_CHANGELOG_FOLDER}"
os.makedirs(change_log_folder, exist_ok=True)
Expand All @@ -450,7 +452,7 @@ def sync_up(client_config):
old_dir_state = DirState.load(dir_filename)
fix_tree = {}
for key, value in old_dir_state.tree.items():
fix_tree[key] = FileInfo(**value)
fix_tree[key] = value
old_dir_state.tree = fix_tree
except Exception:
pass
Expand Down
5 changes: 4 additions & 1 deletion syftbox/lib/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,10 @@ def is_registered(self) -> bool:
@property
def server_client(self) -> httpx.Client:
if self._server_client is None:
self._server_client = httpx.Client(base_url=self.server_url)
self._server_client = httpx.Client(
base_url=self.server_url,
follow_redirects=True,
)
return self._server_client

def close(self):
Expand Down
7 changes: 7 additions & 0 deletions syftbox/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Optional

from pydantic import BaseModel
from typing_extensions import Self


class SyftBaseModel(BaseModel):
Expand All @@ -16,6 +17,12 @@ def save(self, path: str) -> bool:
f.write(self.model_dump_json())
return self.model_dump(mode="json")

@classmethod
def load(cls, filepath: str) -> Self:
with open(filepath) as f:
data = f.read()
return cls.model_validate_json(data)


class FileChangeKind(Enum):
CREATE: str = "create"
Expand Down
4 changes: 4 additions & 0 deletions syftbox/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import random
import sys
import traceback
from dataclasses import dataclass
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -417,6 +418,9 @@ async def dir_state(
)
raise HTTPException(status_code=400, detail={"status": "error"})
except Exception as e:
# TODO dir_state can fail in hash_dir os.path.join
# example: if sub_path is absolute, os.path.join will return sub_path and not snapshot_folder
traceback.print_exc()
print("Failed to run /dir_state", e)


Expand Down
171 changes: 132 additions & 39 deletions tests/sync/sync_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,10 @@
import pytest
from fastapi.testclient import TestClient

from syftbox.client.client import app as client_app
from syftbox.client.client import lifespan as client_lifespan
from syftbox.client.plugins.create_datasite import run as run_create_datasite_plugin
from syftbox.client.plugins.init import run as run_init_plugin
from syftbox.client.plugins.sync import do_sync
from syftbox.lib.lib import ClientConfig, perm_file_path
from syftbox.lib.lib import ClientConfig, SharedState, perm_file_path
from syftbox.server.server import app as server_app
from syftbox.server.server import lifespan as server_lifespan
from syftbox.server.settings import ServerSettings
Expand All @@ -55,14 +53,23 @@


@pytest.fixture(scope="function")
def datasite_1(
tmp_path: Path, server_client: TestClient
) -> Generator[TestClient, None, None]:
def datasite_1(tmp_path: Path, server_client: TestClient) -> ClientConfig:
email = "user_1@openmined.org"
client_path = tmp_path / "client_1"
return setup_datasite(tmp_path, server_client, email)


@pytest.fixture(scope="function")
def datasite_2(tmp_path: Path, server_client: TestClient) -> ClientConfig:
email = "user_2@openmined.org"
return setup_datasite(tmp_path, server_client, email)


def setup_datasite(
tmp_path: Path, server_client: TestClient, email: str
) -> ClientConfig:
client_path = tmp_path / email
client_path.unlink(missing_ok=True)
client_path.mkdir(parents=True)
print("client_path", client_path)

client_config = ClientConfig(
config_path=str(client_path / "client_config.json"),
Expand All @@ -74,20 +81,16 @@ def datasite_1(

client_config._server_client = server_client

lifespan_with_settings = partial(client_lifespan, client_config=client_config)
client_app.router.lifespan_context = lifespan_with_settings
with TestClient(client_app) as client:
yield client


def setup_datasite(datasite: TestClient):
run_init_plugin(datasite.app.shared_state)
run_create_datasite_plugin(datasite.app.shared_state)
wait_for_datasite_setup(datasite)
shared_state = SharedState(client_config=client_config)
run_init_plugin(shared_state)
run_create_datasite_plugin(shared_state)
wait_for_datasite_setup(client_config)
return client_config


@pytest.fixture(scope="function")
def server_client(tmp_path: Path) -> Generator[TestClient, None, None]:
print("Using test dir", tmp_path)
path = tmp_path / "server"
path.mkdir()

Expand All @@ -105,10 +108,9 @@ def http_server_client():
yield client


def wait_for_datasite_setup(datasite: TestClient, timeout=5):
def wait_for_datasite_setup(client_config: ClientConfig, timeout=5):
print("waiting for datasite setup...")

client_config: ClientConfig = datasite.app.shared_state.client_config
perm_file = perm_file_path(str(client_config.datasite_path))

t0 = time.time()
Expand All @@ -123,34 +125,125 @@ def wait_for_datasite_setup(datasite: TestClient, timeout=5):
raise TimeoutError("Datasite setup took too long")


def create_random_file(datasite_client: TestClient) -> Path:
client_config: ClientConfig = datasite_client.app.shared_state.client_config
file_path = Path(client_config.datasite_path) / fake.file_name(extension="json")
def create_random_file(client_config: ClientConfig, sub_path: str = "") -> Path:
relative_path = Path(sub_path) / fake.file_name(extension="json")
file_path = client_config.datasite_path / relative_path
content = {"body": fake.text()}
file_path.write_text(json.dumps(content))
return file_path

path_in_datasite = file_path.relative_to(client_config.sync_folder)
return path_in_datasite


def assert_files_not_on_datasite(datasite: ClientConfig, files: list[Path]):
for file in files:
assert not (
datasite.sync_folder / file
).exists(), f"File {file} exists on datasite {datasite.email}"


def assert_files_on_datasite(datasite: ClientConfig, files: list[Path]):
for file in files:
assert (
datasite.sync_folder / file
).exists(), f"File {file} does not exist on datasite {datasite.email}"


def test_sync_file_to_server_snapshot(
tmp_path: Path, server_client: TestClient, datasite_1: TestClient
def assert_files_on_server(server_client: TestClient, files: list[Path]):
server_settings: ServerSettings = server_client.app_state["server_settings"]
for file in files:
assert (
server_settings.snapshot_folder / file
).exists(), f"File {file} does not exist on server"


def test_create_public_file(
server_client: TestClient, datasite_1: ClientConfig, datasite_2: ClientConfig
):
print(datasite_1.app.shared_state.client_config)
setup_datasite(datasite_1)
# Two datasites create and sync a random file each

do_sync(datasite_1.app.shared_state)
datasite_1_shared_state = SharedState(client_config=datasite_1)
datasite_2_shared_state = SharedState(client_config=datasite_2)

print(server_client.app_state["server_settings"].snapshot_folder)
file_path_1 = create_random_file(datasite_1, "public")
file_path_2 = create_random_file(datasite_2, "public")
assert_files_on_datasite(datasite_1, [file_path_1])
assert_files_on_datasite(datasite_2, [file_path_2])

file_path = create_random_file(datasite_1)
# client 1 syncs
do_sync(datasite_1_shared_state)
assert_files_on_server(server_client, [file_path_1])
assert_files_on_datasite(datasite_1, [file_path_1])

do_sync(datasite_1.app.shared_state)
# client 2 syncs
do_sync(datasite_2_shared_state)
assert_files_on_server(server_client, [file_path_1, file_path_2])
assert_files_on_datasite(datasite_1, [file_path_1])
assert_files_on_datasite(datasite_2, [file_path_1, file_path_2])

snapshot_file_path = (
server_client.app_state["server_settings"].snapshot_folder
/ datasite_1.app.shared_state.client_config.email
/ file_path.name
)
# client 1 syncs again
do_sync(datasite_1_shared_state)
assert_files_on_server(server_client, [file_path_1, file_path_2])
assert_files_on_datasite(datasite_1, [file_path_1, file_path_2])


def test_modify_public_file(
server_client: TestClient, datasite_1: ClientConfig, datasite_2: ClientConfig
):
# Two datasites create and sync a random file each

datasite_1_shared_state = SharedState(client_config=datasite_1)
datasite_2_shared_state = SharedState(client_config=datasite_2)

assert snapshot_file_path.exists()
file_path_1 = create_random_file(datasite_1, "public")
assert_files_on_datasite(datasite_1, [file_path_1])

print("test done")
# client 1 syncs
do_sync(datasite_1_shared_state)
assert_files_on_server(server_client, [file_path_1])

# client 2 syncs
do_sync(datasite_2_shared_state)
assert_files_on_datasite(datasite_2, [file_path_1])

# client 1 modifies
(datasite_1.sync_folder / file_path_1).write_text("modified")
do_sync(datasite_1_shared_state)

# client 2 gets the modification
do_sync(datasite_2_shared_state)
assert (datasite_2.sync_folder / file_path_1).read_text() == "modified"


@pytest.mark.skip("Delete works after a few seconds, is this intended behaviour?")
def test_delete_public_file(
server_client: TestClient, datasite_1: ClientConfig, datasite_2: ClientConfig
):
# Two datasites create and sync a random file each
datasite_1_shared_state = SharedState(client_config=datasite_1)
datasite_2_shared_state = SharedState(client_config=datasite_2)

file_path_1 = create_random_file(datasite_1, "public")
assert_files_on_datasite(datasite_1, [file_path_1])

# client 1 syncs
do_sync(datasite_1_shared_state)
assert_files_on_server(server_client, [file_path_1])

# client 2 syncs
do_sync(datasite_2_shared_state)
assert_files_on_datasite(datasite_2, [file_path_1])

# client 1 deletes
(datasite_1.sync_folder / file_path_1).unlink()

# deletion is only synced after a few seconds, so first sync does not delete
do_sync(datasite_1_shared_state)
do_sync(datasite_2_shared_state)
assert_files_on_datasite(datasite_2, [file_path_1])

# after a few seconds the file is deleted
time.sleep(5)
do_sync(datasite_1_shared_state)
do_sync(datasite_2_shared_state)
assert_files_on_datasite(datasite_2, [file_path_1])

0 comments on commit b875fcf

Please sign in to comment.