Skip to content

Commit

Permalink
replace FileChange with Pydantic model
Browse files Browse the repository at this point in the history
  • Loading branch information
abyesilyurt committed Oct 10, 2024
1 parent 949ba57 commit a8938a3
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 146 deletions.
5 changes: 2 additions & 3 deletions syftbox/client/plugins/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

from syftbox.lib import (
DirState,
FileChange,
FileChangeKind,
FileInfo,
PermissionTree,
ResettableTimer,
Expand All @@ -20,6 +18,7 @@
hash_dir,
strtobin,
)
from syftbox.server.models import FileChange, FileChangeKind

CLIENT_CHANGELOG_FOLDER = "syft_changelog"
CLIENT_APPS = "apps"
Expand Down Expand Up @@ -100,7 +99,7 @@ def remove_empty_folders(leaf, current_path, root_dir):


# write operations
def diff_dirstate(old, new):
def diff_dirstate(old: FileChange, new: FileChange):
sync_folder = old.sync_folder
old_sub_path = old.sub_path
try:
Expand Down
130 changes: 1 addition & 129 deletions syftbox/lib/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import requests
from typing_extensions import Self

from syftbox.server.models import FileChangeKind
from syftbox.server.models import get_file_hash, get_file_last_modified

USER_GROUP_GLOBAL = "GLOBAL"

Expand Down Expand Up @@ -205,120 +205,6 @@ def strtobin(encoded_data):
return zlib.decompress(base64.b85decode(encoded_data.encode("utf-8")))


@dataclass
class FileChange(Jsonable):
kind: FileChangeKind
parent_path: str
sub_path: str
file_hash: str
last_modified: float
sync_folder: str | None = None

@property
def kind_write(self) -> bool:
return self.kind in [FileChangeKind.WRITE, FileChangeKind.CREATE]

@property
def kind_delete(self) -> bool:
return self.kind == FileChangeKind.DELETE

def to_dict(self) -> dict:
output = {}
for k, v in self.__dict__.items():
if k.startswith("_"):
continue
if k == "kind":
v = v.value
output[k] = pack(v)
return output

@property
def full_path(self) -> str:
return self.sync_folder + "/" + self.parent_path + "/" + self.sub_path

@property
def internal_path(self) -> str:
return self.parent_path + "/" + self.sub_path

def hash_equal_or_none(self) -> bool:
if not os.path.exists(self.full_path):
return True

local_file_hash = get_file_hash(self.full_path)
return self.file_hash == local_file_hash

def newer(self) -> bool:
if not os.path.exists(self.full_path):
return True

local_last_modified = get_file_last_modified(self.full_path)
if self.last_modified >= local_last_modified:
return True

return False

def read(self) -> bytes:
# if is_symlink(self.full_path):
# # write a text file with a syftlink
# data = convert_to_symlink(self.full_path).encode("utf-8")
# return data
# else:
with open(self.full_path, "rb") as f:
return f.read()

def write(self, data: bytes) -> bool:
# if its a non private syftlink turn it into a symlink
# if data.startswith(b"syft://") and not self.full_path.endswith(".private"):
# syft_link = SyftLink.from_url(data.decode("utf-8"))
# abs_path = os.path.join(
# os.path.abspath(self.sync_folder), syft_link.sync_path
# )
# if not os.path.exists(abs_path):
# raise Exception(
# f"Cant make symlink because source doesnt exist {abs_path}"
# )
# dir_path = os.path.dirname(self.full_path)
# os.makedirs(dir_path, exist_ok=True)
# if os.path.exists(self.full_path) and is_symlink(self.full_path):
# os.unlink(self.full_path)
# os.symlink(abs_path, self.full_path)
# os.utime(
# self.full_path,
# (self.last_modified, self.last_modified),
# follow_symlinks=False,
# )

# return True
# else:
return self.write_to(data, self.full_path)

def delete(self) -> bool:
try:
os.unlink(self.full_path)
return True
except Exception as e:
if "No such file" in str(e):
return True
print(f"Failed to delete file at {self.full_path}. {e}")
return False

def write_to(self, data: bytes, path: str) -> bool:
base_dir = os.path.dirname(path)
os.makedirs(base_dir, exist_ok=True)
try:
with open(path, "wb") as f:
f.write(data)
os.utime(
path,
(self.last_modified, self.last_modified),
follow_symlinks=False,
)
return True
except Exception as e:
print("failed to write", path, e)
return False


@dataclass
class DirState(Jsonable):
tree: dict[str, FileInfo]
Expand Down Expand Up @@ -347,20 +233,6 @@ def is_symlink(file_path) -> bool:
# return str(syft_link)


def get_file_last_modified(file_path: str) -> float:
return os.path.getmtime(file_path)


def get_file_hash(file_path: str) -> str:
# if is_symlink(file_path):
# # return the hash of the syftlink instead
# sym_link_string = convert_to_symlink(file_path)
# return hashlib.md5(sym_link_string.encode("utf-8")).hexdigest()
# else:
with open(file_path, "rb") as file:
return hashlib.md5(file.read()).hexdigest()


def ignore_dirs(directory: str, root: str, ignore_folders=None) -> bool:
if ignore_folders is not None:
for ignore_folder in ignore_folders:
Expand Down
18 changes: 16 additions & 2 deletions syftbox/server/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import hashlib
import os
from enum import Enum
from typing import Optional

from pydantic import BaseModel

from syftbox.lib.lib import get_file_hash, get_file_last_modified


class FileChangeKind(Enum):
CREATE: str = "create"
Expand Down Expand Up @@ -134,3 +133,18 @@ class WriteResponse(BaseModel):
class ListDatasitesResponse(BaseModel):
datasites: list[str]
status: str


def get_file_last_modified(file_path: str) -> float:
return os.path.getmtime(file_path)


def get_file_hash(file_path: str) -> str:
# if is_symlink(file_path):
# # return the hash of the syftlink instead
# sym_link_string = convert_to_symlink(file_path)
# return hashlib.md5(sym_link_string.encode("utf-8")).hexdigest()
# else:
# TODO: we will run out of memory for very large files
with open(file_path, "rb") as file:
return hashlib.md5(file.read()).hexdigest()
2 changes: 1 addition & 1 deletion syftbox/server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

from syftbox import __version__
from syftbox.lib import (
FileChange,
Jsonable,
PermissionTree,
bintostr,
Expand All @@ -32,6 +31,7 @@
strtobin,
)
from syftbox.server.models import (
FileChange,
FileChangeKind,
ListDatasitesResponse,
WriteRequest,
Expand Down
19 changes: 8 additions & 11 deletions tests/server/endpoint_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@


@pytest.fixture(scope="function")
def client(snapshot_folder, monkeypatch, tmp_path):
def client(monkeypatch, tmp_path):
"""Every client gets their own snapshot folder at `tmp_path`"""
snapshot_folder = tmp_path / TEST_DATASITE_NAME
settings = ServerSettings.from_data_folder(snapshot_folder)
monkeypatch.setenv("SYFTBOX_DATA_FOLDER", str(settings.data_folder))
monkeypatch.setenv("SYFTBOX_SNAPSHOT_FOLDER", str(settings.snapshot_folder))
Expand All @@ -37,22 +38,16 @@ def test_register(client):


def test_write_file(client: TestClient):
# Setup: Create a temporary directory for testing
# Test data
test_file = "test_file.txt"
test_content = bintostr(b"Hello, World!")
email = "test@example.com"

request_data = {
"email": email,
"email": TEST_DATASITE_NAME,
"change": {
"kind": "write",
"parent_path": email,
"sub_path": test_file,
"parent_path": TEST_DATASITE_NAME,
"sub_path": "test_file.txt",
"file_hash": "some_hash",
"last_modified": time.time(),
},
"data": test_content,
"data": bintostr(b"Hello, World!"),
}

# Send POST request to /write endpoint
Expand All @@ -66,5 +61,7 @@ def test_list_datasites(client: TestClient):
response = client.get("/list_datasites")
assert response.status_code == 200

assert len(response.json()["datasites"])

response = client.get(f"/datasites/{TEST_DATASITE_NAME}/")
assert response.status_code == 200

0 comments on commit a8938a3

Please sign in to comment.