Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Standarize fsspec file access #397

Merged
merged 1 commit into from
Aug 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/fondant/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def _get_latest_matching_manifest(self) -> t.Union[Manifest, None]:
else:
manifest_file = matching_manifests[0]

return Manifest.from_file(manifest_file)
return Manifest.from_file(manifest_file, self.filesystem)

logger.info("No matching execution for component detected")

Expand Down Expand Up @@ -323,15 +323,15 @@ def upload_manifest(self, manifest: Manifest, save_path: t.Union[str, Path]):
f"{manifest.component_id}/manifest_{manifest.cache_key}.json"
)
Path(save_path_base_path).parent.mkdir(parents=True, exist_ok=True)
manifest.to_file(save_path_base_path)
manifest.to_file(save_path_base_path, self.filesystem)
logger.info(f"Saving output manifest to {save_path_base_path}")
# Write manifest to the native kfp artifact path that will be passed as an artifact
# and read by the next component
manifest.to_file(save_path)
manifest.to_file(save_path, self.filesystem)
else:
# Local runner
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
manifest.to_file(save_path)
manifest.to_file(save_path, self.filesystem)
logger.info(f"Saving output manifest to {save_path}")


Expand Down Expand Up @@ -372,7 +372,7 @@ class TransformExecutor(Executor[Component]):
"""Base class for a Fondant transform component."""

def _load_or_create_manifest(self) -> Manifest:
return Manifest.from_file(self.input_manifest_path)
return Manifest.from_file(self.input_manifest_path, self.filesystem)

def _execute_component(
self,
Expand Down Expand Up @@ -514,7 +514,7 @@ def optional_fondant_arguments() -> t.List[str]:
return ["output_manifest_path"]

def _load_or_create_manifest(self) -> Manifest:
return Manifest.from_file(self.input_manifest_path)
return Manifest.from_file(self.input_manifest_path, self.filesystem)

def _execute_component(
self,
Expand Down
10 changes: 5 additions & 5 deletions src/fondant/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from pathlib import Path

import jsonschema.exceptions
from fsspec import open as fs_open
from fsspec import AbstractFileSystem
from jsonschema import Draft4Validator
from referencing import Registry, Resource
from referencing.jsonschema import DRAFT4
Expand Down Expand Up @@ -177,15 +177,15 @@ def create(
return cls(specification)

@classmethod
def from_file(cls, path: t.Union[str, Path]) -> "Manifest":
def from_file(cls, path: t.Union[str, Path], fs: AbstractFileSystem) -> "Manifest":
"""Load the manifest from the file specified by the provided path."""
with fs_open(path, encoding="utf-8") as file_:
with fs.open(path, encoding="utf-8") as file_:
specification = json.load(file_)
return cls(specification)

def to_file(self, path: t.Union[str, Path]) -> None:
def to_file(self, path: t.Union[str, Path], fs: AbstractFileSystem) -> None:
"""Dump the manifest to the file specified by the provided path."""
with fs_open(path, "w", encoding="utf-8") as file_:
with fs.open(path, "w", encoding="utf-8") as file_:
json.dump(self._specification, file_)

def copy(self) -> "Manifest":
Expand Down
5 changes: 4 additions & 1 deletion tests/test_component.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,10 @@ def mock_fs_created(path):
assert matching_execution_manifest.run_id == "test_pipeline_2023"
# Check that the previous component is not cached due to differing run IDs
assert (
executor._is_previous_cached(Manifest.from_file(input_manifest_path)) is False
executor._is_previous_cached(
Manifest.from_file(input_manifest_path, executor.filesystem),
)
is False
)


Expand Down
4 changes: 3 additions & 1 deletion tests/test_data_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from fondant.component_spec import ComponentSpec
from fondant.data_io import DaskDataLoader, DaskDataWriter
from fondant.manifest import Manifest
from fsspec.implementations.local import LocalFileSystem

manifest_path = Path(__file__).parent / "example_data/manifest.json"
component_spec_path = Path(__file__).parent / "example_data/components/1.yaml"
Expand All @@ -15,7 +16,8 @@

@pytest.fixture()
def manifest():
return Manifest.from_file(manifest_path)
fs = LocalFileSystem()
return Manifest.from_file(manifest_path, fs)


@pytest.fixture()
Expand Down
6 changes: 4 additions & 2 deletions tests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
from fondant.exceptions import InvalidManifest
from fondant.manifest import Field, Index, Manifest, Subset, Type
from fsspec.implementations.local import LocalFileSystem

manifest_path = Path(__file__).parent / "example_specs/manifests"

Expand Down Expand Up @@ -88,13 +89,14 @@ def test_set_base_path(valid_manifest):
def test_from_to_file(valid_manifest):
"""Test reading from and writing to file."""
tmp_path = "/tmp/manifest.json"
fs = LocalFileSystem()
with open(tmp_path, "w", encoding="utf-8") as f:
json.dump(valid_manifest, f)

manifest = Manifest.from_file(tmp_path)
manifest = Manifest.from_file(tmp_path, fs)
assert manifest.metadata == valid_manifest["metadata"]

manifest.to_file(tmp_path)
manifest.to_file(tmp_path, fs)
with open(tmp_path, encoding="utf-8") as f:
assert json.load(f) == valid_manifest

Expand Down
Loading