Skip to content

Commit

Permalink
Redesign base path file structure (#373)
Browse files Browse the repository at this point in the history
Addresses #368

First review this #372
  • Loading branch information
PhilippeMoussalli authored Aug 23, 2023
1 parent 36973db commit b9f3dea
Show file tree
Hide file tree
Showing 17 changed files with 275 additions and 272 deletions.
6 changes: 4 additions & 2 deletions src/fondant/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,8 @@ def _generate_spec(
command.extend(
[
"--output_manifest_path",
f"{path}/{component_name}/manifest.json",
f"{path}/{metadata.pipeline_name}/{metadata.run_id}/"
f"{component_name}/manifest.json",
],
)

Expand All @@ -160,7 +161,8 @@ def _generate_spec(
command.extend(
[
"--input_manifest_path",
f"{path}/{dependency}/manifest.json",
f"{path}/{metadata.pipeline_name}/{metadata.run_id}/"
f"{dependency}/manifest.json",
],
)

Expand Down
4 changes: 2 additions & 2 deletions src/fondant/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,9 @@ def upload_manifest(self, manifest: Manifest, save_path: t.Union[str, Path]):

if is_kubeflow_output:
# Save to the expected base path directory
safe_component_name = self.spec.name.replace(" ", "_").lower()
save_path_base_path = (
f"{manifest.base_path}/{safe_component_name}/manifest.json"
f"{manifest.base_path}/{manifest.pipeline_name}/{manifest.run_id}/"
f"{manifest.component_id}/manifest.json"
)
Path(save_path_base_path).parent.mkdir(parents=True, exist_ok=True)
manifest.to_file(save_path_base_path)
Expand Down
8 changes: 4 additions & 4 deletions src/fondant/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ def create(

specification = {
"metadata": metadata.to_dict(),
"index": {"location": f"/index/{run_id}/{component_id}"},
"index": {"location": f"/{pipeline_name}/{run_id}/{component_id}/index"},
"subsets": {},
}
return cls(specification)
Expand Down Expand Up @@ -226,7 +226,7 @@ def add_subset(
raise ValueError(msg)

self._specification["subsets"][name] = {
"location": f"/{name}/{self.run_id}/{self.component_id}",
"location": f"/{self.pipeline_name}/{self.run_id}/{self.component_id}/{name}",
"fields": {name: type_.to_json() for name, type_ in fields},
}

Expand Down Expand Up @@ -254,7 +254,7 @@ def evolve( # noqa : PLR0912 (too many branches)
# Update index location as this is currently always rewritten
evolved_manifest.index._specification[
"location"
] = f"/index/{self.run_id}/{component_id}"
] = f"/{self.pipeline_name}/{self.run_id}/{component_id}/index"

# If additionalSubsets is False in consumes,
# Remove all subsets from the manifest that are not listed
Expand Down Expand Up @@ -305,7 +305,7 @@ def evolve( # noqa : PLR0912 (too many branches)
# Update subset location as this is currently always rewritten
evolved_manifest.subsets[subset_name]._specification[
"location"
] = f"/{subset_name}/{self.run_id}/{component_id}"
] = f"{self.pipeline_name}/{self.run_id}/{component_id}/{subset_name}"

# Subset is not yet in manifest, add it
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
- '{"base_path": "/foo/bar", "pipeline_name": "test_pipeline", "run_id": "test_pipeline-20230101000000",
"component_id": "first_component"}'
- --output_manifest_path
- /foo/bar/first_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/first_component/manifest.json
- --storage_args
- a dummy string arg
- --input_partition_rows
Expand Down Expand Up @@ -38,7 +38,7 @@ services:
- '{"base_path": "/foo/bar", "pipeline_name": "test_pipeline", "run_id": "test_pipeline-20230101000000",
"component_id": "second_component"}'
- --output_manifest_path
- /foo/bar/second_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/second_component/manifest.json
- --storage_args
- a dummy string arg
- --input_partition_rows
Expand All @@ -50,7 +50,7 @@ services:
"array", "items": {"type": "float32"}}}}}, "args": {"storage_args": {"description":
"Storage arguments", "type": "str"}}}'
- --input_manifest_path
- /foo/bar/first_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/first_component/manifest.json
depends_on:
first_component:
condition: service_completed_successfully
Expand All @@ -64,7 +64,7 @@ services:
- '{"base_path": "/foo/bar", "pipeline_name": "test_pipeline", "run_id": "test_pipeline-20230101000000",
"component_id": "third_component"}'
- --output_manifest_path
- /foo/bar/third_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/third_component/manifest.json
- --storage_args
- a dummy string arg
- --input_partition_rows
Expand All @@ -78,7 +78,7 @@ services:
false}, "args": {"storage_args": {"description": "Storage arguments", "type":
"str"}}}'
- --input_manifest_path
- /foo/bar/second_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/second_component/manifest.json
depends_on:
second_component:
condition: service_completed_successfully
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ services:
- '{"base_path": "/foo/bar", "pipeline_name": "test_pipeline", "run_id": "test_pipeline-20230101000000",
"component_id": "first_component"}'
- --output_manifest_path
- /foo/bar/first_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/first_component/manifest.json
- --storage_args
- a dummy string arg
- --input_partition_rows
Expand All @@ -27,7 +27,7 @@ services:
- '{"base_path": "/foo/bar", "pipeline_name": "test_pipeline", "run_id": "test_pipeline-20230101000000",
"component_id": "image_cropping"}'
- --output_manifest_path
- /foo/bar/image_cropping/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/image_cropping/manifest.json
- --cropping_threshold
- '0'
- --padding
Expand All @@ -46,7 +46,7 @@ services:
for the image cropping. The padding is added to all borders of the image.",
"type": "int", "default": 10}}}'
- --input_manifest_path
- /foo/bar/first_component/manifest.json
- /foo/bar/test_pipeline/test_pipeline-20230101000000/first_component/manifest.json
depends_on:
first_component:
condition: service_completed_successfully
Expand Down
88 changes: 44 additions & 44 deletions tests/example_specs/evolution_examples/1/output_manifest.json
Original file line number Diff line number Diff line change
@@ -1,46 +1,46 @@
{
"metadata": {
"pipeline_name": "test_pipeline",
"base_path": "gs://bucket",
"run_id": "12345",
"component_id": "example_component"
},
"index": {
"location": "/index/12345/example_component"
},
"subsets": {
"images": {
"location": "/images/12345/example_component",
"fields": {
"width": {
"type": "int32"
},
"height": {
"type": "int32"
},
"data": {
"type": "binary"
}
"metadata":{
"pipeline_name":"test_pipeline",
"base_path":"gs://bucket",
"run_id":"12345",
"component_id":"example_component"
},
"index":{
"location":"/test_pipeline/12345/example_component/index"
},
"subsets":{
"images":{
"location":"/test_pipeline/12345/example_component/images",
"fields":{
"width":{
"type":"int32"
},
"height":{
"type":"int32"
},
"data":{
"type":"binary"
}
}
},
"captions":{
"location":"/test_pipeline/12345/example_component/captions",
"fields":{
"data":{
"type":"binary"
}
}
},
"embeddings":{
"location":"/test_pipeline/12345/example_component/embeddings",
"fields":{
"data":{
"type":"array",
"items":{
"type":"float32"
}
}
}
}
},
"captions": {
"location": "/captions/12345/example_component",
"fields": {
"data": {
"type": "binary"
}
}
},
"embeddings": {
"location": "/embeddings/12345/example_component",
"fields": {
"data": {
"type": "array",
"items": {
"type": "float32"
}
}
}
}
}
}
}
}
72 changes: 36 additions & 36 deletions tests/example_specs/evolution_examples/2/output_manifest.json
Original file line number Diff line number Diff line change
@@ -1,38 +1,38 @@
{
"metadata": {
"pipeline_name": "test_pipeline",
"base_path": "gs://bucket",
"run_id": "12345",
"component_id": "example_component"
},
"index": {
"location": "/index/12345/example_component"
},
"subsets": {
"images": {
"location": "/images/12345/example_component",
"fields": {
"width": {
"type": "int32"
},
"height": {
"type": "int32"
},
"data": {
"type": "binary"
}
"metadata":{
"pipeline_name":"test_pipeline",
"base_path":"gs://bucket",
"run_id":"12345",
"component_id":"example_component"
},
"index":{
"location":"/test_pipeline/12345/example_component/index"
},
"subsets":{
"images":{
"location":"/test_pipeline/12345/example_component/images",
"fields":{
"width":{
"type":"int32"
},
"height":{
"type":"int32"
},
"data":{
"type":"binary"
}
}
},
"embeddings":{
"location":"/test_pipeline/12345/example_component/embeddings",
"fields":{
"data":{
"type":"array",
"items":{
"type":"float32"
}
}
}
}
},
"embeddings": {
"location": "/embeddings/12345/example_component",
"fields": {
"data": {
"type": "array",
"items": {
"type": "float32"
}
}
}
}
}
}
}
}
60 changes: 30 additions & 30 deletions tests/example_specs/evolution_examples/3/output_manifest.json
Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
{
"metadata": {
"pipeline_name": "test_pipeline",
"base_path": "gs://bucket",
"run_id": "12345",
"component_id": "example_component"
},
"index": {
"location": "/index/12345/example_component"
},
"subsets": {
"images": {
"location": "/images/12345/example_component",
"fields": {
"data": {
"type": "binary"
}
"metadata":{
"pipeline_name":"test_pipeline",
"base_path":"gs://bucket",
"run_id":"12345",
"component_id":"example_component"
},
"index":{
"location":"/test_pipeline/12345/example_component/index"
},
"subsets":{
"images":{
"location":"/test_pipeline/12345/example_component/images",
"fields":{
"data":{
"type":"binary"
}
}
},
"embeddings":{
"location":"/test_pipeline/12345/example_component/embeddings",
"fields":{
"data":{
"type":"array",
"items":{
"type":"float32"
}
}
}
}
},
"embeddings": {
"location": "/embeddings/12345/example_component",
"fields": {
"data": {
"type": "array",
"items": {
"type": "float32"
}
}
}
}
}
}
}
}
Loading

0 comments on commit b9f3dea

Please sign in to comment.