Skip to content

Commit

Permalink
Merge branch 'ml6team:main' into feature/commoncrawl-download-segments
Browse files Browse the repository at this point in the history
  • Loading branch information
shayorshay authored Aug 1, 2023
2 parents f774493 + 4161b19 commit 64abf3e
Show file tree
Hide file tree
Showing 7 changed files with 82 additions and 7 deletions.
4 changes: 2 additions & 2 deletions components/caption_images/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
FROM --platform=linux/amd64 pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

# System dependencies
RUN apt-get update && \
Expand All @@ -20,4 +20,4 @@ WORKDIR /component/src
# Copy over src-files
COPY src/ .

ENTRYPOINT ["python", "main.py"]
ENTRYPOINT ["python", "main.py"]
2 changes: 1 addition & 1 deletion components/caption_images/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def __init__(self, *args, model_id: str, batch_size: int, max_new_tokens: int) -
self.max_new_tokens = max_new_tokens

def transform(self, dataframe: pd.DataFrame) -> pd.DataFrame:
images = dataframe["images"]["data"].apply(
images = pd.Series(dataframe["images"]["data"]).apply(
process_image,
processor=self.processor,
device=self.device,
Expand Down
27 changes: 27 additions & 0 deletions components/caption_images/tests/test_caption_images.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import pandas as pd
import requests
from caption_images.src.main import CaptionImagesComponent
from fondant.abstract_component_test import AbstractComponentTest


class TestCaptionImagesComponent(AbstractComponentTest):
def create_component(self):
return CaptionImagesComponent(
model_id="Salesforce/blip-image-captioning-base",
batch_size=4,
max_new_tokens=2,
)

def create_input_data(self):
image_urls = [
"https://cdn.pixabay.com/photo/2023/06/29/09/52/angkor-thom-8096092_1280.jpg",
"https://cdn.pixabay.com/photo/2023/07/19/18/56/japanese-beetle-8137606_1280.png",
]
return pd.DataFrame(
{"images": {"data": [requests.get(url).content for url in image_urls]}},
)

def create_output_data(self):
return pd.DataFrame(
data={("captions", "text"): {0: "a motorcycle", 1: "a beetle"}},
)
2 changes: 1 addition & 1 deletion components/segment_images/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM --platform=linux/amd64 pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
FROM --platform=linux/amd64 pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

# System dependencies
RUN apt-get update && \
Expand Down
2 changes: 1 addition & 1 deletion docs/custom_component.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ which is then imported in the `main.py` script.
The `Dockerfile` defines how to build the component into a Docker image. An example Dockerfile is defined below.

```bash
FROM --platform=linux/amd64 pytorch/pytorch:2.0.1-cuda11.7-cudnn8-devel
FROM --platform=linux/amd64 pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime

## System dependencies
RUN apt-get update && \
Expand Down
47 changes: 47 additions & 0 deletions src/fondant/abstract_component_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from abc import ABC, abstractmethod

import pandas as pd
import pytest


class AbstractComponentTest(ABC):
@abstractmethod
def create_component(self):
"""
This method should be implemented by concrete test classes
to create the specific component
that needs to be tested.
"""
raise NotImplementedError

@abstractmethod
def create_input_data(self):
"""This method should be implemented by concrete test classes
to create the specific input data.
"""
raise NotImplementedError

@abstractmethod
def create_output_data(self):
"""This method should be implemented by concrete test classes
to create the specific output data.
"""
raise NotImplementedError

@pytest.fixture(autouse=True)
def __setUp(self):
"""
This method will be run before each test method.
Add any common setup steps for your components here.
"""
self.component = self.create_component()
self.input_data = self.create_input_data()
self.expected_output_data = self.create_output_data()

def test_transform(self):
"""
Default test for the transform method.
Tests if the transform method executes without errors.
"""
output = self.component.transform(self.input_data)
pd.testing.assert_frame_equal(output, self.expected_output_data)
5 changes: 3 additions & 2 deletions src/fondant/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,9 @@ def upload_manifest(self, manifest: Manifest, save_path: t.Union[str, Path]):
if is_kubeflow_output:
# Save to the expected base path directory
safe_component_name = self.spec.name.replace(" ", "_").lower()
base_path = self.metadata["base_path"]
save_path_base_path = f"{base_path}/{safe_component_name}/manifest.json"
save_path_base_path = (
f"{manifest.base_path}/{safe_component_name}/manifest.json"
)
Path(save_path_base_path).parent.mkdir(parents=True, exist_ok=True)
manifest.to_file(save_path_base_path)
logger.info(f"Saving output manifest to {save_path_base_path}")
Expand Down

0 comments on commit 64abf3e

Please sign in to comment.