Skip to content

Commit

Permalink
Add component install extra and update others (#592)
Browse files Browse the repository at this point in the history
Follow-up on #587
  • Loading branch information
RobbeSneyders authored Nov 6, 2023
1 parent cfb01c7 commit 071726c
Show file tree
Hide file tree
Showing 29 changed files with 51 additions and 35 deletions.
2 changes: 1 addition & 1 deletion components/caption_images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component
Expand Down
2 changes: 1 addition & 1 deletion components/chunk_text/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component
Expand Down
2 changes: 1 addition & 1 deletion components/download_images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component
Expand Down
2 changes: 1 addition & 1 deletion components/embed_images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/embed_text/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component
Expand Down
2 changes: 1 addition & 1 deletion components/embedding_based_laion_retrieval/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/filter_image_resolution/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/image_cropping/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/image_resolution_extraction/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/index_weaviate/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/language_filter/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Unit test for language filter component."""
import pandas as pd
from fondant.component_spec import ComponentSpec
from fondant.core.component_spec import ComponentSpec

from components.language_filter.src.main import LanguageFilterComponent

Expand Down
2 changes: 1 addition & 1 deletion components/load_from_files/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/load_from_hf_hub/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/load_from_hf_hub/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import dask.dataframe as dd
import pandas as pd
from fondant.component import DaskLoadComponent
from fondant.component_spec import ComponentSpec
from fondant.core.component_spec import ComponentSpec

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion components/load_from_parquet/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/load_from_parquet/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import dask.dataframe as dd
import pandas as pd
from fondant.component import DaskLoadComponent
from fondant.component_spec import ComponentSpec
from fondant.core.component_spec import ComponentSpec

logger = logging.getLogger(__name__)

Expand Down
2 changes: 1 addition & 1 deletion components/minhash_generator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/minhash_generator/tests/component_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Unit test for minhash generation component."""
import pandas as pd
from fondant.component_spec import ComponentSpec
from fondant.core.component_spec import ComponentSpec

from components.minhash_generator.src.main import MinHashGeneratorComponent

Expand Down
2 changes: 1 addition & 1 deletion components/prompt_based_laion_retrieval/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/resize_images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/segment_images/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/text_length_filter/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Unit test for text length filter component."""
import pandas as pd
from fondant.component_spec import ComponentSpec
from fondant.core.component_spec import ComponentSpec

from components.text_length_filter.src.main import TextLengthFilterComponent

Expand Down
2 changes: 1 addition & 1 deletion components/text_normalization/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component
Expand Down
2 changes: 1 addition & 1 deletion components/write_to_hf_hub/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ RUN pip3 install --no-cache-dir -r requirements.txt
# Install Fondant
# This is split from other requirements to leverage caching
ARG FONDANT_VERSION=main
RUN pip3 install fondant[aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
Expand Down
2 changes: 1 addition & 1 deletion components/write_to_hf_hub/src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import huggingface_hub
from datasets.features.features import generate_from_arrow_type
from fondant.component import DaskWriteComponent
from fondant.component_spec import ComponentSpec
from fondant.core.component_spec import ComponentSpec
from PIL import Image

logger = logging.getLogger(__name__)
Expand Down
21 changes: 13 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,26 +42,31 @@ classifiers = [

[tool.poetry.dependencies]
python = ">= 3.8, <3.11"
dask = {extras = ["dataframe", "distributed", "diagnostics"], version = ">= 2023.4.1"}

fsspec = ">= 2023.4.0"
importlib-resources = { version = ">= 1.3", python = "<3.9" }
jsonschema = ">= 4.18"
pyarrow = ">= 11.0.0"

fsspec = { version = ">= 2023.4.0", optional = true}
dask = { version = ">= 2023.4.1", extras = ["dataframe", "distributed", "diagnostics"], optional = true }

gcsfs = { version = ">= 2023.10.0", optional = true }
s3fs = { version = ">= 2023.4.0", optional = true }
adlfs = { version = ">= 2023.4.0", optional = true }

docker = {version = ">= 6.1.3", optional = true }
kfp = { version = "2.3.0", optional = true, extras =["kubernetes"] }
pandas = { version = ">= 1.3.5", optional = true }
google-cloud-aiplatform = { version = "1.34.0", optional = true}

[tool.poetry.extras]
aws = ["fsspec", "s3fs"]
azure = ["fsspec", "adlfs"]
gcp = ["fsspec", "gcsfs"]
kfp = ["kfp"]
vertex = ["kfp", "google-cloud-aiplatform"]
component = ["dask"]

aws = ["s3fs"]
azure = ["adlfs"]
gcp = ["gcsfs"]

kfp = ["docker", "kfp"]
vertex = ["docker", "kfp", "google-cloud-aiplatform"]
docker = ["docker"]

[tool.poetry.group.test.dependencies]
Expand Down
11 changes: 11 additions & 0 deletions src/fondant/component/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
try:
pass
except ImportError:
msg = (
"You need to install fondant using the `component` extra to develop or run a component."
"You can install it with `pip install fondant[component]`"
)
raise SystemExit(
msg,
)

from .component import ( # noqa
BaseComponent,
Component,
Expand Down

0 comments on commit 071726c

Please sign in to comment.