From db1c38907edf1e7c71edb8d8057063e0015ab8b8 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Wed, 23 Oct 2024 14:31:14 -0500 Subject: [PATCH] Update dockerfiles and dependencies for all existing transforms Signed-off-by: Maroun Touma --- transforms/code/code2parquet/python/Dockerfile | 5 +++-- transforms/code/code2parquet/ray/Dockerfile | 15 +++++++-------- transforms/code/code2parquet/ray/pyproject.toml | 2 +- transforms/code/code_profiler/python/Dockerfile | 5 +++-- transforms/code/code_profiler/ray/Dockerfile | 9 +++++---- transforms/code/code_profiler/ray/pyproject.toml | 3 +-- transforms/code/code_quality/python/Dockerfile | 5 +++-- transforms/code/code_quality/ray/Dockerfile | 8 ++++---- transforms/code/code_quality/ray/pyproject.toml | 2 +- transforms/code/header_cleanser/python/Dockerfile | 5 +++-- transforms/code/header_cleanser/ray/Dockerfile | 9 +++++---- .../code/header_cleanser/ray/pyproject.toml | 2 +- transforms/code/license_select/python/Dockerfile | 5 +++-- transforms/code/license_select/ray/Dockerfile | 12 ++++++------ transforms/code/license_select/ray/pyproject.toml | 2 +- transforms/code/malware/python/Dockerfile | 5 +++-- transforms/code/malware/ray/Dockerfile | 12 ++++++------ transforms/code/malware/ray/pyproject.toml | 2 +- transforms/code/proglang_select/python/Dockerfile | 5 +++-- transforms/code/proglang_select/ray/Dockerfile | 8 ++++---- .../code/proglang_select/ray/pyproject.toml | 2 +- .../code/repo_level_ordering/ray/Dockerfile | 7 +++---- .../code/repo_level_ordering/ray/pyproject.toml | 2 +- transforms/language/doc_quality/python/Dockerfile | 6 ++++-- transforms/language/doc_quality/ray/Dockerfile | 9 +++++---- .../language/doc_quality/ray/pyproject.toml | 2 +- .../language/html2parquet/python/Dockerfile | 6 ++++-- transforms/language/html2parquet/ray/Dockerfile | 9 +++++---- .../language/html2parquet/ray/requirements.txt | 2 +- transforms/language/lang_id/python/Dockerfile | 5 +++-- transforms/language/lang_id/ray/Dockerfile | 9 +++++---- transforms/language/lang_id/ray/pyproject.toml | 2 +- transforms/language/pdf2parquet/python/Dockerfile | 5 +++-- transforms/language/pdf2parquet/ray/Dockerfile | 10 +++++----- .../language/pdf2parquet/ray/requirements.txt | 2 +- .../language/pii_redactor/python/Dockerfile | 5 +++-- transforms/language/pii_redactor/ray/Dockerfile | 9 +++++---- .../language/pii_redactor/ray/pyproject.toml | 2 +- .../language/text_encoder/python/Dockerfile | 5 +++-- transforms/language/text_encoder/ray/Dockerfile | 9 ++++----- .../language/text_encoder/ray/pyproject.toml | 2 +- transforms/universal/doc_id/python/Dockerfile | 5 +++-- transforms/universal/doc_id/ray/Dockerfile | 9 +++++---- transforms/universal/doc_id/ray/pyproject.toml | 2 +- transforms/universal/ededup/python/Dockerfile | 5 +++-- transforms/universal/ededup/ray/Dockerfile | 9 +++++---- transforms/universal/ededup/ray/pyproject.toml | 2 +- transforms/universal/fdedup/ray/Dockerfile | 9 +++++---- transforms/universal/fdedup/ray/pyproject.toml | 2 +- transforms/universal/filter/python/Dockerfile | 5 +++-- transforms/universal/filter/ray/Dockerfile | 9 +++++---- transforms/universal/filter/ray/pyproject.toml | 2 +- transforms/universal/hap/python/Dockerfile | 5 +++-- transforms/universal/hap/ray/Dockerfile | 11 ++++++----- transforms/universal/hap/ray/requirements.txt | 2 +- transforms/universal/noop/python/Dockerfile | 5 +++-- transforms/universal/noop/ray/Dockerfile | 9 +++++---- transforms/universal/noop/ray/pyproject.toml | 2 +- transforms/universal/profiler/python/Dockerfile | 5 +++-- transforms/universal/profiler/ray/Dockerfile | 10 ++++++---- transforms/universal/profiler/ray/pyproject.toml | 2 +- transforms/universal/resize/python/Dockerfile | 5 +++-- transforms/universal/resize/ray/Dockerfile | 11 ++++++----- transforms/universal/resize/ray/pyproject.toml | 2 +- .../universal/tokenization/python/Dockerfile | 5 +++-- transforms/universal/tokenization/ray/Dockerfile | 9 +++++---- .../universal/tokenization/ray/pyproject.toml | 2 +- 67 files changed, 205 insertions(+), 170 deletions(-) diff --git a/transforms/code/code2parquet/python/Dockerfile b/transforms/code/code2parquet/python/Dockerfile index f94301a9c..ffbb71465 100644 --- a/transforms/code/code2parquet/python/Dockerfile +++ b/transforms/code/code2parquet/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/code/code2parquet/ray/Dockerfile b/transforms/code/code2parquet/ray/Dockerfile index 495acbb54..35eed7233 100644 --- a/transforms/code/code2parquet/ray/Dockerfile +++ b/transforms/code/code2parquet/ray/Dockerfile @@ -7,14 +7,13 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest -# Copy in the data processing framework source/project and install it -# This is expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . -COPY --chown=ray:users python-transform/ python-transform/ -RUN cd python-transform && pip install --no-cache-dir -e . +ARG WHEEL_FILE_NAME + +# Copy and install data processing libraries +# These are expected to be placed in the docker context before this is run (see the make image). +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index 120d080dc..b4627e3f0 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "dpk-code2parquet-transform-python==0.2.2.dev1", "parameterized", "pandas", diff --git a/transforms/code/code_profiler/python/Dockerfile b/transforms/code/code_profiler/python/Dockerfile index f64e0adab..ea5145db5 100644 --- a/transforms/code/code_profiler/python/Dockerfile +++ b/transforms/code/code_profiler/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} COPY --chown=dpk:root src/ src/ COPY --chown=dpk:root pyproject.toml pyproject.toml diff --git a/transforms/code/code_profiler/ray/Dockerfile b/transforms/code/code_profiler/ray/Dockerfile index b41ec614a..ec0d5e8ce 100644 --- a/transforms/code/code_profiler/ray/Dockerfile +++ b/transforms/code/code_profiler/ray/Dockerfile @@ -6,12 +6,13 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME + # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/code_profiler/ray/pyproject.toml b/transforms/code/code_profiler/ray/pyproject.toml index 5cc051bbe..6725b0ba1 100644 --- a/transforms/code/code_profiler/ray/pyproject.toml +++ b/transforms/code/code_profiler/ray/pyproject.toml @@ -10,8 +10,7 @@ authors = [ ] dependencies = [ "dpk-code-profiler-transform-python==0.2.2.dev0", - "data-prep-toolkit-ray==0.2.2.dev0", - "data-prep-toolkit==0.2.2.dev0", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/code/code_quality/python/Dockerfile b/transforms/code/code_quality/python/Dockerfile index b25a57ca1..82588f4cd 100644 --- a/transforms/code/code_quality/python/Dockerfile +++ b/transforms/code/code_quality/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/code/code_quality/ray/Dockerfile b/transforms/code/code_quality/ray/Dockerfile index bb136f498..6bfe0a535 100644 --- a/transforms/code/code_quality/ray/Dockerfile +++ b/transforms/code/code_quality/ray/Dockerfile @@ -10,13 +10,13 @@ RUN pip install --no-cache-dir pytest USER root RUN mkdir -p /home/ray/.cache && chmod -R 777 /home/ray/.cache USER ray +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 457678a6e..ac56ee0f5 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-code-quality-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/code/header_cleanser/python/Dockerfile b/transforms/code/header_cleanser/python/Dockerfile index 84831bcd2..cf12ca53c 100644 --- a/transforms/code/header_cleanser/python/Dockerfile +++ b/transforms/code/header_cleanser/python/Dockerfile @@ -17,11 +17,12 @@ WORKDIR /home/dpk # Create directories to mount volumnes for processing data outside of the image. # RUN mkdir input && mkdir output +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/code/header_cleanser/ray/Dockerfile b/transforms/code/header_cleanser/ray/Dockerfile index 16f8cf69c..8f049e48a 100644 --- a/transforms/code/header_cleanser/ray/Dockerfile +++ b/transforms/code/header_cleanser/ray/Dockerfile @@ -3,12 +3,13 @@ FROM docker.io/rayproject/ray:2.24.0-py310 # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME + # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + COPY --chown=ray:users python-transform/ python-transform RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/header_cleanser/ray/pyproject.toml b/transforms/code/header_cleanser/ray/pyproject.toml index f99feaba7..3acb075f9 100644 --- a/transforms/code/header_cleanser/ray/pyproject.toml +++ b/transforms/code/header_cleanser/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-header-cleanser-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "scancode-toolkit==32.1.0", ] diff --git a/transforms/code/license_select/python/Dockerfile b/transforms/code/license_select/python/Dockerfile index 2fa9f9426..6f5d1dacc 100644 --- a/transforms/code/license_select/python/Dockerfile +++ b/transforms/code/license_select/python/Dockerfile @@ -8,11 +8,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/code/license_select/ray/Dockerfile b/transforms/code/license_select/ray/Dockerfile index 39e4674bb..c271743bd 100644 --- a/transforms/code/license_select/ray/Dockerfile +++ b/transforms/code/license_select/ray/Dockerfile @@ -6,13 +6,13 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME + +# Copy and install data processing libraries +# These are expected to be placed in the docker context before this is run (see the make image). +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] -# Copy in the data processing framework source/project and install it -# This is expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/license_select/ray/pyproject.toml b/transforms/code/license_select/ray/pyproject.toml index 0d0634ef3..5307e1783 100644 --- a/transforms/code/license_select/ray/pyproject.toml +++ b/transforms/code/license_select/ray/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "dpk-license-select-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/code/malware/python/Dockerfile b/transforms/code/malware/python/Dockerfile index 0051cccd3..a5a6ee439 100644 --- a/transforms/code/malware/python/Dockerfile +++ b/transforms/code/malware/python/Dockerfile @@ -30,11 +30,12 @@ COPY --chown=dpk:root --from=clamav-local /var/lib/clamav/ /var/lib/clamav/ COPY --chown=dpk:root --from=clamav-local /etc/clamav/clamd.conf /etc/clamav/clamd.conf COPY --chown=dpk:root --from=clamav-local /var/log/clamav/clamav.log /var/log/clamav/clamav.log COPY --chown=dpk:root --from=clamav-local /var/run/clamav /var/run/clamav +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} COPY --chown=dpk:root src/ src/ COPY --chown=dpk:root pyproject.toml pyproject.toml diff --git a/transforms/code/malware/ray/Dockerfile b/transforms/code/malware/ray/Dockerfile index 9bf1e4d55..533f36a21 100644 --- a/transforms/code/malware/ray/Dockerfile +++ b/transforms/code/malware/ray/Dockerfile @@ -36,13 +36,13 @@ COPY --from=clamav-local --chown=ray:0 /etc/clamav/clamd.conf /etc/clamav/clamd. COPY --from=clamav-local --chown=ray:0 /var/log/clamav /var/log/clamav COPY --from=clamav-local --chown=ray:0 /var/run/clamav /var/run/clamav +ARG WHEEL_FILE_NAME + +# Copy and install data processing libraries +# These are expected to be placed in the docker context before this is run (see the make image). +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] -# Copy in the data processing framework source/project and install it -# This is expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index 6abc2af60..b04f6ab2b 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-malware-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/code/proglang_select/python/Dockerfile b/transforms/code/proglang_select/python/Dockerfile index 3186862f0..8cf50b321 100644 --- a/transforms/code/proglang_select/python/Dockerfile +++ b/transforms/code/proglang_select/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/code/proglang_select/ray/Dockerfile b/transforms/code/proglang_select/ray/Dockerfile index 7eb896037..5586899a4 100644 --- a/transforms/code/proglang_select/ray/Dockerfile +++ b/transforms/code/proglang_select/ray/Dockerfile @@ -6,13 +6,13 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index a74372f49..5b2a488f1 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-proglang-select-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/code/repo_level_ordering/ray/Dockerfile b/transforms/code/repo_level_ordering/ray/Dockerfile index e98ee8d76..ea35593ce 100644 --- a/transforms/code/repo_level_ordering/ray/Dockerfile +++ b/transforms/code/repo_level_ordering/ray/Dockerfile @@ -6,13 +6,12 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] COPY --chown=ray:users src/ src/ COPY --chown=ray:users pyproject.toml pyproject.toml diff --git a/transforms/code/repo_level_ordering/ray/pyproject.toml b/transforms/code/repo_level_ordering/ray/pyproject.toml index f66d2c9d1..d29f704e4 100644 --- a/transforms/code/repo_level_ordering/ray/pyproject.toml +++ b/transforms/code/repo_level_ordering/ray/pyproject.toml @@ -11,7 +11,7 @@ authors = [ { name = "Shanmukha Guttula", email = "shagutt1@in.ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "networkx==3.3", "colorlog==6.8.2", "func-timeout==4.3.5", diff --git a/transforms/language/doc_quality/python/Dockerfile b/transforms/language/doc_quality/python/Dockerfile index 10dca4999..a1dd879d5 100644 --- a/transforms/language/doc_quality/python/Dockerfile +++ b/transforms/language/doc_quality/python/Dockerfile @@ -10,10 +10,12 @@ RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME + # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=dpk:root data-processing-dist/ data-processing-dist/ +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/language/doc_quality/ray/Dockerfile b/transforms/language/doc_quality/ray/Dockerfile index 14d84f207..5c21f5b7d 100644 --- a/transforms/language/doc_quality/ray/Dockerfile +++ b/transforms/language/doc_quality/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/language/doc_quality/ray/pyproject.toml b/transforms/language/doc_quality/ray/pyproject.toml index a4aba9a3a..821298910 100644 --- a/transforms/language/doc_quality/ray/pyproject.toml +++ b/transforms/language/doc_quality/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-doc_quality-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1" + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/language/html2parquet/python/Dockerfile b/transforms/language/html2parquet/python/Dockerfile index 8c2f493c7..973e1fa74 100644 --- a/transforms/language/html2parquet/python/Dockerfile +++ b/transforms/language/html2parquet/python/Dockerfile @@ -10,10 +10,12 @@ RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME + # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=dpk:root data-processing-dist/ data-processing-dist/ +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/language/html2parquet/ray/Dockerfile b/transforms/language/html2parquet/ray/Dockerfile index d3de64999..523c49630 100644 --- a/transforms/language/html2parquet/ray/Dockerfile +++ b/transforms/language/html2parquet/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/language/html2parquet/ray/requirements.txt b/transforms/language/html2parquet/ray/requirements.txt index d4c7abc1b..2454b186e 100644 --- a/transforms/language/html2parquet/ray/requirements.txt +++ b/transforms/language/html2parquet/ray/requirements.txt @@ -1,3 +1,3 @@ dpk-html2parquet-transform-python==0.2.2.dev1 -data-prep-toolkit-ray==0.2.2.dev1 +data-prep-toolkit[ray]==0.2.2.dev1 trafilatura==1.12.0 \ No newline at end of file diff --git a/transforms/language/lang_id/python/Dockerfile b/transforms/language/lang_id/python/Dockerfile index f1bcc1bdd..f989da4e6 100644 --- a/transforms/language/lang_id/python/Dockerfile +++ b/transforms/language/lang_id/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=dpk:root data-processing-dist/ data-processing-dist/ +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/language/lang_id/ray/Dockerfile b/transforms/language/lang_id/ray/Dockerfile index f61fbbb3a..4fd4b7b90 100644 --- a/transforms/language/lang_id/ray/Dockerfile +++ b/transforms/language/lang_id/ray/Dockerfile @@ -6,6 +6,7 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # set up environment required to install and use huggingface and fasttext USER root @@ -15,10 +16,10 @@ USER ray # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index 60ff39947..b9ae910a2 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-lang_id-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/language/pdf2parquet/python/Dockerfile b/transforms/language/pdf2parquet/python/Dockerfile index 9fae370e7..35d06d43e 100644 --- a/transforms/language/pdf2parquet/python/Dockerfile +++ b/transforms/language/pdf2parquet/python/Dockerfile @@ -17,11 +17,12 @@ USER dpk WORKDIR /home/dpk ARG PIP_INSTALL_EXTRA_ARGS +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=dpk:root data-processing-dist/ data-processing-dist/ +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/language/pdf2parquet/ray/Dockerfile b/transforms/language/pdf2parquet/ray/Dockerfile index 178fdcd9b..b491ff09a 100644 --- a/transforms/language/pdf2parquet/ray/Dockerfile +++ b/transforms/language/pdf2parquet/ray/Dockerfile @@ -7,6 +7,7 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG PIP_INSTALL_EXTRA_ARGS ARG PIP_INSTALL_EXTRA_ARGS RUN \ @@ -15,13 +16,12 @@ RUN \ && sudo apt-get install -y libgl1 libglib2.0-0 curl wget \ && sudo apt-get clean - # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -e . diff --git a/transforms/language/pdf2parquet/ray/requirements.txt b/transforms/language/pdf2parquet/ray/requirements.txt index af70c0354..afe1c8bbf 100644 --- a/transforms/language/pdf2parquet/ray/requirements.txt +++ b/transforms/language/pdf2parquet/ray/requirements.txt @@ -1,5 +1,5 @@ dpk-pdf2parquet-transform-python==0.2.2.dev1 -data-prep-toolkit-ray==0.2.2.dev1 +data-prep-toolkit[ray]==0.2.2.dev1 docling-core==1.7.2 docling-ibm-models==2.0.0 deepsearch-glm==0.22.0 diff --git a/transforms/language/pii_redactor/python/Dockerfile b/transforms/language/pii_redactor/python/Dockerfile index 437bf8220..d3b640256 100644 --- a/transforms/language/pii_redactor/python/Dockerfile +++ b/transforms/language/pii_redactor/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=dpk:root data-processing-dist/ data-processing-dist/ +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/language/pii_redactor/ray/Dockerfile b/transforms/language/pii_redactor/ray/Dockerfile index 831a632aa..60f9a293d 100644 --- a/transforms/language/pii_redactor/ray/Dockerfile +++ b/transforms/language/pii_redactor/ray/Dockerfile @@ -5,13 +5,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG PIP_INSTALL_EXTRA_ARGS # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/language/pii_redactor/ray/pyproject.toml b/transforms/language/pii_redactor/ray/pyproject.toml index 1ef96511a..e6e35cd55 100644 --- a/transforms/language/pii_redactor/ray/pyproject.toml +++ b/transforms/language/pii_redactor/ray/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "dpk_pii_redactor_transform_python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "presidio-analyzer>=2.2.355", "presidio-anonymizer>=2.2.355", "flair>=0.14.0", diff --git a/transforms/language/text_encoder/python/Dockerfile b/transforms/language/text_encoder/python/Dockerfile index 86023a440..969635ca0 100644 --- a/transforms/language/text_encoder/python/Dockerfile +++ b/transforms/language/text_encoder/python/Dockerfile @@ -11,11 +11,12 @@ USER dpk WORKDIR /home/dpk ARG PIP_INSTALL_EXTRA_ARGS +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=dpk:root data-processing-dist/ data-processing-dist/ +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/language/text_encoder/ray/Dockerfile b/transforms/language/text_encoder/ray/Dockerfile index 249bf2448..33df87171 100644 --- a/transforms/language/text_encoder/ray/Dockerfile +++ b/transforms/language/text_encoder/ray/Dockerfile @@ -5,13 +5,12 @@ FROM ${BASE_IMAGE} RUN pip install --no-cache-dir pytest ARG PIP_INSTALL_EXTRA_ARGS - # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install ${PIP_INSTALL_EXTRA_ARGS} --no-cache-dir -e . diff --git a/transforms/language/text_encoder/ray/pyproject.toml b/transforms/language/text_encoder/ray/pyproject.toml index 95e29638f..777d65eae 100644 --- a/transforms/language/text_encoder/ray/pyproject.toml +++ b/transforms/language/text_encoder/ray/pyproject.toml @@ -12,7 +12,7 @@ authors = [ ] dependencies = [ "dpk-text_encoder-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/universal/doc_id/python/Dockerfile b/transforms/universal/doc_id/python/Dockerfile index 6f478cb33..1c5288c8a 100644 --- a/transforms/universal/doc_id/python/Dockerfile +++ b/transforms/universal/doc_id/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} COPY --chown=dpk:root src/ src/ COPY --chown=dpk:root pyproject.toml pyproject.toml diff --git a/transforms/universal/doc_id/ray/Dockerfile b/transforms/universal/doc_id/ray/Dockerfile index 34c331ce5..6a72f9660 100644 --- a/transforms/universal/doc_id/ray/Dockerfile +++ b/transforms/universal/doc_id/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/universal/doc_id/ray/pyproject.toml b/transforms/universal/doc_id/ray/pyproject.toml index 03530dff2..59e51f6df 100644 --- a/transforms/universal/doc_id/ray/pyproject.toml +++ b/transforms/universal/doc_id/ray/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "dpk_doc_id_transform_python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1" + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/universal/ededup/python/Dockerfile b/transforms/universal/ededup/python/Dockerfile index df9f3ce64..ee5d7c86c 100644 --- a/transforms/universal/ededup/python/Dockerfile +++ b/transforms/universal/ededup/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} COPY --chown=dpk:root src/ src/ COPY --chown=dpk:root pyproject.toml pyproject.toml diff --git a/transforms/universal/ededup/ray/Dockerfile b/transforms/universal/ededup/ray/Dockerfile index c48c2c575..7fe69ba4a 100644 --- a/transforms/universal/ededup/ray/Dockerfile +++ b/transforms/universal/ededup/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/universal/ededup/ray/pyproject.toml b/transforms/universal/ededup/ray/pyproject.toml index 84a892180..57e85e256 100644 --- a/transforms/universal/ededup/ray/pyproject.toml +++ b/transforms/universal/ededup/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "dpk_ededup_transform_python==0.2.2.dev1", "tqdm==4.66.3", ] diff --git a/transforms/universal/fdedup/ray/Dockerfile b/transforms/universal/fdedup/ray/Dockerfile index 27d101bb8..6077586b2 100644 --- a/transforms/universal/fdedup/ray/Dockerfile +++ b/transforms/universal/fdedup/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform # Install ray project source COPY --chown=ray:users src/ src/ diff --git a/transforms/universal/fdedup/ray/pyproject.toml b/transforms/universal/fdedup/ray/pyproject.toml index 54fd83a00..3f5168773 100644 --- a/transforms/universal/fdedup/ray/pyproject.toml +++ b/transforms/universal/fdedup/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "mmh3>=4.1.0", "xxhash==3.4.1", "tqdm==4.66.3", diff --git a/transforms/universal/filter/python/Dockerfile b/transforms/universal/filter/python/Dockerfile index 5df52a36e..fafc8a850 100644 --- a/transforms/universal/filter/python/Dockerfile +++ b/transforms/universal/filter/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/universal/filter/ray/Dockerfile b/transforms/universal/filter/ray/Dockerfile index 540e8fcaf..9e5328e47 100644 --- a/transforms/universal/filter/ray/Dockerfile +++ b/transforms/universal/filter/ray/Dockerfile @@ -5,13 +5,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index 9d2f84325..3d66c64d7 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-filter-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/universal/hap/python/Dockerfile b/transforms/universal/hap/python/Dockerfile index 6926bdbd2..67984c7a0 100644 --- a/transforms/universal/hap/python/Dockerfile +++ b/transforms/universal/hap/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/universal/hap/ray/Dockerfile b/transforms/universal/hap/ray/Dockerfile index 42005e9ba..799d9aeca 100644 --- a/transforms/universal/hap/ray/Dockerfile +++ b/transforms/universal/hap/ray/Dockerfile @@ -5,13 +5,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . @@ -20,7 +21,7 @@ RUN cd python-transform && pip install --no-cache-dir -e . COPY --chown=ray:users src/ src/ COPY --chown=ray:users pyproject.toml pyproject.toml -RUN pip install --no-cache-dir -e . +RUN pip install --no-cache-dir -r requirements.txt -e . # copy the main() entry point to the image COPY ./src/hap_transform_ray.py . diff --git a/transforms/universal/hap/ray/requirements.txt b/transforms/universal/hap/ray/requirements.txt index 6b7f46c5f..b92c9d64c 100644 --- a/transforms/universal/hap/ray/requirements.txt +++ b/transforms/universal/hap/ray/requirements.txt @@ -1,4 +1,4 @@ -data-prep-toolkit-ray==0.2.2.dev1 +data-prep-toolkit[ray]==0.2.2.dev1 dpk-hap-transform-python==0.2.2.dev1 nltk==3.9.1 transformers==4.38.2 diff --git a/transforms/universal/noop/python/Dockerfile b/transforms/universal/noop/python/Dockerfile index 84ea63ffd..ce93bf57a 100644 --- a/transforms/universal/noop/python/Dockerfile +++ b/transforms/universal/noop/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/universal/noop/ray/Dockerfile b/transforms/universal/noop/ray/Dockerfile index 035744aa8..dc84bab00 100644 --- a/transforms/universal/noop/ray/Dockerfile +++ b/transforms/universal/noop/ray/Dockerfile @@ -5,13 +5,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index c73f5c67a..519b63b29 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "dpk-noop-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/universal/profiler/python/Dockerfile b/transforms/universal/profiler/python/Dockerfile index 9aa921f5e..abb6577b0 100644 --- a/transforms/universal/profiler/python/Dockerfile +++ b/transforms/universal/profiler/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} COPY --chown=dpk:root src/ src/ COPY --chown=dpk:root pyproject.toml pyproject.toml diff --git a/transforms/universal/profiler/ray/Dockerfile b/transforms/universal/profiler/ray/Dockerfile index 6c7235249..73ee23527 100644 --- a/transforms/universal/profiler/ray/Dockerfile +++ b/transforms/universal/profiler/ray/Dockerfile @@ -7,12 +7,14 @@ RUN pip install --upgrade --no-cache-dir pip # install pytest RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME + # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/universal/profiler/ray/pyproject.toml b/transforms/universal/profiler/ray/pyproject.toml index 0b3ef4b55..a67ffb305 100644 --- a/transforms/universal/profiler/ray/pyproject.toml +++ b/transforms/universal/profiler/ray/pyproject.toml @@ -9,7 +9,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", "dpk_profiler_transform_python==0.2.2.dev1", "tqdm==4.66.3", ] diff --git a/transforms/universal/resize/python/Dockerfile b/transforms/universal/resize/python/Dockerfile index 9caa3565c..be4c3c485 100644 --- a/transforms/universal/resize/python/Dockerfile +++ b/transforms/universal/resize/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # Install python project source COPY --chown=dpk:users src/ src/ diff --git a/transforms/universal/resize/ray/Dockerfile b/transforms/universal/resize/ray/Dockerfile index 27e7b64b5..f2873ba2e 100644 --- a/transforms/universal/resize/ray/Dockerfile +++ b/transforms/universal/resize/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # Install pytest so we can test the image later RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME -# Copy and install data processing libraries +# Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . diff --git a/transforms/universal/resize/ray/pyproject.toml b/transforms/universal/resize/ray/pyproject.toml index 38043bb7e..df0928fa2 100644 --- a/transforms/universal/resize/ray/pyproject.toml +++ b/transforms/universal/resize/ray/pyproject.toml @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "dpk-resize-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system] diff --git a/transforms/universal/tokenization/python/Dockerfile b/transforms/universal/tokenization/python/Dockerfile index e1eea7e40..3e940576f 100644 --- a/transforms/universal/tokenization/python/Dockerfile +++ b/transforms/universal/tokenization/python/Dockerfile @@ -9,11 +9,12 @@ RUN pip install --no-cache-dir pytest RUN useradd -ms /bin/bash dpk USER dpk WORKDIR /home/dpk +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=dpk:root data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME} # END OF STEPS destined for a data-prep-kit base image diff --git a/transforms/universal/tokenization/ray/Dockerfile b/transforms/universal/tokenization/ray/Dockerfile index 8b7e78c27..f78438833 100644 --- a/transforms/universal/tokenization/ray/Dockerfile +++ b/transforms/universal/tokenization/ray/Dockerfile @@ -6,13 +6,14 @@ RUN pip install --upgrade --no-cache-dir pip # Install pytest so we can test the image later RUN pip install --no-cache-dir pytest +ARG WHEEL_FILE_NAME # Copy and install data processing libraries # These are expected to be placed in the docker context before this is run (see the make image). -COPY --chown=ray:users data-processing-lib-python/ data-processing-lib-python/ -RUN cd data-processing-lib-python && pip install --no-cache-dir -e . -COPY --chown=ray:users data-processing-lib-ray/ data-processing-lib-ray/ -RUN cd data-processing-lib-ray && pip install --no-cache-dir -e . +COPY --chown=ray:users data-processing-dist data-processing-dist +RUN pip install data-processing-dist/${WHEEL_FILE_NAME}[ray] + +## Copy the python version of the tansform COPY --chown=ray:users python-transform/ python-transform RUN cd python-transform && pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir -e . diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index a1ef73dd8..997987a46 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -10,7 +10,7 @@ authors = [ ] dependencies = [ "dpk-tokenization-transform-python==0.2.2.dev1", - "data-prep-toolkit-ray==0.2.2.dev1", + "data-prep-toolkit[ray]==0.2.2.dev1", ] [build-system]