From 79c60b8688ed71389d2b1ecfe404e541011f2434 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 17 May 2024 14:36:32 -0400 Subject: [PATCH 1/2] rename make targets to be ray-specific Signed-off-by: David Wood --- .make.defaults | 33 +++++++++------- examples/Makefile | 2 +- kfp/kfp_ray_components/Makefile | 14 ++----- tools/ingest2parquet/Makefile | 6 +-- transforms/.make.transforms | 26 ++++++------- transforms/code/code_quality/ray/Makefile | 12 +++--- transforms/code/malware/ray/Makefile | 14 +++---- transforms/code/proglang_select/ray/Makefile | 12 +++--- transforms/universal/doc_id/ray/Makefile | 12 +++--- transforms/universal/ededup/ray/Makefile | 10 ++--- transforms/universal/fdedup/ray/Makefile | 10 ++--- transforms/universal/filter/ray/Makefile | 12 +++--- transforms/universal/noop/ray/.dockerignore | 1 + transforms/universal/noop/ray/.gitignore | 38 +++++++++++++++++++ transforms/universal/noop/ray/Makefile | 12 +++--- .../universal/tokenization/ray/Makefile | 12 +++--- 16 files changed, 131 insertions(+), 95 deletions(-) create mode 100644 transforms/universal/noop/ray/.dockerignore create mode 100644 transforms/universal/noop/ray/.gitignore diff --git a/.make.defaults b/.make.defaults index 8dc87c502..914c610d7 100644 --- a/.make.defaults +++ b/.make.defaults @@ -1,5 +1,5 @@ ####################################################################################### -# Include this to get access to a common set of rules for reuse in transform projects. +# Include this to get access to a common set of rules for reuse in Makefile-based projects. # include ../../.make.defaults # # Before including, the following must be defined: @@ -169,24 +169,31 @@ __check_defined = \ --build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \ --build-arg GIT_COMMIT=$(shell git log -1 --format=%h) . -.PHONY: .defaults.lib-src-image -.defaults.lib-src-image:: # Must be called with a DOCKER_IMAGE= settings. - @# Help: Build the $(DOCKER_IMAGE) using the $(DOCKER_FILE), requirements.txt and install data-prep-lib source - rm -rf data-processing-lib - mkdir data-processing-lib - # Copy with -p so docker cachine works when copying this into the image - cp -p -R $(DPK_RAY_LIB_DIR)/src data-processing-lib - cp -p $(DPK_RAY_LIB_DIR)/pyproject.toml data-processing-lib - cp -p $(DPK_RAY_LIB_DIR)/README.md data-processing-lib +# Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME +# Generally used to copy source from within the repo into a local directory for use by a Dockerfile +.PHONY: .defaults.copy-lib +.defaults.copy-lib: + rm -rf ${LIB_NAME} + mkdir ${LIB_NAME} + cp -p -R ${LIB_PATH}/src ${LIB_NAME} + cp -p -R ${LIB_PATH}/pyproject.toml ${LIB_NAME} + cp -p -R ${LIB_PATH}/README.md ${LIB_NAME} + +# Build and image using the local Dockerfile and make the data-processing-lib +# available in the current directory for use by the Dockerfile (i.e. to install the library). +.PHONY: .defaults.ray-lib-src-image +.defaults.ray-lib-src-image:: # Must be called with a DOCKER_IMAGE= settings. + @# Help: Build the $(DOCKER_IMAGE) using the $(DOCKER_FILE), requirements.txt and install data-processing-lib source + $(MAKE) LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib .defaults.copy-lib $(MAKE) DOCKER_IMAGE=$(DOCKER_IMAGE) .defaults.image rm -rf data-processing-lib -.PHONY: .defaults.install-lib-src -.defaults.lib-src-venv:: .defaults.venv +.PHONY: .defaults.ray-lib-src-venv +.defaults.ray-lib-src-venv:: .defaults.venv @# Help: Install the source from the data processing library for $(PYTHON) @echo Installing source from data processing library for venv - source venv/bin/activate; \ + @source venv/bin/activate; \ pip install pytest; \ pip uninstall -y data-prep-toolkit; \ if [ ! -z "$(EXTRA_INDEX_URL)" ]; then \ diff --git a/examples/Makefile b/examples/Makefile index 71a12e6b9..83bf09e58 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -5,7 +5,7 @@ include ../.make.defaults clean:: .defaults.clean -venv:: .defaults.lib-src-venv +venv:: .defaults.ray-lib-src-venv @# Help: Install requirements from all transforms into a venv . ./venv/bin/activate && bash ./prepare_env.sh diff --git a/kfp/kfp_ray_components/Makefile b/kfp/kfp_ray_components/Makefile index ea60f37e9..75771290b 100644 --- a/kfp/kfp_ray_components/Makefile +++ b/kfp/kfp_ray_components/Makefile @@ -12,20 +12,12 @@ DOCKER_IMG=${DOCKER_HOSTNAME}/${DOCKER_NAMESPACE}/${DOCKER_NAME}:${DOCKER_IMAGE_ # Include the common rules. # Use "make help" to see them. -include ../../.make.defaults - -.PHONY: .copy-lib -.copy-lib: - rm -rf ${LIB_NAME} - mkdir ${LIB_NAME} - cp -p -R ${LIB_PATH}/src ${LIB_NAME} - cp -p -R ${LIB_PATH}/pyproject.toml ${LIB_NAME} - cp -p -R ${LIB_PATH}/README.md ${LIB_NAME} +include $(REPOROOT)/.make.defaults .PHONY: .lib-src-image .lib-src-image:: - $(MAKE) .copy-lib LIB_PATH=$(REPOROOT)/data-processing-lib/ray LIB_NAME=data-processing-lib - $(MAKE) .copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib LIB_NAME=kfp_support_lib + $(MAKE) .defaults.copy-lib LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib + $(MAKE) .defaults.copy-lib LIB_PATH=$(REPOROOT)/kfp/kfp_support_lib LIB_NAME=kfp_support_lib $(MAKE) DOCKER_IMAGE=$(DOCKER_IMAGE) .defaults.image rm -rf data-processing-lib rm -rf kfp_support_lib diff --git a/tools/ingest2parquet/Makefile b/tools/ingest2parquet/Makefile index 8c63932ab..e4d1ebbad 100644 --- a/tools/ingest2parquet/Makefile +++ b/tools/ingest2parquet/Makefile @@ -9,7 +9,7 @@ DOCKER_IMAGE_NAME?=ingest2parquet DOCKER_IMAGE=${DOCKER_REGISTRY_ENDPOINT}/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) -venv:: .defaults.lib-src-venv +venv:: .defaults.ray-lib-src-venv build:: venv image @@ -17,7 +17,7 @@ test:: venv test-src test-image clean:: .defaults.clean -image:: .defaults.lib-src-image +image:: .defaults.ray-lib-src-image test-src:: .defaults.test-src test-local @@ -42,4 +42,4 @@ run-s3-sample: @# Help: Run src/ingest2parquet_s3.py file (if it exists). Assumes minio has beens started. @echo "" @echo "You may want to stop the minio server now (see make help)" - @echo "" \ No newline at end of file + @echo "" diff --git a/transforms/.make.transforms b/transforms/.make.transforms index 627b9d8e3..fc90599fd 100644 --- a/transforms/.make.transforms +++ b/transforms/.make.transforms @@ -91,7 +91,7 @@ extra-help: # We use "pip" instead of "$(PIP)" below because otherwise if the user has overriddent PYTHON # they will end up installing into that PYTHON and NOT the venv. .PHONY: .transforms.venv -.transforms.venv:: .defaults.lib-src-venv +.transforms.ray-venv:: .defaults.ray-lib-src-venv .PHONY: .transforms.check_env .transforms.check_env:: @@ -100,26 +100,24 @@ extra-help: # Create the docker image making sure the preloaded models are available to copy into the image # We copy the library/framework source here so it can be installed into the docker image and # pip installed inside the Dockerfile -.PHONY: .transforms.image -.transforms.image:: .defaults.lib-src-image +.PHONY: .transforms.ray-image +.transforms.ray-image:: .defaults.ray-lib-src-image -.PHONY: .transforms.lib-src-venv -.transforms.lib-src-venv:: .defaults.lib-src-venv - -.PHONY: .transforms.build -.transforms.build:: .transforms.venv .transforms.image +.PHONY: .transforms.ray-build +.transforms.ray-build:: .transforms.venv .transforms.ray-image @# Help: Create the venv and build the transform image -.PHONY: .transforms.test -.transforms.test:: .transforms.test-src .transforms.test-image +.PHONY: .transforms.ray-test +.transforms.ray-test:: .transforms.test-src .transforms.ray-test-image @# Help: Run both source and image level tests. +# Assumes the runtime-specific venv has already been built. .PHONY: .transforms.test-src .transforms.test-src:: .defaults.test-src .transforms.test-locals @# Help: Run the transform's tests and any '*local' .py files -.PHONY: .transforms.test-image -.transforms.test-image:: .transforms.image .defaults.test-image-pytest +.PHONY: .transforms.ray-test-image +.transforms.ray-test-image:: .transforms.ray-image .defaults.test-image-pytest .PHONY: .transforms.test-image-pytest .transforms.test-image-pytest:: .defaults.test-image-pytest @@ -136,8 +134,8 @@ test-locals:: .transforms.test-locals .transforms.test-locals:: .defaults.test-locals # Build the image the transform way and then use the default publishing -.PHONY: .transforms.publish -.transforms.publish:: .transforms.image .defaults.publish +.PHONY: .transforms.ray-publish +.transforms.ray-publish:: .transforms.ray-image .defaults.publish .PHONY: .transforms-check-exists .transforms-check-exists: diff --git a/transforms/code/code_quality/ray/Makefile b/transforms/code/code_quality/ray/Makefile index c2de41dd0..3ba6cd29c 100644 --- a/transforms/code/code_quality/ray/Makefile +++ b/transforms/code/code_quality/ray/Makefile @@ -12,22 +12,22 @@ DOCKER_IMAGE_VERSION=${CODE_QUALITY_VERSION} clean:: .transforms.clean # Use default rule inherited from makefile.common -test:: .transforms.test +test:: .transforms.ray-test # Use default rule inherited from makefile.common -image:: .transforms.image +image:: .transforms.ray-image # Use default rule inherited from makefile.common -build:: .transforms.build +build:: .transforms.ray-build # Use default rule inherited from makefile.common -venv:: .transforms.venv +venv:: .transforms.ray-venv test-src:: .transforms.test-src -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish setup:: .transforms.setup diff --git a/transforms/code/malware/ray/Makefile b/transforms/code/malware/ray/Makefile index df034f5ff..6bfe273b4 100644 --- a/transforms/code/malware/ray/Makefile +++ b/transforms/code/malware/ray/Makefile @@ -12,12 +12,12 @@ DOCKER_IMAGE_VERSION=${MALWARE_VERSION} OS := $(shell uname -s) ifeq ($(OS),Darwin) -venv:: .transforms.venv +venv:: .transforms.ray-venv @./install_clamd_mac.sh @echo -e "if [ ! -e /var/run/clamav/clamd.ctl ]; then\n clamd --config-file=\$$(brew --prefix)/etc/clamav/clamd.conf\nfi" >> ./venv/bin/activate @sed -i '' -e "s#deactivate () {#deactivate () {\nps aux | grep \"[c]lamd --config-file=\$$(brew --prefix)/etc/clamav/clamd.conf\" | awk '{print \$$2}' | xargs kill\nrm -f /var/run/clamav/clamd.ctl#" ./venv/bin/activate else -venv:: .transforms.venv +venv:: .transforms.ray-venv @sudo mkdir -p /var/run/clamav && sudo chmod 777 /var/run/clamav @$(DOCKER) build --target clamav-local -f Dockerfile -t clamav-local:latest . @echo -e "$(DOCKER) run -d --name clamav -v /var/run/clamav:/var/run/clamav clamav-local:latest" >> ./venv/bin/activate @@ -25,19 +25,19 @@ venv:: .transforms.venv @sed -i -e "s/deactivate () {/deactivate () {\nfor c in \`$(DOCKER) ps -a | grep clamav | cut -f1 -d ' '\`; do $(DOCKER) rm -f -v \$$c; done/" ./venv/bin/activate endif -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish setup:: .transforms.setup diff --git a/transforms/code/proglang_select/ray/Makefile b/transforms/code/proglang_select/ray/Makefile index 3aa7f15a4..f26f8d213 100644 --- a/transforms/code/proglang_select/ray/Makefile +++ b/transforms/code/proglang_select/ray/Makefile @@ -10,23 +10,23 @@ TRANSFORM_NAME=proglang_select # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${PROGLANG_SELECT_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src setup:: .transforms.setup -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: .transforms.run-cli-ray-sample diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile index 8803c990f..76c744936 100644 --- a/transforms/universal/doc_id/ray/Makefile +++ b/transforms/universal/doc_id/ray/Makefile @@ -10,23 +10,23 @@ TRANSFORM_NAME=doc_id # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${DOC_ID_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test setup:: .transforms.setup -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: $(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform.py \ diff --git a/transforms/universal/ededup/ray/Makefile b/transforms/universal/ededup/ray/Makefile index 05e390e8b..697e95e93 100644 --- a/transforms/universal/ededup/ray/Makefile +++ b/transforms/universal/ededup/ray/Makefile @@ -10,15 +10,15 @@ TRANSFORM_NAME=ededup # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${EDEDUP_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src @@ -28,7 +28,7 @@ test-image:: .transforms.test-image-help @echo WARNING: Skipping pytest version of this test until we have a test/test_ededup.py file. -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: $(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform.py \ diff --git a/transforms/universal/fdedup/ray/Makefile b/transforms/universal/fdedup/ray/Makefile index 6ff8ecdab..e1bda74cc 100644 --- a/transforms/universal/fdedup/ray/Makefile +++ b/transforms/universal/fdedup/ray/Makefile @@ -10,15 +10,15 @@ TRANSFORM_NAME=fdedup # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${FDEDUP_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src @@ -27,7 +27,7 @@ setup:: .transforms.setup test-image:: .transforms.test-image-help @echo WARNING: Skipping pytest test until we have a test/test_fdedup.py file. -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: $(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform.py \ diff --git a/transforms/universal/filter/ray/Makefile b/transforms/universal/filter/ray/Makefile index 824de8b92..855f91945 100644 --- a/transforms/universal/filter/ray/Makefile +++ b/transforms/universal/filter/ray/Makefile @@ -11,23 +11,23 @@ TRANSFORM_NAME=filter # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${FILTER_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src setup:: .transforms.setup -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: $(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform.py \ diff --git a/transforms/universal/noop/ray/.dockerignore b/transforms/universal/noop/ray/.dockerignore new file mode 100644 index 000000000..f7275bbbd --- /dev/null +++ b/transforms/universal/noop/ray/.dockerignore @@ -0,0 +1 @@ +venv/ diff --git a/transforms/universal/noop/ray/.gitignore b/transforms/universal/noop/ray/.gitignore new file mode 100644 index 000000000..3ea7fd4ab --- /dev/null +++ b/transforms/universal/noop/ray/.gitignore @@ -0,0 +1,38 @@ +test-data/output +output/* +/output/ +data-processing-lib/ + + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + + +# Distribution / packaging +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +.tox/ +htmlcov +.coverage +.cache +nosetests.xml +coverage.xml \ No newline at end of file diff --git a/transforms/universal/noop/ray/Makefile b/transforms/universal/noop/ray/Makefile index 4dc866d62..b2e5a88a0 100644 --- a/transforms/universal/noop/ray/Makefile +++ b/transforms/universal/noop/ray/Makefile @@ -10,23 +10,23 @@ TRANSFORM_NAME=noop # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${NOOP_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src setup:: .transforms.setup -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: .transforms.run-cli-ray-sample diff --git a/transforms/universal/tokenization/ray/Makefile b/transforms/universal/tokenization/ray/Makefile index 6d2ab0326..bea983ba1 100644 --- a/transforms/universal/tokenization/ray/Makefile +++ b/transforms/universal/tokenization/ray/Makefile @@ -11,23 +11,23 @@ TRANSFORM_NAME=tokenization DOCKER_IMAGE_VERSION=${TOKENIZER_VERSION} -venv:: .transforms.venv +venv:: .transforms.ray-venv -build:: .transforms.build +build:: .transforms.ray-build -test:: .transforms.test +test:: .transforms.ray-test clean:: .transforms.clean -image:: .transforms.image +image:: .transforms.ray-image test-src:: .transforms.test-src setup:: .transforms.setup -test-image:: .transforms.test-image +test-image:: .transforms.ray-test-image -publish:: .transforms.publish +publish:: .transforms.ray-publish run-cli-ray-sample: $(MAKE) RUN_FILE=$(TRANSFORM_NAME)_transform.py \ From d7daa59e84e2d322b29031435986b367c37a0984 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 17 May 2024 16:18:18 -0400 Subject: [PATCH 2/2] fix doc_id make test target Signed-off-by: David Wood --- transforms/universal/doc_id/ray/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile index 76c744936..9f3dbcbc2 100644 --- a/transforms/universal/doc_id/ray/Makefile +++ b/transforms/universal/doc_id/ray/Makefile @@ -20,7 +20,7 @@ clean:: .transforms.clean image:: .transforms.ray-image -test-src:: .transforms.test +test-src:: .transforms.test-src setup:: .transforms.setup