Skip to content

Commit

Permalink
Merge pull request #837 from IBM/transform-0.2.3.dev0
Browse files Browse the repository at this point in the history
  • Loading branch information
touma-I authored Dec 4, 2024
2 parents 8987261 + b2625d0 commit 6866f78
Show file tree
Hide file tree
Showing 61 changed files with 946 additions and 190 deletions.
75 changes: 20 additions & 55 deletions .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ __check_defined = \
# We create both local and remote tags. Local seems to be needed when using our spark
# base image. Remote seems to be needed by kfp.
.PHONY: .defaults.image
.defaults.image:: # Must be called with a DOCKER_IMAGE= settings.
.defaults.image:: # Must be called with a DOCKER_IMAGE_NAME= settings.
@# Help: Create the docker image $(DOCKER_LOCAL_IMAGE) and a tag for $(DOCKER_REMOTE_IMAGE)
$(call check_defined, DOCKER_IMAGE_NAME)
# The following touch seems to be needed to work around a docker build problem in which
Expand All @@ -222,14 +222,15 @@ __check_defined = \
if [ -e pyproject.toml ]; then \
touch pyproject.toml; \
fi
$(DOCKER) build -t $(DOCKER_LOCAL_IMAGE) $(DOCKER_BUILD_EXTRA_ARGS) \
$(DOCKER) build -f $(DOCKER_FILE) -t $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_BUILD_EXTRA_ARGS) \
--platform $(DOCKER_PLATFORM) \
--build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg DPK_WHEEL_FILE_NAME=$(DPK_WHEEL_FILE_NAME) \
--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \
--build-arg GIT_COMMIT=$(shell git log -1 --format=%h) .
$(DOCKER) tag $(DOCKER_LOCAL_IMAGE) $(DOCKER_REMOTE_IMAGE)
$(DOCKER) tag $(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION) $(DOCKER_REGISTRY_ENDPOINT)/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_VERSION)


# Copy a source tree in LIB_PATH, including src, pyproject.toml to LIB_NAME
# Generally used to copy source from within the repo into a local directory for use by a Dockerfile
Expand All @@ -244,24 +245,25 @@ __check_defined = \
cp -p ${LIB_PATH}/requirements.txt ${LIB_NAME}; \
fi


# Build and image using the local Dockerfile and make the data-processing-lib/python
# available in the current directory for use by the Dockerfile (i.e. to install the library).
#.PHONY: .defaults.python-lib-src-image
#.defaults.python-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings.
# @# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-lib/python source
#ifeq ($(USE_REPO_LIB_SRC), 1)
# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
#endif
# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image
# -rm -rf data-processing-lib-python

.PHONY: .default.build-lib-wheel
.default.build-lib-wheel:
make -C $(REPOROOT)/data-processing-lib build-pkg-dist
$(MAKE) -C $(REPOROOT)/data-processing-lib build-pkg-dist
rm -rf data-processing-dist && mkdir data-processing-dist
cp $(REPOROOT)/data-processing-lib/dist/*.whl data-processing-dist


# Build and image using the local Dockerfile
# Assumes wheel has already been created
.PHONY: .defaults.lib-whl-image
.defaults.lib-whl-image::
# Must be called with a DOCKER_LOCAL_IMAGE= settings.
@# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library
@$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl))
$(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE)))
$(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image


# Build and image using the local Dockerfile and make the wheel for data-processing-lib
# available in the current directory for use by the Dockerfile (i.e. to install the library).
.PHONY: .defaults.python-lib-whl-image
Expand All @@ -270,28 +272,9 @@ __check_defined = \
@# Help: Build the Python $(DOCKER_LOCAL_IMAGE) using the the wheel file for the library
@$(eval LIB_WHEEL_FILE := $(shell find data-processing-dist/*.whl))
$(eval LIB_WHEEL_FILE := $(shell basename $(LIB_WHEEL_FILE)))
$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
$(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
-rm -rf data-processing-dist

# Build an image using the local Dockerfile and make the data-processing-lib/ray
# available in the current directory for use by the Dockerfile (i.e. to install the library).
# Note that this looks for the ../python directory, which is currently only used in the transform projects,
# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms.
#.PHONY: .defaults.ray-lib-src-image
#.defaults.ray-lib-src-image:: # Must be called with a DOCKER_LOCAL_IMAGE= settings.
# @# Help: Build the Ray $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source
#ifeq ($(USE_REPO_LIB_SRC), 1)
# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
# $(MAKE) LIB_PATH=$(DPK_RAY_LIB_DIR) LIB_NAME=data-processing-lib-ray .defaults.copy-lib
#endif
# if [ -e ../python ]; then \
# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \
# fi
# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) .defaults.image
# -rm -rf data-processing-lib-python
# -rm -rf data-processing-lib-ray
# -rm -rf python-transform


# Build an image using the local Dockerfile and make the data-processing wheel
# available in the current directory for use by the Dockerfile (i.e. to install the library).
Expand All @@ -306,7 +289,7 @@ __check_defined = \
if [ -e ../python ]; then \
$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \
fi
$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
$(MAKE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
-rm -rf python-transform
-rm -rf data-processing-dist

Expand All @@ -316,24 +299,6 @@ __check_defined = \
.defaults.spark-lib-base-image:
$(MAKE) -C $(DPK_SPARK_LIB_DIR) image

# Note that this looks for the ../python directory, which is currently only used in the transform projects,
# but we add it here as a convenience to avoid duplicating a lot of this in transforms/.make.transforms.
# Must be called with a DOCKER_LOCAL_IMAGE= settings.
#.PHONY: .defaults.spark-lib-src-image
#.defaults.spark-lib-src-image:: .defaults.spark-lib-base-image
# @# Help: Build the Spark $(DOCKER_LOCAL_IMAGE) using the $(DOCKER_FILE), requirements.txt and data-processing-libs source
# $(MAKE) IMAGE_NAME_TO_VERIFY=$(DOCKER_SPARK_BASE_IMAGE_NAME) .defaults.verify-image-availability
#ifeq ($(USE_REPO_LIB_SRC), 1)
# $(MAKE) LIB_PATH=$(DPK_PYTHON_LIB_DIR) LIB_NAME=data-processing-lib-python .defaults.copy-lib
# $(MAKE) LIB_PATH=$(DPK_SPARK_LIB_DIR) LIB_NAME=data-processing-lib-spark .defaults.copy-lib
#endif
# if [ -e ../python ]; then \
# $(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \
# fi
# $(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) .defaults.image
# -rm -rf data-processing-lib-python
# -rm -rf data-processing-lib-spark
# -rm -rf python-transform

.PHONY: .defaults.spark-lib-whl-image
.defaults.spark-lib-whl-image:: .default.build-lib-wheel .defaults.spark-lib-base-image
Expand All @@ -345,7 +310,7 @@ __check_defined = \
if [ -e ../python ]; then \
$(MAKE) LIB_PATH=../python LIB_NAME=python-transform .defaults.copy-lib; \
fi
$(MAKE) DOCKER_IMAGE=$(DOCKER_LOCAL_IMAGE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
$(MAKE) BASE_IMAGE=$(DOCKER_SPARK_BASE_IMAGE) DPK_WHEEL_FILE_NAME=$(LIB_WHEEL_FILE) .defaults.image
-rm -rf python-transform
-rm -rf data-processing-dist

Expand Down
2 changes: 1 addition & 1 deletion .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,4 @@ endif
#
# If you change the versions numbers, be sure to run "make set-versions" to
# update version numbers across the transform (e.g., pyproject.toml).
TRANSFORMS_PKG_VERSION=0.2.3.dev0
TRANSFORMS_PKG_VERSION=0.2.3.dev1
2 changes: 1 addition & 1 deletion kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ authors = [
dependencies = [
"requests",
"kubernetes",
"data-prep-toolkit[ray]==0.2.3.dev0",
"data-prep-toolkit[ray]>=0.2.3.dev0",
]

[build-system]
Expand Down
153 changes: 104 additions & 49 deletions transforms/.make.cicd.targets
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@
include $(REPOROOT)/transforms/.make.transforms

######################################################################
## Default setting for TRANSFORM_RUNTIME uses folder name-- Old layout
TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).transform
TRANSFORM_RAY_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).ray.transform
TRANSFORM_PYTHON_RUNTIME_SRC_FILE=-m dpk_$(TRANSFORM_NAME).spark.transform
## Default setting for TRANSFORM_RUNTIME entry point:
# python -m dpk_html2parquet.ray.transform --help
# or
# python -m dpk_html2parquet.transform_python --help
#
TRANSFORM_PYTHON_SRC?="-m dpk_$(TRANSFORM_NAME).transform_python"
TRANSFORM_RAY_SRC?="-m dpk_$(TRANSFORM_NAME).ray.transform"
TRANSFORM_SPARK_SRC?="-m dpk_$(TRANSFORM_NAME).spark.transform"


venv:: .defaults.create-venv
source venv/bin/activate && $(PIP) install -e $(REPOROOT)/data-processing-lib[ray,spark]
Expand All @@ -19,7 +24,6 @@ venv:: .defaults.create-venv
source venv/bin/activate && $(PIP) install -r requirements.txt; \
fi;


test:: .transforms.test-src test-image

clean:: .transforms.clean
Expand All @@ -28,62 +32,113 @@ clean:: .transforms.clean
set-versions::

## We need to think how we want to do this going forward
build::

image::
@if [ -e Dockerfile ]; then \
$(MAKE) image-default ; \
else \
echo "Skipping image for $(shell pwd) since no Dockerfile is present"; \
build:: image

publish:
@if [ -e Dockerfile.python ]; then \
$(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-python:$(DOCKER_IMAGE_VERSION) \
.defaults.publish-image ; \
fi
@if [ -e Dockerfile.ray ]; then \
$(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-ray:$(DOCKER_IMAGE_VERSION) \
.defaults.publish-image ; \
fi
@if [ -e Dockerfile.spark ]; then \
$(MAKE) DOCKER_REMOTE_IMAGE=$(DOCKER_REGISTRY_ENDPOINT)/$(TRANSFORM_NAME)-spark:$(DOCKER_IMAGE_VERSION) \
.defaults.publish-image ; \
fi

test-image-sequence:: .defaults.lib-whl-image .transforms.test-image-help .transforms.clean

test-image:: .default.build-lib-wheel
@if [ -e Dockerfile.python ]; then \
$(MAKE) DOCKER_FILE=Dockerfile.python \
TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_PYTHON_SRC) \
DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \
test-image-sequence ; \
fi
@if [ -e Dockerfile.ray ]; then \
$(MAKE) DOCKER_FILE=Dockerfile.ray \
TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_RAY_SRC) \
DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \
BASE_IMAGE=$(RAY_BASE_IMAGE) \
test-image-sequence ; \
fi
@if [ -e Dockerfile.spark ]; then \
$(MAKE) DOCKER_FILE=Dockerfile.spark \
TRANSFORM_RUNTIME_SRC_FILE=$(TRANSFORM_SPARK_SRC) \
DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \
BASE_IMAGE=$(SPARK_BASE_IMAGE) \
test-image-sequence ; \
fi
-rm -rf data-processing-dist


publish::
@if [ -e Dockerfile ]; then \
$(MAKE) publish-default ; \
else \
echo "Skipping publish for $(shell pwd) since no Dockerfile is present"; \
image-python:
@if [ -e Dockerfile.python ]; then \
$(MAKE) DOCKER_FILE=Dockerfile.python \
DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-python \
.defaults.lib-whl-image ; \
fi

publish-image::
@if [ -e Dockerfile ]; then \
$(MAKE) publish-image-default ; \
else \
echo "Skipping publish-image for $(shell pwd) since no Dockerfile is present"; \
image-ray:
@if [ -e Dockerfile.ray ]; then \
$(MAKE) DOCKER_FILE=Dockerfile.ray \
DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-ray \
BASE_IMAGE=$(RAY_BASE_IMAGE) \
.defaults.lib-whl-image ; \
fi

test-image::
@if [ -e Dockerfile ]; then \
$(MAKE) test-image-default ; \
else \
echo "Skipping test-image for $(shell pwd) since no Dockerfile is present"; \
image-spark:
@if [ -e Dockerfile.spark ]; then \
$(MAKE) DOCKER_FILE=Dockerfile.spark \
DOCKER_IMAGE_NAME=$(TRANSFORM_NAME)-spark \
BASE_IMAGE=$(SPARK_BASE_IMAGE) \
.defaults.lib-whl-image ; \
fi

image:: .default.build-lib-wheel
## Build all possible images unless a specific runtime is specified
@if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "python" ]; then \
$(MAKE) image-python ; \
fi
@if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "ray" ]; then \
$(MAKE) image-ray ; \
fi
@if [ -z "$(BUILD_SPECIFIC_RUNTIME)" ] || [ "$(BUILD_SPECIFIC_RUNTIME)" == "spark" ]; then \
$(MAKE) image-spark ; \
fi
-rm -rf data-processing-dist

test-src:: .transforms.test-src

setup:: .transforms.setup

publish-default:: publish-image

publish-image-default:: .defaults.publish-image

test-image-default:: image .transforms.test-image-help .defaults.test-image-pytest .transforms.clean

build-lib-wheel:
make -C $(REPOROOT)/data-processing-lib build-pkg-dist

image-default:: build-lib-wheel
@$(eval LIB_WHEEL_FILE := $(shell find $(REPOROOT)/data-processing-lib/dist/*.whl))
rm -fr dist && mv $(REPOROOT)/data-processing-lib/dist .
$(eval WHEEL_FILE_NAME := $(shell basename $(LIB_WHEEL_FILE)))
$(DOCKER) build -t $(DOCKER_IMAGE_NAME) $(DOCKER_BUILD_EXTRA_ARGS) \
--platform $(DOCKER_PLATFORM) \
--build-arg EXTRA_INDEX_URL=$(EXTRA_INDEX_URL) \
--build-arg BASE_IMAGE=$(RAY_BASE_IMAGE) \
--build-arg BUILD_DATE=$(shell date -u +'%Y-%m-%dT%H:%M:%SZ') \
--build-arg WHEEL_FILE_NAME=$(WHEEL_FILE_NAME) \
--build-arg TRANSFORM_NAME=$(TRANSFORM_NAME) \
--build-arg GIT_COMMIT=$(shell git log -1 --format=%h) .
$(DOCKER) tag $(DOCKER_LOCAL_IMAGE) $(DOCKER_REMOTE_IMAGE)
rm -fr dist
kind-load-image:: .transforms.kind-load-image

.PHONY: workflow-vent
workflow-venv:
if [ -e kfp_ray ]; then \
$(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-venv; \
fi

.PHONY: workflow-test
workflow-test:
if [ -e kfp_ray ]; then \
$(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-test; \
fi

.PHONY: workflow-upload
workflow-upload:
if [ -e kfp_ray ]; then \
$(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-upload; \
fi

.PHONY: workflow-build
workflow-build:
if [ -e kfp_ray ]; then \
$(MAKE) -C kfp_ray TRANSFORM_NAME=$(TRANSFORM_NAME) workflow-build; \
fi


2 changes: 1 addition & 1 deletion transforms/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ build-pkg-dist:
-rm -fr src
mkdir src
# Copy all the src folders recursively (not clear if they have subfolders)
for x in $(shell find . | grep '[ray| python]/src$$') ; do \
for x in $(shell find . | grep '[ray| python | spark]/src$$') ; do \
echo $$x ; \
if [ -d "$$x" ]; then \
cp -r $$x/* src ; \
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.3.dev0
data-prep-toolkit>=0.2.3.dev0
parameterized
pandas
2 changes: 1 addition & 1 deletion transforms/code/code2parquet/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
{ name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
]
dependencies = [
"data-prep-toolkit[ray]==0.2.3.dev0",
"data-prep-toolkit[ray]>=0.2.3.dev0",
"dpk-code2parquet-transform-python==0.2.3.dev0",
"parameterized",
"pandas",
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_profiler/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
data-prep-toolkit==0.2.3.dev0
data-prep-toolkit>=0.2.3.dev0
parameterized
pandas
aiolimiter==1.1.0
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_profiler/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-code-profiler-transform-python==0.2.3.dev0",
"data-prep-toolkit[ray]==0.2.3.dev0",
"data-prep-toolkit[ray]>=0.2.3.dev0",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/code_quality/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.3.dev0
data-prep-toolkit>=0.2.3.dev0
bs4==0.0.2
transformers==4.38.2
2 changes: 1 addition & 1 deletion transforms/code/code_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ authors = [
]
dependencies = [
"dpk-code-quality-transform-python==0.2.3.dev0",
"data-prep-toolkit[ray]==0.2.3.dev0",
"data-prep-toolkit[ray]>=0.2.3.dev0",
]

[build-system]
Expand Down
2 changes: 1 addition & 1 deletion transforms/code/header_cleanser/python/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
data-prep-toolkit==0.2.3.dev0
data-prep-toolkit>=0.2.3.dev0
scancode-toolkit==32.1.0 ; platform_system != 'Darwin'

Loading

0 comments on commit 6866f78

Please sign in to comment.