From 3dd1c9c2324b2c6297018032683106283842e0b2 Mon Sep 17 00:00:00 2001 From: David Wood Date: Wed, 19 Jun 2024 16:21:35 -0400 Subject: [PATCH 01/32] Some new thoughts on cutting a release, especially scripts/release.sh Signed-off-by: David Wood --- .make.versions | 63 ++++++++++--------- Makefile | 30 ++++++--- scripts/release.sh | 44 +++++++++++++ .../universal/profiler/kfp_ray/Makefile | 9 ++- 4 files changed, 103 insertions(+), 43 deletions(-) create mode 100644 scripts/release.sh diff --git a/.make.versions b/.make.versions index 3328b5a9a..e4de65483 100644 --- a/.make.versions +++ b/.make.versions @@ -8,50 +8,55 @@ # % make RELEASE_VERSION_SUFFIX= set-version # % git push, tag, etc. # % make build, publish, etc. -RELEASE_VERSION_SUFFIX=.dev6 +DPK_MAJOR_VERSION=0 +DPK_MINOR_VERSION=2 +DPK_MICRO_VERSION=0 +DPK_VERSION_SUFFIX=.dev6 + +DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) # Data prep lab wheel version -DPK_LIB_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -DPK_LIB_KFP_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -DPK_LIB_KFP_VERSION_v2=0.2.0$(RELEASE_VERSION_SUFFIX) -DPK_LIB_KFP_SHARED=0.2.0$(RELEASE_VERSION_SUFFIX) +DPK_LIB_VERSION=$(DPK_VERSION) +DPK_LIB_KFP_VERSION=$(DPK_VERSION) +DPK_LIB_KFP_VERSION_v2=$(DPK_VERSION) +DPK_LIB_KFP_SHARED=$(DPK_VERSION) # Begin transform versions/tags -BLOCKLIST_VERSION=0.4.2$(RELEASE_VERSION_SUFFIX) +BLOCKLIST_VERSION=$(DPK_VERSION) -DOC_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -DOC_ID_SPARK_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +DOC_ID_RAY_VERSION=$(DPK_VERSION) +DOC_ID_SPARK_VERSION=$(DPK_VERSION) -EDEDUP_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +EDEDUP_RAY_VERSION=$(DPK_VERSION) -FDEDUP_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +FDEDUP_RAY_VERSION=$(DPK_VERSION) -FILTER_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -FILTER_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -FILTER_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) +FILTER_PYTHON_VERSION=$(DPK_VERSION) +FILTER_RAY_VERSION=$(DPK_VERSION) +FILTER_SPARK_VERSION=$(DPK_VERSION) -NOOP_PYTHON_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX) -NOOP_RAY_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX) -NOOP_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -PROFILER_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) +NOOP_PYTHON_VERSION=$(DPK_VERSION) +NOOP_RAY_VERSION=$(DPK_VERSION) +NOOP_SPARK_VERSION=$(DPK_VERSION) +PROFILER_VERSION=$(DPK_VERSION) -RESIZE_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +RESIZE_VERSION=$(DPK_VERSION) -LANG_ID_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -LANG_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +LANG_ID_PYTHON_VERSION=$(DPK_VERSION) +LANG_ID_RAY_VERSION=$(DPK_VERSION) -TOKENIZATION_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -TOKENIZATION_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +TOKENIZATION_RAY_VERSION=$(DPK_VERSION) +TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION) -MALWARE_RAY_VERSION=0.5.0$(RELEASE_VERSION_SUFFIX) +MALWARE_RAY_VERSION=$(DPK_VERSION) -PROGLANG_SELECT_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +PROGLANG_SELECT_RAY_VERSION=$(DPK_VERSION) -CODE_QUALITY_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +CODE_QUALITY_RAY_VERSION=$(DPK_VERSION) -INGEST_TO_PARQUET_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -INGEST_TO_PARQUET_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +INGEST_TO_PARQUET_RAY_VERSION=$(DPK_VERSION) +INGEST_TO_PARQUET_VERSION=$(DPK_VERSION) -KFP_DOCKER_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -KFP_DOCKER_VERSION_v2=0.2.0$(RELEASE_VERSION_SUFFIX) +KFP_DOCKER_VERSION=$(DPK_VERSION) +KFP_DOCKER_VERSION_v2=$(DPK_VERSION) diff --git a/Makefile b/Makefile index 39cfa87ad..5e9a70e82 100644 --- a/Makefile +++ b/Makefile @@ -51,15 +51,27 @@ set-versions: @# Help: Recursively $@ in all subdirs @$(MAKE) RULE=$@ .recurse -lib-release: - @# Help: Publish data-prep-kit $(DPK_LIB_VERSION) and data-prep-kit-kfp $(DPK_LIB_KFP_VERSION) libraries to pypi - @$(MAKE) -C $(DPK_PYTHON_LIB_DIR) build publish - @$(MAKE) -C $(DPK_RAY_LIB_DIR) build publish - @$(MAKE) -C $(DPK_SPARK_LIB_DIR) build publish - @$(MAKE) -C kfp/kfp_support_lib build publish - @echo "" - @echo "This modified files in the repo. Please be sure to commit/push back to the repository." - @echo "" +#set-release-verions: +# @# Help: Update all internally used versions to not include the release suffix. +# @$(MAKE) DPK_VERSION_SUFFIX= set-versions + +#lib-release: +# @# Help: Set versions to be unsuffixed and publish libraries +# @$(MAKE) set-release-versions +# @$(MAKE) publish-lib + +show-version: + @echo $(DPK_VERSION) + +#publish-lib: +# @# Help: Publish data-prep-kit $(DPK_LIB_VERSION) and data-prep-kit-kfp $(DPK_LIB_KFP_VERSION) libraries to pypi +# @$(MAKE) -C $(DPK_PYTHON_LIB_DIR) build publish +# @$(MAKE) -C $(DPK_RAY_LIB_DIR) build publish +# @$(MAKE) -C $(DPK_SPARK_LIB_DIR) build publish +# @$(MAKE) -C kfp/kfp_support_lib build publish +# @echo "" +# @echo "This modified files in the repo. Please be sure to commit/push back to the repository." +# @echo "" diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100644 index 000000000..1b12dfedd --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,44 @@ +DEFAULT_BRANCH=dev +# Assume this file is in the reporoot/scripts directory +reporoot=$(dirname $0)/.. +cd $reporoot + +# Make sure we're starting from the base branch +# git checkout $DEFAULT_BRANCH + +# Get the currently defined version w/o any suffix. This is the next release version +# version=$(make DPK_VERSION_SUFFIX= show-version) +# tag=v$version + +# Create a new branch for this version and switch to it +# git branch -r release/$tag + +# Remove the release suffix in this branch +# cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt +# mv tt .make.version + +# Apply the unsuffixed version to the repo and check it into this release branch +# make set-versions +# git add -A +# git commit -s -m "Cut release $version" +# git push origin +# git tag -a -s -m "Cut release $version" $tag +# git push origin $tag + +# Now build with the updated version +# make build publish + +# Now go back to the default branch so we can bump the minor version number and reset the version suffix +# git branch $DEFAULT_BRANCH + +# Change to the next development version (bumped minor version with suffix). +minor=$(cat .make.versions | grep '^DPK_MINOR_VERSION=' | sed -e 's/DPK_MINOR_VERSION=\([0-9]*\).*/\1/') +minor=$(($minor + 1)) +#cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" \ +# -e "s/^DPK_VERSION_SUFFIX=.*/DPK_VERSION_SUFFIX=.dev0/" > tt +#mv tt .make.versions + +# Push the version change back to the origin +# git add -A +# git commit -s -m "Bump minor version to $minor after cutting release $version" +# git push origin diff --git a/transforms/universal/profiler/kfp_ray/Makefile b/transforms/universal/profiler/kfp_ray/Makefile index 30739f491..4111e1555 100644 --- a/transforms/universal/profiler/kfp_ray/Makefile +++ b/transforms/universal/profiler/kfp_ray/Makefile @@ -29,8 +29,6 @@ image:: load-image:: -set-versions: workflow-reconcile-requirements - .PHONY: workflow-build workflow-build: workflow-venv $(MAKE) $(YAML_WF) @@ -45,8 +43,9 @@ workflow-upload: workflow-build $(MAKE) .transforms_workflows.upload-pipeline PIPELINE_FILE=$$file; \ done -.PHONY: workflow-reconcile-requirements -workflow-reconcile-requirements: +.PHONY: set-versions +set-versions: @for file in $(PYTHON_WF); do \ - $(MAKE) .transforms_workflows.reconcile-requirements PIPELINE_FILE=$$file; \ + $(MAKE) .workflows.set-versions PIPELINE_FILE=$$file; \ done + From 04fb1dc7bdc48d6255cd04c14893fbfd142b5b83 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 11:17:32 -0400 Subject: [PATCH 02/32] Minor updates to release.sh script Signed-off-by: David Wood n# --- scripts/release.sh | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/scripts/release.sh b/scripts/release.sh index 1b12dfedd..68caf8b00 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -26,12 +26,20 @@ cd $reporoot # git push origin $tag # Now build with the updated version +# Requires quay credentials in the environment! +# DPL_DOCKER_REGISTRY_USER=dataprep1 +# DPK_DOCKER_REGISTRY_KEY=... +# Requires pypi credentials in the environment! +# DPK_PYPI_USER=__token__ +# DPK_PYPI_TOKEN=... +# make -C transforms/noop DPK_VERSION_SUFFIX=.dev7 build publish # make build publish # Now go back to the default branch so we can bump the minor version number and reset the version suffix # git branch $DEFAULT_BRANCH # Change to the next development version (bumped minor version with suffix). +# Do we want to control major vs minor bump minor=$(cat .make.versions | grep '^DPK_MINOR_VERSION=' | sed -e 's/DPK_MINOR_VERSION=\([0-9]*\).*/\1/') minor=$(($minor + 1)) #cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" \ From 98a41b5edd1c1c375abdae903e4f64a571bb2199 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 12:01:36 -0400 Subject: [PATCH 03/32] Update repo.md for 2 make users and add workflow test of global make rules Signed-off-by: David Wood --- .github/workflows/test.yml | 8 +++ doc/repo.md | 128 +++++++++++++++---------------------- 2 files changed, 61 insertions(+), 75 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 034fb57f9..2da55da6a 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,6 +9,14 @@ on: branches: - "dev" jobs: + test-make: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Test top-level recursive make targets. + run: | + make -n clean test build publish set-versions test-python-lib: runs-on: ubuntu-latest steps: diff --git a/doc/repo.md b/doc/repo.md index 66f884719..8d89c58a1 100644 --- a/doc/repo.md +++ b/doc/repo.md @@ -2,11 +2,17 @@ # Repository structure * data_processing_lib - provides the core transform framework and library -supporting data transformations in a Ray cluster +supporting data transformations in 3 runtimes + * python + * ray + * spark + * transform * universal - * ededup + * noop + * python * ray + * spark * kfp_ray * ... * code @@ -24,84 +30,56 @@ supporting data transformations in a Ray cluster # Build and Makefiles Makefiles are used for operations performed across all projects in the directory tree. -Using specific rules from the top of the repository tree will recurse their execution -into subdirectories until subdirectories provide a Makefile that implements the action -and/or recurses further. For example, -```shell -make test -``` -will apply the `make test` rule into all sub-directories supporting such recursion. -Try `make help` to see the set of available targets in a directory. For example, -from the root of the repo... -``` -Target Description ------- ----------- -build Recursively build in all subdirs -clean Recursively clean in all subdirs -setup Recursively setup in all subdirs -test Recursively test in all subdirs -``` -or from a transform project directory -``` -cd transforms/universal/noop -make help -Target Description ------- ----------- -build Create the venv and build the transform image -clean Clean up the virtual environment. -conventions Check transform project conventions and make recommendations, if needed. -image Create the docker image quay.io/dataprep1/data-prep-kit/noop:0.7 -publish Publish the quay.io/dataprep1/data-prep-kit/noop:0.7 to quay.io container registry -setup Do nothing, since nothing to setup by default. -test Run both source and image level tests. -test-image Test an quay.io/dataprep1/data-prep-kit/noop:0.7 use test source inside the image. -test-locals Run the *local*.py files in the src directory -test-src Run the transform's tests and any '*local' .py files -venv Install the source from the data processing library for python -workflow-build Recursively make workflow-build in subdirs -workflow-reconcile-requirements Recursively make workflow-reconcile-requirements in all subdirs -workflow-test Recursively make workflow-test in subdirs -workflow-upload Recursively make workflow-upload in subdirs -workflow-venv Recursively make workflow-venv in subdirs -``` +There are two types of users envisioned to use the make files. -The `workflow-` related Makefile targets are dedicated for handling the [Kubeflow Pipelines](https://github.com/kubeflow/pipelines) workflows for the specified transforms. + * adminstrators - perform git actions and release management + * developers - work with core libraries and transforms -Overridable macro values include the following: -DOCKER - the name of the docker executable to use. DOCKER=docker -DOCKER_FILE - the name of the docker file to use. DOCKER_FILE=Dockerfile -DOCKER_REGISTRY_ENDPOINT - the docker registry location to publish images. DOCKER_REGISTRY_ENDPOINT=quay.io/dataprep1/data-prep-kit -DOCKER_HOSTNAME - the name of the docker registry to use. DOCKER_HOSTNAME=quay.io -DOCKER_NAMESPACE - the name space to use in the registry. DOCKER_NAMESPACE=dataprep1 -DOCKER_NAME - the name under the name space where images are publishes. DOCKER_NAME=data-prep-kit -DOCKER_REGISTRY_USER - the docker user to use. DOCKER_REGISTRY_USER=dataprep1 -DOCKER_REGISTRY_KEY - the docker user to use. DOCKER_REGISTRY_KEY=secret -PYTHON - the python executable to use. PYTHON=python -DOCKER_IMAGE_NAME - the name of the docker image to produce. DOCKER_IMAGE_NAME=noop -TRANSFORM_SRC_FILE is the base name of the python source file containing the main() (e.g. noop_local_ray.py) +Each directory has access to a `make help` target that will show all available targets. -Macros that require definition in the including Makefile -REPOROOT defines the root directory of this repository (such as ../../..) -TRANSFORM_NAME defines the name of the transform and is used to define defaults for... - DOCKER_IMAGE_NAME and TRANSFORM_SRC_FILE. For, example 'noop' -DOCKER_IMAGE_VERSION - the version of the docker image to produce. DOCKER_IMAGE_VERSION=0.7 -``` +## Administrators +Generally, administrators will issue make commands from the top of the repository to, for example +publish a new release. The top level make file provides a set of targets that +are executed recursively, which as a result are expected to be implementd by +sub-directories. These and their semantics as expected to be implemented, +as appropriate, in the sub-directories are as follows: -If you'd like to build each component separately, you can move into the sub-directories as desired. -If planning to develop and/or use on Apple Mac please see these [considerations](mac.md). +* clean - Restore the directory to as close to initial repository clone state as possible. +* build - Build all components contained in a given sub-directory. +This might include pypi distributions, images, etc. +* test - Test all components contained in a given sub-directory. +* publish - Publish any components in sub-directory. +* set-versions - apply the DPK_VERSION to all published components. +This might include things published to pypi or the docker registry. -## Data Prep Kit Library -To build the wheel for the data processing library and publish it to a pypi... -```shell -cd data-processing-lib -make test build publish -``` +Sub-directories are free to define these as empty/no-op targets, but generally are required +to define them unless a parent directory does not recurse into the directory. + +## Developers +Generally, developers will be working in a python project directory +(e.g., data-processing-lib/python, transforms/universal/filter, etc.) +and can issue the administrator's make targets or others that might be defined locally +(e.g., venv, test-image, test-src in transform projects). +Key targets are as follows: + +* venv - creates the virtual environment from either a pyproject.toml or requirements.txt file. +* publish - publish libraries or docker images as appropriate. +This is generally only used during release generation. + +If working with an IDE, one generally make the venv, then configures the IDE to +reference the venv and src directories. + +Transform projects generally include these transform project-specific targets for convenience, +which are triggered with the the `test` target. + +* test-src - test python tests in the test directory +* test-image - build and test the docker image for the transform + +### Transforms and KFP +The kfp_ray directories in the transform projects provide +`workflow-` targets and are dedicated to handling the +[Kubeflow Pipelines](https://github.com/kubeflow/pipelines) +workflows for the specified transforms. -## Transforms -To create all transform images and publish them (by default to quay.io) -```shell -cd transforms -make venv test-src -make image test-image publish ``` From 2d006a5f4009d34f5a477feb252e13f0170a8b75 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 12:52:17 -0400 Subject: [PATCH 04/32] Update release.sh script for testing Signed-off-by: David Wood --- scripts/release.sh | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/scripts/release.sh b/scripts/release.sh index 68caf8b00..f32484845 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -1,3 +1,4 @@ +<<<<<<< Updated upstream DEFAULT_BRANCH=dev # Assume this file is in the reporoot/scripts directory reporoot=$(dirname $0)/.. @@ -5,6 +6,37 @@ cd $reporoot # Make sure we're starting from the base branch # git checkout $DEFAULT_BRANCH +======= +debug=echo +# Assume this file is in the reporoot/scripts directory +reporoot=$(dirname $0)/.. +cd $reporoot + +# Make sure required env vars are set +if [ -z "$DPK_DOCKER_REGISTRY_USER" ]; then + echo DPK_DOCKER_REGISTRY_USER env var must be set + exit 1 +elif [ -z "$DPK_DOCKER_REGISTRY_KEY" ]; then + echo DPK_DOCKER_REGISTRY_KEY env var must be set + exit 1 +elif [ -z "$DPK_PYPI_USER" ]; then + echo DPK_PYPI_USER env var must be set + exit 1 +elif [ -z "$DPK_PYPI_TOKEN" ]; then + echo DPK_DPYP_TOKEN env var must be set + exit 1 +fi + +if [ -z "$debug" ]; then + DEFAULT_BRANCH=dev +else + DEFAULT_BRANCH=releasing +fi + +# Make sure we're starting from the base branch +get fetch +git checkout $DEFAULT_BRANCH +>>>>>>> Stashed changes # Get the currently defined version w/o any suffix. This is the next release version # version=$(make DPK_VERSION_SUFFIX= show-version) @@ -18,12 +50,22 @@ cd $reporoot # mv tt .make.version # Apply the unsuffixed version to the repo and check it into this release branch +<<<<<<< Updated upstream # make set-versions # git add -A # git commit -s -m "Cut release $version" # git push origin # git tag -a -s -m "Cut release $version" $tag # git push origin $tag +======= + make set-versions +git add -A +git commit -s -m "Cut release $version" +git push origin +git tag -a -s -m "Cut release $version" $tag +git push --set-upstream origin release/$tag +git push origin $tag +>>>>>>> Stashed changes # Now build with the updated version # Requires quay credentials in the environment! @@ -36,7 +78,11 @@ cd $reporoot # make build publish # Now go back to the default branch so we can bump the minor version number and reset the version suffix +<<<<<<< Updated upstream # git branch $DEFAULT_BRANCH +======= +git checkout $DEFAULT_BRANCH +>>>>>>> Stashed changes # Change to the next development version (bumped minor version with suffix). # Do we want to control major vs minor bump From cc6b3aadda83e2e80d59fcf8ad3332993b62f735 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 12:57:56 -0400 Subject: [PATCH 05/32] more testing or release.sh script Signed-off-by: David Wood --- scripts/release.sh | 63 +++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 40 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index f32484845..256dfdb3c 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -1,12 +1,3 @@ -<<<<<<< Updated upstream -DEFAULT_BRANCH=dev -# Assume this file is in the reporoot/scripts directory -reporoot=$(dirname $0)/.. -cd $reporoot - -# Make sure we're starting from the base branch -# git checkout $DEFAULT_BRANCH -======= debug=echo # Assume this file is in the reporoot/scripts directory reporoot=$(dirname $0)/.. @@ -36,61 +27,53 @@ fi # Make sure we're starting from the base branch get fetch git checkout $DEFAULT_BRANCH ->>>>>>> Stashed changes # Get the currently defined version w/o any suffix. This is the next release version -# version=$(make DPK_VERSION_SUFFIX= show-version) -# tag=v$version +version=$(make DPK_VERSION_SUFFIX= show-version) + +if [ -z "$debug"]; then + tag=v$version +else + tag=test$version +fi # Create a new branch for this version and switch to it -# git branch -r release/$tag +git checkout -b release/$tag # Remove the release suffix in this branch -# cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt -# mv tt .make.version +if [ -z "$debug"]; then + cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt + mv tt .make.version +fi # Apply the unsuffixed version to the repo and check it into this release branch -<<<<<<< Updated upstream -# make set-versions -# git add -A -# git commit -s -m "Cut release $version" -# git push origin -# git tag -a -s -m "Cut release $version" $tag -# git push origin $tag -======= - make set-versions +make set-versions git add -A git commit -s -m "Cut release $version" git push origin git tag -a -s -m "Cut release $version" $tag git push --set-upstream origin release/$tag git push origin $tag ->>>>>>> Stashed changes # Now build with the updated version -# Requires quay credentials in the environment! -# DPL_DOCKER_REGISTRY_USER=dataprep1 -# DPK_DOCKER_REGISTRY_KEY=... -# Requires pypi credentials in the environment! -# DPK_PYPI_USER=__token__ -# DPK_PYPI_TOKEN=... -# make -C transforms/noop DPK_VERSION_SUFFIX=.dev7 build publish -# make build publish +# Requires quay credentials in the environment, DPL_DOCKER_REGISTRY_USER, DPK_DOCKER_REGISTRY_KEY +# Requires pypi credentials in the environment, DPK_PYPI_USER=, DPK_PYPI_TOKEN +if [ -z "$debug" ]; then + make build publish +else + echo make -C transforms/universal/noop build publish +fi # Now go back to the default branch so we can bump the minor version number and reset the version suffix -<<<<<<< Updated upstream -# git branch $DEFAULT_BRANCH -======= git checkout $DEFAULT_BRANCH ->>>>>>> Stashed changes # Change to the next development version (bumped minor version with suffix). # Do we want to control major vs minor bump minor=$(cat .make.versions | grep '^DPK_MINOR_VERSION=' | sed -e 's/DPK_MINOR_VERSION=\([0-9]*\).*/\1/') minor=$(($minor + 1)) -#cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" \ -# -e "s/^DPK_VERSION_SUFFIX=.*/DPK_VERSION_SUFFIX=.dev0/" > tt -#mv tt .make.versions +cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" \ + -e "s/^DPK_VERSION_SUFFIX=.*/DPK_VERSION_SUFFIX=.dev0/" > tt +mv tt .make.versions # Push the version change back to the origin # git add -A From 4fc4f9cce337e940bb54a16cdc4817c1d7989dc3 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:01:40 -0400 Subject: [PATCH 06/32] More release.sh Signed-off-by: David Wood --- .make.versions | 4 ++-- scripts/release.sh | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.make.versions b/.make.versions index e4de65483..f3bd20b21 100644 --- a/.make.versions +++ b/.make.versions @@ -9,9 +9,9 @@ # % git push, tag, etc. # % make build, publish, etc. DPK_MAJOR_VERSION=0 -DPK_MINOR_VERSION=2 +DPK_MINOR_VERSION=3 DPK_MICRO_VERSION=0 -DPK_VERSION_SUFFIX=.dev6 +DPK_VERSION_SUFFIX=.dev0 DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) diff --git a/scripts/release.sh b/scripts/release.sh index 256dfdb3c..e209878f9 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -1,4 +1,5 @@ debug=echo +dbg_suffix=.dev7 # Assume this file is in the reporoot/scripts directory reporoot=$(dirname $0)/.. cd $reporoot @@ -44,6 +45,9 @@ git checkout -b release/$tag if [ -z "$debug"]; then cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt mv tt .make.version +else + cat .make.versions | sed -e "s/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=$dbg_suffix/" > tt + mv tt .make.version fi # Apply the unsuffixed version to the repo and check it into this release branch From 8de2c498918f5aeedf14bea15cbdc344f1453478 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:08:00 -0400 Subject: [PATCH 07/32] Revert to dev versions Signed-off-by: David Wood --- .make.versions | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.make.versions b/.make.versions index f3bd20b21..e4de65483 100644 --- a/.make.versions +++ b/.make.versions @@ -9,9 +9,9 @@ # % git push, tag, etc. # % make build, publish, etc. DPK_MAJOR_VERSION=0 -DPK_MINOR_VERSION=3 +DPK_MINOR_VERSION=2 DPK_MICRO_VERSION=0 -DPK_VERSION_SUFFIX=.dev0 +DPK_VERSION_SUFFIX=.dev6 DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) From e49cbaa2e7c2c10b7d5c86f77c4469dc768b7a64 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:08:59 -0400 Subject: [PATCH 08/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/release.sh b/scripts/release.sh index e209878f9..d784a323e 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -22,7 +22,7 @@ fi if [ -z "$debug" ]; then DEFAULT_BRANCH=dev else - DEFAULT_BRANCH=releasing + DEFAULT_BRANCH=releasing-copy fi # Make sure we're starting from the base branch From db921b9950ac00ed084a27d3c78486f175d51d4a Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:15:27 -0400 Subject: [PATCH 09/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index d784a323e..9b9382cd7 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -44,10 +44,10 @@ git checkout -b release/$tag # Remove the release suffix in this branch if [ -z "$debug"]; then cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt - mv tt .make.version + mv tt .make.versions else cat .make.versions | sed -e "s/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=$dbg_suffix/" > tt - mv tt .make.version + mv tt .make.versions fi # Apply the unsuffixed version to the repo and check it into this release branch From 1451277712f0ce312ab312b056b77715048228e5 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:33:34 -0400 Subject: [PATCH 10/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index 9b9382cd7..da52f1f11 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -39,7 +39,17 @@ else fi # Create a new branch for this version and switch to it -git checkout -b release/$tag +release_branch=release/$tag +if [ ! -z "$debug"]; then + # delete local tag and branch + git tag --delete $tag + git branch --delete $release_branch + # delete remote tag and branch + git push --delete origin $tag + git push --delete origin $release_branch +fi +git checkout -b $release_branch + # Remove the release suffix in this branch if [ -z "$debug"]; then @@ -56,7 +66,7 @@ git add -A git commit -s -m "Cut release $version" git push origin git tag -a -s -m "Cut release $version" $tag -git push --set-upstream origin release/$tag +git push --set-upstream origin $release_branch git push origin $tag # Now build with the updated version From c276b71b961dbe4e212cf927c48d18abb345edbb Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:36:16 -0400 Subject: [PATCH 11/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index da52f1f11..296119be8 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -32,7 +32,7 @@ git checkout $DEFAULT_BRANCH # Get the currently defined version w/o any suffix. This is the next release version version=$(make DPK_VERSION_SUFFIX= show-version) -if [ -z "$debug"]; then +if [ -z "$debug" ]; then tag=v$version else tag=test$version @@ -40,7 +40,7 @@ fi # Create a new branch for this version and switch to it release_branch=release/$tag -if [ ! -z "$debug"]; then +if [ ! -z "$debug" ]; then # delete local tag and branch git tag --delete $tag git branch --delete $release_branch @@ -52,7 +52,7 @@ git checkout -b $release_branch # Remove the release suffix in this branch -if [ -z "$debug"]; then +if [ -z "$debug" ]; then cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt mv tt .make.versions else From a026584c7f8926261f404d8804a7a38109993e66 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:44:56 -0400 Subject: [PATCH 12/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index 296119be8..f40acc968 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -62,11 +62,10 @@ fi # Apply the unsuffixed version to the repo and check it into this release branch make set-versions -git add -A -git commit -s -m "Cut release $version" -git push origin -git tag -a -s -m "Cut release $version" $tag +git status +git commit -s -a -m "Cut release $version" git push --set-upstream origin $release_branch +git tag -a -s -m "Cut release $version" $tag git push origin $tag # Now build with the updated version @@ -90,6 +89,5 @@ cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" mv tt .make.versions # Push the version change back to the origin -# git add -A -# git commit -s -m "Bump minor version to $minor after cutting release $version" +# git commit -s -a -m "Bump minor version to $minor after cutting release $version" # git push origin From 4b6f41bf71ad00530104d823ad55dbced10123ca Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 13:53:20 -0400 Subject: [PATCH 13/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index f40acc968..97da15a85 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -74,7 +74,7 @@ git push origin $tag if [ -z "$debug" ]; then make build publish else - echo make -C transforms/universal/noop build publish + make -C transforms/universal/noop build publish fi # Now go back to the default branch so we can bump the minor version number and reset the version suffix @@ -89,5 +89,6 @@ cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" mv tt .make.versions # Push the version change back to the origin -# git commit -s -a -m "Bump minor version to $minor after cutting release $version" -# git push origin +git commit -s -a -m "Bump minor version to $minor after cutting release $version into branch $release_branch" +git diff $DEFAULT_BRANCH origin/$DEFAULT_BRANCH +#git push origin From 15317ef4725b58d8aa909fc10f1a164230e122df Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 14:04:29 -0400 Subject: [PATCH 14/32] release.sh Signed-off-by: David Wood --- scripts/release.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/release.sh b/scripts/release.sh index 97da15a85..77fd24b28 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -74,7 +74,8 @@ git push origin $tag if [ -z "$debug" ]; then make build publish else - make -C transforms/universal/noop build publish + # make -C data-processing-lib/spark image # Build the base image required by spark + make -C transforms/universal/noop/python build publish fi # Now go back to the default branch so we can bump the minor version number and reset the version suffix From 7d5ca76e9668d146f315d44703c3f51c64f22f48 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 14:40:53 -0400 Subject: [PATCH 15/32] updated repo.md on makefiles Signed-off-by: David Wood --- doc/repo.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/repo.md b/doc/repo.md index 8d89c58a1..7a1259eba 100644 --- a/doc/repo.md +++ b/doc/repo.md @@ -41,7 +41,7 @@ Each directory has access to a `make help` target that will show all available t Generally, administrators will issue make commands from the top of the repository to, for example publish a new release. The top level make file provides a set of targets that are executed recursively, which as a result are expected to be implementd by -sub-directories. These and their semantics as expected to be implemented, +sub-directories. These and their semantics are expected to be implemented, as appropriate, in the sub-directories are as follows: * clean - Restore the directory to as close to initial repository clone state as possible. @@ -49,8 +49,8 @@ as appropriate, in the sub-directories are as follows: This might include pypi distributions, images, etc. * test - Test all components contained in a given sub-directory. * publish - Publish any components in sub-directory. -* set-versions - apply the DPK_VERSION to all published components. This might include things published to pypi or the docker registry. +* set-versions - apply the DPK_VERSION to all published components. Sub-directories are free to define these as empty/no-op targets, but generally are required to define them unless a parent directory does not recurse into the directory. @@ -58,7 +58,8 @@ to define them unless a parent directory does not recurse into the directory. ## Developers Generally, developers will be working in a python project directory (e.g., data-processing-lib/python, transforms/universal/filter, etc.) -and can issue the administrator's make targets or others that might be defined locally +and can issue the administrator's make targets (e g., build, test, etc) +or others that might be defined locally (e.g., venv, test-image, test-src in transform projects). Key targets are as follows: @@ -66,8 +67,8 @@ Key targets are as follows: * publish - publish libraries or docker images as appropriate. This is generally only used during release generation. -If working with an IDE, one generally make the venv, then configures the IDE to -reference the venv and src directories. +If working with an IDE, one generally makes the venv, then configures the IDE to +reference the venv, src and test directories. Transform projects generally include these transform project-specific targets for convenience, which are triggered with the the `test` target. From c85c68a1e899ebbe3f0592087922b9b978545cc2 Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 14:46:45 -0400 Subject: [PATCH 16/32] add publish target to superworkflows Makefile Signed-off-by: David Wood --- kfp/superworkflows/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kfp/superworkflows/Makefile b/kfp/superworkflows/Makefile index 7ad02a71b..d655f90c6 100644 --- a/kfp/superworkflows/Makefile +++ b/kfp/superworkflows/Makefile @@ -4,6 +4,9 @@ include ${REPOROOT}/.make.defaults KFPv2 ?= 0 +.PHONY: publish +publish: + .PHONY: workflow-venv workflow-venv: ifeq ($(KFPv2), 1) From 2a7bcb929d63611e67d8520cdd52dd11eac6be5f Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 17:30:41 -0400 Subject: [PATCH 17/32] More release work Added docs in .make.versions Add RELEASE.md. Fix checking for pypi credentials Signed-off-by: David Wood --- .make.defaults | 3 --- .make.versions | 10 ++++---- .secrets.baseline | 1 + RELEASE.md | 57 ++++++++++++++++++++++++++++++++++++++++++++++ scripts/release.sh | 31 ++++++++++++++----------- 5 files changed, 82 insertions(+), 20 deletions(-) create mode 100644 RELEASE.md diff --git a/.make.defaults b/.make.defaults index 151afab4e..62688be58 100644 --- a/.make.defaults +++ b/.make.defaults @@ -540,6 +540,3 @@ MINIO_ADMIN_PWD= localminiosecretkey fi ${PYTHON} -m twine check dist/* ${PYTHON} -m twine upload --verbose --non-interactive dist/* - #@echo "create a git tag to reference published version" - #@git tag ${TAG} - #@git push origin ${TAG} diff --git a/.make.versions b/.make.versions index 970dd90ee..8884642a6 100644 --- a/.make.versions +++ b/.make.versions @@ -4,13 +4,15 @@ # numbers TO BE published on the NEXT release/publishing of artifacts. ################################################################################ -# do -# % make RELEASE_VERSION_SUFFIX= set-version -# % git push, tag, etc. -# % make build, publish, etc. + +# This major versions is generally changed manually when a breaking change to apis is made in the libraries, for example. DPK_MAJOR_VERSION=0 +# The minor version is incremented manually when significant features have been added that are backward compatible with the previous major.minor release. DPK_MINOR_VERSION=2 +# The minor version is incremented AUTOMATICALLY by the release.sh script when a new release is set. DPK_MICRO_VERSION=0 +# The suffix is generally always set in the main/development branch and only nulled out when creating release branches. +# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi. DPK_VERSION_SUFFIX=.dev6 DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) diff --git a/.secrets.baseline b/.secrets.baseline index 25be48bab..45e93b490 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -132,3 +132,4 @@ "hash": null } } + diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 000000000..91e4b8ca9 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,57 @@ +# Release Management + +Release are created from the main repository branch using the version +numbers, including an intermediate version suffix, +defined in `.make.versions`. +The following points are important: + +1. `.make.versions` contains the version to be used when publishing the **next** release. +1. The main branch always has the version suffix set to .dev, which +allows intermediate publishing from the dev branch using version X.Y.Z.dev. +2. In general, common version number is used for all published pypi wheels and docker images. +3. The `scripts/release.sh` script automates the following: + 1. Creating a `release/vX.Y.Z` branch and `vX.Y.Z` tag + 2. Nulling out the version suffix in the new branch's `.make.version` file. + 3. Applying the unsuffixed versions to the artifacts published from the repo. + 4. Building and publishing the wheels to pypi and images to a docker registry. + 5. Incrementing the minor version and resetting the suffix in the main branch. + +# Cutting the release +Creating the release requires running the `release.sh` script and optionally +generating a release on github. The latter can be performed manually +once the `release.sh` script has done its work. + +## release.sh +Running `release.sh` requires credentials to publish to the various cloud locations. + +For docker registry publishing, the following environment variables/credentials are needed: + +* DPK_DOCKER_REGISTRY_USER - user used with the registry defined in DOCKER_HOST in `.make.defaults` +* DPK_DOCKER_REGISTRY_KEY - key/password for docker registry user. + +To publish to pypi, the credentials in `~/.pypirc` file (let us know if there is a way to do +this with environment variables). +See [pypi](https://packaging.python.org/en/latest/specifications/pypirc/) for details. + +To see the version that will be published, +``` +make DPK_VERSION_SUFFIX= show-version +``` +This will print for example, 1.2.3. + +To generate the release : +```shell +bash scripts/release.sh +``` + +## Github release +After running the `release.sh` script, to create tag `vX.Y.Z` and branch `releases/vX.Y.Z` +1. Go to the [releases page](https://github.com/IBM/data-prep-kit/releases). +2. Select `Draft a new release` +3. Select `Choose a tag -> vX.Y.Z` +4. Press `Generate release notes` +5. Add a title (e.g., Release X.Y.Z) +6. Add any additional relese notes. +7. Press `Publish release` + + diff --git a/scripts/release.sh b/scripts/release.sh index 77fd24b28..63045a829 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -11,13 +11,15 @@ if [ -z "$DPK_DOCKER_REGISTRY_USER" ]; then elif [ -z "$DPK_DOCKER_REGISTRY_KEY" ]; then echo DPK_DOCKER_REGISTRY_KEY env var must be set exit 1 -elif [ -z "$DPK_PYPI_USER" ]; then - echo DPK_PYPI_USER env var must be set - exit 1 -elif [ -z "$DPK_PYPI_TOKEN" ]; then - echo DPK_DPYP_TOKEN env var must be set - exit 1 fi +if [ ! -e ~/.pypirc ]; then + cat << EOF +You need a ~/.pypirc containing pypi.org credentials. +See https://packaging.python.org/en/latest/specifications/pypirc/ for details. +EOF + exit +fi +exit if [ -z "$debug" ]; then DEFAULT_BRANCH=dev @@ -39,7 +41,7 @@ else fi # Create a new branch for this version and switch to it -release_branch=release/$tag +release_branch=releases/$tag if [ ! -z "$debug" ]; then # delete local tag and branch git tag --delete $tag @@ -83,13 +85,16 @@ git checkout $DEFAULT_BRANCH # Change to the next development version (bumped minor version with suffix). # Do we want to control major vs minor bump -minor=$(cat .make.versions | grep '^DPK_MINOR_VERSION=' | sed -e 's/DPK_MINOR_VERSION=\([0-9]*\).*/\1/') -minor=$(($minor + 1)) -cat .make.versions | sed -e "s/^DPK_MINOR_VERSION=.*/DPK_MINOR_VERSION=$minor/" \ +micro=$(cat .make.versions | grep '^DPK_MICRO_VERSION=' | sed -e 's/DPK_MICRO_VERSION=\([0-9]*\).*/\1/') +micro=$(($micro + 1)) +cat .make.versions | sed -e "s/^DPK_MICRO_VERSION=.*/DPK_MICRO_VERSION=$micro/" \ -e "s/^DPK_VERSION_SUFFIX=.*/DPK_VERSION_SUFFIX=.dev0/" > tt mv tt .make.versions # Push the version change back to the origin -git commit -s -a -m "Bump minor version to $minor after cutting release $version into branch $release_branch" -git diff $DEFAULT_BRANCH origin/$DEFAULT_BRANCH -#git push origin +next_version=$(make show-version) +git commit -s -a -m "Bump micro version to $next_version after cutting release $version into branch $release_branch" +git diff origin/$DEFAULT_BRANCH $DEFAULT_BRANCH +if [ -z "$debug" ]; then + git push origin +fi From 12f315bcd72567dbbb10e49d02cda5fc43d8515a Mon Sep 17 00:00:00 2001 From: David Wood Date: Thu, 20 Jun 2024 18:11:41 -0400 Subject: [PATCH 18/32] add publish target to kind/Makefile Signed-off-by: David Wood --- kind/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/kind/Makefile b/kind/Makefile index a80417333..be54808e1 100644 --- a/kind/Makefile +++ b/kind/Makefile @@ -33,6 +33,9 @@ endif $(MAKE) cluster-deploy @echo "setup-cluster completed" +.PHONY: publish +publish: + populate-data:: @# Help: Populate test data in Minio cd ${KIND_SCRIPTS} && ./populate_minio.sh From 6c0d4ab47e813f2a59ff2730cb0c57dc96e3c922 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 09:20:08 -0400 Subject: [PATCH 19/32] install ../python for spark transforms in .make.defaults Signed-off-by: David Wood --- .make.defaults | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.make.defaults b/.make.defaults index 62688be58..2e36e368f 100644 --- a/.make.defaults +++ b/.make.defaults @@ -331,6 +331,9 @@ __check_defined = \ $(MAKE) PIP_TARGET=data-prep-toolkit-spark .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_SPARK_LIB_DIR) .defaults.install-src-venv; \ + if [ -d ../python ]; then \ + $(MAKE) PYTHON_PROJECT_DIR=../python .defaults.install-src-venv; \ + fi echo Installed source from Spark processing library for `which $(PYTHON)` # Run tests in test directory from that dir after adding ../src to PYTHONPATH From f6bd444292d4c1d346531e611bf5980d79c3a088 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 09:43:33 -0400 Subject: [PATCH 20/32] fix kfp set versions, and noop versions Signed-off-by: David Wood --- .make.defaults | 2 +- transforms/universal/noop/python/pyproject.toml | 2 +- transforms/universal/noop/spark/pyproject.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.make.defaults b/.make.defaults index 2e36e368f..6d8695973 100644 --- a/.make.defaults +++ b/.make.defaults @@ -515,7 +515,7 @@ MINIO_ADMIN_PWD= localminiosecretkey cat pyproject.toml | sed \ -e 's/"data-prep-toolkit-ray\(..\).*",/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)",/' \ -e 's/"data-prep-toolkit-spark\(..\).*",/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)",/' \ - -e 's/"data-prep-toolkit-kfp\(..\).*",/"data-prep-toolkit-spark\1$(DPK_LIB_KFP_VERSION)",/' \ + -e 's/"data-prep-toolkit-kfp\(..\).*",/"data-prep-toolkit-kfp\1$(DPK_LIB_KFP_VERSION)",/' \ -e 's/"data-prep-toolkit\([=><][=><]\).*",/"data-prep-toolkit\1$(DPK_LIB_VERSION)",/' \ > tt.toml mv tt.toml pyproject.toml diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml index ad5b86597..cdf9c2740 100644 --- a/transforms/universal/noop/python/pyproject.toml +++ b/transforms/universal/noop/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_python" -version = "0.9.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "NOOP Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml index b83bab57a..5ac47b3da 100644 --- a/transforms/universal/noop/spark/pyproject.toml +++ b/transforms/universal/noop/spark/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.9.0.dev6", + "dpk-noop-transform-python==0.2.0.dev6", "data-prep-toolkit-spark==0.2.0.dev6", ] From 103f55989af7708eac4ff7cce6ed07c29662313f Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 10:09:17 -0400 Subject: [PATCH 21/32] Fix versioning in .make* and typo in noop/ray/pyproject.tmo Signed-off-by: David Wood --- .make.defaults | 2 -- transforms/.make.transforms | 2 +- transforms/universal/noop/ray/pyproject.toml | 4 ++-- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.make.defaults b/.make.defaults index 6d8695973..e31a294a2 100644 --- a/.make.defaults +++ b/.make.defaults @@ -255,8 +255,6 @@ __check_defined = \ # PYTHON_PROJECT_DIR is expected to have src and pyproject.toml .PHONY: .defaults.install-src-venv .defaults.install-src-venv:: - @# Help: Install Ray and Python data processing library source into existing venv - @echo Installing Ray and Python data processing library source to existing venv @source venv/bin/activate; \ if [ ! -z "$(EXTRA_INDEX_URL)" ]; then \ extra_url='--extra-index-url $(EXTRA_INDEX_URL)'; \ diff --git a/transforms/.make.transforms b/transforms/.make.transforms index e8b5a82ed..e206e3434 100644 --- a/transforms/.make.transforms +++ b/transforms/.make.transforms @@ -324,6 +324,6 @@ minio-stop: .transforms.set-versions: $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml cat pyproject.toml | sed \ - -e 's/dpk-$(TRANSFORM_NAME)-transform-python\([=<>][=<>]\).*/dpk-$(TRANSFORM_NAME)-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ + -e 's/"dpk-$(TRANSFORM_NAME)-transform-python\([=<>][=<>]\).*",/"dpk-$(TRANSFORM_NAME)-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ > tt.toml mv tt.toml pyproject.toml diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index 8357ad284..bc757abe9 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_ray" -version = "0.9.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "NOOP Ray Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.9.0.dev6", + "dpk-noop-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] From 7f7a26421ed269dda4065b0dfd0c420e3daa3aef Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 11:17:12 -0400 Subject: [PATCH 22/32] comments and dont created empty pyproject.toml in .make.defaults Signed-off-by: David Wood --- .make.defaults | 32 +++++++++++++++++++------------- .make.versions | 4 ++++ 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/.make.defaults b/.make.defaults index 9fdd96ee8..77294d43f 100644 --- a/.make.defaults +++ b/.make.defaults @@ -493,30 +493,36 @@ MINIO_ADMIN_PWD= localminiosecretkey # Expects TOML_VERSION .PHONY: .defaults.update-toml .defaults.update-toml: - $(MAKE) TOML_VERSION=$(TOML_VERSION) .defaults.set-toml-version - $(MAKE) .defaults.update-toml-lib-dep-versions + if [ -e pyproject.toml ]; then \ + $(MAKE) TOML_VERSION=$(TOML_VERSION) .defaults.__set-toml-version; \ + $(MAKE) .defaults.__update-toml-lib-dep-versions; \ + fi # Changes the version field of the pyproject.toml file to the given version # Expects TOML_VERSION -.PHONY: .defaults.set-toml-version -.defaults.set-toml-version: +.PHONY: .defaults.__set-toml-version +.defaults.__set-toml-version: @# Help: Set the version= field of pyproject.toml - cat pyproject.toml | sed -e \ - 's/^version[ ]*=.*/version = "'${TOML_VERSION}'"/' \ - > tt.toml - mv tt.toml pyproject.toml + if [ -e pyproject.toml ]; then \ + cat pyproject.toml | sed -e \ + 's/^version[ ]*=.*/version = "'${TOML_VERSION}'"/' \ + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi # Updates the versions references to our repo source as defined in .make.versions -.PHONY: .defaults.update-toml-lib-dep-versions -.defaults.update-toml-lib-dep-versions: +.PHONY: .defaults.__update-toml-lib-dep-versions +.defaults.__update-toml-lib-dep-versions: @# Help: Update pyproject.toml to depend on lib versions defined in .make.versions - cat pyproject.toml | sed \ + if [ -e pyproject.toml ]; then \ + cat pyproject.toml | sed \ -e 's/"data-prep-toolkit-ray\(..\).*",/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)",/' \ -e 's/"data-prep-toolkit-spark\(..\).*",/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)",/' \ -e 's/"data-prep-toolkit-kfp\(..\).*",/"data-prep-toolkit-kfp\1$(DPK_LIB_KFP_VERSION)",/' \ -e 's/"data-prep-toolkit\([=><][=><]\).*",/"data-prep-toolkit\1$(DPK_LIB_VERSION)",/' \ - > tt.toml - mv tt.toml pyproject.toml + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi # Build the distribution, usually in preparation for publishing using ith the .defaults.publish-dist target .PHONY: .defaults.build-dist diff --git a/.make.versions b/.make.versions index b2020a28f..95a7f53ca 100644 --- a/.make.versions +++ b/.make.versions @@ -2,6 +2,10 @@ # Here we attempt to capture/define all the version numbers used across the # repository in Makefile format. These are generally considered the version # numbers TO BE published on the NEXT release/publishing of artifacts. +# +# NOTE: If you modify any of the version numbers, you MUST run "make set-versions" +# from the top of the repo to have the new versions applied throughout the repo. +# ################################################################################ From 52e4eb71127f37988293fe263fdbe94b3bfc0b9f Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 11:23:11 -0400 Subject: [PATCH 23/32] update release.sh to apply final version changes Signed-off-by: David Wood --- scripts/release.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/release.sh b/scripts/release.sh index 63045a829..859d24684 100644 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -54,6 +54,7 @@ git checkout -b $release_branch # Remove the release suffix in this branch +# Apply the unsuffixed version to the repo and check it into this release branch if [ -z "$debug" ]; then cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt mv tt .make.versions @@ -61,9 +62,10 @@ else cat .make.versions | sed -e "s/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=$dbg_suffix/" > tt mv tt .make.versions fi - -# Apply the unsuffixed version to the repo and check it into this release branch +# Apply the version change to all files in the repo make set-versions + +# Commit the changes to the release branch and tag it git status git commit -s -a -m "Cut release $version" git push --set-upstream origin $release_branch @@ -72,7 +74,6 @@ git push origin $tag # Now build with the updated version # Requires quay credentials in the environment, DPL_DOCKER_REGISTRY_USER, DPK_DOCKER_REGISTRY_KEY -# Requires pypi credentials in the environment, DPK_PYPI_USER=, DPK_PYPI_TOKEN if [ -z "$debug" ]; then make build publish else @@ -84,12 +85,13 @@ fi git checkout $DEFAULT_BRANCH # Change to the next development version (bumped minor version with suffix). -# Do we want to control major vs minor bump micro=$(cat .make.versions | grep '^DPK_MICRO_VERSION=' | sed -e 's/DPK_MICRO_VERSION=\([0-9]*\).*/\1/') micro=$(($micro + 1)) cat .make.versions | sed -e "s/^DPK_MICRO_VERSION=.*/DPK_MICRO_VERSION=$micro/" \ -e "s/^DPK_VERSION_SUFFIX=.*/DPK_VERSION_SUFFIX=.dev0/" > tt mv tt .make.versions +# Apply the version change to all files in the repo +make set-versions # Push the version change back to the origin next_version=$(make show-version) From 6211b50637086f04495533b37244206f608e918a Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 12:25:09 -0400 Subject: [PATCH 24/32] Version updates, version setting fixes, noop spark Dockerfile, Signed-off-by: David Wood --- .../ray/kfp_v1/superworkflow_code_sample_wf.py | 14 +++++++------- .../ray/kfp_v1/superworkflow_dedups_sample_wf.py | 6 +++--- transforms/.gitignore | 1 + transforms/.make.transforms | 10 +++++++--- .../code/code_quality/kfp_ray/code_quality_wf.py | 2 +- transforms/code/code_quality/python/pyproject.toml | 2 +- transforms/code/code_quality/ray/Makefile | 9 +-------- transforms/code/code_quality/ray/pyproject.toml | 4 ++-- transforms/code/ingest_2_parquet/ray/Makefile | 3 +-- transforms/code/malware/kfp_ray/malware_wf.py | 2 +- transforms/code/malware/python/pyproject.toml | 2 +- transforms/code/malware/ray/Makefile | 3 +-- transforms/code/malware/ray/pyproject.toml | 4 ++-- .../proglang_select/kfp_ray/proglang_select_wf.py | 2 +- .../code/proglang_select/python/pyproject.toml | 2 +- transforms/code/proglang_select/ray/Makefile | 3 +-- transforms/code/proglang_select/ray/pyproject.toml | 4 ++-- .../lang_id/kfp_ray/lang_id_multiple_wf.py | 2 +- transforms/language/lang_id/kfp_ray/lang_id_wf.py | 2 +- transforms/language/lang_id/python/pyproject.toml | 2 +- transforms/language/lang_id/ray/pyproject.toml | 4 ++-- transforms/universal/doc_id/kfp_ray/doc_id_wf.py | 2 +- transforms/universal/doc_id/ray/Makefile | 3 +-- transforms/universal/doc_id/spark/Makefile | 3 +-- transforms/universal/ededup/kfp_ray/ededup_wf.py | 2 +- transforms/universal/ededup/ray/Makefile | 3 +-- transforms/universal/fdedup/kfp_ray/fdedup_wf.py | 2 +- transforms/universal/fdedup/ray/Makefile | 3 +-- transforms/universal/filter/kfp_ray/filter_wf.py | 2 +- transforms/universal/filter/python/pyproject.toml | 2 +- transforms/universal/filter/ray/Makefile | 7 +------ transforms/universal/filter/ray/pyproject.toml | 4 ++-- transforms/universal/filter/spark/Makefile | 3 +-- .../universal/noop/kfp_ray/noop_multiple_wf.py | 2 +- transforms/universal/noop/kfp_ray/noop_wf.py | 2 +- transforms/universal/noop/spark/Dockerfile | 5 ++--- transforms/universal/profiler/ray/Makefile | 3 +-- .../tokenization/kfp_ray/tokenization_wf.py | 2 +- .../universal/tokenization/python/pyproject.toml | 2 +- transforms/universal/tokenization/ray/Makefile | 7 +------ .../universal/tokenization/ray/pyproject.toml | 4 ++-- 41 files changed, 62 insertions(+), 84 deletions(-) diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py index a00d8434f..f7a46fb7e 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py @@ -15,13 +15,13 @@ run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_tokenization_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.4.0.dev6" -code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.4.0.dev6" -malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.5.0.dev6" -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6" -tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.4.0.dev6" +proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.0.dev6" +code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.0.dev6" +malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.0.dev6" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.0.dev6" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.0.dev6" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.0.dev6" +tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.0.dev6" # Pipeline to invoke execution on remote resource diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py index 07c831718..fde560b35 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py @@ -12,9 +12,9 @@ run_exact_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.0.dev6" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.0.dev6" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.0.dev6" # Pipeline to invoke execution on remote resource @dsl.pipeline( diff --git a/transforms/.gitignore b/transforms/.gitignore index ce6cb6566..2272a1fac 100644 --- a/transforms/.gitignore +++ b/transforms/.gitignore @@ -2,3 +2,4 @@ /**/data-processing-lib-python /**/data-processing-lib-spark /**/data-processing-lib-ray +/**/python-transform diff --git a/transforms/.make.transforms b/transforms/.make.transforms index e206e3434..8b9bfa051 100644 --- a/transforms/.make.transforms +++ b/transforms/.make.transforms @@ -323,7 +323,11 @@ minio-stop: # Requires version number of Python image as TRANSFORM_PYTHON_VERSION to be set when called .transforms.set-versions: $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml - cat pyproject.toml | sed \ + if [ -e pyproject.toml ]; then \ + dash_name=$$(echo $(TRANSFORM_NAME) | sed -e 's/_/-/g'); \ + cat pyproject.toml | sed \ -e 's/"dpk-$(TRANSFORM_NAME)-transform-python\([=<>][=<>]\).*",/"dpk-$(TRANSFORM_NAME)-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ - > tt.toml - mv tt.toml pyproject.toml + -e 's/"dpk-'$${dash_name}'-transform-python\([=<>][=<>]\).*",/"dpk-'$${dash_name}'-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi diff --git a/transforms/code/code_quality/kfp_ray/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py index 86d61bc5f..0542eb283 100644 --- a/transforms/code/code_quality/kfp_ray/code_quality_wf.py +++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py @@ -21,7 +21,7 @@ EXEC_SCRIPT_NAME: str = "code_quality_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index e5118d415..3a2af8238 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Code Quality Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/code_quality/ray/Makefile b/transforms/code/code_quality/ray/Makefile index 29d8331ee..88c6e02fc 100644 --- a/transforms/code/code_quality/ray/Makefile +++ b/transforms/code/code_quality/ray/Makefile @@ -8,16 +8,12 @@ TRANSFORM_NAME=code_quality # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${CODE_QUALITY_RAY_VERSION} -# Use default rule inherited from makefile.common clean:: .transforms.clean -# Use default rule inherited from makefile.common test:: .transforms.ray-test -# Use default rule inherited from makefile.common image:: .transforms.ray-image -# Use default rule inherited from makefile.common venv:: .transforms.ray-venv test-src:: .transforms.test-src @@ -32,16 +28,13 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist publish-dist:: .defaults.publish-dist -setup:: .transforms.setup - run-cli-sample: .transforms.run-cli-ray-sample run-local-sample: .transforms.run-local-ray-sample diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 1b706ae25..aa0cc46eb 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Code Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-code-quality-transform-python==0.4.0.dev6", + "dpk-code-quality-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/code/ingest_2_parquet/ray/Makefile b/transforms/code/ingest_2_parquet/ray/Makefile index f23d1c518..1a11dc9ac 100644 --- a/transforms/code/ingest_2_parquet/ray/Makefile +++ b/transforms/code/ingest_2_parquet/ray/Makefile @@ -30,9 +30,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/malware/kfp_ray/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py index dd34933d3..4bc26c0e8 100644 --- a/transforms/code/malware/kfp_ray/malware_wf.py +++ b/transforms/code/malware/kfp_ray/malware_wf.py @@ -21,7 +21,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "malware_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.5.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index 563503b5f..3130a9c72 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_python" -version = "0.5.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Malware Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/malware/ray/Makefile b/transforms/code/malware/ray/Makefile index 6813f1b45..6e68976f7 100644 --- a/transforms/code/malware/ray/Makefile +++ b/transforms/code/malware/ray/Makefile @@ -39,9 +39,8 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index bd630831c..45970387e 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_ray" -version = "0.5.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Malware Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "dpk-malware-transform-python==0.5.0.dev6", + "dpk-malware-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py index e3c41666e..c8f9c2e98 100644 --- a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py +++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py @@ -21,7 +21,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "proglang_select_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 072cd7d17..4eddd1769 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Programming Language Selection Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/proglang_select/ray/Makefile b/transforms/code/proglang_select/ray/Makefile index 0eea52289..d46f951ea 100644 --- a/transforms/code/proglang_select/ray/Makefile +++ b/transforms/code/proglang_select/ray/Makefile @@ -30,9 +30,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index 471bd5364..ad9ed878f 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Programming Language Selection Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-proglang-select-transform-python==0.4.0.dev6", + "dpk-proglang-select-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py index d49beb7e5..439fb21d2 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" diff --git a/transforms/language/lang_id/kfp_ray/lang_id_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_wf.py index eda4f5309..b849cdd77 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index d5ae9e4a5..126df7fee 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Language Identification Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index f230d193c..80cbb47ff 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Language Identification Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-lang_id-transform-python==0.4.0.dev6", + "dpk-lang_id-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6" ] diff --git a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py index deb647441..25c5d779f 100644 --- a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py +++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py" diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile index b5867b6ef..a6d16869e 100644 --- a/transforms/universal/doc_id/ray/Makefile +++ b/transforms/universal/doc_id/ray/Makefile @@ -32,9 +32,8 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/doc_id/spark/Makefile b/transforms/universal/doc_id/spark/Makefile index c93122c17..f9c3aac94 100644 --- a/transforms/universal/doc_id/spark/Makefile +++ b/transforms/universal/doc_id/spark/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-spark -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py index f8c1f0c4a..6ad1e323e 100644 --- a/transforms/universal/ededup/kfp_ray/ededup_wf.py +++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py @@ -18,7 +18,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py" diff --git a/transforms/universal/ededup/ray/Makefile b/transforms/universal/ededup/ray/Makefile index bf79f20cb..4af058ffe 100644 --- a/transforms/universal/ededup/ray/Makefile +++ b/transforms/universal/ededup/ray/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py index 5c43acb10..df50a4af3 100644 --- a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py +++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py @@ -18,7 +18,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "fdedup_transform_ray.py" diff --git a/transforms/universal/fdedup/ray/Makefile b/transforms/universal/fdedup/ray/Makefile index 19f2be284..7f9cb0026 100644 --- a/transforms/universal/fdedup/ray/Makefile +++ b/transforms/universal/fdedup/ray/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/filter/kfp_ray/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py index 782026d10..9de907c55 100644 --- a/transforms/universal/filter/kfp_ray/filter_wf.py +++ b/transforms/universal/filter/kfp_ray/filter_wf.py @@ -21,7 +21,7 @@ EXEC_SCRIPT_NAME: str = "filter_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml index 1af0da972..42ad8ea1c 100644 --- a/transforms/universal/filter/python/pyproject.toml +++ b/transforms/universal/filter/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Filter Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/filter/ray/Makefile b/transforms/universal/filter/ray/Makefile index 69f2060d7..cb9fe049b 100644 --- a/transforms/universal/filter/ray/Makefile +++ b/transforms/universal/filter/ray/Makefile @@ -31,13 +31,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml - cat pyproject.toml | sed \ - -e 's/"dpk-filter-transform-python\(..\).*",/"dpk-filter-transform-python\1$(FILTER_PYTHON_VERSION)",/' \ - > tt.toml - mv tt.toml pyproject.toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index 678f60133..4773cbcc5 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Filter Transform for Ray" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "dpk-filter-transform-python==0.4.0.dev6", + "dpk-filter-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/universal/filter/spark/Makefile b/transforms/universal/filter/spark/Makefile index 275da5362..849275ea2 100644 --- a/transforms/universal/filter/spark/Makefile +++ b/transforms/universal/filter/spark/Makefile @@ -31,9 +31,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-spark -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py index 51d6b1686..1aa18fd1a 100644 --- a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.9.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" diff --git a/transforms/universal/noop/kfp_ray/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py index 830af418a..600e3146c 100644 --- a/transforms/universal/noop/kfp_ray/noop_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.9.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index e60dbacaa..3fb44635e 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -13,9 +13,8 @@ COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/ RUN cd data-processing-lib-python && pip install --no-cache-dir -e . COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/ RUN cd data-processing-lib-spark && pip install --no-cache-dir -e . - -#COPY requirements.txt requirements.txt -#RUN pip install --no-cache-dir -r requirements.txt +COPY --chown=spark:root python-transform/ python-transform/ +RUN cd python-transform && pip install --no-cache-dir -e . COPY --chown=root:root src/ src/ COPY --chown=root:root pyproject.toml pyproject.toml diff --git a/transforms/universal/profiler/ray/Makefile b/transforms/universal/profiler/ray/Makefile index 01a117b5f..918bd286e 100644 --- a/transforms/universal/profiler/ray/Makefile +++ b/transforms/universal/profiler/ray/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py index 76e31b1f9..f147df186 100644 --- a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py +++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py @@ -20,7 +20,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "tokenization_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml index 6bf310c7c..c5cb72346 100644 --- a/transforms/universal/tokenization/python/pyproject.toml +++ b/transforms/universal/tokenization/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_tokenization_transform_python" keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Tokenization Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/tokenization/ray/Makefile b/transforms/universal/tokenization/ray/Makefile index 0231c442c..8f1378697 100644 --- a/transforms/universal/tokenization/ray/Makefile +++ b/transforms/universal/tokenization/ray/Makefile @@ -30,13 +30,8 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml - cat pyproject.toml | sed \ - -e 's/"dpk-tokenization-transform-python\(..\).*",/"dpk-tokenization-transform-python\1$(TOKENIZATION_PYTHON_VERSION)",/' \ - > tt.toml - mv tt.toml pyproject.toml + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index b813dbd5a..e09e64932 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_tokenization_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Tokenization Transform for Ray" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "dpk-tokenization-transform-python==0.4.0.dev6", + "dpk-tokenization-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] From 51fe74c118fa8251bb71231997fb27bfb614b48b Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 16:09:29 -0400 Subject: [PATCH 25/32] set-versions inputcode2parquet' Signed-off-by: David Wood --- transforms/code/inputcode2parquet/python/pyproject.toml | 2 +- transforms/code/inputcode2parquet/ray/pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/transforms/code/inputcode2parquet/python/pyproject.toml b/transforms/code/inputcode2parquet/python/pyproject.toml index 4fa309d6b..a632fcf2e 100644 --- a/transforms/code/inputcode2parquet/python/pyproject.toml +++ b/transforms/code/inputcode2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_inputcode2parquet_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "inputcode2parquet Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/inputcode2parquet/ray/pyproject.toml b/transforms/code/inputcode2parquet/ray/pyproject.toml index 747b43cfc..2b3a0e70b 100644 --- a/transforms/code/inputcode2parquet/ray/pyproject.toml +++ b/transforms/code/inputcode2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_dpk_inputcode2parquet_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "inputcode2parquet Ray Transform" license = {text = "Apache-2.0"} From 3db00f7d10ac3e0cb0da9ddd894472dd1bf441c2 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 17:10:56 -0400 Subject: [PATCH 26/32] restore INGEST_TO_PARQUET_VERSION removed in recent commit" Signed-off-by: David Wood --- .make.versions | 1 + 1 file changed, 1 insertion(+) diff --git a/.make.versions b/.make.versions index 2ae80c74b..b0999919d 100644 --- a/.make.versions +++ b/.make.versions @@ -66,6 +66,7 @@ CODE_QUALITY_RAY_VERSION=$(DPK_VERSION) INPUT_CODE_TO_PARQUET_PYTHON_VERSION=$(DPK_VERSION) INPUT_CODE_TO_PARQUET_RAY_VERSION=$(DPK_VERSION) +INGEST_TO_PARQUET_VERSION=$(DPK_VERSION) KFP_DOCKER_VERSION=$(DPK_VERSION) KFP_DOCKER_VERSION_v2=$(DPK_VERSION) From caa42b4159c38f3703b870396ec2b72cf6500a5e Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 17:17:17 -0400 Subject: [PATCH 27/32] updated repo.md to point to include link to transform conventions Signed-off-by: David Wood --- doc/repo.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/repo.md b/doc/repo.md index 7a1259eba..a44a5a7bf 100644 --- a/doc/repo.md +++ b/doc/repo.md @@ -1,5 +1,7 @@ # Repository Structure and Use +Here we discuss the structure, use and approach to code management in the repo. + # Repository structure * data_processing_lib - provides the core transform framework and library supporting data transformations in 3 runtimes @@ -76,6 +78,9 @@ which are triggered with the the `test` target. * test-src - test python tests in the test directory * test-image - build and test the docker image for the transform +Please also consult [transform project conventions](../transforms#transform-project-conventions) for +additional considerations when developing transforms. + ### Transforms and KFP The kfp_ray directories in the transform projects provide `workflow-` targets and are dedicated to handling the From b43683f94178c4c179d1173a46d8f3554dd48277 Mon Sep 17 00:00:00 2001 From: David Wood Date: Fri, 21 Jun 2024 17:18:48 -0400 Subject: [PATCH 28/32] fix transform link added to repo.md in last commit Signed-off-by: David Wood --- doc/repo.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/repo.md b/doc/repo.md index a44a5a7bf..353282f6f 100644 --- a/doc/repo.md +++ b/doc/repo.md @@ -78,7 +78,7 @@ which are triggered with the the `test` target. * test-src - test python tests in the test directory * test-image - build and test the docker image for the transform -Please also consult [transform project conventions](../transforms#transform-project-conventions) for +Please also consult [transform project conventions](../transforms/README.md#transform-project-conventions) for additional considerations when developing transforms. ### Transforms and KFP From 67d3357a5239f07286a400707eee0a5a0974d313 Mon Sep 17 00:00:00 2001 From: David Wood Date: Sun, 23 Jun 2024 13:28:08 -0400 Subject: [PATCH 29/32] fixed code2parquet versions from last merge and uninstall libs in better order in .make.defaults Signed-off-by: David Wood --- .make.defaults | 4 ++-- transforms/code/code2parquet/kfp_ray/code2parquet_wf.py | 2 +- transforms/code/code2parquet/ray/pyproject.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.make.defaults b/.make.defaults index 78bce78ea..374d1d33c 100644 --- a/.make.defaults +++ b/.make.defaults @@ -304,8 +304,8 @@ __check_defined = \ @echo Installing Ray and Python data processing library source to existing venv @source venv/bin/activate; \ pip install pytest; \ - $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit-ray .defaults.pip-uninstall; \ + $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_RAY_LIB_DIR) .defaults.install-src-venv; \ echo Installed source from Ray data processing library for `which $(PYTHON)`; \ @@ -325,8 +325,8 @@ __check_defined = \ @echo Installing Spark and Python data processing library source to existing venv @source venv/bin/activate; \ pip install pytest; \ - $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit-spark .defaults.pip-uninstall; \ + $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_SPARK_LIB_DIR) .defaults.install-src-venv; \ if [ -d ../python ]; then \ diff --git a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py index 3efcb0d5a..8c5f08ebc 100644 --- a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py +++ b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py @@ -21,7 +21,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "code2parquet_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.2.0.dev6" # components diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index 3fdabe384..e6acdde20 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] -name = "dpk_dpk_inputcode2parquet_transform_ray" name = "dpk_code2parquet_transform_ray" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "code2parquet Ray Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "data-prep-toolkit-ray==0.2.0.dev6", - "dpk-code2parquet-transform-python==0.4.0.dev6", + "dpk-code2parquet-transform-python==0.2.0.dev6", "parameterized", "pandas", ] From 40ccc4c3e32018d65c03c5e20ea99cd4254a2898 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 24 Jun 2024 12:03:49 -0400 Subject: [PATCH 30/32] remove redundant pip uninstall in .make.defaults Signed-off-by: David Wood --- .make.defaults | 1 - 1 file changed, 1 deletion(-) diff --git a/.make.defaults b/.make.defaults index 5febaeb2e..be2d30dee 100644 --- a/.make.defaults +++ b/.make.defaults @@ -307,7 +307,6 @@ __check_defined = \ pip install pytest pytest-cov; \ $(MAKE) PIP_TARGET=data-prep-toolkit-ray .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ - $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_RAY_LIB_DIR) .defaults.install-src-venv; \ echo Installed source from Ray data processing library for `which $(PYTHON)`; \ From 0123b7ebe556146792bfd4e34437275e4d58c8bd Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 24 Jun 2024 12:56:50 -0400 Subject: [PATCH 31/32] Fix transform version settings when python transform code is referenced Signed-off-by: David Wood --- transforms/code/code_quality/ray/Makefile | 2 +- transforms/code/malware/ray/Makefile | 2 +- transforms/code/proglang_select/ray/Makefile | 2 +- transforms/universal/doc_id/ray/Makefile | 2 +- transforms/universal/ededup/ray/Makefile | 2 +- transforms/universal/fdedup/ray/Makefile | 2 +- transforms/universal/filter/ray/Makefile | 2 +- transforms/universal/profiler/ray/Makefile | 2 +- transforms/universal/tokenization/ray/Makefile | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/transforms/code/code_quality/ray/Makefile b/transforms/code/code_quality/ray/Makefile index 88c6e02fc..9b8b7f794 100644 --- a/transforms/code/code_quality/ray/Makefile +++ b/transforms/code/code_quality/ray/Makefile @@ -29,7 +29,7 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(CODE_QUALITY_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/malware/ray/Makefile b/transforms/code/malware/ray/Makefile index 6e68976f7..91885cc40 100644 --- a/transforms/code/malware/ray/Makefile +++ b/transforms/code/malware/ray/Makefile @@ -40,7 +40,7 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(MALWARE_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/proglang_select/ray/Makefile b/transforms/code/proglang_select/ray/Makefile index d46f951ea..0ee89a9c9 100644 --- a/transforms/code/proglang_select/ray/Makefile +++ b/transforms/code/proglang_select/ray/Makefile @@ -31,7 +31,7 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(PROGLANG_SELECT_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile index 9d9700eec..df477b498 100644 --- a/transforms/universal/doc_id/ray/Makefile +++ b/transforms/universal/doc_id/ray/Makefile @@ -31,7 +31,7 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/ededup/ray/Makefile b/transforms/universal/ededup/ray/Makefile index 4af058ffe..25bcdd97b 100644 --- a/transforms/universal/ededup/ray/Makefile +++ b/transforms/universal/ededup/ray/Makefile @@ -29,7 +29,7 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/fdedup/ray/Makefile b/transforms/universal/fdedup/ray/Makefile index 7f9cb0026..0a765650b 100644 --- a/transforms/universal/fdedup/ray/Makefile +++ b/transforms/universal/fdedup/ray/Makefile @@ -29,7 +29,7 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/filter/ray/Makefile b/transforms/universal/filter/ray/Makefile index cb9fe049b..1bf6ece6e 100644 --- a/transforms/universal/filter/ray/Makefile +++ b/transforms/universal/filter/ray/Makefile @@ -32,7 +32,7 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(FILTER_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/profiler/ray/Makefile b/transforms/universal/profiler/ray/Makefile index 918bd286e..5c53ae38a 100644 --- a/transforms/universal/profiler/ray/Makefile +++ b/transforms/universal/profiler/ray/Makefile @@ -29,7 +29,7 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/tokenization/ray/Makefile b/transforms/universal/tokenization/ray/Makefile index 8f1378697..989dfb778 100644 --- a/transforms/universal/tokenization/ray/Makefile +++ b/transforms/universal/tokenization/ray/Makefile @@ -31,7 +31,7 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(TOKENIZATION_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist From 87a7549b5a2fc5591408e997ae13ae442bb573b6 Mon Sep 17 00:00:00 2001 From: David Wood Date: Mon, 24 Jun 2024 16:30:05 -0400 Subject: [PATCH 32/32] .make.versions changes - remove redancy and unused macros, and define section for the repo's version dependencies. Signed-off-by: David Wood --- .make.versions | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/.make.versions b/.make.versions index 374871290..ee9bd3c1b 100644 --- a/.make.versions +++ b/.make.versions @@ -8,6 +8,8 @@ # ################################################################################ +# See below for versions the repo depends on + # This major versions is generally changed manually when a breaking change to apis is made in the libraries, for example. DPK_MAJOR_VERSION=0 @@ -30,16 +32,6 @@ DPK_LIB_KFP_SHARED=$(DPK_VERSION) KFP_DOCKER_VERSION=$(DPK_VERSION) KFP_DOCKER_VERSION_v2=$(DPK_VERSION) -KFP_v2=2.7.0 -KFP_v1=1.8.22 -RAY=2.24.0 - -ifeq ($(KFPv2), 1) - WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support -else - WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support -endif - # Begin transform versions/tags BLOCKLIST_VERSION=$(DPK_VERSION) @@ -60,8 +52,6 @@ NOOP_SPARK_VERSION=$(DPK_VERSION) PROFILER_VERSION=$(DPK_VERSION) PROFILER_RAY_VERSION=$(DPK_VERSION) -RESIZE_VERSION=$(DPK_VERSION) - LANG_ID_PYTHON_VERSION=$(DPK_VERSION) LANG_ID_RAY_VERSION=$(DPK_VERSION) @@ -76,7 +66,6 @@ PROGLANG_SELECT_RAY_VERSION=$(DPK_VERSION) CODE_QUALITY_RAY_VERSION=$(DPK_VERSION) CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION) -CODE_QUALITY_RAY_VERSION=$(DPK_VERSION) CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION) CODE2PARQUET_RAY_VERSION=$(DPK_VERSION) @@ -85,3 +74,17 @@ INGEST_TO_PARQUET_VERSION=$(DPK_VERSION) KFP_DOCKER_VERSION=$(DPK_VERSION) KFP_DOCKER_VERSION_v2=$(DPK_VERSION) +################## ################## ################## ################## ################## ################## +# Begin versions that the repo depends on. + +KFP_v2=2.7.0 +KFP_v1=1.8.22 +RAY=2.24.0 + +ifeq ($(KFPv2), 1) + WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support +else + WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support +endif + +