diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6bd1d783b..af61c07a2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -9,6 +9,14 @@ on: branches: - "dev" jobs: + test-make: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Test top-level recursive make targets. + run: | + make -n clean test build publish set-versions test-python-lib: runs-on: ubuntu-latest steps: diff --git a/.make.defaults b/.make.defaults index 11b4c8a83..be2d30dee 100644 --- a/.make.defaults +++ b/.make.defaults @@ -255,8 +255,6 @@ __check_defined = \ # PYTHON_PROJECT_DIR is expected to have src and pyproject.toml .PHONY: .defaults.install-src-venv .defaults.install-src-venv:: - @# Help: Install Ray and Python data processing library source into existing venv - @echo Installing Ray and Python data processing library source to existing venv @source venv/bin/activate; \ if [ ! -z "$(EXTRA_INDEX_URL)" ]; then \ extra_url='--extra-index-url $(EXTRA_INDEX_URL)'; \ @@ -305,9 +303,10 @@ __check_defined = \ @# Help: Install Ray and Python data processing library source into existing venv @echo Installing Ray and Python data processing library source to existing venv @source venv/bin/activate; \ + pip install pytest; \ pip install pytest pytest-cov; \ - $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit-ray .defaults.pip-uninstall; \ + $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_RAY_LIB_DIR) .defaults.install-src-venv; \ echo Installed source from Ray data processing library for `which $(PYTHON)`; \ @@ -327,8 +326,8 @@ __check_defined = \ @echo Installing Spark and Python data processing library source to existing venv @source venv/bin/activate; \ pip install pytest; \ - $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PIP_TARGET=data-prep-toolkit-spark .defaults.pip-uninstall; \ + $(MAKE) PIP_TARGET=data-prep-toolkit .defaults.pip-uninstall; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_PYTHON_LIB_DIR) .defaults.install-src-venv; \ $(MAKE) PYTHON_PROJECT_DIR=$(DPK_SPARK_LIB_DIR) .defaults.install-src-venv; \ if [ -d ../python ]; then \ @@ -495,32 +494,38 @@ MINIO_ADMIN_PWD= localminiosecretkey # Expects TOML_VERSION .PHONY: .defaults.update-toml .defaults.update-toml: - $(MAKE) TOML_VERSION=$(TOML_VERSION) .defaults.set-toml-version - $(MAKE) .defaults.update-toml-lib-dep-versions + if [ -e pyproject.toml ]; then \ + $(MAKE) TOML_VERSION=$(TOML_VERSION) .defaults.__set-toml-version; \ + $(MAKE) .defaults.__update-toml-lib-dep-versions; \ + fi # Changes the version field of the pyproject.toml file to the given version # Expects TOML_VERSION -.PHONY: .defaults.set-toml-version -.defaults.set-toml-version: +.PHONY: .defaults.__set-toml-version +.defaults.__set-toml-version: @# Help: Set the version= field of pyproject.toml - @cat pyproject.toml | sed -e \ - 's/^version[ ]*=.*/version = "'${TOML_VERSION}'"/' \ - > tt.toml - @mv tt.toml pyproject.toml + if [ -e pyproject.toml ]; then \ + cat pyproject.toml | sed -e \ + 's/^version[ ]*=.*/version = "'${TOML_VERSION}'"/' \ + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi # Updates the versions references to our repo source as defined in .make.versions -.PHONY: .defaults.update-toml-lib-dep-versions -.defaults.update-toml-lib-dep-versions: +.PHONY: .defaults.__update-toml-lib-dep-versions +.defaults.__update-toml-lib-dep-versions: @# Help: Update pyproject.toml to depend on lib versions defined in .make.versions - @cat pyproject.toml | sed \ + @if [ -e pyproject.toml ]; then \ + cat pyproject.toml | sed \ -e 's/"data-prep-toolkit-ray\(..\).*",/"data-prep-toolkit-ray\1$(DPK_LIB_VERSION)",/' \ -e 's/"data-prep-toolkit-spark\(..\).*",/"data-prep-toolkit-spark\1$(DPK_LIB_VERSION)",/' \ -e 's/"data-prep-toolkit-kfp\([=><][=><]\).*",/"data-prep-toolkit-kfp\1$(DPK_LIB_KFP_VERSION)",/' \ -e 's/"data-prep-toolkit\([=><][=><]\).*",/"data-prep-toolkit\1$(DPK_LIB_VERSION)",/' \ -e 's/"ray\[default\]\([=><][=><]\).*",/"ray\[default\]\1$(RAY)",/' \ -e 's/"data-prep-toolkit-kfp-shared\(..\).*",/"data-prep-toolkit-kfp-shared\1$(DPK_LIB_KFP_VERSION)",/' \ - > tt.toml - mv tt.toml pyproject.toml + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi # Build the distribution, usually in preparation for publishing using ith the .defaults.publish-dist target .PHONY: .defaults.build-dist @@ -545,6 +550,3 @@ MINIO_ADMIN_PWD= localminiosecretkey fi ${PYTHON} -m twine check dist/* ${PYTHON} -m twine upload --verbose --non-interactive dist/* - #@echo "create a git tag to reference published version" - #@git tag ${TAG} - #@git push origin ${TAG} diff --git a/.make.versions b/.make.versions index d19d66fa2..ee9bd3c1b 100644 --- a/.make.versions +++ b/.make.versions @@ -2,69 +2,89 @@ # Here we attempt to capture/define all the version numbers used across the # repository in Makefile format. These are generally considered the version # numbers TO BE published on the NEXT release/publishing of artifacts. +# +# NOTE: If you modify any of the version numbers, you MUST run "make set-versions" +# from the top of the repo to have the new versions applied throughout the repo. +# ################################################################################ -# do -# % make RELEASE_VERSION_SUFFIX= set-version -# % git push, tag, etc. -# % make build, publish, etc. -RELEASE_VERSION_SUFFIX=.dev6 +# See below for versions the repo depends on -# Data prep lab wheel version -DPK_LIB_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -DPK_LIB_KFP_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -KFP_DOCKER_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -KFP_DOCKER_VERSION_v2=0.2.0$(RELEASE_VERSION_SUFFIX) +# This major versions is generally changed manually when a breaking change to apis is made in the libraries, for example. +DPK_MAJOR_VERSION=0 +# The minor version is incremented manually when significant features have been added that are backward compatible with the previous major.minor release. +DPK_MINOR_VERSION=2 +# The minor version is incremented AUTOMATICALLY by the release.sh script when a new release is set. +DPK_MICRO_VERSION=0 +# The suffix is generally always set in the main/development branch and only nulled out when creating release branches. +# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi. +DPK_VERSION_SUFFIX=.dev6 -KFP_v2=2.7.0 -KFP_v1=1.8.22 -RAY=2.24.0 +DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX) -ifeq ($(KFPv2), 1) - WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support -else - WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support -endif +# Data prep lab wheel version +DPK_LIB_VERSION=$(DPK_VERSION) +DPK_LIB_KFP_VERSION=$(DPK_VERSION) +DPK_LIB_KFP_VERSION_v2=$(DPK_VERSION) +DPK_LIB_KFP_SHARED=$(DPK_VERSION) + +KFP_DOCKER_VERSION=$(DPK_VERSION) +KFP_DOCKER_VERSION_v2=$(DPK_VERSION) # Begin transform versions/tags -BLOCKLIST_VERSION=0.4.2$(RELEASE_VERSION_SUFFIX) +BLOCKLIST_VERSION=$(DPK_VERSION) -DOC_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -DOC_ID_SPARK_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +DOC_ID_RAY_VERSION=$(DPK_VERSION) +DOC_ID_SPARK_VERSION=$(DPK_VERSION) -EDEDUP_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +EDEDUP_RAY_VERSION=$(DPK_VERSION) -FDEDUP_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +FDEDUP_RAY_VERSION=$(DPK_VERSION) -FILTER_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -FILTER_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -FILTER_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) +FILTER_PYTHON_VERSION=$(DPK_VERSION) +FILTER_RAY_VERSION=$(DPK_VERSION) +FILTER_SPARK_VERSION=$(DPK_VERSION) -NOOP_PYTHON_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX) -NOOP_RAY_VERSION=0.9.0$(RELEASE_VERSION_SUFFIX) -NOOP_SPARK_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) -PROFILER_RAY_VERSION=0.2.0$(RELEASE_VERSION_SUFFIX) +NOOP_PYTHON_VERSION=$(DPK_VERSION) +NOOP_RAY_VERSION=$(DPK_VERSION) +NOOP_SPARK_VERSION=$(DPK_VERSION) +PROFILER_VERSION=$(DPK_VERSION) +PROFILER_RAY_VERSION=$(DPK_VERSION) -RESIZE_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +LANG_ID_PYTHON_VERSION=$(DPK_VERSION) +LANG_ID_RAY_VERSION=$(DPK_VERSION) -LANG_ID_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -LANG_ID_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +TOKENIZATION_RAY_VERSION=$(DPK_VERSION) +TOKENIZATION_PYTHON_VERSION=$(DPK_VERSION) -TOKENIZATION_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -TOKENIZATION_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +MALWARE_RAY_VERSION=$(DPK_VERSION) +MALWARE_PYTHON_VERSION=$(DPK_VERSION) -MALWARE_PYTHON_VERSION=0.5.0$(RELEASE_VERSION_SUFFIX) -MALWARE_RAY_VERSION=0.5.0$(RELEASE_VERSION_SUFFIX) +PROGLANG_SELECT_PYTHON_VERSION=$(DPK_VERSION) +PROGLANG_SELECT_RAY_VERSION=$(DPK_VERSION) -PROGLANG_SELECT_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -PROGLANG_SELECT_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +CODE_QUALITY_RAY_VERSION=$(DPK_VERSION) +CODE_QUALITY_PYTHON_VERSION=$(DPK_VERSION) -CODE_QUALITY_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -CODE_QUALITY_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +CODE2PARQUET_PYTHON_VERSION=$(DPK_VERSION) +CODE2PARQUET_RAY_VERSION=$(DPK_VERSION) +INGEST_TO_PARQUET_VERSION=$(DPK_VERSION) -CODE2PARQUET_PYTHON_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) -CODE2PARQUET_RAY_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) +KFP_DOCKER_VERSION=$(DPK_VERSION) +KFP_DOCKER_VERSION_v2=$(DPK_VERSION) + +################## ################## ################## ################## ################## ################## +# Begin versions that the repo depends on. + +KFP_v2=2.7.0 +KFP_v1=1.8.22 +RAY=2.24.0 + +ifeq ($(KFPv2), 1) + WORKFLOW_SUPPORT_LIB=kfp_v2_workflow_support +else + WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support +endif -INGEST_TO_PARQUET_VERSION=0.4.0$(RELEASE_VERSION_SUFFIX) diff --git a/.secrets.baseline b/.secrets.baseline index 25be48bab..45e93b490 100644 --- a/.secrets.baseline +++ b/.secrets.baseline @@ -132,3 +132,4 @@ "hash": null } } + diff --git a/Makefile b/Makefile index 39cfa87ad..5e9a70e82 100644 --- a/Makefile +++ b/Makefile @@ -51,15 +51,27 @@ set-versions: @# Help: Recursively $@ in all subdirs @$(MAKE) RULE=$@ .recurse -lib-release: - @# Help: Publish data-prep-kit $(DPK_LIB_VERSION) and data-prep-kit-kfp $(DPK_LIB_KFP_VERSION) libraries to pypi - @$(MAKE) -C $(DPK_PYTHON_LIB_DIR) build publish - @$(MAKE) -C $(DPK_RAY_LIB_DIR) build publish - @$(MAKE) -C $(DPK_SPARK_LIB_DIR) build publish - @$(MAKE) -C kfp/kfp_support_lib build publish - @echo "" - @echo "This modified files in the repo. Please be sure to commit/push back to the repository." - @echo "" +#set-release-verions: +# @# Help: Update all internally used versions to not include the release suffix. +# @$(MAKE) DPK_VERSION_SUFFIX= set-versions + +#lib-release: +# @# Help: Set versions to be unsuffixed and publish libraries +# @$(MAKE) set-release-versions +# @$(MAKE) publish-lib + +show-version: + @echo $(DPK_VERSION) + +#publish-lib: +# @# Help: Publish data-prep-kit $(DPK_LIB_VERSION) and data-prep-kit-kfp $(DPK_LIB_KFP_VERSION) libraries to pypi +# @$(MAKE) -C $(DPK_PYTHON_LIB_DIR) build publish +# @$(MAKE) -C $(DPK_RAY_LIB_DIR) build publish +# @$(MAKE) -C $(DPK_SPARK_LIB_DIR) build publish +# @$(MAKE) -C kfp/kfp_support_lib build publish +# @echo "" +# @echo "This modified files in the repo. Please be sure to commit/push back to the repository." +# @echo "" diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 000000000..91e4b8ca9 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,57 @@ +# Release Management + +Release are created from the main repository branch using the version +numbers, including an intermediate version suffix, +defined in `.make.versions`. +The following points are important: + +1. `.make.versions` contains the version to be used when publishing the **next** release. +1. The main branch always has the version suffix set to .dev, which +allows intermediate publishing from the dev branch using version X.Y.Z.dev. +2. In general, common version number is used for all published pypi wheels and docker images. +3. The `scripts/release.sh` script automates the following: + 1. Creating a `release/vX.Y.Z` branch and `vX.Y.Z` tag + 2. Nulling out the version suffix in the new branch's `.make.version` file. + 3. Applying the unsuffixed versions to the artifacts published from the repo. + 4. Building and publishing the wheels to pypi and images to a docker registry. + 5. Incrementing the minor version and resetting the suffix in the main branch. + +# Cutting the release +Creating the release requires running the `release.sh` script and optionally +generating a release on github. The latter can be performed manually +once the `release.sh` script has done its work. + +## release.sh +Running `release.sh` requires credentials to publish to the various cloud locations. + +For docker registry publishing, the following environment variables/credentials are needed: + +* DPK_DOCKER_REGISTRY_USER - user used with the registry defined in DOCKER_HOST in `.make.defaults` +* DPK_DOCKER_REGISTRY_KEY - key/password for docker registry user. + +To publish to pypi, the credentials in `~/.pypirc` file (let us know if there is a way to do +this with environment variables). +See [pypi](https://packaging.python.org/en/latest/specifications/pypirc/) for details. + +To see the version that will be published, +``` +make DPK_VERSION_SUFFIX= show-version +``` +This will print for example, 1.2.3. + +To generate the release : +```shell +bash scripts/release.sh +``` + +## Github release +After running the `release.sh` script, to create tag `vX.Y.Z` and branch `releases/vX.Y.Z` +1. Go to the [releases page](https://github.com/IBM/data-prep-kit/releases). +2. Select `Draft a new release` +3. Select `Choose a tag -> vX.Y.Z` +4. Press `Generate release notes` +5. Add a title (e.g., Release X.Y.Z) +6. Add any additional relese notes. +7. Press `Publish release` + + diff --git a/doc/repo.md b/doc/repo.md index 435cd8409..dcaa2f254 100644 --- a/doc/repo.md +++ b/doc/repo.md @@ -1,12 +1,20 @@ # Repository Structure and Use +Here we discuss the structure, use and approach to code management in the repo. + # Repository structure * data_processing_lib - provides the core transform framework and library -supporting data transformations in a Ray cluster +supporting data transformations in 3 runtimes + * python + * ray + * spark + * transform * universal - * ededup + * noop + * python * ray + * spark * kfp_ray * ... * code @@ -24,83 +32,60 @@ supporting data transformations in a Ray cluster # Build and Makefiles Makefiles are used for operations performed across all projects in the directory tree. -Using specific rules from the top of the repository tree will recurse their execution -into subdirectories until subdirectories provide a Makefile that implements the action -and/or recurses further. For example, -```shell -make test -``` -will apply the `make test` rule into all sub-directories supporting such recursion. -Try `make help` to see the set of available targets in a directory. For example, -from the root of the repo... -``` -Target Description ------- ----------- -build Recursively build in all subdirs -clean Recursively clean in all subdirs -setup Recursively setup in all subdirs -test Recursively test in all subdirs -``` -or from a transform project directory -``` -cd transforms/universal/noop -make help -Target Description ------- ----------- -build Create the venv and build the transform image -clean Clean up the virtual environment. -conventions Check transform project conventions and make recommendations, if needed. -image Create the docker image quay.io/dataprep1/data-prep-kit/noop:0.7 -publish Publish the quay.io/dataprep1/data-prep-kit/noop:0.7 to quay.io container registry -setup Build kfp workflow for noop. -test Run both source and image level tests. -test-image Test an quay.io/dataprep1/data-prep-kit/noop:0.7 use test source inside the image. -test-locals Run the *local*.py files in the src directory -test-src Run the transform's tests and any '*local' .py files -venv Install the source from the data processing library for python -workflow-reconcile-requirements Recursively make workflow-reconcile-requirements in all subdirs -workflow-test Recursively make workflow-test in subdirs -workflow-upload Recursively make workflow-upload in subdirs -workflow-venv Recursively make workflow-venv in subdirs -``` +There are two types of users envisioned to use the make files. -The `workflow-` related Makefile targets are dedicated for handling the [Kubeflow Pipelines](https://github.com/kubeflow/pipelines) workflows for the specified transforms. - -Overridable macro values include the following: -DOCKER - the name of the docker executable to use. DOCKER=docker -DOCKER_FILE - the name of the docker file to use. DOCKER_FILE=Dockerfile -DOCKER_REGISTRY_ENDPOINT - the docker registry location to publish images. DOCKER_REGISTRY_ENDPOINT=quay.io/dataprep1/data-prep-kit -DOCKER_HOSTNAME - the name of the docker registry to use. DOCKER_HOSTNAME=quay.io -DOCKER_NAMESPACE - the name space to use in the registry. DOCKER_NAMESPACE=dataprep1 -DOCKER_NAME - the name under the name space where images are publishes. DOCKER_NAME=data-prep-kit -DOCKER_REGISTRY_USER - the docker user to use. DOCKER_REGISTRY_USER=dataprep1 -DOCKER_REGISTRY_KEY - the docker user to use. DOCKER_REGISTRY_KEY=secret -PYTHON - the python executable to use. PYTHON=python -DOCKER_IMAGE_NAME - the name of the docker image to produce. DOCKER_IMAGE_NAME=noop -TRANSFORM_SRC_FILE is the base name of the python source file containing the main() (e.g. noop_local_ray.py) - -Macros that require definition in the including Makefile -REPOROOT defines the root directory of this repository (such as ../../..) -TRANSFORM_NAME defines the name of the transform and is used to define defaults for... - DOCKER_IMAGE_NAME and TRANSFORM_SRC_FILE. For, example 'noop' -DOCKER_IMAGE_VERSION - the version of the docker image to produce. DOCKER_IMAGE_VERSION=0.7 -``` +* adminstrators - perform git actions and release management +* developers - work with core libraries and transforms -If you'd like to build each component separately, you can move into the sub-directories as desired. -If planning to develop and/or use on Apple Mac please see these [considerations](mac.md). +Each directory has access to a `make help` target that will show all available targets. -## Data Prep Kit Library -To build the wheel for the data processing library and publish it to a pypi... -```shell -cd data-processing-lib -make test build publish -``` +## Administrators +Generally, administrators will issue make commands from the top of the repository to, for example +publish a new release. The top level make file provides a set of targets that +are executed recursively, which as a result are expected to be implementd by +sub-directories. These and their semantics are expected to be implemented, +as appropriate, in the sub-directories are as follows: + +* clean - Restore the directory to as close to initial repository clone state as possible. +* build - Build all components contained in a given sub-directory. +This might include pypi distributions, images, etc. +* test - Test all components contained in a given sub-directory. +* publish - Publish any components in sub-directory. +This might include things published to pypi or the docker registry. +* set-versions - apply the DPK_VERSION to all published components. + +Sub-directories are free to define these as empty/no-op targets, but generally are required +to define them unless a parent directory does not recurse into the directory. + +## Developers +Generally, developers will be working in a python project directory +(e.g., data-processing-lib/python, transforms/universal/filter, etc.) +and can issue the administrator's make targets (e g., build, test, etc) +or others that might be defined locally +(e.g., venv, test-image, test-src in transform projects). +Key targets are as follows: + +* venv - creates the virtual environment from either a pyproject.toml or requirements.txt file. +* publish - publish libraries or docker images as appropriate. +This is generally only used during release generation. + +If working with an IDE, one generally makes the venv, then configures the IDE to +reference the venv, src and test directories. + +Transform projects generally include these transform project-specific targets for convenience, +which are triggered with the the `test` target. + +* test-src - test python tests in the test directory +* test-image - build and test the docker image for the transform + +Please also consult [transform project conventions](../transforms/README.md#transform-project-conventions) for +additional considerations when developing transforms. + +### Transforms and KFP +The kfp_ray directories in the transform projects provide +`workflow-` targets and are dedicated to handling the +[Kubeflow Pipelines](https://github.com/kubeflow/pipelines) +workflows for the specified transforms. -## Transforms -To create all transform images and publish them (by default to quay.io) -```shell -cd transforms -make venv test-src -make image test-image publish ``` diff --git a/kfp/superworkflows/Makefile b/kfp/superworkflows/Makefile index 3fba19299..0b79f3b8e 100644 --- a/kfp/superworkflows/Makefile +++ b/kfp/superworkflows/Makefile @@ -4,6 +4,9 @@ include ${REPOROOT}/.make.defaults KFPv2 ?= 0 +.PHONY: publish +publish: + .PHONY: workflow-venv workflow-venv: ifeq ($(KFPv2), 1) diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py index a00d8434f..f7a46fb7e 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_code_sample_wf.py @@ -15,13 +15,13 @@ run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_tokenization_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.4.0.dev6" -code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.4.0.dev6" -malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.5.0.dev6" -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6" -tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.4.0.dev6" +proglang_select_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.0.dev6" +code_quality_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.0.dev6" +malware_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.0.dev6" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.0.dev6" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.0.dev6" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.0.dev6" +tokenizer_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.0.dev6" # Pipeline to invoke execution on remote resource diff --git a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py index 07c831718..fde560b35 100644 --- a/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py +++ b/kfp/superworkflows/ray/kfp_v1/superworkflow_dedups_sample_wf.py @@ -12,9 +12,9 @@ run_exact_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") run_fuzzy_dedup_op = comp.load_component_from_file(component_spec_path + "executeSubWorkflowComponent.yaml") -doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6" -ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6" -fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6" +doc_id_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.0.dev6" +ededup_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.0.dev6" +fdedup_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.0.dev6" # Pipeline to invoke execution on remote resource @dsl.pipeline( diff --git a/kind/Makefile b/kind/Makefile index a80417333..be54808e1 100644 --- a/kind/Makefile +++ b/kind/Makefile @@ -33,6 +33,9 @@ endif $(MAKE) cluster-deploy @echo "setup-cluster completed" +.PHONY: publish +publish: + populate-data:: @# Help: Populate test data in Minio cd ${KIND_SCRIPTS} && ./populate_minio.sh diff --git a/scripts/release.sh b/scripts/release.sh new file mode 100644 index 000000000..859d24684 --- /dev/null +++ b/scripts/release.sh @@ -0,0 +1,102 @@ +debug=echo +dbg_suffix=.dev7 +# Assume this file is in the reporoot/scripts directory +reporoot=$(dirname $0)/.. +cd $reporoot + +# Make sure required env vars are set +if [ -z "$DPK_DOCKER_REGISTRY_USER" ]; then + echo DPK_DOCKER_REGISTRY_USER env var must be set + exit 1 +elif [ -z "$DPK_DOCKER_REGISTRY_KEY" ]; then + echo DPK_DOCKER_REGISTRY_KEY env var must be set + exit 1 +fi +if [ ! -e ~/.pypirc ]; then + cat << EOF +You need a ~/.pypirc containing pypi.org credentials. +See https://packaging.python.org/en/latest/specifications/pypirc/ for details. +EOF + exit +fi +exit + +if [ -z "$debug" ]; then + DEFAULT_BRANCH=dev +else + DEFAULT_BRANCH=releasing-copy +fi + +# Make sure we're starting from the base branch +get fetch +git checkout $DEFAULT_BRANCH + +# Get the currently defined version w/o any suffix. This is the next release version +version=$(make DPK_VERSION_SUFFIX= show-version) + +if [ -z "$debug" ]; then + tag=v$version +else + tag=test$version +fi + +# Create a new branch for this version and switch to it +release_branch=releases/$tag +if [ ! -z "$debug" ]; then + # delete local tag and branch + git tag --delete $tag + git branch --delete $release_branch + # delete remote tag and branch + git push --delete origin $tag + git push --delete origin $release_branch +fi +git checkout -b $release_branch + + +# Remove the release suffix in this branch +# Apply the unsuffixed version to the repo and check it into this release branch +if [ -z "$debug" ]; then + cat .make.versions | sed -e 's/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=/' > tt + mv tt .make.versions +else + cat .make.versions | sed -e "s/^DPK_VERSION_SUFFIX.*/DPK_VERSION_SUFFIX=$dbg_suffix/" > tt + mv tt .make.versions +fi +# Apply the version change to all files in the repo +make set-versions + +# Commit the changes to the release branch and tag it +git status +git commit -s -a -m "Cut release $version" +git push --set-upstream origin $release_branch +git tag -a -s -m "Cut release $version" $tag +git push origin $tag + +# Now build with the updated version +# Requires quay credentials in the environment, DPL_DOCKER_REGISTRY_USER, DPK_DOCKER_REGISTRY_KEY +if [ -z "$debug" ]; then + make build publish +else + # make -C data-processing-lib/spark image # Build the base image required by spark + make -C transforms/universal/noop/python build publish +fi + +# Now go back to the default branch so we can bump the minor version number and reset the version suffix +git checkout $DEFAULT_BRANCH + +# Change to the next development version (bumped minor version with suffix). +micro=$(cat .make.versions | grep '^DPK_MICRO_VERSION=' | sed -e 's/DPK_MICRO_VERSION=\([0-9]*\).*/\1/') +micro=$(($micro + 1)) +cat .make.versions | sed -e "s/^DPK_MICRO_VERSION=.*/DPK_MICRO_VERSION=$micro/" \ + -e "s/^DPK_VERSION_SUFFIX=.*/DPK_VERSION_SUFFIX=.dev0/" > tt +mv tt .make.versions +# Apply the version change to all files in the repo +make set-versions + +# Push the version change back to the origin +next_version=$(make show-version) +git commit -s -a -m "Bump micro version to $next_version after cutting release $version into branch $release_branch" +git diff origin/$DEFAULT_BRANCH $DEFAULT_BRANCH +if [ -z "$debug" ]; then + git push origin +fi diff --git a/transforms/.gitignore b/transforms/.gitignore index ce6cb6566..2272a1fac 100644 --- a/transforms/.gitignore +++ b/transforms/.gitignore @@ -2,3 +2,4 @@ /**/data-processing-lib-python /**/data-processing-lib-spark /**/data-processing-lib-ray +/**/python-transform diff --git a/transforms/.make.transforms b/transforms/.make.transforms index 0cc8479b3..7b4be0dbb 100644 --- a/transforms/.make.transforms +++ b/transforms/.make.transforms @@ -323,14 +323,16 @@ minio-stop: # Requires version number of Python image as TRANSFORM_PYTHON_VERSION to be set when called # In addition, update Dockerfile with versions .transforms.set-versions: -ifneq ("$(wildcard pyproject.toml)","") $(call check_defined,DOCKER_IMAGE_VERSION) $(call check_defined, TRANSFORM_PYTHON_VERSION) $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .defaults.update-toml - @cat pyproject.toml | sed \ - -e 's/dpk-$(TRANSFORM_NAME)-transform-python\([=<>][=<>]\).*/dpk-$(TRANSFORM_NAME)-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ - > tt.toml - @mv tt.toml pyproject.toml -endif + if [ -e pyproject.toml ]; then \ + dash_name=$$(echo $(TRANSFORM_NAME) | sed -e 's/_/-/g'); \ + cat pyproject.toml | sed \ + -e 's/"dpk-$(TRANSFORM_NAME)-transform-python\([=<>][=<>]\).*",/"dpk-$(TRANSFORM_NAME)-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ + -e 's/"dpk-'$${dash_name}'-transform-python\([=<>][=<>]\).*",/"dpk-'$${dash_name}'-transform-python\1$(TRANSFORM_PYTHON_VERSION)",/' \ + > tt.toml; \ + mv tt.toml pyproject.toml; \ + fi $(REPOROOT)/scripts/transforms/update_dockerfile.sh ${RAY} diff --git a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py index 3efcb0d5a..8c5f08ebc 100644 --- a/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py +++ b/transforms/code/code2parquet/kfp_ray/code2parquet_wf.py @@ -21,7 +21,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "code2parquet_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/code2parquet-ray:0.2.0.dev6" # components diff --git a/transforms/code/code2parquet/python/pyproject.toml b/transforms/code/code2parquet/python/pyproject.toml index 935025000..8ca8f3e4e 100644 --- a/transforms/code/code2parquet/python/pyproject.toml +++ b/transforms/code/code2parquet/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "code2parquet Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/code2parquet/ray/pyproject.toml b/transforms/code/code2parquet/ray/pyproject.toml index 1d603e754..e6acdde20 100644 --- a/transforms/code/code2parquet/ray/pyproject.toml +++ b/transforms/code/code2parquet/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code2parquet_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "code2parquet Ray Transform" license = {text = "Apache-2.0"} @@ -11,7 +11,7 @@ authors = [ ] dependencies = [ "data-prep-toolkit-ray==0.2.0.dev6", - "dpk-code2parquet-transform-python==0.4.0.dev6", + "dpk-code2parquet-transform-python==0.2.0.dev6", "parameterized", "pandas", ] diff --git a/transforms/code/code_quality/kfp_ray/code_quality_wf.py b/transforms/code/code_quality/kfp_ray/code_quality_wf.py index 86d61bc5f..0542eb283 100644 --- a/transforms/code/code_quality/kfp_ray/code_quality_wf.py +++ b/transforms/code/code_quality/kfp_ray/code_quality_wf.py @@ -21,7 +21,7 @@ EXEC_SCRIPT_NAME: str = "code_quality_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/code_quality-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/code/code_quality/python/pyproject.toml b/transforms/code/code_quality/python/pyproject.toml index e5118d415..3a2af8238 100644 --- a/transforms/code/code_quality/python/pyproject.toml +++ b/transforms/code/code_quality/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Code Quality Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/code_quality/ray/Makefile b/transforms/code/code_quality/ray/Makefile index ee22fd547..9b8b7f794 100644 --- a/transforms/code/code_quality/ray/Makefile +++ b/transforms/code/code_quality/ray/Makefile @@ -8,16 +8,12 @@ TRANSFORM_NAME=code_quality # $(REPOROOT)/.make.versions file contains the versions DOCKER_IMAGE_VERSION=${CODE_QUALITY_RAY_VERSION} -# Use default rule inherited from makefile.common clean:: .transforms.clean -# Use default rule inherited from makefile.common test:: .transforms.ray-test -# Use default rule inherited from makefile.common image:: .transforms.ray-image -# Use default rule inherited from makefile.common venv:: .transforms.ray-venv test-src:: .transforms.test-src @@ -32,16 +28,13 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=${CODE_QUALITY_PYTHON_VERSION} TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(CODE_QUALITY_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist publish-dist:: .defaults.publish-dist -setup:: .transforms.setup - run-cli-sample: .transforms.run-cli-ray-sample run-local-sample: .transforms.run-local-ray-sample diff --git a/transforms/code/code_quality/ray/pyproject.toml b/transforms/code/code_quality/ray/pyproject.toml index 1b706ae25..aa0cc46eb 100644 --- a/transforms/code/code_quality/ray/pyproject.toml +++ b/transforms/code/code_quality/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_code_quality_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Code Quality Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-code-quality-transform-python==0.4.0.dev6", + "dpk-code-quality-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/code/malware/kfp_ray/malware_wf.py b/transforms/code/malware/kfp_ray/malware_wf.py index dd34933d3..4bc26c0e8 100644 --- a/transforms/code/malware/kfp_ray/malware_wf.py +++ b/transforms/code/malware/kfp_ray/malware_wf.py @@ -21,7 +21,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "malware_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.5.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/malware-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/code/malware/python/pyproject.toml b/transforms/code/malware/python/pyproject.toml index 563503b5f..3130a9c72 100644 --- a/transforms/code/malware/python/pyproject.toml +++ b/transforms/code/malware/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_python" -version = "0.5.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Malware Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/malware/ray/Makefile b/transforms/code/malware/ray/Makefile index bd47bd504..91885cc40 100644 --- a/transforms/code/malware/ray/Makefile +++ b/transforms/code/malware/ray/Makefile @@ -39,9 +39,8 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=${MALWARE_PYTHON_VERSION} TOML_VERSION=$(MALWARE_RAY_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(MALWARE_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/malware/ray/pyproject.toml b/transforms/code/malware/ray/pyproject.toml index bd630831c..45970387e 100644 --- a/transforms/code/malware/ray/pyproject.toml +++ b/transforms/code/malware/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_malware_transform_ray" -version = "0.5.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Malware Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Takuya Goto", email = "tkyg@jp.ibm.com" }, ] dependencies = [ - "dpk-malware-transform-python==0.5.0.dev6", + "dpk-malware-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py index e3c41666e..c8f9c2e98 100644 --- a/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py +++ b/transforms/code/proglang_select/kfp_ray/proglang_select_wf.py @@ -21,7 +21,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "proglang_select_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/proglang_select-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/code/proglang_select/python/pyproject.toml b/transforms/code/proglang_select/python/pyproject.toml index 072cd7d17..4eddd1769 100644 --- a/transforms/code/proglang_select/python/pyproject.toml +++ b/transforms/code/proglang_select/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Programming Language Selection Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/code/proglang_select/ray/Makefile b/transforms/code/proglang_select/ray/Makefile index 78605c45c..0ee89a9c9 100644 --- a/transforms/code/proglang_select/ray/Makefile +++ b/transforms/code/proglang_select/ray/Makefile @@ -30,9 +30,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=${PROGLANG_SELECT_PYTHON_VERSION} TOML_VERSION=$(PROGLANG_SELECT_RAY_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(PROGLANG_SELECT_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/code/proglang_select/ray/pyproject.toml b/transforms/code/proglang_select/ray/pyproject.toml index 471bd5364..ad9ed878f 100644 --- a/transforms/code/proglang_select/ray/pyproject.toml +++ b/transforms/code/proglang_select/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_proglang_select_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Programming Language Selection Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" }, ] dependencies = [ - "dpk-proglang-select-transform-python==0.4.0.dev6", + "dpk-proglang-select-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py index d49beb7e5..439fb21d2 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_multiple_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" diff --git a/transforms/language/lang_id/kfp_ray/lang_id_wf.py b/transforms/language/lang_id/kfp_ray/lang_id_wf.py index eda4f5309..b849cdd77 100644 --- a/transforms/language/lang_id/kfp_ray/lang_id_wf.py +++ b/transforms/language/lang_id/kfp_ray/lang_id_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/lang_id-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "lang_id_transform_ray.py" diff --git a/transforms/language/lang_id/python/pyproject.toml b/transforms/language/lang_id/python/pyproject.toml index 000feab16..b881a15eb 100644 --- a/transforms/language/lang_id/python/pyproject.toml +++ b/transforms/language/lang_id/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Language Identification Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/language/lang_id/ray/pyproject.toml b/transforms/language/lang_id/ray/pyproject.toml index f230d193c..80cbb47ff 100644 --- a/transforms/language/lang_id/ray/pyproject.toml +++ b/transforms/language/lang_id/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_lang_id_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Language Identification Ray Transform" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Daiki Tsuzuku", email = "dtsuzuku@jp.ibm.com" } ] dependencies = [ - "dpk-lang_id-transform-python==0.4.0.dev6", + "dpk-lang_id-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6" ] diff --git a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py index deb647441..25c5d779f 100644 --- a/transforms/universal/doc_id/kfp_ray/doc_id_wf.py +++ b/transforms/universal/doc_id/kfp_ray/doc_id_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/doc_id-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "doc_id_transform_ray.py" diff --git a/transforms/universal/doc_id/ray/Makefile b/transforms/universal/doc_id/ray/Makefile index 9c93b2771..df477b498 100644 --- a/transforms/universal/doc_id/ray/Makefile +++ b/transforms/universal/doc_id/ray/Makefile @@ -30,9 +30,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=${DOC_ID_PYTHON_VERSION} TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/doc_id/spark/Makefile b/transforms/universal/doc_id/spark/Makefile index cf9cd7d0d..f9c3aac94 100644 --- a/transforms/universal/doc_id/spark/Makefile +++ b/transforms/universal/doc_id/spark/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-spark -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/ededup/kfp_ray/ededup_wf.py b/transforms/universal/ededup/kfp_ray/ededup_wf.py index f8c1f0c4a..6ad1e323e 100644 --- a/transforms/universal/ededup/kfp_ray/ededup_wf.py +++ b/transforms/universal/ededup/kfp_ray/ededup_wf.py @@ -18,7 +18,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/ededup-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "ededup_transform_ray.py" diff --git a/transforms/universal/ededup/ray/Makefile b/transforms/universal/ededup/ray/Makefile index d3e930d4f..25bcdd97b 100644 --- a/transforms/universal/ededup/ray/Makefile +++ b/transforms/universal/ededup/ray/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py index 5c43acb10..df50a4af3 100644 --- a/transforms/universal/fdedup/kfp_ray/fdedup_wf.py +++ b/transforms/universal/fdedup/kfp_ray/fdedup_wf.py @@ -18,7 +18,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/fdedup-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "fdedup_transform_ray.py" diff --git a/transforms/universal/fdedup/ray/Makefile b/transforms/universal/fdedup/ray/Makefile index 2cde7c573..0a765650b 100644 --- a/transforms/universal/fdedup/ray/Makefile +++ b/transforms/universal/fdedup/ray/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/filter/kfp_ray/filter_wf.py b/transforms/universal/filter/kfp_ray/filter_wf.py index 782026d10..9de907c55 100644 --- a/transforms/universal/filter/kfp_ray/filter_wf.py +++ b/transforms/universal/filter/kfp_ray/filter_wf.py @@ -21,7 +21,7 @@ EXEC_SCRIPT_NAME: str = "filter_transform_ray.py" PREFIX: str = "" -task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/filter-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/universal/filter/python/pyproject.toml b/transforms/universal/filter/python/pyproject.toml index 1af0da972..42ad8ea1c 100644 --- a/transforms/universal/filter/python/pyproject.toml +++ b/transforms/universal/filter/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_python" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Filter Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/filter/ray/Makefile b/transforms/universal/filter/ray/Makefile index 4088c8e0a..1bf6ece6e 100644 --- a/transforms/universal/filter/ray/Makefile +++ b/transforms/universal/filter/ray/Makefile @@ -31,9 +31,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=${FILTER_PYTHON_VERSION} TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(FILTER_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/filter/ray/pyproject.toml b/transforms/universal/filter/ray/pyproject.toml index 678f60133..4773cbcc5 100644 --- a/transforms/universal/filter/ray/pyproject.toml +++ b/transforms/universal/filter/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_filter_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Filter Transform for Ray" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Constantin Adam", email = "cmadam@us.ibm.com" }, ] dependencies = [ - "dpk-filter-transform-python==0.4.0.dev6", + "dpk-filter-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/universal/filter/spark/Makefile b/transforms/universal/filter/spark/Makefile index 4abc28ec1..849275ea2 100644 --- a/transforms/universal/filter/spark/Makefile +++ b/transforms/universal/filter/spark/Makefile @@ -31,9 +31,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-spark -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py index 51d6b1686..1aa18fd1a 100644 --- a/transforms/universal/noop/kfp_ray/noop_multiple_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_multiple_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.9.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" diff --git a/transforms/universal/noop/kfp_ray/noop_wf.py b/transforms/universal/noop/kfp_ray/noop_wf.py index 830af418a..600e3146c 100644 --- a/transforms/universal/noop/kfp_ray/noop_wf.py +++ b/transforms/universal/noop/kfp_ray/noop_wf.py @@ -17,7 +17,7 @@ from workflow_support.compile_utils import ONE_HOUR_SEC, ONE_WEEK_SEC, ComponentUtils -task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.9.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/noop-ray:0.2.0.dev6" # the name of the job script EXEC_SCRIPT_NAME: str = "noop_transform_ray.py" diff --git a/transforms/universal/noop/python/pyproject.toml b/transforms/universal/noop/python/pyproject.toml index ad5b86597..cdf9c2740 100644 --- a/transforms/universal/noop/python/pyproject.toml +++ b/transforms/universal/noop/python/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_python" -version = "0.9.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "NOOP Python Transform" license = {text = "Apache-2.0"} diff --git a/transforms/universal/noop/ray/pyproject.toml b/transforms/universal/noop/ray/pyproject.toml index 8357ad284..bc757abe9 100644 --- a/transforms/universal/noop/ray/pyproject.toml +++ b/transforms/universal/noop/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_noop_transform_ray" -version = "0.9.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "NOOP Ray Transform" license = {text = "Apache-2.0"} @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.9.0.dev6", + "dpk-noop-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ] diff --git a/transforms/universal/noop/spark/Dockerfile b/transforms/universal/noop/spark/Dockerfile index 4719b786a..3fb44635e 100644 --- a/transforms/universal/noop/spark/Dockerfile +++ b/transforms/universal/noop/spark/Dockerfile @@ -13,8 +13,7 @@ COPY --chown=spark:root data-processing-lib-python/ data-processing-lib-python/ RUN cd data-processing-lib-python && pip install --no-cache-dir -e . COPY --chown=spark:root data-processing-lib-spark/ data-processing-lib-spark/ RUN cd data-processing-lib-spark && pip install --no-cache-dir -e . - -COPY --chown=ray:users python-transform/ python-transform/ +COPY --chown=spark:root python-transform/ python-transform/ RUN cd python-transform && pip install --no-cache-dir -e . COPY --chown=root:root src/ src/ diff --git a/transforms/universal/noop/spark/pyproject.toml b/transforms/universal/noop/spark/pyproject.toml index b83bab57a..5ac47b3da 100644 --- a/transforms/universal/noop/spark/pyproject.toml +++ b/transforms/universal/noop/spark/pyproject.toml @@ -10,7 +10,7 @@ authors = [ { name = "Boris Lublinsky", email = "blublinsky@ibm.com" }, ] dependencies = [ - "dpk-noop-transform-python==0.9.0.dev6", + "dpk-noop-transform-python==0.2.0.dev6", "data-prep-toolkit-spark==0.2.0.dev6", ] diff --git a/transforms/universal/profiler/kfp_ray/Makefile b/transforms/universal/profiler/kfp_ray/Makefile index 7a16e1369..20d1864dc 100644 --- a/transforms/universal/profiler/kfp_ray/Makefile +++ b/transforms/universal/profiler/kfp_ray/Makefile @@ -42,4 +42,5 @@ workflow-test: setup workflow-upload: setup @for file in $(YAML_WF); do \ $(MAKE) .workflows.upload-pipeline PIPELINE_FILE=$$file; \ - done \ No newline at end of file + done + diff --git a/transforms/universal/profiler/ray/Makefile b/transforms/universal/profiler/ray/Makefile index 240d2c86a..5c53ae38a 100644 --- a/transforms/universal/profiler/ray/Makefile +++ b/transforms/universal/profiler/ray/Makefile @@ -28,9 +28,8 @@ publish:: publish-dist publish-image publish-image:: .transforms.publish-image-ray -# distribution versions is the same as image version. set-versions: - $(MAKE) TOML_VERSION=$(DOCKER_IMAGE_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=not-used .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py index 76e31b1f9..f147df186 100644 --- a/transforms/universal/tokenization/kfp_ray/tokenization_wf.py +++ b/transforms/universal/tokenization/kfp_ray/tokenization_wf.py @@ -20,7 +20,7 @@ # the name of the job script EXEC_SCRIPT_NAME: str = "tokenization_transform_ray.py" -task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.4.0.dev6" +task_image = "quay.io/dataprep1/data-prep-kit/tokenization-ray:0.2.0.dev6" # components base_kfp_image = "quay.io/dataprep1/data-prep-kit/kfp-data-processing:0.2.0.dev6" diff --git a/transforms/universal/tokenization/python/pyproject.toml b/transforms/universal/tokenization/python/pyproject.toml index 6bf310c7c..c5cb72346 100644 --- a/transforms/universal/tokenization/python/pyproject.toml +++ b/transforms/universal/tokenization/python/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "dpk_tokenization_transform_python" keywords = ["tokenizer", "data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ] -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Tokenization Transform for Python" license = {text = "Apache-2.0"} diff --git a/transforms/universal/tokenization/ray/Makefile b/transforms/universal/tokenization/ray/Makefile index e32441fbb..989dfb778 100644 --- a/transforms/universal/tokenization/ray/Makefile +++ b/transforms/universal/tokenization/ray/Makefile @@ -30,9 +30,8 @@ publish-image:: .transforms.publish-image-ray setup:: .transforms.setup -# distribution versions is the same as image version. set-versions: - $(MAKE) TRANSFORM_PYTHON_VERSION=${TOKENIZATION_PYTHON_VERSION} TOML_VERSION=$(TOKENIZATION_RAY_VERSION) .transforms.set-versions + $(MAKE) TRANSFORM_PYTHON_VERSION=$(TOKENIZATION_PYTHON_VERSION) .transforms.set-versions build-dist:: set-versions .defaults.build-dist diff --git a/transforms/universal/tokenization/ray/pyproject.toml b/transforms/universal/tokenization/ray/pyproject.toml index b813dbd5a..e09e64932 100644 --- a/transforms/universal/tokenization/ray/pyproject.toml +++ b/transforms/universal/tokenization/ray/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "dpk_tokenization_transform_ray" -version = "0.4.0.dev6" +version = "0.2.0.dev6" requires-python = ">=3.10" description = "Tokenization Transform for Ray" license = {text = "Apache-2.0"} @@ -9,7 +9,7 @@ authors = [ { name = "Xuan-Hong Dang", email = "xuan-hong.dang@ibm.com"}, ] dependencies = [ - "dpk-tokenization-transform-python==0.4.0.dev6", + "dpk-tokenization-transform-python==0.2.0.dev6", "data-prep-toolkit-ray==0.2.0.dev6", ]