Skip to content

Commit

Permalink
Merge pull request #493 from IBM/build-transforms-wheel
Browse files Browse the repository at this point in the history
Build transforms wheel
  • Loading branch information
touma-I authored Sep 24, 2024
2 parents d159d61 + 33b8853 commit f5d9680
Show file tree
Hide file tree
Showing 65 changed files with 738 additions and 119 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/test-packaging-python.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Test - transforms/packaging/python

on:
workflow_dispatch:
push:
branches:
- "dev"
- "releases/**"
tags:
- "*"
paths:
- "transforms/packaging/python/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"
pull_request:
branches:
- "dev"
- "releases/**"
paths:
- "transforms/packaging/python/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"

jobs:
test-src:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform source in transforms/packaging/python
run: |
if [ -e "transforms/packaging/python/Makefile" ]; then
make -C transforms/packaging/python DOCKER=docker test-src
else
echo "transforms/packaging/python/Makefile not found - source testing disabled for this transform."
fi
49 changes: 49 additions & 0 deletions .github/workflows/test-packaging-ray.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
name: Test - transforms/packaging/ray

on:
workflow_dispatch:
push:
branches:
- "dev"
- "releases/**"
tags:
- "*"
paths:
- "transforms/packaging/ray/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"
pull_request:
branches:
- "dev"
- "releases/**"
paths:
- "transforms/packaging/ray/**"
- "!**.md"
- "!**/doc/**"
- "!**/images/**"
- "!**.gitignore"

jobs:
test-src:
runs-on: ubuntu-22.04
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Free up space in github runner
# Free space as indicated here : https://github.com/actions/runner-images/issues/2840#issuecomment-790492173
run: |
df -h
sudo rm -rf "/usr/local/share/boost"
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
sudo rm -rf /usr/share/dotnet /opt/ghc /usr/local/lib/android /usr/local/share/powershell /usr/share/swift /usr/local/.ghcup
sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
df -h
- name: Test transform source in transforms/packaging/ray
run: |
if [ -e "transforms/packaging/ray/Makefile" ]; then
make -C transforms/packaging/ray DOCKER=docker test-src
else
echo "transforms/packaging/ray/Makefile not found - source testing disabled for this transform."
fi
15 changes: 14 additions & 1 deletion .make.defaults
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,8 @@ endif
if [ -e requirements.txt ]; then \
echo Installing requirements from requirements.txt; \
pip install $(PIP_INSTALL_EXTRA_ARGS) $$extra_url -r requirements.txt; \
elif [ -e pyproject.toml ]; then \
fi; \
if [ -e pyproject.toml ]; then \
echo Installing from pyproject.toml; \
pip install $(PIP_INSTALL_EXTRA_ARGS) $$extra_url -e .; \
fi
Expand Down Expand Up @@ -587,6 +588,18 @@ MINIO_ADMIN_PWD= localminiosecretkey
> tt.toml; \
mv tt.toml pyproject.toml; \
fi
@if [ -e requirements.txt ]; then \
cat requirements.txt | sed \
-e 's/data-prep-toolkit-ray\([=><~][=]\).*/data-prep-toolkit-ray\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-transforms\([=><~][=]\).*/data-prep-toolkit-transforms\1$(DPK_TRANSFORMS_VERSION)/' \
-e 's/data-prep-toolkit-spark\([=><~][=]\).*/data-prep-toolkit-spark\1$(DPK_LIB_VERSION)/' \
-e 's/data-prep-toolkit-kfp\([=><~][=]\).*/data-prep-toolkit-kfp\1$(DPK_LIB_KFP_VERSION)/' \
-e 's/data-prep-toolkit\([=><~][=]\).*/data-prep-toolkit\1$(DPK_LIB_VERSION)/' \
-e 's/ray\[default\]\([=><~][=]\).*/ray\[default\]\1$(RAY)/' \
-e 's/data-prep-toolkit-kfp-shared\(..\).*/data-prep-toolkit-kfp-shared\1$(DPK_LIB_KFP_VERSION)/' \
> tt.txt; \
mv tt.txt requirements.txt; \
fi

# Build the distribution, usually in preparation for publishing using ith the .defaults.publish-dist target
.PHONY: .defaults.build-dist
Expand Down
5 changes: 4 additions & 1 deletion .make.versions
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ DPK_MINOR_VERSION=2
DPK_MICRO_VERSION=1
# The suffix is generally always set in the main/development branch and only nulled out when creating release branches.
# It can be manually incremented, for example, to allow publishing a new intermediate version wheel to pypi.
DPK_VERSION_SUFFIX=.dev0
DPK_VERSION_SUFFIX=.dev3

DPK_VERSION=$(DPK_MAJOR_VERSION).$(DPK_MINOR_VERSION).$(DPK_MICRO_VERSION)$(DPK_VERSION_SUFFIX)

Expand Down Expand Up @@ -103,6 +103,8 @@ PII_REDACTOR_PYTHON_VERSION=$(DPK_VERSION)

HTML2PARQUET_PYTHON_VERSION=$(DPK_VERSION)

DPK_TRANSFORMS_VERSION=$(DPK_VERSION)

################## ################## ################## ################## ################## ##################
# Begin versions that the repo depends on.

Expand All @@ -117,3 +119,4 @@ ifeq ($(KFPv2), 1)
else
WORKFLOW_SUPPORT_LIB=kfp_v1_workflow_support
endif

2 changes: 1 addition & 1 deletion data-processing-lib/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
description = "Data Preparation Toolkit Library"
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_ray"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10"
description = "Data Preparation Toolkit Library for Ray"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit>=0.2.1.dev3",
"ray[default]==2.24.0",
# These two are to fix security issues identified by quay.io
"fastapi>=0.110.2",
Expand Down
4 changes: 2 additions & 2 deletions data-processing-lib/spark/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_spark"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
keywords = ["data", "data preprocessing", "data preparation", "llm", "generative", "ai", "fine-tuning", "llmapps" ]
requires-python = ">=3.10"
description = "Data Preparation Toolkit Library for Spark"
Expand All @@ -11,7 +11,7 @@ authors = [
{ name = "Boris Lublinsky", email = "blublinsk@ibm.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit==0.2.1.dev3",
"pyspark>=3.5.2",
"psutil>=6.0.0"
]
Expand Down
6 changes: 3 additions & 3 deletions examples/notebooks/rag/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Data prep kit
data-prep-toolkit-transforms==0.2.1.dev1
data-prep-toolkit-transforms-ray==0.2.1.dev1
#data-prep-toolkit-transforms==0.2.1.dev1
#data-prep-toolkit-transforms-ray==0.2.1.dev1



Expand Down Expand Up @@ -53,4 +53,4 @@ ipython
ipywidgets
IProgress
chardet==5.2.0
charset-normalizer==3.3.2
charset-normalizer==3.3.2
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/kfp_v1_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v1"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10,<3.12"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -13,7 +13,7 @@ authors = [
]
dependencies = [
"kfp==1.8.22",
"data-prep-toolkit-kfp-shared==0.2.1.dev0",
"data-prep-toolkit-kfp-shared==0.2.1.dev3",
]

[build-system]
Expand Down
6 changes: 3 additions & 3 deletions kfp/kfp_support_lib/kfp_v2_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_v2"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10,<3.12"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -12,9 +12,9 @@ authors = [
{ name = "Revital Eres", email = "eres@il.ibm.com" },
]
dependencies = [
"kfp==2.7.0",
"kfp==2.8.0",
"kfp-kubernetes==1.2.0",
"data-prep-toolkit-kfp-shared==0.2.1.dev0",
"data-prep-toolkit-kfp-shared==0.2.1.dev3",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions kfp/kfp_support_lib/shared_workflow_support/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "data_prep_toolkit_kfp_shared"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10,<3.12"
description = "Data Preparation Kit Library. KFP support"
license = {text = "Apache-2.0"}
Expand All @@ -14,7 +14,7 @@ authors = [
dependencies = [
"requests",
"kubernetes",
"data-prep-toolkit-ray==0.2.1.dev0",
"data-prep-toolkit-ray==0.2.1.dev3",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/code2parquet/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_python"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "code2parquet Python Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,7 +10,7 @@ authors = [
{ name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit==0.2.1.dev3",
"parameterized",
"pandas",
]
Expand Down
6 changes: 3 additions & 3 deletions transforms/code/code2parquet/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code2parquet_transform_ray"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "code2parquet Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -10,8 +10,8 @@ authors = [
{ name = "Boris Lublinsky", email = "blublinsky@ibm.com" },
]
dependencies = [
"data-prep-toolkit-ray==0.2.1.dev0",
"dpk-code2parquet-transform-python==0.2.1.dev0",
"data-prep-toolkit-ray==0.2.1.dev3",
"dpk-code2parquet-transform-python==0.2.1.dev3",
"parameterized",
"pandas",
]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/code_quality/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_python"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "Code Quality Python Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,7 +9,7 @@ authors = [
{ name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit==0.2.1.dev3",
"bs4==0.0.2",
"transformers==4.38.2",
]
Expand Down
6 changes: 3 additions & 3 deletions transforms/code/code_quality/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_code_quality_transform_ray"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "Code Quality Ray Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Shivdeep Singh", email = "shivdeep.singh@ibm.com" },
]
dependencies = [
"dpk-code-quality-transform-python==0.2.1.dev0",
"data-prep-toolkit-ray==0.2.1.dev0",
"dpk-code-quality-transform-python==0.2.1.dev3",
"data-prep-toolkit-ray==0.2.1.dev3",
]

[build-system]
Expand Down
4 changes: 2 additions & 2 deletions transforms/code/header_cleanser/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_python"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "License and Copyright Removal Transform for Python"
license = {text = "Apache-2.0"}
Expand All @@ -9,7 +9,7 @@ authors = [
{ name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit==0.2.1.dev3",
"scancode-toolkit==32.1.0",
]

Expand Down
6 changes: 3 additions & 3 deletions transforms/code/header_cleanser/ray/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_header_cleanser_transform_ray"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "License and copyright removal Transform for Ray"
license = {text = "Apache-2.0"}
Expand All @@ -9,8 +9,8 @@ authors = [
{ name = "Yash kalathiya", email = "yashkalathiya164@gmail.com" },
]
dependencies = [
"dpk-header-cleanser-transform-python==0.2.1.dev0",
"data-prep-toolkit-ray==0.2.1.dev0",
"dpk-header-cleanser-transform-python==0.2.1.dev3",
"data-prep-toolkit-ray==0.2.1.dev3",
"scancode-toolkit==32.1.0",
]

Expand Down
4 changes: 2 additions & 2 deletions transforms/code/malware/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "dpk_malware_transform_python"
version = "0.2.1.dev0"
version = "0.2.1.dev3"
requires-python = ">=3.10"
description = "Malware Python Transform"
license = {text = "Apache-2.0"}
Expand All @@ -9,7 +9,7 @@ authors = [
{ name = "Takuya Goto", email = "tkyg@jp.ibm.com" },
]
dependencies = [
"data-prep-toolkit==0.2.1.dev0",
"data-prep-toolkit==0.2.1.dev3",
"clamd==1.0.2",
]

Expand Down
Loading

0 comments on commit f5d9680

Please sign in to comment.