Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move hack dirs to scripts dir #295

Merged
merged 7 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,9 @@ jobs:
dir=("code" "universal") && index=$(($RANDOM % 2)) && subdirs=${dir[$index]} && transforms=($(find transforms/$subdirs/ -type d -maxdepth 1 ))
# First element is not really a subdir but rather the current dir so remove it and randomly choose a transform to run
set -- "${transforms[@]}" && shift && transforms=("$@") && size=${#transforms[@]} && index=$(($RANDOM % $size))
export ROOT_DIR=kind
source kind/hack/common.sh
export REPOROOT=$PWD
export KIND_SCRIPTS=$PWD/scripts/kind
source scripts/kind/common.sh
header_text "Running ${transforms[$index]} workflow test"
make -C ${transforms[$index]} workflow-test
header_text "Run ${transforms[$index]} completed"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def get_pipeline_by_name(self, name: str, np: int = 100) -> models.api_pipeline.
"""
try:
# Get all pipelines
pipelines = self.kfp_client.list_pipelines(page_size=np).pipelines
pipelines = self.kfp_client.list_pipelines(page_size=np, sort_by="created_at desc").pipelines
required = list(filter(lambda p: name in p.name, pipelines))
if len(required) != 1:
logger.warning(f"Failure to get pipeline. Number of pipelines with name {name} is {len(required)}")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def get_pipeline_by_name(self, name: str, np: int = 100) -> kfp_server_api.V2bet
]
}
)
result = self.kfp_client.list_pipelines(filter=pipeline_filter)
result = self.kfp_client.list_pipelines(filter=pipeline_filter, page_size=np, sort_by="created_at desc")
if result.pipelines is None:
return None
if len(result.pipelines) == 1:
Expand Down
6 changes: 3 additions & 3 deletions kind/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ IGNORE := $(shell bash -c "sed -n /=/p ${REPOROOT}/kind/requirements.env | sed

include makeenv

export ROOT_DIR=${CURDIR}
# Include the common rules.
# Use "make help" to see them.
include ../.make.defaults

export TOOLS_DIR=${ROOT_DIR}/hack/tools
export TOOLS_DIR=${REPOROOT}/scripts/kind/tools
export KIND_SCRIPTS=${REPOROOT}/scripts/kind

export EXTERNAL_CLUSTER ?= 0
export DEPLOY_KUBEFLOW ?= 1
Expand All @@ -35,7 +35,7 @@ endif

populate-data::
@# Help: Populate test data in Minio
cd hack && ./populate_minio.sh
cd ${KIND_SCRIPTS} && ./populate_minio.sh

cluster-deploy::
@# Help: Deploy all required tools on existing cluster
Expand Down
22 changes: 11 additions & 11 deletions kind/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ Run the following command to create the cluster:
cd /tmp
git clone https://github.com/IBM/data-prep-kit.git
cd data-prep-kit
ROOT_DIR=$PWD/kind/
kind create cluster --name dataprep --config ${ROOT_DIR}/hack/kind-cluster-config.yaml
export REPOROOT=$PWD
kind create cluster --name dataprep --config ${REPOROOT}/scripts/kind/kind-cluster-config.yaml
```

Note that by default this will create a kind cluster with 2 worker nodes. If you would like a different
amount of node, modify [cluster configuration](hack/kind-cluster-config.yaml)
amount of node, modify [cluster configuration](../scripts/kind/kind-cluster-config.yaml)

### Install KFP

Expand All @@ -31,7 +31,7 @@ Install [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/
# Set required KFP version. You can reference to the latest supported version in the [requirements.env](./requirements.env) file.
# Currently, we support 1.8.5 for KFPv1 and 2.2.0 for KFP v2
export PIPELINE_VERSION=1.8.5
cd $ROOT_DIR/hack/tools/ && ./install_kubeflow.sh deploy && cd -
cd $REPOROOT/scripts/kind/tools/ && ./install_kubeflow.sh deploy && cd -
kubectl wait --for=condition=ready --all pod -n kubeflow --timeout=300s
```

Expand All @@ -40,7 +40,7 @@ kubectl wait --for=condition=ready --all pod -n kubeflow --timeout=300s
Install Kuberay:

```shell
cd $ROOT_DIR/hack/tools && KUBERAY_APISERVER=1.1.0 KUBERAY_OPERATOR=1.0.0 ./install_kuberay.sh deploy && cd -
cd $REPOROOT/scripts/kind/tools/ && KUBERAY_APISERVER=1.1.0 KUBERAY_OPERATOR=1.0.0 ./install_kuberay.sh deploy && cd -
kubectl wait --for=condition=ready --all pod -n kuberay --timeout=300s
```

Expand All @@ -52,7 +52,7 @@ To access the API server and Kubeflow pipeline UI externally, we make use NGINX
Install [Ingress NGNIX](https://kind.sigs.k8s.io/docs/user/ingress/#ingress-nginx) for KFP, RAY and MinIO and wait for it to be ready:

```shell
${ROOT_DIR}/hack/tools/install_nginx.sh deploy
${REPOROOT}/scripts/kind/tools/install_nginx.sh deploy
kubectl wait --namespace ingress-nginx \
--for=condition=ready pod \
--selector=app.kubernetes.io/component=controller \
Expand All @@ -61,9 +61,9 @@ kubectl wait --namespace ingress-nginx \

To deploy the ingress for Ray API Server, KFP and MinIO execute the following:
```shell
kubectl apply -f $ROOT_DIR/hack/ray_api_server_ingress.yaml
kubectl apply -f $ROOT_DIR/hack/kfp_ingress.yaml
kubectl apply -f $ROOT_DIR/hack/minio_ingress.yaml
kubectl apply -f $REPOROOT/scripts/kind/ray_api_server_ingress.yaml
kubectl apply -f $REPOROOT/scripts/kind/kfp_ingress.yaml
kubectl apply -f $REPOROOT/scripts/kind/minio_ingress.yaml
```

Open the Kubeflow Pipelines UI at http://localhost:8080/
Expand All @@ -79,15 +79,15 @@ as the secret key.
A secret needs to be created for accessing MinIO using the following command:

```shell
kubectl apply -f $ROOT_DIR/hack/s3_secret.yaml
kubectl apply -f $REPOROOT/scripts/kind/s3_secret.yaml
```

#### Copy test data

Populating Minio server with test data can be done using `mc`. Use the following command:

```shell
$ROOT_DIR/hack/populate_minio.sh
$REPOROOT/scripts/kind/populate_minio.sh
```

This file creates an mc alias, creates the test bucket and copies the local test data into MinIO. If you need
Expand Down
29 changes: 0 additions & 29 deletions kind/hack/populate_minio.sh

This file was deleted.

2 changes: 1 addition & 1 deletion kind/hack/common.sh → scripts/kind/common.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if [[ "$os" == "unknown" ]]; then
exit 1
fi

source ${ROOT_DIR}/hack/helper-functions.sh
source ${KIND_SCRIPTS}/helper-functions.sh

# Turn colors in this script off by setting the NO_COLOR variable in your
# environment to any value:
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
29 changes: 29 additions & 0 deletions scripts/kind/populate_minio.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/usr/bin/env bash

if [ "$MINIO_SERVER" == "" ]; then
MINIO_SERVER="http://localhost:8090"
fi

echo "creating minio alias to $MINIO_SERVER"
mc alias set kfp $MINIO_SERVER minio minio123

echo "creating test bucket"
mc mb kfp/test
echo "copying data"
# code modules
mc cp --recursive ${REPOROOT}/transforms/code/code_quality/ray/test-data/input/ kfp/test/code_quality/input
mc cp --recursive ${REPOROOT}/transforms/code/ingest_2_parquet/ray/test-data/input/data-processing-lib.zip kfp/test/ingest_2_parquet/input
mc cp --recursive ${REPOROOT}/transforms/code/ingest_2_parquet/ray/test-data/languages/ kfp/test/ingest_2_parquet/languages
mc cp --recursive ${REPOROOT}/transforms/code/proglang_select/ray/test-data/input/ kfp/test/proglang_select/input
mc cp --recursive ${REPOROOT}/transforms/code/proglang_select/ray/test-data/languages/ kfp/test/proglang_select/languages
mc cp --recursive ${REPOROOT}/transforms/code/malware/ray/test-data/input/ kfp/test/malware/input
# language
mc cp --recursive ${REPOROOT}/transforms/language/lang_id/ray/test-data/input/ kfp/test/lang_id/input
# universal
mc cp --recursive ${REPOROOT}/transforms/universal/doc_id/ray/test-data/input/ kfp/test/doc_id/input
mc cp --recursive ${REPOROOT}/transforms/universal/ededup/ray/test-data/input/ kfp/test/ededup/input
mc cp --recursive ${REPOROOT}/transforms/universal/fdedup/ray/test-data/input/ kfp/test/fdedup/input
mc cp --recursive ${REPOROOT}/transforms/universal/filter/ray/test-data/input/ kfp/test/filter/input
mc cp --recursive ${REPOROOT}/transforms/universal/noop/ray/test-data/input/ kfp/test/noop/input
mc cp --recursive ${REPOROOT}/transforms/universal/tokenization/ray/test-data/ds01/input/ kfp/test/tokenization/ds01/input

File renamed without changes.
11 changes: 6 additions & 5 deletions kind/hack/tools/ingress.sh → scripts/kind/tools/ingress.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,20 @@

op=$1

source ${ROOT_DIR}/hack/common.sh
source ${KIND_SCRIPTS}/common.sh

deploy() {
kubectl apply -f ${ROOT_DIR}/hack/ray_api_server_ingress.yaml
sleep 10
kubectl apply -f ${KIND_SCRIPTS}/ray_api_server_ingress.yaml
if [[ "${DEPLOY_KUBEFLOW}" -eq 1 ]]; then
kubectl apply -f ${ROOT_DIR}/hack/kfp_ingress.yaml
kubectl apply -f ${KIND_SCRIPTS}/kfp_ingress.yaml
fi
}

delete(){
kubectl delete -f ${ROOT_DIR}/hack/ray_api_server_ingress.yaml
kubectl delete -f ${KIND_SCRIPTS}/ray_api_server_ingress.yaml
if [[ "${DEPLOY_KUBEFLOW}" -eq 1 ]]; then
kubectl delete -f ${ROOT_DIR}/hack/kfp_ingress.yaml
kubectl delete -f ${KIND_SCRIPTS}/kfp_ingress.yaml
fi
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ MAX_RETRIES="${MAX_RETRIES:-5}"
EXIT_CODE=0

deploy() {
sed -i.back "s/tag: v[0-9].*/tag: v${KUBERAY_APISERVER}/" ${ROOT_DIR}/hack/ray_api_server_values.yaml
sed -i.back "s/tag: v[0-9].*/tag: v${KUBERAY_APISERVER}/" ${KIND_SCRIPTS}/ray_api_server_values.yaml
helm repo add kuberay https://ray-project.github.io/kuberay-helm/
helm repo update kuberay
helm install kuberay-operator kuberay/kuberay-operator -n kuberay --version ${KUBERAY_OPERATOR} --set image.pullPolicy=IfNotPresent --create-namespace
helm install -f ${ROOT_DIR}/hack/ray_api_server_values.yaml kuberay-apiserver kuberay/kuberay-apiserver -n kuberay --version ${KUBERAY_APISERVER} --set image.pullPolicy=IfNotPresent
helm install -f ${KIND_SCRIPTS}/ray_api_server_values.yaml kuberay-apiserver kuberay/kuberay-apiserver -n kuberay --version ${KUBERAY_APISERVER} --set image.pullPolicy=IfNotPresent
echo "Finished KubeRay deployment."
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ MAX_RETRIES="${MAX_RETRIES:-20}"
EXIT_CODE=0

deploy() {
kubectl apply -f ${ROOT_DIR}/hack/s3_secret.yaml
kubectl apply -f ${ROOT_DIR}/hack/minio_ingress.yaml
kubectl apply -f ${KIND_SCRIPTS}/s3_secret.yaml
kubectl apply -f ${KIND_SCRIPTS}/minio_ingress.yaml
}

wait(){
Expand All @@ -24,8 +24,8 @@ echo "ingress minio is ready"
}

delete(){
kubectl delete -f ${ROOT_DIR}/hack/s3_secret.yaml
kubectl delete -f ${ROOT_DIR}/hack/minio_ingress.yaml
kubectl delete -f ${KIND_SCRIPTS}/s3_secret.yaml
kubectl delete -f ${KIND_SCRIPTS}/minio_ingress.yaml
}

usage(){
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ op=$1
SLEEP_TIME="${SLEEP_TIME:-30}"
MAX_RETRIES="${MAX_RETRIES:-20}"
EXIT_CODE=0
NGINX_INSTALLATION_FILE="${ROOT_DIR}/hack/nginx_deploy.yaml"
NGINX_MINIO_INSTALLATION_FILE="${ROOT_DIR}/hack/nginx_deploy_minio.yaml"
NGINX_INSTALLATION_FILE="${KIND_SCRIPTS}/nginx_deploy.yaml"
NGINX_MINIO_INSTALLATION_FILE="${KIND_SCRIPTS}/nginx_deploy_minio.yaml"

source ${ROOT_DIR}/hack/common.sh
source ${KIND_SCRIPTS}/common.sh

deploy() {
kubectl apply -f "$NGINX_INSTALLATION_FILE"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ kind_create() {
echo "port 8090 is in use, please clear the port and try again"
exit 1
fi
kind create cluster --name $cluster_name --config ${ROOT_DIR}/hack/kind-cluster-config.yaml
kind create cluster --name $cluster_name --config ${KIND_SCRIPTS}/kind-cluster-config.yaml
}

usage(){
Expand Down
2 changes: 1 addition & 1 deletion transforms/.make.workflows
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ endef
.workflows.set-versions:
cd ${REPOROOT}/kfp/kfp_ray_components && $(MAKE) set-versions
cd ${REPOROOT}/kfp/kfp_ray_components && $(MAKE) .reconcile-requirements FILE=${CURDIR}/${PIPELINE_FILE}
${REPOROOT}/transforms/hack/update_workflow_tags.sh ${REPOROOT}/.make.versions ${CURDIR}/${PIPELINE_FILE}
${REPOROOT}/scripts/transforms/update_workflow_tags.sh ${REPOROOT}/.make.versions ${CURDIR}/${PIPELINE_FILE}

.PHONY: .workflows.compile-pipeline
.workflows.compile-pipeline:
Expand Down
9 changes: 7 additions & 2 deletions transforms/code/ingest_2_parquet/kfp_ray/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ YAML_WF := $(patsubst %.py, %.yaml, ${PYTHON_WF})

workflow-venv: .check_python_version ${WORKFLOW_VENV_ACTIVATE}

clean:: .defaults.clean

setup::

venv::

build::
Expand All @@ -17,12 +21,13 @@ test::

test-src::

test-image::
publish::

image::

load-image::
test-image::

load-image::


.PHONY: workflow-build
Expand Down