From ae1d74376b7daa86cd44361553e48f6d508e7ae0 Mon Sep 17 00:00:00 2001 From: Nagaram Prasad Addepally Date: Thu, 15 Nov 2018 15:51:04 -0800 Subject: [PATCH 1/6] [SPARK-25957][K8S] Add ability to skip building optional k8s docker images bin/docker-image-tool.sh tries to build all docker images (JVM, PySpark and SparkR) by default. But not all spark distributions are built with SparkR and hence this script will fail on such distros. With this change, - We should be able to skip building optional docker images (PySpark and SparkR) by specifying -pskip or -Rskip flags. - We autodetect if SparkR is not installed in the build and skip building SparkR docker image. - We skip pushing docker images that are not available locally. Tested following scenarios. - On source code and distro with SparkR support - Run bin/docker-image-tool.sh -r -t build. Verify that JVM, PySpark and SparkR docker images are built. - Run bin/docker-image-tool.sh -r -t -Rskip -pskip build. Verify that only JVM docker image is built. Building PySpark and SparkR images is skipped. - On source code and distro without SparkR support - Run bin/docker-image-tool.sh -r -t build. Verify that only JVM, PySpark docker images are built. Building SparkR image is skipped. - On system with JVM, PySpark and SparkR images built, - Run bin/docker-image-tool.sh -r -t push. Verify that all images are pushed to docker registry. - On system with only JVM and PySpark images built. - Run bin/docker-image-tool.sh -r -t push. Verify that only JVM and PySpark images are pushed. Pushing SparkR images is skipped. --- bin/docker-image-tool.sh | 56 +++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index aa5d847f4be2f..e1455932f339b 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -41,6 +41,18 @@ function image_ref { echo "$image" } +function docker_push { + local image_name="$1" + if [ ! -z $(docker images -q "$(image_ref ${image_name})") ]; then + docker push "$(image_ref ${image_name})" + if [ $? -ne 0 ]; then + error "Failed to push $image_name Docker image." + fi + else + echo "$(image_ref ${image_name}) image not found. Skipping push for this image." + fi +} + function build { local BUILD_ARGS local IMG_PATH @@ -102,33 +114,33 @@ function build { error "Failed to build Spark JVM Docker image, please refer to Docker build output for details." fi - docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-py) \ - -f "$PYDOCKERFILE" . + if [ "${PYDOCKERFILE}" != "skip" ]; then + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-py) \ + -f "$PYDOCKERFILE" . + if [ $? -ne 0 ]; then + error "Failed to build PySpark Docker image, please refer to Docker build output for details." + fi + else + echo "Skipped building PySpark docker image." + fi + + if [ "${RDOCKERFILE}" != "skip" ] && [ -d "${SPARK_HOME}/R/lib" ]; then + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" . if [ $? -ne 0 ]; then - error "Failed to build PySpark Docker image, please refer to Docker build output for details." + error "Failed to build SparkR Docker image, please refer to Docker build output for details." fi - docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-r) \ - -f "$RDOCKERFILE" . - if [ $? -ne 0 ]; then - error "Failed to build SparkR Docker image, please refer to Docker build output for details." + else + echo "Skipped building SparkR docker image." fi } function push { - docker push "$(image_ref spark)" - if [ $? -ne 0 ]; then - error "Failed to push Spark JVM Docker image." - fi - docker push "$(image_ref spark-py)" - if [ $? -ne 0 ]; then - error "Failed to push PySpark Docker image." - fi - docker push "$(image_ref spark-r)" - if [ $? -ne 0 ]; then - error "Failed to push SparkR Docker image." - fi + docker_push "spark" + docker_push "spark-py" + docker_push "spark-r" } function usage { @@ -145,6 +157,8 @@ Options: -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. -p file Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. -R file Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. + -pskip Skip building PySpark docker image. + -Rskip Skip building SparkR docker image. -r repo Repository address. -t tag Tag to apply to the built image, or to identify the image to be pushed. -m Use minikube's Docker daemon. From 8887b5abd426f61d004e03918445187ccc836a46 Mon Sep 17 00:00:00 2001 From: Nagaram Prasad Addepally Date: Fri, 16 Nov 2018 12:12:02 -0800 Subject: [PATCH 2/6] Address review comments * Update usage to specify how to skip building docker image * explicitly log message when building R image is skipped due to R packages not found --- bin/docker-image-tool.sh | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index e1455932f339b..14d6a9d009e9c 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -125,12 +125,16 @@ function build { echo "Skipped building PySpark docker image." fi - if [ "${RDOCKERFILE}" != "skip" ] && [ -d "${SPARK_HOME}/R/lib" ]; then - docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-r) \ - -f "$RDOCKERFILE" . - if [ $? -ne 0 ]; then - error "Failed to build SparkR Docker image, please refer to Docker build output for details." + if [ "${RDOCKERFILE}" != "skip" ]; then + if [ -d "${SPARK_HOME}/R/lib" ]; then + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" . + if [ $? -ne 0 ]; then + error "Failed to build SparkR Docker image, please refer to Docker build output for details." + fi + else + echo "SparkR artifacts not found. Skipped building SparkR docker image." fi else echo "Skipped building SparkR docker image." @@ -156,9 +160,9 @@ Commands: Options: -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. -p file Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. + Specify 'skip' to skip building PySpark docker image. -R file Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. - -pskip Skip building PySpark docker image. - -Rskip Skip building SparkR docker image. + Specify 'skip' to skip building SparkR docker image. -r repo Repository address. -t tag Tag to apply to the built image, or to identify the image to be pushed. -m Use minikube's Docker daemon. @@ -178,6 +182,9 @@ Examples: - Build image in minikube with tag "testing" $0 -m -t testing build + - Skip building SparkR docker image + $0 -r repo -t tag -R skip build + - Build and push image with tag "v2.3.0" to docker.io/myrepo $0 -r docker.io/myrepo -t v2.3.0 build $0 -r docker.io/myrepo -t v2.3.0 push From 11080c973407b6ffdb23a33f202a3cef0335a4f7 Mon Sep 17 00:00:00 2001 From: Nagaram Prasad Addepally Date: Tue, 20 Nov 2018 11:48:44 -0800 Subject: [PATCH 3/6] Make building alternate language binding images opt-in rather than default --- bin/docker-image-tool.sh | 34 +++++++++++++--------------------- 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 14d6a9d009e9c..244cf0bf4e532 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -104,8 +104,8 @@ function build { base_img=$(image_ref spark) ) local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"} - local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"} - local RDOCKERFILE=${RDOCKERFILE:-"$IMG_PATH/spark/bindings/R/Dockerfile"} + local PYDOCKERFILE=${PYDOCKERFILE:-false} + local RDOCKERFILE=${RDOCKERFILE:-false} docker build $NOCACHEARG "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ @@ -114,30 +114,22 @@ function build { error "Failed to build Spark JVM Docker image, please refer to Docker build output for details." fi - if [ "${PYDOCKERFILE}" != "skip" ]; then + if [ "${PYDOCKERFILE}" != "false" ]; then docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" . if [ $? -ne 0 ]; then error "Failed to build PySpark Docker image, please refer to Docker build output for details." fi - else - echo "Skipped building PySpark docker image." fi - if [ "${RDOCKERFILE}" != "skip" ]; then - if [ -d "${SPARK_HOME}/R/lib" ]; then - docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ - -t $(image_ref spark-r) \ - -f "$RDOCKERFILE" . - if [ $? -ne 0 ]; then - error "Failed to build SparkR Docker image, please refer to Docker build output for details." - fi - else - echo "SparkR artifacts not found. Skipped building SparkR docker image." + if [ "${RDOCKERFILE}" != "false" ]; then + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ + -t $(image_ref spark-r) \ + -f "$RDOCKERFILE" . + if [ $? -ne 0 ]; then + error "Failed to build SparkR Docker image, please refer to Docker build output for details." fi - else - echo "Skipped building SparkR docker image." fi } @@ -159,10 +151,10 @@ Commands: Options: -f file Dockerfile to build for JVM based Jobs. By default builds the Dockerfile shipped with Spark. - -p file Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. - Specify 'skip' to skip building PySpark docker image. - -R file Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. - Specify 'skip' to skip building SparkR docker image. + -p file (Optional) Dockerfile to build for PySpark Jobs. Builds Python dependencies and ships with Spark. + Skips building PySpark docker image if not specified. + -R file (Optional) Dockerfile to build for SparkR Jobs. Builds R dependencies and ships with Spark. + Skips building SparkR docker image if not specified. -r repo Repository address. -t tag Tag to apply to the built image, or to identify the image to be pushed. -m Use minikube's Docker daemon. From 5ec7cb75bf476962dcabb160ff9a3672d9482bd5 Mon Sep 17 00:00:00 2001 From: Nagaram Prasad Addepally Date: Tue, 20 Nov 2018 11:56:15 -0800 Subject: [PATCH 4/6] Update usage --- bin/docker-image-tool.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index 244cf0bf4e532..e51201a77cb5d 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -174,8 +174,8 @@ Examples: - Build image in minikube with tag "testing" $0 -m -t testing build - - Skip building SparkR docker image - $0 -r repo -t tag -R skip build + - Build PySpark docker image + $0 -r docker.io/myrepo -t v2.3.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build - Build and push image with tag "v2.3.0" to docker.io/myrepo $0 -r docker.io/myrepo -t v2.3.0 build From 7601cbcd6a1ef20be8f6f74fdaa350418665f71d Mon Sep 17 00:00:00 2001 From: Nagaram Prasad Addepally Date: Tue, 20 Nov 2018 13:20:06 -0800 Subject: [PATCH 5/6] Update setup-integration-test-env.sh to build PySpark and R images as per changes --- .../scripts/setup-integration-test-env.sh | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh index a4a9f5b7da131..36e30d7b2cffb 100755 --- a/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh +++ b/resource-managers/kubernetes/integration-tests/scripts/setup-integration-test-env.sh @@ -72,10 +72,16 @@ then IMAGE_TAG=$(uuidgen); cd $UNPACKED_SPARK_TGZ + # Build PySpark image + LANGUAGE_BINDING_BUILD_ARGS="-p $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/python/Dockerfile" + + # Build SparkR image + LANGUAGE_BINDING_BUILD_ARGS="$LANGUAGE_BINDING_BUILD_ARGS -R $UNPACKED_SPARK_TGZ/kubernetes/dockerfiles/spark/bindings/R/Dockerfile" + case $DEPLOY_MODE in cloud) # Build images - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build # Push images appropriately if [[ $IMAGE_REPO == gcr.io* ]] ; @@ -89,13 +95,13 @@ then docker-for-desktop) # Only need to build as this will place it in our local Docker repo which is all # we need for Docker for Desktop to work so no need to also push - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG build + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build ;; minikube) # Only need to build and if we do this with the -m option for minikube we will # build the images directly using the minikube Docker daemon so no need to push - $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG build + $UNPACKED_SPARK_TGZ/bin/docker-image-tool.sh -m -r $IMAGE_REPO -t $IMAGE_TAG $LANGUAGE_BINDING_BUILD_ARGS build ;; *) echo "Unrecognized deploy mode $DEPLOY_MODE" && exit 1 From 86a11a88c432313b2333b183bf8b03cf3483afa8 Mon Sep 17 00:00:00 2001 From: Nagaram Prasad Addepally Date: Wed, 21 Nov 2018 10:36:18 -0800 Subject: [PATCH 6/6] Update docs/running-on-kubernetes.md file to reflect the changes --- docs/running-on-kubernetes.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 905226877720a..4a8a73bafe592 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -76,6 +76,18 @@ $ ./bin/docker-image-tool.sh -r -t my-tag build $ ./bin/docker-image-tool.sh -r -t my-tag push ``` +By default `bin/docker-image-tool.sh` builds docker image for running JVM jobs. You need to opt-in to build additional +language binding docker images. + +Example usage is +```bash +# To build additional PySpark docker image +$ ./bin/docker-image-tool.sh -r -t my-tag -p ./kubernetes/dockerfiles/spark/bindings/python/Dockerfile build + +# To build additional SparkR docker image +$ ./bin/docker-image-tool.sh -r -t my-tag -R ./kubernetes/dockerfiles/spark/bindings/R/Dockerfile build +``` + ## Cluster Mode To launch Spark Pi in cluster mode,