From 0889fbaf959e25ebb79e691692a02a93962727d0 Mon Sep 17 00:00:00 2001 From: Qi Shao Date: Mon, 3 Dec 2018 15:36:41 -0800 Subject: [PATCH] [SPARK-26083][K8S] Add Copy pyspark into corresponding dir cmd in pyspark Dockerfile When I try to run `./bin/pyspark` cmd in a pod in Kubernetes(image built without change from pyspark Dockerfile), I'm getting an error: ``` $SPARK_HOME/bin/pyspark --deploy-mode client --master k8s://https://$KUBERNETES_SERVICE_HOST:$KUBERNETES_SERVICE_PORT_HTTPS ... Python 2.7.15 (default, Aug 22 2018, 13:24:18) [GCC 6.4.0] on linux2 Type "help", "copyright", "credits" or "license" for more information. Could not open PYTHONSTARTUP IOError: [Errno 2] No such file or directory: '/opt/spark/python/pyspark/shell.py' ``` This is because `pyspark` folder doesn't exist under `/opt/spark/python/` ## What changes were proposed in this pull request? Added `COPY python/pyspark ${SPARK_HOME}/python/pyspark` to pyspark Dockerfile to resolve issue above. ## How was this patch tested? Google Kubernetes Engine Closes #23037 from AzureQ/master. Authored-by: Qi Shao Signed-off-by: Marcelo Vanzin --- bin/docker-image-tool.sh | 1 + .../docker/src/main/dockerfiles/spark/bindings/python/Dockerfile | 1 + 2 files changed, 2 insertions(+) diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index fbf9c9e448fd1..4f66137eb1c7a 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -107,6 +107,7 @@ function create_dev_build_context {( "$PYSPARK_CTX/kubernetes/dockerfiles" mkdir "$PYSPARK_CTX/python" cp -r "python/lib" "$PYSPARK_CTX/python/lib" + cp -r "python/pyspark" "$PYSPARK_CTX/python/pyspark" local R_CTX="$CTX_DIR/sparkr" mkdir -p "$R_CTX/kubernetes" diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile index de1a0617b1cc5..36b91eb9a3aac 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile @@ -38,6 +38,7 @@ RUN apk add --no-cache python && \ # Removed the .cache to save space rm -r /root/.cache +COPY python/pyspark ${SPARK_HOME}/python/pyspark COPY python/lib ${SPARK_HOME}/python/lib ENV PYTHONPATH ${SPARK_HOME}/python/lib/pyspark.zip:${SPARK_HOME}/python/lib/py4j-*.zip