diff --git a/bin/pyspark b/bin/pyspark index c39eb62ce5a82..01d42025c978e 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -27,10 +27,14 @@ source $FWDIR/bin/utils.sh SCALA_VERSION=2.10 -if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then +function usage() { echo "Usage: ./bin/pyspark [options]" 1>&2 $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 exit 0 +} + +if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then + usage fi # Exit if the user hasn't compiled Spark @@ -68,11 +72,11 @@ fi # Build up arguments list manually to preserve quotes and backslashes. # We export Spark submit arguments as an environment variable because shell.py must run as a # PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks. - -gatherSparkSubmitOpts $@ +SUBMIT_USAGE_FUNCTION=usage +gatherSparkSubmitOpts "$@" PYSPARK_SUBMIT_ARGS="" whitespace="[[:space:]]" -for i in ${SUBMISSION_OPTS[@]}; do +for i in "${SUBMISSION_OPTS[@]}"; do if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i" @@ -95,8 +99,8 @@ if [[ "$1" =~ \.py$ ]]; then echo -e "Use ./bin/spark-submit \n" 1>&2 primary=$1 shift - gatherSparkSubmitOpts $@ - exec $FWDIR/bin/spark-submit ${SUBMISSION_OPTS[@]} $primary ${APPLICATION_OPTS[@]} + gatherSparkSubmitOpts "$@" + exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}" else # Only use ipython if no command line arguments were provided [SPARK-1134] if [[ "$IPYTHON" = "1" ]]; then diff --git a/bin/spark-shell b/bin/spark-shell index c30fd5cb89303..8b7ccd7439551 100755 --- a/bin/spark-shell +++ b/bin/spark-shell @@ -31,14 +31,19 @@ set -o posix ## Global script variables FWDIR="$(cd `dirname $0`/..; pwd)" +function usage() { + echo "Usage: ./bin/spark-shell [options]" + $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 + exit 0 +} + if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then - echo "Usage: ./bin/spark-shell [options]" - $FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2 - exit 0 + usage fi source $FWDIR/bin/utils.sh -gatherSparkSubmitOpts $@ +SUBMIT_USAGE_FUNCTION=usage +gatherSparkSubmitOpts "$@" function main() { if $cygwin; then @@ -49,11 +54,11 @@ function main() { # (see https://github.com/sbt/sbt/issues/562). stty -icanon min 1 -echo > /dev/null 2>&1 export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix" - $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main ${SUBMISSION_OPTS[@]} spark-shell ${APPLICATION_OPTS[@]} + $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}" stty icanon echo > /dev/null 2>&1 else export SPARK_SUBMIT_OPTS - $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main ${SUBMISSION_OPTS[@]} spark-shell ${APPLICATION_OPTS[@]} + $FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}" fi } diff --git a/bin/utils.sh b/bin/utils.sh index 9c56b99488e38..9c5fef1a42667 100644 --- a/bin/utils.sh +++ b/bin/utils.sh @@ -19,22 +19,24 @@ # Gather all all spark-submit options into SUBMISSION_OPTS function gatherSparkSubmitOpts() { + + if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then + + echo "Function for printing usage of $0 is not set." 1>&2 + echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2 + exit 1 + fi + SUBMISSION_OPTS=() APPLICATION_OPTS=() while (($#)); do case $1 in - --master | --deploy-mode | --class | --name | --jars | --py-files | --files) - ;& - - --conf | --properties-file | --driver-memory | --driver-java-options) - ;& - - --driver-library-path | --driver-class-path | --executor-memory | --driver-cores) - ;& - + --master | --deploy-mode | --class | --name | --jars | --py-files | --files | \ + --conf | --properties-file | --driver-memory | --driver-java-options | \ + --driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \ --total-executor-cores | --executor-cores | --queue | --num-executors | --archives) if [[ $# -lt 2 ]]; then - usage + "$SUBMIT_USAGE_FUNCTION" exit 1; fi SUBMISSION_OPTS+=($1); shift diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index c7f7c1fe591b0..4672171328faf 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -39,7 +39,11 @@ def launch_gateway(): submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS") submit_args = submit_args if submit_args is not None else "" submit_args = shlex.split(submit_args) - command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"] + application_opts = os.environ.get("APPLICATION_OPTS") + application_opts = application_opts if application_opts is not None else "" + application_opts = shlex.split(application_opts) + command = [os.path.join(SPARK_HOME, script)] + submit_args + \ + ["pyspark-shell"] + application_opts if not on_windows: # Don't send ctrl-c / SIGINT to the Java gateway: def preexec_func():