From 0309cf911cc0379b7de871463c25a7c105202f4b Mon Sep 17 00:00:00 2001 From: Diana Carroll Date: Thu, 27 Feb 2014 17:39:43 -0500 Subject: [PATCH 1/3] SPARK-1134 bug with ipython prevents non-interactive use with spark; only call ipython if no command line arguments were supplied --- bin/pyspark | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bin/pyspark b/bin/pyspark index ed6f8da73035a..5a12516d70740 100755 --- a/bin/pyspark +++ b/bin/pyspark @@ -58,7 +58,8 @@ if [ -n "$IPYTHON_OPTS" ]; then IPYTHON=1 fi -if [[ "$IPYTHON" = "1" ]] ; then +# Only use ipython if no command line arguments were provided [SPARK-1134] +if [[ "$IPYTHON" = "1" && $# = 0 ]] ; then exec ipython $IPYTHON_OPTS else exec "$PYSPARK_PYTHON" "$@" From 98b3828df7f3d0ef15dbe834eea1a7ff0ddd3c13 Mon Sep 17 00:00:00 2001 From: Diana Carroll Date: Wed, 12 Mar 2014 10:27:44 -0700 Subject: [PATCH 2/3] Cleaned up text of yarn-client instructions. [SPARK-1234] --- docs/running-on-yarn.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index ee1d892a3b630..161a082e44313 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -99,16 +99,16 @@ With this mode, your application is actually run on the remote machine where the ## Launch spark application with yarn-client mode. -With yarn-client mode, the application will be launched locally. Just like running application or spark-shell on Local / Mesos / Standalone mode. The launch method is also the similar with them, just make sure that when you need to specify a master url, use "yarn-client" instead. And you also need to export the env value for SPARK_JAR. +With yarn-client mode, the application will be launched locally, as when running the application or spark-shell on Local / Mesos / Standalone mode. The method to launch is similar as with those modes, except you should specify "yarn-client" as the master URL. You also need to export the env value for SPARK_JAR. Configuration in yarn-client mode: -In order to tune worker core/number/memory etc. You need to export environment variables or add them to the spark configuration file (./conf/spark_env.sh). The following are the list of options. +In order to tune worker core/number/memory etc. you need to export environment variables or add them to the spark configuration file (./conf/spark_env.sh). The following are the list of options. * `SPARK_WORKER_INSTANCES`, Number of workers to start (Default: 2) -* `SPARK_WORKER_CORES`, Number of cores for the workers (Default: 1). +* `SPARK_WORKER_CORES`, Number of cores for the workers (Default: 1) * `SPARK_WORKER_MEMORY`, Memory per Worker (e.g. 1000M, 2G) (Default: 1G) -* `SPARK_MASTER_MEMORY`, Memory for Master (e.g. 1000M, 2G) (Default: 512 Mb) +* `SPARK_MASTER_MEMORY`, Memory for Master (e.g. 1000M, 2G) (Default: 512 M) * `SPARK_YARN_APP_NAME`, The name of your application (Default: Spark) * `SPARK_YARN_QUEUE`, The hadoop queue to use for allocation requests (Default: 'default') * `SPARK_YARN_DIST_FILES`, Comma separated list of files to be distributed with the job. From 1dfbc4dfc5592ef8ae363f975a8e23186ad2fac4 Mon Sep 17 00:00:00 2001 From: Diana Carroll Date: Thu, 20 Mar 2014 11:02:16 -0400 Subject: [PATCH 3/3] fix typo in running-on-yarn default SPARK_MASTER_MEMORY values --- docs/running-on-yarn.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 161a082e44313..9a170f1b1ff11 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -108,7 +108,7 @@ In order to tune worker core/number/memory etc. you need to export environment v * `SPARK_WORKER_INSTANCES`, Number of workers to start (Default: 2) * `SPARK_WORKER_CORES`, Number of cores for the workers (Default: 1) * `SPARK_WORKER_MEMORY`, Memory per Worker (e.g. 1000M, 2G) (Default: 1G) -* `SPARK_MASTER_MEMORY`, Memory for Master (e.g. 1000M, 2G) (Default: 512 M) +* `SPARK_MASTER_MEMORY`, Memory for Master (e.g. 1000M, 2G) (Default: 512M) * `SPARK_YARN_APP_NAME`, The name of your application (Default: Spark) * `SPARK_YARN_QUEUE`, The hadoop queue to use for allocation requests (Default: 'default') * `SPARK_YARN_DIST_FILES`, Comma separated list of files to be distributed with the job.