Skip to content

Commit

Permalink
Merge pull request apache#345 from colorant/yarn
Browse files Browse the repository at this point in the history
support distributing extra files to worker for yarn client mode

So that user doesn't need to package all dependency into one assemble jar as spark app jar
  • Loading branch information
tgravescs committed Jan 8, 2014
2 parents bb6a39a + 67af803 commit 6eef78d
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 3 deletions.
2 changes: 2 additions & 0 deletions docs/running-on-yarn.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@ For example:
SPARK_YARN_APP_JAR=examples/target/scala-{{site.SCALA_VERSION}}/spark-examples-assembly-{{site.SPARK_VERSION}}.jar \
MASTER=yarn-client ./bin/spark-shell

You can also send extra files to yarn cluster for worker to use by exporting SPARK_YARN_DIST_FILES=file1,file2... etc.

# Building Spark for Hadoop/YARN 2.2.x

See [Building Spark with Maven](building-with-maven.html) for instructions on how to build Spark using the Maven process.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
(System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
(args.userJar == null) -> "Error: You must specify a user jar!",
(args.userClass == null) -> "Error: You must specify a user class!",
(args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
(args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
(args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be " +
"greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
(args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ private[spark] class YarnClientSchedulerBackend(
val defaultWorkerNumber = "1"

val userJar = System.getenv("SPARK_YARN_APP_JAR")
val distFiles = System.getenv("SPARK_YARN_DIST_FILES")
var workerCores = System.getenv("SPARK_WORKER_CORES")
var workerMemory = System.getenv("SPARK_WORKER_MEMORY")
var workerNumber = System.getenv("SPARK_WORKER_INSTANCES")
Expand All @@ -64,7 +65,8 @@ private[spark] class YarnClientSchedulerBackend(
"--worker-memory", workerMemory,
"--worker-cores", workerCores,
"--num-workers", workerNumber,
"--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher"
"--master-class", "org.apache.spark.deploy.yarn.WorkerLauncher",
"--files", distFiles
)

val args = new ClientArguments(argsArray, conf)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ class Client(args: ClientArguments, conf: Configuration, sparkConf: SparkConf)
(System.getenv("SPARK_JAR") == null) -> "Error: You must set SPARK_JAR environment variable!",
(args.userJar == null) -> "Error: You must specify a user jar!",
(args.userClass == null) -> "Error: You must specify a user class!",
(args.numWorkers <= 0) -> "Error: You must specify atleast 1 worker!",
(args.numWorkers <= 0) -> "Error: You must specify at least 1 worker!",
(args.amMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: AM memory size must be" +
"greater than: " + YarnAllocationHandler.MEMORY_OVERHEAD),
(args.workerMemory <= YarnAllocationHandler.MEMORY_OVERHEAD) -> ("Error: Worker memory size" +
Expand Down

0 comments on commit 6eef78d

Please sign in to comment.