From 7757a70a0fd100b311486dce6616e4503a028ede Mon Sep 17 00:00:00 2001 From: "Susan X. Huynh" Date: Mon, 30 Jan 2017 08:37:26 -0800 Subject: [PATCH] Added documentation for SparkR --- docs/limitations.md | 7 ++----- docs/quick-start.md | 4 ++++ docs/run-job.md | 3 ++- docs/spark-shell.md | 11 ++++++++++- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/docs/limitations.md b/docs/limitations.md index 649bde9171c08..59622a5e9a749 100644 --- a/docs/limitations.md +++ b/docs/limitations.md @@ -5,11 +5,8 @@ feature_maturity: stable enterprise: 'no' --- -* DC/OS Spark only supports submitting jars and Python scripts. It -does not support R. - * Mesosphere does not provide support for Spark app development, -such as writing a Python app to process data from Kafka or writing +such as writing a Python app to process data from Kafka or writing Scala code to process data from HDFS. * Spark jobs run in Docker containers. The first time you run a @@ -17,6 +14,6 @@ Spark job on a node, it might take longer than you expect because of the `docker pull`. * DC/OS Spark only supports running the Spark shell from within a -DC/OS cluster. See the Spark Shell section for more information. +DC/OS cluster. See the Spark Shell section for more information. For interactive analytics, we recommend Zeppelin, which supports visualizations and dynamic dependency management. diff --git a/docs/quick-start.md b/docs/quick-start.md index c2f6379f40abc..89e371583043d 100644 --- a/docs/quick-start.md +++ b/docs/quick-start.md @@ -17,6 +17,10 @@ enterprise: 'no' $ dcos spark run --submit-args="https://downloads.mesosphere.com/spark/examples/pi.py 30" +1. Run an R Spark job: + + $ dcos spark run --submit-args="https://downloads.mesosphere.com/spark/examples/dataframe.R" + 1. View your job: Visit the Spark cluster dispatcher at diff --git a/docs/run-job.md b/docs/run-job.md index 28d483ebaf708..595d74fe27dda 100644 --- a/docs/run-job.md +++ b/docs/run-job.md @@ -12,9 +12,10 @@ more][13]. $ dcos spark run --submit-args=`--class MySampleClass http://external.website/mysparkapp.jar 30` - $ dcos spark run --submit-args="--py-files mydependency.py http://external.website/mysparkapp.py 30" + $ dcos spark run --submit-args="http://external.website/mysparkapp.R" + `dcos spark run` is a thin wrapper around the standard Spark `spark-submit` script. You can submit arbitrary pass-through options to this script via the `--submit-args` options. diff --git a/docs/spark-shell.md b/docs/spark-shell.md index 8c00d6eba2d66..a73f06ef945e1 100644 --- a/docs/spark-shell.md +++ b/docs/spark-shell.md @@ -7,7 +7,7 @@ enterprise: 'no' # Interactive Spark Shell You can run Spark commands interactively in the Spark shell. The Spark shell is available -in either Scala or Python. +in either Scala, Python, or R. 1. SSH into a node in the DC/OS cluster. [Learn how to SSH into your cluster and get the agent node ID](https://dcos.io/docs/latest/administration/access-node/sshcluster/). @@ -27,6 +27,10 @@ in either Scala or Python. $ ./bin/pyspark --master mesos://:5050 --conf spark.mesos.executor.docker.image=mesosphere/spark:1.0.4-2.0.1 --conf spark.mesos.executor.home=/opt/spark/dist + Or, run the R Spark shell. + + $ ./bin/sparkR --master mesos://:5050 --conf spark.mesos.executor.docker.image=mesosphere/spark:1.0.7-2.1.0-hadoop-2.6 --conf spark.mesos.executor.home=/opt/spark/dist + 1. Run Spark commands interactively. In the Scala shell: @@ -38,3 +42,8 @@ in either Scala or Python. $ textFile = sc.textFile("/opt/spark/dist/README.md") $ textFile.count() + + In the R shell: + + $ df <- as.DataFrame(faithful) + $ head(df)