From 6b00df2ca9cd20ba0169af1a8695410e9b26ff14 Mon Sep 17 00:00:00 2001 From: Baohe Zhang Date: Tue, 2 Jun 2020 01:44:37 -0500 Subject: [PATCH] Add README.md and update tag --- .../resources/oozie/spark_hdfs_lr/README.md | 17 +++++++++++++++++ .../oozie/spark_hdfs_lr/job.properties | 1 + .../resources/oozie/spark_hdfs_lr/workflow.xml | 6 +++--- 3 files changed, 21 insertions(+), 3 deletions(-) create mode 100644 src/main/resources/oozie/spark_hdfs_lr/README.md diff --git a/src/main/resources/oozie/spark_hdfs_lr/README.md b/src/main/resources/oozie/spark_hdfs_lr/README.md new file mode 100644 index 0000000000000..ab11ea72e8d4e --- /dev/null +++ b/src/main/resources/oozie/spark_hdfs_lr/README.md @@ -0,0 +1,17 @@ +Instructions for running this oozie application: + +- create a directory `spark_hdfs_lr/` in HDFS for the oozie application. + +- upload `workflow.xml` to `spark_hdfs_lr/apps/spark/`. + +- use `mvn clean package` to create the jar package of spark-starter if you haven't done so. + +- upload the jar package `spark-starter/target/spark-starter-2.0-SNAPSHOT-jar-with-dependencies.jar` to `spark_hdfs_lr/apps/lib/`. + +- upload resource files `spark-starter/src/main/resources/data/lr_data.txt` to `spark_hdfs_lr/data/`. + +- update `nameNode` and `jobTracker` in `job.properties` if you are running on the cluster other than AR. + +- export OOZIE_URL, for example, `export OOZIE_URL=https://axonitered-oozie.red.ygrid.yahoo.com:4443/oozie/`. + +- submit the oozie job using `oozie job -run -config job.properties -auth KERBEROS` diff --git a/src/main/resources/oozie/spark_hdfs_lr/job.properties b/src/main/resources/oozie/spark_hdfs_lr/job.properties index 7a1e167d4f2e6..6304ed626629f 100644 --- a/src/main/resources/oozie/spark_hdfs_lr/job.properties +++ b/src/main/resources/oozie/spark_hdfs_lr/job.properties @@ -1,5 +1,6 @@ nameNode=hdfs://axonitered-nn1.red.ygrid.yahoo.com:8020 jobTracker=axonitered-jt1.red.ygrid.yahoo.com:8032 wfRoot=spark_hdfs_lr +sparkTag=spark_latest oozie.libpath=/user/${user.name}/${wfRoot}/apps/lib oozie.wf.application.path=${nameNode}/user/${user.name}/${wfRoot}/apps/spark diff --git a/src/main/resources/oozie/spark_hdfs_lr/workflow.xml b/src/main/resources/oozie/spark_hdfs_lr/workflow.xml index 61fa86970d4f3..1f4c4f901b036 100644 --- a/src/main/resources/oozie/spark_hdfs_lr/workflow.xml +++ b/src/main/resources/oozie/spark_hdfs_lr/workflow.xml @@ -11,7 +11,7 @@ oozie.action.sharelib.for.spark - spark_latest + ${sparkTag} yarn @@ -32,7 +32,7 @@ oozie.action.sharelib.for.spark - spark_latest + ${sparkTag} yarn @@ -53,7 +53,7 @@ oozie.action.sharelib.for.spark - spark_latest + ${sparkTag} yarn