Skip to content

Commit

Permalink
[SPARK-556] Kerberos support in history server (apache#233)
Browse files Browse the repository at this point in the history
* Kerberos support in history server. Added kerberos config: krb5conf, principal, keytab secret path, and updated marathon.json to use them.

* Build the history server stub universe in the Makefile, use a fixture to add stub repos.

* Adding history server to tests

* Fixed the adding of stub universes. Added a job that logs to the history server. Test passes.

* Add configure_universe as a dependency

* Updated history server docs.

* Fixed the Makefile, made the user configurable, made the default user "nobody".

* Made the keytab path configurable

* Made spark-history package/service name in tests configurable from env var.
  • Loading branch information
susanxhuynh authored Dec 18, 2017
1 parent 9be6d18 commit 772d064
Show file tree
Hide file tree
Showing 13 changed files with 227 additions and 43 deletions.
17 changes: 13 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ $(CLI_DIST_DIR):
cli: $(CLI_DIST_DIR)

UNIVERSE_URL_PATH ?= stub-universe-url
HISTORY_URL_PATH := $(UNIVERSE_URL_PATH).history
$(UNIVERSE_URL_PATH): $(CLI_DIST_DIR) docker-dist
UNIVERSE_URL_PATH=$(UNIVERSE_URL_PATH) \
TEMPLATE_CLI_VERSION=$(CLI_VERSION) \
Expand All @@ -122,7 +123,15 @@ $(UNIVERSE_URL_PATH): $(CLI_DIST_DIR) docker-dist
$(CLI_DIST_DIR)/dcos-spark-darwin \
$(CLI_DIST_DIR)/dcos-spark-linux \
$(CLI_DIST_DIR)/dcos-spark.exe \
$(CLI_DIST_DIR)/*.whl;
$(CLI_DIST_DIR)/*.whl; \
UNIVERSE_URL_PATH=$(HISTORY_URL_PATH) \
TEMPLATE_DEFAULT_DOCKER_IMAGE=`cat docker-dist` \
$(TOOLS_DIR)/publish_aws.py \
spark-history \
$(ROOT_DIR)/history/package/; \
cat $(HISTORY_URL_PATH) >> $(UNIVERSE_URL_PATH);

stub-universe: $(UNIVERSE_URL_PATH)

DCOS_SPARK_TEST_JAR_PATH ?= $(ROOT_DIR)/dcos-spark-scala-tests-assembly-0.1-SNAPSHOT.jar
$(DCOS_SPARK_TEST_JAR_PATH):
Expand Down Expand Up @@ -176,8 +185,8 @@ test: test-env $(DCOS_SPARK_TEST_JAR_PATH) $(MESOS_SPARK_TEST_JAR_PATH) $(UNIVER
fi; \
fi; \
export CLUSTER_URL=`cat cluster-url`
$(TOOLS_DIR)/./dcos_login.py
dcos package repo add --index=0 spark-aws `cat stub-universe-url`
$(TOOLS_DIR)/./dcos_login.py; \
export STUB_UNIVERSE_URL=`cat $(UNIVERSE_URL_PATH)`; \
SCALA_TEST_JAR_PATH=$(DCOS_SPARK_TEST_JAR_PATH) \
TEST_JAR_PATH=$(MESOS_SPARK_TEST_JAR_PATH) \
S3_BUCKET=$(S3_BUCKET) \
Expand All @@ -187,7 +196,7 @@ test: test-env $(DCOS_SPARK_TEST_JAR_PATH) $(MESOS_SPARK_TEST_JAR_PATH) $(UNIVER
clean: clean-dist clean-cluster
rm -rf test-env
rm -rf $(CLI_DIST_DIR)
for f in "$(MESOS_SPARK_TEST_JAR_PATH)" "$(DCOS_SPARK_TEST_JAR_PATH)" "cluster-url" "$(UNIVERSE_URL_PATH)" "docker-build" "docker-dist" ; do \
for f in "$(MESOS_SPARK_TEST_JAR_PATH)" "$(DCOS_SPARK_TEST_JAR_PATH)" "cluster-url" "$(UNIVERSE_URL_PATH)" "$(HISTORY_URL_PATH)" "docker-build" "docker-dist" ; do \
[ ! -e $$f ] || rm $$f; \
done; \

Expand Down
4 changes: 3 additions & 1 deletion docs/history-server.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ DC/OS Apache Spark includes The [Spark History Server][3]. Because the history s
1. Create `spark-history-options.json`:

{
"hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
"service": {
"hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
}
}

1. Install The Spark History Server:
Expand Down
21 changes: 11 additions & 10 deletions docs/kerberos.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,19 +52,20 @@ Spark (and all Kerberos-enabed) components need a valid `krb5.conf` file. You ca
1. Make sure your keytab is accessible from the DC/OS [Secret Store][https://docs.mesosphere.com/latest/security/secrets/].

1. If you've enabled the history server via `history-server.enabled`, you must also configure the principal and keytab for the history server. **WARNING**: The keytab contains secrets, in the current history server package the keytab is not stored securely. See [Limitations][9]
1. If you are using the history server, you must also configure the `krb5.conf`, principal, and keytab
for the history server.

Base64 encode your keytab:

cat spark.keytab | base64

And add the following to your configuration file:
Add the Kerberos configurations to your spark-history JSON configuration file:

{
"history-server": {
"service": {
"hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints"
},
"security": {
"kerberos": {
"principal": "spark@REALM",
"keytab": "<base64 encoding>"
"krb5conf": "<base64_encoding>",
"principal": "<Kerberos principal>", # e.g. spark@REALM
"keytab": "<keytab secret path>" # e.g. __dcos_base64__hdfs_keytab
}
}
}
Expand All @@ -87,7 +88,7 @@ Submit the job with the keytab:
Submit the job with the ticket:

dcos spark run --submit-args="\
--kerberos-principal hdfs/name-0-node.hdfs.autoip.dcos.thisdcos.directory@LOCAL \
--kerberos-principal user@REALM \
--tgt-secret-path /__dcos_base64__tgt \
--conf ... --class MySparkJob <url> <args>"

Expand Down
3 changes: 2 additions & 1 deletion history/bin/universe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ set -e -x -o pipefail

DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
HISTORY_DIR="${DIR}/.."
TOOLS_DIR="${DIR}/../../tools"

function check_env {
if [ -z "${DOCKER_IMAGE}" ]; then
Expand All @@ -15,7 +16,7 @@ function check_env {

function make_universe {
TEMPLATE_DEFAULT_DOCKER_IMAGE=${DOCKER_IMAGE} \
${COMMONS_DIR}/tools/ci_upload.py \
${TOOLS_DIR}/publish_aws.py \
spark-history \
${HISTORY_DIR}/package
}
Expand Down
29 changes: 28 additions & 1 deletion history/package/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"user": {
"description": "OS user",
"type": "string",
"default": "root"
"default": "nobody"
},
"docker-image": {
"description": "Docker image to run in. See https://hub.docker.com/r/mesosphere/spark/tags/ for options.",
Expand Down Expand Up @@ -63,6 +63,33 @@
}
},
"required": ["hdfs-config-url"]
},
"security": {
"description": "Security configuration properties",
"type": "object",
"properties": {
"kerberos": {
"description": "Kerberos configuration.",
"type": "object",
"properties": {
"krb5conf": {
"description": "Base64 encoded krb5.conf file to access your KDC.",
"type": "string",
"default": ""
},
"principal": {
"description": "Kerberos principal.",
"default": "",
"type": "string"
},
"keytab": {
"description": "Keytab path in the secret store.",
"default": "",
"type": "string"
}
}
}
}
}
},
"required": ["service"]
Expand Down
51 changes: 42 additions & 9 deletions history/package/marathon.json.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,60 @@
"id": "{{service.name}}",
"cpus": {{service.cpus}},
"mem": {{service.mem}},
{{^security.kerberos.principal}}
"cmd": "SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=${PORT0} ${SPARK_HISTORY_OPTS}\" ./bin/spark-class org.apache.spark.deploy.history.HistoryServer",
{{/security.kerberos.principal}}
{{#security.kerberos.principal}}
"cmd": "SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=${PORT0} ${SPARK_HISTORY_OPTS} ${SPARK_HISTORY_KERBEROS_OPTS}\" ./bin/spark-class org.apache.spark.deploy.history.HistoryServer",
{{/security.kerberos.principal}}
"user": "{{service.user}}",
"env": {
{{#security.kerberos.krb5conf}}
"KRB5_CONFIG_BASE64": "{{security.kerberos.krb5conf}}",
{{/security.kerberos.krb5conf}}
"SPARK_USER": "{{service.user}}",
"APPLICATION_WEB_PROXY_BASE": "/service/{{service.name}}",
{{#security.kerberos.principal}}
"SPARK_HISTORY_KERBEROS_OPTS": "-Dspark.history.kerberos.enabled=true -Dspark.history.kerberos.principal={{security.kerberos.principal}} -Dspark.history.kerberos.keytab=/opt/spark/hdfs.keytab",
{{/security.kerberos.principal}}
"SPARK_HISTORY_OPTS": "-Dspark.history.fs.logDirectory={{service.log-dir}} -Dspark.history.fs.cleaner.enabled={{service.cleaner.enabled}} -Dspark.history.fs.cleaner.interval={{service.cleaner.interval}} -Dspark.history.fs.cleaner.maxAge={{service.cleaner.max-age}}"
},
"ports": [0],
"container": {
"type": "DOCKER",
"type": "MESOS",
"docker": {
"image": "{{service.docker-image}}",
"network": "HOST",
"forcePullImage": true,
"parameters": [
{
"key": "user",
"value": "{{service.user}}"
}
]
"forcePullImage": true
}
{{#security.kerberos.keytab}}
,
"volumes": [
{
"containerPath": "/opt/spark/hdfs.keytab",
"secret": "hdfs_keytab",
"hostPath": ""
}
]
{{/security.kerberos.keytab}}
},
{{#security.kerberos.keytab}}
"secrets": {
"hdfs_keytab": {
"source": "{{security.kerberos.keytab}}"
}
},
{{/security.kerberos.keytab}}
"healthChecks": [
{
"portIndex": 0,
"protocol": "MESOS_HTTP",
"path": "/",
"gracePeriodSeconds": 5,
"intervalSeconds": 60,
"timeoutSeconds": 10,
"maxConsecutiveFailures": 3
}
],
"labels": {
"DCOS_SERVICE_NAME": "{{service.name}}",
"DCOS_SERVICE_PORT_INDEX": "0",
Expand Down
2 changes: 1 addition & 1 deletion history/package/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,6 @@
"analytics"
],
"website": "https://docs.mesosphere.com/service-docs/spark/",
"version": "2.1.0-1",
"version": "{{package-version}}",
"minDcosReleaseVersion": "1.8"
}
7 changes: 7 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import pytest
import sdk_repository


@pytest.fixture(scope='session')
def configure_universe():
yield from sdk_repository.universe_session()
36 changes: 36 additions & 0 deletions tests/resources/hdfsclient.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"id": "hdfsclient",
"mem": 1024,
"user": "nobody",
"container": {
"type": "MESOS",
"docker": {
"image": "nvaziri/hdfs-client:dev",
"forcePullImage": true
},
"volumes": [
{
"containerPath": "/hadoop-2.6.0-cdh5.9.1/hdfs.keytab",
"secret": "hdfs_keytab",
"hostPath": ""
}
]
},
"secrets": {
"hdfs_keytab": {
"source": "__dcos_base64___keytab"
}
},
"networks": [
{
"mode": "host"
}
],
"env": {
"REALM": "LOCAL",
"KDC_ADDRESS": "kdc.marathon.autoip.dcos.thisdcos.directory:2500",
"JAVA_HOME": "/usr/lib/jvm/default-java",
"KRB5_CONFIG": "/etc/krb5.conf",
"HDFS_SERVICE_NAME": "hdfs"
}
}
Loading

0 comments on commit 772d064

Please sign in to comment.