diff --git a/Makefile b/Makefile index 1e0e3af95d950..400d9f2d4c1eb 100644 --- a/Makefile +++ b/Makefile @@ -112,6 +112,7 @@ $(CLI_DIST_DIR): cli: $(CLI_DIST_DIR) UNIVERSE_URL_PATH ?= stub-universe-url +HISTORY_URL_PATH := $(UNIVERSE_URL_PATH).history $(UNIVERSE_URL_PATH): $(CLI_DIST_DIR) docker-dist UNIVERSE_URL_PATH=$(UNIVERSE_URL_PATH) \ TEMPLATE_CLI_VERSION=$(CLI_VERSION) \ @@ -122,7 +123,15 @@ $(UNIVERSE_URL_PATH): $(CLI_DIST_DIR) docker-dist $(CLI_DIST_DIR)/dcos-spark-darwin \ $(CLI_DIST_DIR)/dcos-spark-linux \ $(CLI_DIST_DIR)/dcos-spark.exe \ - $(CLI_DIST_DIR)/*.whl; + $(CLI_DIST_DIR)/*.whl; \ + UNIVERSE_URL_PATH=$(HISTORY_URL_PATH) \ + TEMPLATE_DEFAULT_DOCKER_IMAGE=`cat docker-dist` \ + $(TOOLS_DIR)/publish_aws.py \ + spark-history \ + $(ROOT_DIR)/history/package/; \ + cat $(HISTORY_URL_PATH) >> $(UNIVERSE_URL_PATH); + +stub-universe: $(UNIVERSE_URL_PATH) DCOS_SPARK_TEST_JAR_PATH ?= $(ROOT_DIR)/dcos-spark-scala-tests-assembly-0.1-SNAPSHOT.jar $(DCOS_SPARK_TEST_JAR_PATH): @@ -176,8 +185,8 @@ test: test-env $(DCOS_SPARK_TEST_JAR_PATH) $(MESOS_SPARK_TEST_JAR_PATH) $(UNIVER fi; \ fi; \ export CLUSTER_URL=`cat cluster-url` - $(TOOLS_DIR)/./dcos_login.py - dcos package repo add --index=0 spark-aws `cat stub-universe-url` + $(TOOLS_DIR)/./dcos_login.py; \ + export STUB_UNIVERSE_URL=`cat $(UNIVERSE_URL_PATH)`; \ SCALA_TEST_JAR_PATH=$(DCOS_SPARK_TEST_JAR_PATH) \ TEST_JAR_PATH=$(MESOS_SPARK_TEST_JAR_PATH) \ S3_BUCKET=$(S3_BUCKET) \ @@ -187,7 +196,7 @@ test: test-env $(DCOS_SPARK_TEST_JAR_PATH) $(MESOS_SPARK_TEST_JAR_PATH) $(UNIVER clean: clean-dist clean-cluster rm -rf test-env rm -rf $(CLI_DIST_DIR) - for f in "$(MESOS_SPARK_TEST_JAR_PATH)" "$(DCOS_SPARK_TEST_JAR_PATH)" "cluster-url" "$(UNIVERSE_URL_PATH)" "docker-build" "docker-dist" ; do \ + for f in "$(MESOS_SPARK_TEST_JAR_PATH)" "$(DCOS_SPARK_TEST_JAR_PATH)" "cluster-url" "$(UNIVERSE_URL_PATH)" "$(HISTORY_URL_PATH)" "docker-build" "docker-dist" ; do \ [ ! -e $$f ] || rm $$f; \ done; \ diff --git a/docs/history-server.md b/docs/history-server.md index e4490f37cde08..c11b68a15680b 100644 --- a/docs/history-server.md +++ b/docs/history-server.md @@ -20,7 +20,9 @@ DC/OS Apache Spark includes The [Spark History Server][3]. Because the history s 1. Create `spark-history-options.json`: { - "hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints" + "service": { + "hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints" + } } 1. Install The Spark History Server: diff --git a/docs/kerberos.md b/docs/kerberos.md index c312580e3518c..bf460f5c26598 100644 --- a/docs/kerberos.md +++ b/docs/kerberos.md @@ -52,19 +52,20 @@ Spark (and all Kerberos-enabed) components need a valid `krb5.conf` file. You ca 1. Make sure your keytab is accessible from the DC/OS [Secret Store][https://docs.mesosphere.com/latest/security/secrets/]. -1. If you've enabled the history server via `history-server.enabled`, you must also configure the principal and keytab for the history server. **WARNING**: The keytab contains secrets, in the current history server package the keytab is not stored securely. See [Limitations][9] +1. If you are using the history server, you must also configure the `krb5.conf`, principal, and keytab + for the history server. - Base64 encode your keytab: - - cat spark.keytab | base64 - - And add the following to your configuration file: + Add the Kerberos configurations to your spark-history JSON configuration file: { - "history-server": { + "service": { + "hdfs-config-url": "http://api.hdfs.marathon.l4lb.thisdcos.directory/v1/endpoints" + }, + "security": { "kerberos": { - "principal": "spark@REALM", - "keytab": "" + "krb5conf": "", + "principal": "", # e.g. spark@REALM + "keytab": "" # e.g. __dcos_base64__hdfs_keytab } } } @@ -87,7 +88,7 @@ Submit the job with the keytab: Submit the job with the ticket: dcos spark run --submit-args="\ - --kerberos-principal hdfs/name-0-node.hdfs.autoip.dcos.thisdcos.directory@LOCAL \ + --kerberos-principal user@REALM \ --tgt-secret-path /__dcos_base64__tgt \ --conf ... --class MySparkJob " diff --git a/history/bin/universe.sh b/history/bin/universe.sh index 70ab1cd00ba5c..048048074c1ce 100755 --- a/history/bin/universe.sh +++ b/history/bin/universe.sh @@ -4,6 +4,7 @@ set -e -x -o pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" HISTORY_DIR="${DIR}/.." +TOOLS_DIR="${DIR}/../../tools" function check_env { if [ -z "${DOCKER_IMAGE}" ]; then @@ -15,7 +16,7 @@ function check_env { function make_universe { TEMPLATE_DEFAULT_DOCKER_IMAGE=${DOCKER_IMAGE} \ - ${COMMONS_DIR}/tools/ci_upload.py \ + ${TOOLS_DIR}/publish_aws.py \ spark-history \ ${HISTORY_DIR}/package } diff --git a/history/package/config.json b/history/package/config.json index 59b36818df6c3..c3d0da2b3dc23 100644 --- a/history/package/config.json +++ b/history/package/config.json @@ -30,7 +30,7 @@ "user": { "description": "OS user", "type": "string", - "default": "root" + "default": "nobody" }, "docker-image": { "description": "Docker image to run in. See https://hub.docker.com/r/mesosphere/spark/tags/ for options.", @@ -63,6 +63,33 @@ } }, "required": ["hdfs-config-url"] + }, + "security": { + "description": "Security configuration properties", + "type": "object", + "properties": { + "kerberos": { + "description": "Kerberos configuration.", + "type": "object", + "properties": { + "krb5conf": { + "description": "Base64 encoded krb5.conf file to access your KDC.", + "type": "string", + "default": "" + }, + "principal": { + "description": "Kerberos principal.", + "default": "", + "type": "string" + }, + "keytab": { + "description": "Keytab path in the secret store.", + "default": "", + "type": "string" + } + } + } + } } }, "required": ["service"] diff --git a/history/package/marathon.json.mustache b/history/package/marathon.json.mustache index 7a4fc2ecb4ed4..805fa26b0fe6c 100644 --- a/history/package/marathon.json.mustache +++ b/history/package/marathon.json.mustache @@ -2,27 +2,60 @@ "id": "{{service.name}}", "cpus": {{service.cpus}}, "mem": {{service.mem}}, +{{^security.kerberos.principal}} "cmd": "SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=${PORT0} ${SPARK_HISTORY_OPTS}\" ./bin/spark-class org.apache.spark.deploy.history.HistoryServer", +{{/security.kerberos.principal}} +{{#security.kerberos.principal}} + "cmd": "SPARK_HISTORY_OPTS=\"-Dspark.history.ui.port=${PORT0} ${SPARK_HISTORY_OPTS} ${SPARK_HISTORY_KERBEROS_OPTS}\" ./bin/spark-class org.apache.spark.deploy.history.HistoryServer", +{{/security.kerberos.principal}} + "user": "{{service.user}}", "env": { +{{#security.kerberos.krb5conf}} + "KRB5_CONFIG_BASE64": "{{security.kerberos.krb5conf}}", +{{/security.kerberos.krb5conf}} "SPARK_USER": "{{service.user}}", "APPLICATION_WEB_PROXY_BASE": "/service/{{service.name}}", +{{#security.kerberos.principal}} + "SPARK_HISTORY_KERBEROS_OPTS": "-Dspark.history.kerberos.enabled=true -Dspark.history.kerberos.principal={{security.kerberos.principal}} -Dspark.history.kerberos.keytab=/opt/spark/hdfs.keytab", +{{/security.kerberos.principal}} "SPARK_HISTORY_OPTS": "-Dspark.history.fs.logDirectory={{service.log-dir}} -Dspark.history.fs.cleaner.enabled={{service.cleaner.enabled}} -Dspark.history.fs.cleaner.interval={{service.cleaner.interval}} -Dspark.history.fs.cleaner.maxAge={{service.cleaner.max-age}}" }, "ports": [0], "container": { - "type": "DOCKER", + "type": "MESOS", "docker": { "image": "{{service.docker-image}}", - "network": "HOST", - "forcePullImage": true, - "parameters": [ - { - "key": "user", - "value": "{{service.user}}" - } - ] + "forcePullImage": true } +{{#security.kerberos.keytab}} + , + "volumes": [ + { + "containerPath": "/opt/spark/hdfs.keytab", + "secret": "hdfs_keytab", + "hostPath": "" + } + ] +{{/security.kerberos.keytab}} }, +{{#security.kerberos.keytab}} + "secrets": { + "hdfs_keytab": { + "source": "{{security.kerberos.keytab}}" + } + }, +{{/security.kerberos.keytab}} + "healthChecks": [ + { + "portIndex": 0, + "protocol": "MESOS_HTTP", + "path": "/", + "gracePeriodSeconds": 5, + "intervalSeconds": 60, + "timeoutSeconds": 10, + "maxConsecutiveFailures": 3 + } + ], "labels": { "DCOS_SERVICE_NAME": "{{service.name}}", "DCOS_SERVICE_PORT_INDEX": "0", diff --git a/history/package/package.json b/history/package/package.json index b779a665103a5..16323d76f957a 100644 --- a/history/package/package.json +++ b/history/package/package.json @@ -19,6 +19,6 @@ "analytics" ], "website": "https://docs.mesosphere.com/service-docs/spark/", - "version": "2.1.0-1", + "version": "{{package-version}}", "minDcosReleaseVersion": "1.8" } diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000000000..3fa91059d6742 --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,7 @@ +import pytest +import sdk_repository + + +@pytest.fixture(scope='session') +def configure_universe(): + yield from sdk_repository.universe_session() diff --git a/tests/resources/hdfsclient.json b/tests/resources/hdfsclient.json new file mode 100644 index 0000000000000..027e9bc9cf47c --- /dev/null +++ b/tests/resources/hdfsclient.json @@ -0,0 +1,36 @@ +{ + "id": "hdfsclient", + "mem": 1024, + "user": "nobody", + "container": { + "type": "MESOS", + "docker": { + "image": "nvaziri/hdfs-client:dev", + "forcePullImage": true + }, + "volumes": [ + { + "containerPath": "/hadoop-2.6.0-cdh5.9.1/hdfs.keytab", + "secret": "hdfs_keytab", + "hostPath": "" + } + ] + }, + "secrets": { + "hdfs_keytab": { + "source": "__dcos_base64___keytab" + } + }, + "networks": [ + { + "mode": "host" + } + ], + "env": { + "REALM": "LOCAL", + "KDC_ADDRESS": "kdc.marathon.autoip.dcos.thisdcos.directory:2500", + "JAVA_HOME": "/usr/lib/jvm/default-java", + "KRB5_CONFIG": "/etc/krb5.conf", + "HDFS_SERVICE_NAME": "hdfs" + } +} diff --git a/tests/test_hdfs.py b/tests/test_hdfs.py index 40acb814f0850..fbdd4023df715 100644 --- a/tests/test_hdfs.py +++ b/tests/test_hdfs.py @@ -1,5 +1,6 @@ import itertools import logging +import os import pytest import json @@ -9,7 +10,9 @@ import sdk_cmd import sdk_hosts import sdk_install +import sdk_marathon import sdk_security +import sdk_tasks from tests import utils @@ -17,9 +20,13 @@ log = logging.getLogger(__name__) DEFAULT_HDFS_TASK_COUNT=10 GENERIC_HDFS_USER_PRINCIPAL = "hdfs@{realm}".format(realm=sdk_auth.REALM) +KEYTAB_SECRET_PATH = os.getenv("KEYTAB_SECRET_PATH", "__dcos_base64___keytab") # To do: change when no longer using HDFS stub universe HDFS_PACKAGE_NAME='beta-hdfs' HDFS_SERVICE_NAME='hdfs' +KERBEROS_ARGS = ["--kerberos-principal", GENERIC_HDFS_USER_PRINCIPAL, + "--keytab-secret-path", "/{}".format(KEYTAB_SECRET_PATH)] +HDFS_CLIENT_ID = "hdfsclient" @pytest.fixture(scope='module') @@ -103,10 +110,54 @@ def configure_security_spark(): yield from utils.spark_security_session() +@pytest.fixture(scope='module') +def setup_hdfs_client(hdfs_with_kerberos): + try: + curr_dir = os.path.dirname(os.path.realpath(__file__)) + app_def_path = "{}/resources/hdfsclient.json".format(curr_dir) + with open(app_def_path) as f: + hdfsclient_app_def = json.load(f) + hdfsclient_app_def["id"] = HDFS_CLIENT_ID + hdfsclient_app_def["secrets"]["hdfs_keytab"]["source"] = KEYTAB_SECRET_PATH + sdk_marathon.install_app(hdfsclient_app_def) + + sdk_auth.kinit(HDFS_CLIENT_ID, keytab="hdfs.keytab", principal=GENERIC_HDFS_USER_PRINCIPAL) + yield + + finally: + sdk_marathon.destroy_app(HDFS_CLIENT_ID) + + +@pytest.fixture(scope='module') +def setup_history_server(hdfs_with_kerberos, setup_hdfs_client, configure_universe): + try: + sdk_tasks.task_exec(HDFS_CLIENT_ID, "bin/hdfs dfs -mkdir /history") + + shakedown.install_package( + package_name=utils.HISTORY_PACKAGE_NAME, + options_json={ + "service": {"hdfs-config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints" + .format(HDFS_SERVICE_NAME)}, + "security": { + "kerberos": { + "krb5conf": utils.HDFS_KRB5_CONF, + "principal": GENERIC_HDFS_USER_PRINCIPAL, + "keytab": KEYTAB_SECRET_PATH + } + } + }, + wait_for_completion=True # wait for it to become healthy + ) + yield + + finally: + sdk_marathon.destroy_app(utils.HISTORY_SERVICE_NAME) + + @pytest.fixture(scope='module', autouse=True) -def setup_spark(hdfs_with_kerberos, configure_security_spark): +def setup_spark(hdfs_with_kerberos, setup_history_server, configure_security_spark, configure_universe): try: - utils.require_spark(use_hdfs=True) + utils.require_spark(use_hdfs=True, use_history=True) yield finally: utils.teardown_spark() @@ -116,22 +167,20 @@ def setup_spark(hdfs_with_kerberos, configure_security_spark): @pytest.mark.sanity def test_terasort_suite(): jar_url = 'https://downloads.mesosphere.io/spark/examples/spark-terasort-1.1-jar-with-dependencies_2.11.jar' - kerberos_args = ["--kerberos-principal", "hdfs/name-0-node.hdfs.autoip.dcos.thisdcos.directory@LOCAL", - "--keytab-secret-path", "/__dcos_base64___keytab"] - teragen_args=["--class", "com.github.ehiggs.spark.terasort.TeraGen"] + kerberos_args + teragen_args=["--class", "com.github.ehiggs.spark.terasort.TeraGen"] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args="1g hdfs:///terasort_in", expected_output="Number of records written", args=teragen_args) - terasort_args = ["--class", "com.github.ehiggs.spark.terasort.TeraSort"] + kerberos_args + terasort_args = ["--class", "com.github.ehiggs.spark.terasort.TeraSort"] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args="hdfs:///terasort_in hdfs:///terasort_out", expected_output="", args=terasort_args) - teravalidate_args = ["--class", "com.github.ehiggs.spark.terasort.TeraValidate"] + kerberos_args + teravalidate_args = ["--class", "com.github.ehiggs.spark.terasort.TeraValidate"] + KERBEROS_ARGS utils.run_tests(app_url=jar_url, app_args="hdfs:///terasort_out hdfs:///terasort_validate", expected_output="partitions are properly sorted", @@ -156,9 +205,6 @@ def has_running_executors(): JOB_SERVICE_NAME = "RecoverableNetworkWordCount" - kerberos_args = ["--kerberos-principal", "hdfs@LOCAL", - "--keytab-secret-path", "/__dcos_base64___keytab"] - job_args = ["--supervise", "--class", "org.apache.spark.examples.streaming.RecoverableNetworkWordCount", "--conf", "spark.cores.max=8", @@ -167,7 +213,7 @@ def has_running_executors(): driver_id = utils.submit_job(app_url=utils.SPARK_EXAMPLES, app_args="10.0.0.1 9090 hdfs:///netcheck hdfs:///outfile", app_name=utils.SPARK_APP_NAME, - args=(kerberos_args + job_args)) + args=(KERBEROS_ARGS + job_args)) log.info("Started supervised driver {}".format(driver_id)) shakedown.wait_for(lambda: streaming_job_registered(), ignore_exceptions=False, @@ -198,3 +244,16 @@ def has_running_executors(): shakedown.wait_for(lambda: streaming_job_is_not_running(), ignore_exceptions=False, timeout_seconds=600) + + +@pytest.mark.skipif(not utils.hdfs_enabled(), reason='HDFS_ENABLED is false') +@pytest.mark.sanity +def test_history(): + job_args = ["--class", "org.apache.spark.examples.SparkPi", + "--conf", "spark.eventLog.enabled=true", + "--conf", "spark.eventLog.dir=hdfs://hdfs/history"] + utils.run_tests(app_url=utils.SPARK_EXAMPLES, + app_args="100", + expected_output="Pi is roughly 3", + app_name="/spark", + args=(job_args + KERBEROS_ARGS)) diff --git a/tests/test_kafka.py b/tests/test_kafka.py index acf52b58ff78c..ad45de01747aa 100644 --- a/tests/test_kafka.py +++ b/tests/test_kafka.py @@ -95,7 +95,7 @@ def configure_security_spark(): @pytest.fixture(scope='module', autouse=True) -def setup_spark(kerberized_kafka, configure_security_spark): +def setup_spark(kerberized_kafka, configure_security_spark, configure_universe): try: # need to do this here also in case this test is run first # and the jar hasn't been updated diff --git a/tests/test_spark.py b/tests/test_spark.py index b6ac15e8dc5f2..dc5c259739dcf 100644 --- a/tests/test_spark.py +++ b/tests/test_spark.py @@ -34,7 +34,7 @@ def configure_security(): @pytest.fixture(scope='module', autouse=True) -def setup_spark(configure_security): +def setup_spark(configure_security, configure_universe): try: utils.require_spark() utils.upload_file(os.environ["SCALA_TEST_JAR_PATH"]) diff --git a/tests/utils.py b/tests/utils.py index 36233eb5dc6a8..1f7109f307a1d 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -34,6 +34,9 @@ def _init_logging(): HDFS_KRB5_CONF='W2xpYmRlZmF1bHRzXQpkZWZhdWx0X3JlYWxtID0gTE9DQUwKZG5zX2xvb2t1cF9yZWFsbSA9IHRydWUKZG5zX2xvb2t1cF9rZGMgPSB0cnVlCnVkcF9wcmVmZXJlbmNlX2xpbWl0ID0gMQoKW3JlYWxtc10KICBMT0NBTCA9IHsKICAgIGtkYyA9IGtkYy5tYXJhdGhvbi5tZXNvczoyNTAwCiAgfQoKW2RvbWFpbl9yZWFsbV0KICAuaGRmcy5kY29zID0gTE9DQUwKICBoZGZzLmRjb3MgPSBMT0NBTAo=' SPARK_PACKAGE_NAME=os.getenv('SPARK_PACKAGE_NAME', 'spark') SPARK_EXAMPLES = "http://downloads.mesosphere.com/spark/assets/spark-examples_2.11-2.0.1.jar" +HISTORY_PACKAGE_NAME = os.getenv("HISTORY_PACKAGE_NAME", "spark-history") +HISTORY_SERVICE_NAME = os.getenv("HISTORY_SERVICE_NAME", "spark-history") + def hdfs_enabled(): return os.environ.get("HDFS_ENABLED") != "false" @@ -59,10 +62,10 @@ def streaming_job_running(job_name): return len([x for x in f.dict()["tasks"] if x["state"] == "TASK_RUNNING"]) > 0 -def require_spark(options=None, service_name=None, use_hdfs=False): +def require_spark(options=None, service_name=None, use_hdfs=False, use_history=False): LOGGER.info("Ensuring Spark is installed.") - _require_package(SPARK_PACKAGE_NAME, service_name, _get_spark_options(options, use_hdfs)) + _require_package(SPARK_PACKAGE_NAME, service_name, _get_spark_options(options, use_hdfs, use_history)) _wait_for_spark(service_name) _require_spark_cli() @@ -129,7 +132,7 @@ def no_spark_jobs(service_name): return len(driver_ips) == 0 -def _get_spark_options(options, use_hdfs): +def _get_spark_options(options, use_hdfs, use_history): if options is None: options = {} @@ -140,6 +143,12 @@ def _get_spark_options(options, use_hdfs): options["security"]["kerberos"] = options["security"].get("kerberos", {}) options["security"]["kerberos"]["krb5conf"] = HDFS_KRB5_CONF + if use_history: + dcos_url = dcos.config.get_config_val("core.dcos_url") + history_url = urllib.parse.urljoin(dcos_url, "/service/{}".format(HISTORY_SERVICE_NAME)) + options["service"] = options.get("service", {}) + options["service"]["spark-history-server-url"] = history_url + if is_strict(): options["service"] = options.get("service", {}) options["service"]["service_account"] = SPARK_SERVICE_ACCOUNT