From 7196a62637ee116d580334bbf18d364a0726d1a6 Mon Sep 17 00:00:00 2001 From: Nick Date: Sat, 3 Sep 2016 20:25:36 +0200 Subject: [PATCH 1/4] Implement a Go Spark CLI For now this focuses on being a drop-in replacement for the current CLI, where an end user shouldn't notice any difference, except that the supported 'spark-submit' args now show up in the CLI's --help. The implementation provides the same arguments that spark-submit uses, but just converts those into a POST request that's sent to the dispatcher. This doesn't support things like maven lookup of jar dependencies, but those things were only applicable to client mode, or things like YARN, anyway. This implementation supports both the Binary CLI in 1.8+, while also providing a simple python wrapper package for backwards compatibility for 1.7. The stub-universe packaging will automatically fill in the required binary CLI SHAs for us. --- bin/build.sh | 18 +- bin/jenkins.sh | 7 +- bin/test.sh | 13 +- cli/Makefile | 20 +- cli/README.rst | 99 ---- cli/bin/binary.sh | 58 --- cli/bin/build-go.sh | 58 +++ cli/bin/build-python-wrapper.sh | 18 + cli/bin/clean.sh | 11 +- cli/bin/env.sh | 26 - cli/bin/packages.sh | 15 - cli/bin/test.sh | 9 - cli/binary/Dockerfile.linux-binary | 21 - cli/binary/binary.spec | 30 -- cli/dcos-spark/.gitignore | 3 + cli/dcos-spark/main.go | 180 +++++++ cli/dcos-spark/submit_builder.go | 360 ++++++++++++++ cli/dcos_spark/__init__.py | 0 cli/dcos_spark/cli.py | 121 ----- cli/dcos_spark/constants.py | 1 - cli/dcos_spark/data/config-schema/spark.json | 18 - cli/dcos_spark/discovery.py | 21 - cli/dcos_spark/log.py | 199 -------- cli/dcos_spark/service.py | 12 - cli/dcos_spark/spark_submit.py | 472 ------------------- cli/dcos_spark/version.py | 1 - cli/python/.gitignore | 6 + cli/python/README.txt | 15 + cli/python/bin_wrapper/__init__.py | 50 ++ cli/{ => python}/setup.cfg | 0 cli/python/setup.py | 48 ++ cli/requirements.txt | 4 - cli/setup.py | 112 ----- cli/tox.ini | 25 - manifest.json | 1 - package/command.json | 2 +- package/resource.json | 25 + 37 files changed, 786 insertions(+), 1293 deletions(-) delete mode 100644 cli/README.rst delete mode 100755 cli/bin/binary.sh create mode 100755 cli/bin/build-go.sh create mode 100755 cli/bin/build-python-wrapper.sh delete mode 100755 cli/bin/env.sh delete mode 100755 cli/bin/packages.sh delete mode 100755 cli/bin/test.sh delete mode 100644 cli/binary/Dockerfile.linux-binary delete mode 100644 cli/binary/binary.spec create mode 100644 cli/dcos-spark/.gitignore create mode 100644 cli/dcos-spark/main.go create mode 100644 cli/dcos-spark/submit_builder.go delete mode 100644 cli/dcos_spark/__init__.py delete mode 100644 cli/dcos_spark/cli.py delete mode 100644 cli/dcos_spark/constants.py delete mode 100644 cli/dcos_spark/data/config-schema/spark.json delete mode 100644 cli/dcos_spark/discovery.py delete mode 100644 cli/dcos_spark/log.py delete mode 100644 cli/dcos_spark/service.py delete mode 100644 cli/dcos_spark/spark_submit.py delete mode 100644 cli/dcos_spark/version.py create mode 100644 cli/python/.gitignore create mode 100644 cli/python/README.txt create mode 100644 cli/python/bin_wrapper/__init__.py rename cli/{ => python}/setup.cfg (100%) create mode 100644 cli/python/setup.py delete mode 100644 cli/requirements.txt delete mode 100644 cli/setup.py delete mode 100644 cli/tox.ini diff --git a/bin/build.sh b/bin/build.sh index 97af36a13364b..2472c103be4fb 100755 --- a/bin/build.sh +++ b/bin/build.sh @@ -6,7 +6,6 @@ # - stub universe zip to S3 # # Manifest config: -# cli_version - version label to use for CLI package # spark_uri - where fetch spark distribution from (or SPARK_DIST_URI if provided) # # ENV vars: @@ -25,13 +24,6 @@ configure_env() { echo "Using Spark dist URI: $SPARK_DIST_URI" fi - if [ -z "${CLI_VERSION}" ]; then - CLI_VERSION=$(cat $BASEDIR/manifest.json | jq .cli_version) - CLI_VERSION="${CLI_VERSION%\"}" - CLI_VERSION="${CLI_VERSION#\"}" - echo "Using CLI Version: $CLI_VERSION" - fi - if [ -z "$DOCKER_IMAGE" ]; then # determine image label based on git commit: if [ -n "$ghprbActualCommit" ]; then @@ -66,7 +58,7 @@ notify_github() { build_cli() { notify_github pending "Building CLI" - CLI_VERSION=$CLI_VERSION make --directory=$BASEDIR/cli env test packages + make --directory=$BASEDIR/cli all if [ $? -ne 0 ]; then notify_github failure "CLI build failed" exit 1 @@ -89,14 +81,16 @@ upload_cli_and_stub_universe() { # Build/upload package using custom template parameters: TEMPLATE_X_Y_Z => {{x-y-z}} TEMPLATE_SPARK_DIST_URI=${SPARK_DIST_URI} \ TEMPLATE_DOCKER_IMAGE=${DOCKER_IMAGE} \ - TEMPLATE_CLI_VERSION=${CLI_VERSION} \ ${COMMONS_TOOLS_DIR}/ci_upload.py \ spark \ ${BASEDIR}/package/ \ - ${BASEDIR}/cli/dist/*.whl + ${BASEDIR}/cli/dcos-spark/dcos-spark-darwin \ + ${BASEDIR}/cli/dcos-spark/dcos-spark-linux \ + ${BASEDIR}/cli/dcos-spark/dcos-spark.exe \ + ${BASEDIR}/cli/python/dist/*.whl } -# set CLI_VERSION, SPARK_URI, and DOCKER_IMAGE: +# set SPARK_URI and DOCKER_IMAGE: configure_env fetch_commons_tools diff --git a/bin/jenkins.sh b/bin/jenkins.sh index d2314bb56c51a..ce7ecb1ddbd94 100644 --- a/bin/jenkins.sh +++ b/bin/jenkins.sh @@ -93,11 +93,8 @@ function spark_test { export $(cat $WORKSPACE/stub-universe.properties) # run tests against build artifacts: CLUSTER_NAME=spark-package-${BUILD_NUMBER} \ - TEST_DIR=$(pwd)/../mesos-spark-integration-tests/ \ - DCOS_CHANNEL=testing/master \ - DCOS_USERNAME=bootstrapuser \ - DCOS_PASSWORD=deleteme \ - make test + TEST_DIR=$(pwd)/../mesos-spark-integration-tests/ \ + make test popd } diff --git a/bin/test.sh b/bin/test.sh index e7f79d913d38a..ed5a89f70d589 100755 --- a/bin/test.sh +++ b/bin/test.sh @@ -11,8 +11,6 @@ BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" check_env() { # Check env early, before starting the cluster: if [ -z "$DOCKER_IMAGE" \ - -o -z "$DCOS_USERNAME" \ - -o -z "$DCOS_PASSWORD" \ -o -z "$STUB_UNIVERSE_URL" \ -o -z "$AWS_ACCESS_KEY_ID" \ -o -z "$AWS_SECRET_ACCESS_KEY" \ @@ -56,14 +54,6 @@ start_cluster() { configure_cli() { notify_github pending "Configuring CLI" - # EE - #TOKEN=$(python -c "import requests;js={'uid':'"${DCOS_USERNAME}"', 'password': '"${DCOS_PASSWORD}"'};r=requests.post('"${DCOS_URL}"/acs/api/v1/auth/login',json=js);print(r.json()['token'])") - - # # Open - # TOKEN=$(python -c "import requests; import sys; js = {'token':'"${DCOS_OAUTH_TOKEN}"'}; r=requests.post('"${DCOS_URL}"/acs/api/v1/auth/login',json=js); sys.stderr.write(str(r.json())); print(r.json()['token'])") - - # dcos config set core.dcos_acs_token "${TOKEN}" - dcos config set core.dcos_url "${DCOS_URL}" ${COMMONS_TOOLS_DIR}/dcos_login.py dcos config show @@ -109,6 +99,9 @@ run_tests() { source env/bin/activate pip install -r requirements.txt python test.py + if [ $? -ne 0 ]; then + notify_github failure "Tests failed" + fi popd } diff --git a/cli/Makefile b/cli/Makefile index e630ed1104c78..48e2c031f1d2c 100644 --- a/cli/Makefile +++ b/cli/Makefile @@ -1,18 +1,8 @@ -all: env test binary +all: go python-wrapper +go: + bin/build-go.sh +python-wrapper: go + bin/build-python-wrapper.sh clean: bin/clean.sh - -env: clean - bin/env.sh - -test: - bin/test.sh - -packages: - bin/packages.sh - -binary: env - pyinstaller binary/binary.spec - -.PHONY: binary diff --git a/cli/README.rst b/cli/README.rst deleted file mode 100644 index 913d6df4916aa..0000000000000 --- a/cli/README.rst +++ /dev/null @@ -1,99 +0,0 @@ -DCOS Spark Subcommand -========================== -Basic DCOS subcommand - -Setup ------ -#. Make sure you meet requirements for installing packages_ -#. Clone git repo for the dcos spark cli:: - - git clone git@github.com:mesosphere/dcos-spark.git - -#. Change directory to the repo directory:: - - cd dcos-spark - -#. Make sure that you have virtualenv installed. If not type:: - - sudo pip install virtualenv - -#. Create a virtualenv for the project:: - - make env - -Configure Environment and Run ------------------------------ - -#. TODO: Talk about how to configure the root dcos cli - -#. :code:`source` the setup file to add the :code:`dcos-spark` command line interface to your - :code:`PATH`:: - - source env/bin/activate - -#. Get started by calling the DCOS Spark CLI's help:: - - dcos-spark help - -When invoking dcos subcommands from the dev env it includes the :code:`dcos-` as a prefix, however when this is included into DCOS it will be a subcommand of dcos and will be invoked by :code:`dcos spark`. :code:`dcos-spark help` translates to :code:`dcos spark help` when installed into the dcos-cli. - -#. Leaving the env - - deactivate - -Binary: ------------ - -Create Binary: -############## - -#. Install pyinstaller:: - - pip install pyinstaller - -#. Create spark cli binary:: - - make binary - - -Running Tests: --------------- - -Setup -##### - -Tox, our test runner, tests against both Python 2.7 and Python 3.4 environments. - -If you're using OS X, be sure to use the officially distributed Python 3.4 installer_ since the -Homebrew version is missing a necessary library. - -Running -####### - -Tox will run unit and integration tests in both Python environments using a temporarily created -virtualenv. - -You should ensure :code:`DCOS_CONFIG` is set and that the config file points to the Marathon -instance you want to use for integration tests. - -There are two ways to run tests, you can either use the virtualenv created by :code:`make env` -above:: - - make test - -Or, assuming you have tox installed (via :code:`sudo pip install tox`):: - - tox - -Other Useful Commands -##################### - -#. List all of the supported test environments:: - - tox --listenvs - -#. Run a specific set of tests:: - - tox -e - -.. _packages: https://packaging.python.org/en/latest/installing.html#installing-requirements diff --git a/cli/bin/binary.sh b/cli/bin/binary.sh deleted file mode 100755 index 2daf86b9440ac..0000000000000 --- a/cli/bin/binary.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash -e -# -# Copyright (C) 2015 Mesosphere, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -echo "Building binary..." -pyinstaller binary/binary.spec - -docker-check() { - time=2 - command="/bin/sh -c \"docker ps\"" - - if hash expect 2>/dev/null; then - expect -c "set echo \"-noecho\"; set timeout $time; spawn -noecho $command; expect timeout { exit 1 } eof { exit 0 }" - - if [ $? = 1 ] ; then - echo "Docker execution timed out. Make sure docker-machine start docker-vm is started." - exit 0; - fi - fi -} - -if [ "$(uname)" == "Darwin" ]; then - # Do something under Mac OS X platform - mkdir -p dist/darwin - mv dist/dcos-spark dist/darwin - shasum -a 256 dist/darwin/dcos-spark | awk '{print $1}' > dist/darwin/dcos-spark.sha - echo "Darin Build Complete!" - - # linux build on a darwin plaform if docker runs - docker-check - docker rmi -f spark-binary || true - docker rm spark-binary || true - docker build -f binary/Dockerfile.linux-binary -t spark-binary . - docker run --name spark-binary spark-binary - mkdir -p dist/linux - docker cp spark-binary:/dcos-spark/dist/linux dist/ - -elif [ "$(expr substr $(uname -s) 1 5)" == "Linux" ]; then - # Do something under GNU/Linux platform #statements - mkdir -p dist/linux - mv dist/dcos-spark dist/linux - sha256sum dist/linux/dcos-spark | awk '{print $1}' > dist/linux/dcos-spark.sha - echo "Linux Build Complete" -fi - -echo "Build finished!" diff --git a/cli/bin/build-go.sh b/cli/bin/build-go.sh new file mode 100755 index 0000000000000..ce31b0f721ef9 --- /dev/null +++ b/cli/bin/build-go.sh @@ -0,0 +1,58 @@ +#!/bin/bash + +set +x + +BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd $BIN_DIR/.. + +if [ -z "$GOPATH" -o -z "$(which go)" ]; then + echo "Missing GOPATH environment variable or 'go' executable. Please configure a Go build environment." + exit 1 +fi + +# The name of the binary produced by Go: +if [ -z "$EXE_NAME" ]; then + EXE_NAME="dcos-spark" +fi + +print_file() { + # Only show 'file ' if that utility is available: often missing in CI builds. + if [ -n "$(which file)" ]; then + file "$1" + fi + ls -l "$1" + echo "" +} + +# --- + +# go (static binaries containing the CLI itself) +cd $EXE_NAME/ + +# this may be omitted in 1.6+, left here for compatibility with 1.5: +export GO15VENDOREXPERIMENT=1 + +go get + +# available GOOS/GOARCH permutations are listed at: +# https://golang.org/doc/install/source#environment + +# windows: +GOOS=windows GOARCH=386 go build +print_file "${EXE_NAME}.exe" + +# osx (static build): +SUFFIX="-darwin" +CGO_ENABLED=0 GOOS=darwin GOARCH=386 go build \ + && mv -vf "${EXE_NAME}" "${EXE_NAME}${SUFFIX}" +# don't ever strip the darwin binary: results in a broken/segfaulty build +print_file "${EXE_NAME}${SUFFIX}" + +# linux (static build): +SUFFIX="-linux" +CGO_ENABLED=0 GOOS=linux GOARCH=386 go build \ + && mv -vf "${EXE_NAME}" "${EXE_NAME}${SUFFIX}" +case "$OSTYPE" in + linux*) strip "${EXE_NAME}${SUFFIX}" +esac +print_file "${EXE_NAME}${SUFFIX}" diff --git a/cli/bin/build-python-wrapper.sh b/cli/bin/build-python-wrapper.sh new file mode 100755 index 0000000000000..4ab308f25fcbf --- /dev/null +++ b/cli/bin/build-python-wrapper.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +set +x + +BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd $BIN_DIR/../python/ + +print_file() { + # Only show 'file ' if that utility is available: often missing in CI builds. + if [ -n "$(which file)" ]; then + file "$1" + fi + ls -l "$1" + echo "" +} + +python setup.py bdist_wheel +print_file dist/*.whl diff --git a/cli/bin/clean.sh b/cli/bin/clean.sh index ed64b3620772c..b19b503dc6fee 100755 --- a/cli/bin/clean.sh +++ b/cli/bin/clean.sh @@ -1,8 +1,9 @@ -#!/bin/bash -e +#!/bin/bash -set -eux +set +x -BASEDIR=`dirname $0`/.. +BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd $BIN_DIR/.. -rm -rf $BASEDIR/.tox $BASEDIR/env $BASEDIR/build $BASEDIR/dist -echo "Deleted virtualenv and test artifacts." +rm -f dcos-spark/dcos-spark* +rm -rf python/build/ python/dist/ python/bin_wrapper.egg-info/ diff --git a/cli/bin/env.sh b/cli/bin/env.sh deleted file mode 100755 index a307be507550d..0000000000000 --- a/cli/bin/env.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -set -ex - -BASEDIR=`dirname $0`/.. - -if [ ! -d "$BASEDIR/env" ]; then - virtualenv -q $BASEDIR/env --prompt='(dcos-spark) ' - echo "Virtualenv created." -fi - -if [ -f "$BASEDIR/env/bin/activate" ]; then - source $BASEDIR/env/bin/activate -else - $BASEDIR/env/Scripts/activate -fi - -echo "Virtualenv activated." - -if [ ! -f "$BASEDIR/env/updated" -o $BASEDIR/setup.py -nt $BASEDIR/env/updated ]; then - pip install -e $BASEDIR - touch $BASEDIR/env/updated - echo "Requirements installed." -fi - -pip install -r $BASEDIR/requirements.txt diff --git a/cli/bin/packages.sh b/cli/bin/packages.sh deleted file mode 100755 index 77f575311bbfa..0000000000000 --- a/cli/bin/packages.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash -e - -BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -BASEDIR="${BIN_DIR}/.." - -if [ -n "$CLI_VERSION" ]; then - echo "Using CLI Version: $CLI_VERSION (default $(cat $BASEDIR/dcos_spark/version.py)" - echo "version = '${CLI_VERSION}'" > $BASEDIR/dcos_spark/version.py -fi - -echo "Building wheel..." -"$BASEDIR/env/bin/python" setup.py bdist_wheel - -echo "Building egg..." -"$BASEDIR/env/bin/python" setup.py sdist diff --git a/cli/bin/test.sh b/cli/bin/test.sh deleted file mode 100755 index ee171b12e6d63..0000000000000 --- a/cli/bin/test.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -e - -BASEDIR=`dirname $0`/.. - -cd $BASEDIR - -# no idea why this is necessary -# PATH=$(pwd)/dist:$PATH -$BASEDIR/env/bin/tox diff --git a/cli/binary/Dockerfile.linux-binary b/cli/binary/Dockerfile.linux-binary deleted file mode 100644 index d714390ae039c..0000000000000 --- a/cli/binary/Dockerfile.linux-binary +++ /dev/null @@ -1,21 +0,0 @@ -# This image runs the dcos-cli test suite. - -FROM ubuntu:15.04 -MAINTAINER support@mesosphere.com - -RUN apt-get update && apt-get install -y \ - make \ - openssh-client \ - git \ - sudo \ -&& sudo apt-get update --fix-missing \ -&& sudo apt-get install -y python-dev build-essential \ -&& sudo apt-get install -y python-pip python-virtualenv \ -&& pip install pip --upgrade \ -&& pip install pyinstaller - -ADD . /dcos-spark -WORKDIR /dcos-spark -RUN make clean env packages - -RUN make binary diff --git a/cli/binary/binary.spec b/cli/binary/binary.spec deleted file mode 100644 index ca1ebe9d16538..0000000000000 --- a/cli/binary/binary.spec +++ /dev/null @@ -1,30 +0,0 @@ -# -*- mode: python -*- - -block_cipher = None - -a = Analysis(['../dcos_spark/cli.py'], - pathex=[os.getcwd(), 'env/lib/python2.7/site-packages'], - binaries=None, - datas=[('../dcos_spark/data/config-schema/*', 'dcos_spark/data/config-schema')], - hiddenimports=[], - hookspath=[], - runtime_hooks=[], - excludes=[], - win_no_prefer_redirects=False, - win_private_assemblies=False, - cipher=block_cipher) - -pyz = PYZ(a.pure, - a.zipped_data, - cipher=block_cipher) - -exe = EXE(pyz, - a.scripts, - a.binaries, - a.zipfiles, - a.datas, - name='dcos-spark', - debug=False, - strip=False, - upx=True, - console=True ) diff --git a/cli/dcos-spark/.gitignore b/cli/dcos-spark/.gitignore new file mode 100644 index 0000000000000..c19c92652f899 --- /dev/null +++ b/cli/dcos-spark/.gitignore @@ -0,0 +1,3 @@ +dcos-spark +dcos-spark-* +dcos-spark.exe diff --git a/cli/dcos-spark/main.go b/cli/dcos-spark/main.go new file mode 100644 index 0000000000000..183507053e0ad --- /dev/null +++ b/cli/dcos-spark/main.go @@ -0,0 +1,180 @@ +package main + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/mesosphere/dcos-commons/cli" + "gopkg.in/alecthomas/kingpin.v2" + "log" + "strings" +) + +func main() { + app, err := cli.NewApp("0.1.0", "Mesosphere", "CLI for launching/accessing Spark jobs") + if err != nil { + log.Fatalf(err.Error()) + } + + cli.HandleCommonFlags(app, "spark", "Spark DC/OS CLI Module") + handleCommands(app) + + // Omit modname: + kingpin.MustParse(app.Parse(cli.GetArguments())) +} + +type SparkCommand struct { + submissionId string + + submitArgs string + submitDockerImage string + submitEnv map[string]string + + statusSkipMessage bool + + logFollow bool + logLines uint + logFile string +} + +func (cmd *SparkCommand) runSubmit(c *kingpin.ParseContext) error { + jsonPayload, err := submitJson(cmd.submitArgs, cmd.submitDockerImage, cmd.submitEnv) + if err != nil { + return err + } + httpResponse := cli.HTTPPostJSON( + fmt.Sprintf("/v1/submissions/create/%s", cmd.submissionId), jsonPayload) + + responseBytes := cli.GetResponseBytes(httpResponse) + responseJson := make(map[string]interface{}) + err = json.Unmarshal(responseBytes, &responseJson) + if err != nil { + return err + } + + err = checkSparkJSONResponse(responseJson) + if err != nil { + // Failure! Print the raw message: + cli.PrintJSONBytes(responseBytes, httpResponse.Request) + return err + } + + // Success! Print the submissionId value: + idObj, ok := responseJson["submissionId"] + if ok { + idString, ok := idObj.(string) + if ok { + // Match the older Python CLI's output: + fmt.Printf("Run job succeeded. Submission id: %s\n", idString) + } else { + log.Printf("Failed to convert 'submissionId' field value to string: %s", responseJson) + } + } else { + log.Printf("Failed to extract 'submissionId' field from JSON response: %s", responseJson) + } + return nil +} + +func (cmd *SparkCommand) runStatus(c *kingpin.ParseContext) error { + httpResponse := cli.HTTPGet(fmt.Sprintf("/v1/submissions/status/%s", cmd.submissionId)) + responseBytes := cli.GetResponseBytes(httpResponse) + cli.PrintJSONBytes(responseBytes, httpResponse.Request) + responseJson := make(map[string]interface{}) + err := json.Unmarshal(responseBytes, &responseJson) + if err != nil { + return err + } + + if !cmd.statusSkipMessage { + // Additionally, attempt to pretty-print the 'message' content, if any. + // This is populated when the task has finished. + messageObj, ok := responseJson["message"] + if ok { + messageString, ok := messageObj.(string) + if ok { + fmt.Printf("\nMessage:\n%s", messageString) + } else { + log.Printf("Failed to convert 'message' field value to string") + } + } + } + + return checkSparkJSONResponse(responseJson) +} + +func (cmd *SparkCommand) runLog(c *kingpin.ParseContext) error { + args := []string{"task", "log", "--completed"} + if cmd.logFollow { + args = append(args, "--follow") + } + if cmd.logLines != 0 { + args = append(args, fmt.Sprintf("--lines=%d", cmd.logLines)) + } + args = append(args, cmd.submissionId) + if len(cmd.logFile) != 0 { + args = append(args, cmd.logFile) + } + result, err := cli.RunCLICommand(args...) + // Always print output from CLI, which may contain user-facing errors (like 'log file not found'): + fmt.Printf("%s\n", strings.TrimRight(result, "\n")) + return err +} + +func (cmd *SparkCommand) runKill(c *kingpin.ParseContext) error { + httpResponse := cli.HTTPPost(fmt.Sprintf("/v1/submissions/kill/%s", cmd.submissionId)) + responseBytes := cli.GetResponseBytes(httpResponse) + cli.PrintJSONBytes(responseBytes, httpResponse.Request) + responseJson := make(map[string]interface{}) + err := json.Unmarshal(responseBytes, &responseJson) + if err != nil { + return err + } + return checkSparkJSONResponse(responseJson) +} + +func (cmd *SparkCommand) runWebui(c *kingpin.ParseContext) error { + // Hackish: Create the request we WOULD make, and print the resulting URL: + fmt.Printf("%s\n", cli.CreateHTTPRequest("GET", "/ui").URL.String()) + return nil +} + +func handleCommands(app *kingpin.Application) { + cmd := &SparkCommand{submitEnv: make(map[string]string)} + + run := app.Command("run", "Submits a new Spark job ala 'spark-submit'").Action(cmd.runSubmit) + + run.Flag("submit-args", fmt.Sprintf("Arguments matching what would be sent to 'spark-submit': %s", sparkSubmitHelp())).Required().PlaceHolder("ARGS").StringVar(&cmd.submitArgs) + run.Flag("docker-image", "Docker image to run the job within").StringVar(&cmd.submitDockerImage) + run.Flag("env", "Environment variable(s) to pass into the Spark job.").Short('E').PlaceHolder("ENVKEY=ENVVAL").StringMapVar(&cmd.submitEnv) + + status := app.Command("status", "Retrieves the status of a submitted Spark job").Action(cmd.runStatus) + status.Flag("skip-message", "Omit the additional printout of the 'message' field").BoolVar(&cmd.statusSkipMessage) + status.Arg("submission-id", "The ID of the Spark job").Required().StringVar(&cmd.submissionId) + + log := app.Command("log", "Retrieves a log file from a submitted Spark job").Action(cmd.runLog) + log.Flag("follow", "Dynamically update the log").BoolVar(&cmd.logFollow) + log.Flag("lines_count", "Print the last N lines.").Default("10").UintVar(&cmd.logLines) //TODO "lines"? + log.Flag("file", "Specify the sandbox file to print.").Default("stdout").StringVar(&cmd.logFile) + log.Arg("submission-id", "The ID of the Spark job").Required().StringVar(&cmd.submissionId) + + kill := app.Command("kill", "Aborts a submitted Spark job").Action(cmd.runKill) + kill.Arg("submission-id", "The ID of the Spark job").Required().StringVar(&cmd.submissionId) + + app.Command("webui", "Returns the Spark Web UI URL").Action(cmd.runWebui) +} + +func checkSparkJSONResponse(responseJson map[string]interface{}) error { + // Parse out the 'success' value returned by Spark, to ensure that we return an error code when success=false: + successObj, ok := responseJson["success"] + if !ok { + return errors.New("Missing 'success' field in response JSON") + } + successBool, ok := successObj.(bool) + if !ok { + return errors.New(fmt.Sprintf("Unable to convert 'success' field in response JSON to boolean: %s", successObj)) + } + if !successBool { + return errors.New("Spark returned success=false") + } + return nil +} diff --git a/cli/dcos-spark/submit_builder.go b/cli/dcos-spark/submit_builder.go new file mode 100644 index 0000000000000..203a12fc566bc --- /dev/null +++ b/cli/dcos-spark/submit_builder.go @@ -0,0 +1,360 @@ +package main + +import ( + "bufio" + "bytes" + "encoding/json" + "errors" + "fmt" + "github.com/mesosphere/dcos-commons/cli" + "gopkg.in/alecthomas/kingpin.v2" + "log" + "net/url" + "os" + "regexp" + "strings" +) + +var keyWhitespaceValPattern = regexp.MustCompile("(.+)\\s+(.+)") +var backslashNewlinePattern = regexp.MustCompile("\\s*\\\\s*\\n\\s+") + +type sparkVal struct { + flagName string + propName string + desc string + s string // used by string vals + b bool // used by bool vals +} +func (f *sparkVal) flag(section *kingpin.Application) *kingpin.FlagClause { + return section.Flag(f.flagName, fmt.Sprintf("%s (%s)", f.desc, f.propName)) +} +func newSparkVal(flagName, propName, desc string) *sparkVal { + return &sparkVal{flagName, propName, desc, "", false} +} + +type sparkArgs struct { + mainClass string + + propertiesFile string + properties map[string]string + + boolVals []*sparkVal + stringVals []*sparkVal + + appJar *url.URL + appArgs []string +} + +/* +Relevant files: +- http://arturmkrtchyan.com/apache-spark-hidden-rest-api +- https://github.com/apache/spark/blob/master/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +- https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +- https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala + +To get POST requests from spark-submit: +- Open spark-2.0.0/conf/log4j.properties.template => set "DEBUG" => write as "log4j.properties" +- $ SPARK_JAVA_OPTS="-Dlog4j.debug=true -Dlog4j.configuration=log4j.properties ..." ./spark-2.0.0/bin/spark-submit ... + +Unsupported flags, omitted here: + +managed by us, user cannot change: +- deployMode: --deploy-mode (spark.submit.deployMode/DEPLOY_MODE) +- master: --master mesos://host:port (spark.master/MASTER) +officially unsupported in DC/OS Spark docs: +- pyFiles: --py-files many.zip,python.egg,files.py (spark.submit.pyFiles) +client mode only? doesn't seem to be used in POST call at all: +- proxyUser: --proxy-user SOMENAME +client mode only (downloads jars to local system): +- ivyRepoPath: (spark.jars.ivy) +- packages: --packages maven,coordinates,for,jars (spark.jars.packages) +- packagesExclusions: --exclude-packages groupId:artifactId,toExclude:fromClasspath (spark.jars.excludes) +- repositories: --repositories additional.remote,repositories.to.search +yarn only: +- archives: --archives +- executorCores: --executor-cores NUM (spark.executor.cores/SPARK_EXECUTOR_CORES) +- keytab: --keytab (spark.yarn.keytab) +- numExecutors: --num-executors (spark.executor.instances) +- principal: --principal (spark.yarn.principal) +- queue: --queue (spark.yarn.queue) +*/ +func sparkSubmitArgSetup() (*kingpin.Application, *sparkArgs) { + submit := kingpin.New("","") + submit.HelpFlag.Short('h').Hidden() + submit.UsageTemplate(` [args] +{{if .Context.Flags}} +Flags: +{{.Context.Flags|FlagsToTwoColumns|FormatTwoColumns}} +{{end}} +{{if .Context.Args}} +Args: +{{.Context.Args|ArgsToTwoColumns|FormatTwoColumns}} +{{end}} +`) + + args := &sparkArgs{"", "", make(map[string]string), make([]*sparkVal, 0), make([]*sparkVal, 0), new(url.URL), make([]string, 0)} + + submit.Flag("class", "Your application's main class (for Java / Scala apps). (REQUIRED)").Required().StringVar(&args.mainClass) // note: spark-submit can autodetect, but only for file://local.jar + submit.Flag("properties-file", "Path to file containing whitespace-separated Spark property defaults.").PlaceHolder("PATH").ExistingFileVar(&args.propertiesFile) + submit.Flag("conf", "Custom Spark configuration properties.").Short('D').PlaceHolder("PROP=VALUE").StringMapVar(&args.properties) + + val := newSparkVal("supervise", "spark.driver.supervise", "If given, restarts the driver on failure.") + val.flag(submit).BoolVar(&val.b) + args.boolVals = append(args.boolVals, val) + + val = newSparkVal("name", "spark.app.name", "A name for your application") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("driver-cores", "spark.driver.cores", "Cores for driver.") + val.flag(submit).Default("1").StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("driver-memory", "spark.driver.memory", "Memory for driver (e.g. 1000M, 2G).") + val.flag(submit).Default("1G").Envar("SPARK_DRIVER_MEMORY").StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("driver-class-path", "spark.driver.extraClassPath", "Extra class path entries to pass to the driver. Note that jars added with --jars are automatically included in the classpath.") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("driver-java-options", "spark.driver.extraJavaOptions", "Extra Java options to pass to the driver.") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("driver-library-path", "spark.driver.extraLibraryPath", "Extra library path entries to pass to the driver.") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("executor-memory", "spark.executor.memory", "Memory per executor (e.g. 1000M, 2G)") + val.flag(submit).Default("1G").Envar("SPARK_EXECUTOR_MEMORY").StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("total-executor-cores", "spark.cores.max", "Total cores for all executors.") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("files", "spark.files", "Comma-separated list of file URLs to be placed in the working directory of each executor.") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + val = newSparkVal("jars", "spark.jars", "Comma-separated list of jar URLs to include on the driver and executor classpaths.") + val.flag(submit).StringVar(&val.s) + args.stringVals = append(args.stringVals, val) + + submit.Arg("jar", "Application jar to be run").Required().URLVar(&args.appJar) + submit.Arg("args", "Application arguments").StringsVar(&args.appArgs) + + return submit, args +} + +func sparkSubmitHelp() string { + app, _ := sparkSubmitArgSetup() + var buf bytes.Buffer + writer := bufio.NewWriter(&buf) + app.UsageWriter(writer) + app.Usage(make([]string, 0)) + writer.Flush() + return buf.String() +} + +func cleanUpSubmitArgs(argsStr string, boolVals []*sparkVal) []string { + // clean up any instances of shell-style escaped newlines: "arg1\\narg2" => "arg1 arg2" + argsCleaned := strings.TrimSpace(backslashNewlinePattern.ReplaceAllLiteralString(argsStr, " ")) + // HACK: spark-submit uses '--arg val' by convention, while kingpin only supports '--arg=val'. + // translate the former into the latter for kingpin to parse. + args := strings.Split(argsCleaned, " ") + argsEquals := make([]string, 0) + i := 0 +ARGLOOP: + for i < len(args) { + arg := args[i] + if !strings.HasPrefix(arg, "-") { + // looks like we've exited the flags entirely, and are now at the jar and/or args. + // any arguments without a dash at the front should've been joined to preceding keys. + // flush the rest and exit. + for i < len(args) { + arg = args[i] + argsEquals = append(argsEquals, arg) + i += 1 + } + break + } + // join this arg to the next arg if...: + // 1. we're not at the last arg in the array + // 2. we start with "--" + // 3. we don't already contain "=" (already joined) + // 4. we aren't a boolean value (no val to join) + if i < len(args) - 1 && strings.HasPrefix(arg, "--") && !strings.Contains(arg, "=") { + // check for boolean: + for _, boolVal := range(boolVals) { + if boolVal.flagName == arg[2:] { + argsEquals = append(argsEquals, arg) + i += 1 + continue ARGLOOP + } + } + // merge this --key against the following val to get --key=val + argsEquals = append(argsEquals, arg + "=" + args[i+1]) + i += 2 + } else { + // already joined or at the end, pass through: + argsEquals = append(argsEquals, arg) + i += 1 + } + } + if cli.Verbose { + fmt.Printf("Translated arguments: '%s'\n", argsEquals) + } + return argsEquals +} + +func getValsFromPropertiesFile(path string) map[string]string { + vals := make(map[string]string) + if len(path) == 0 { + return vals + } + + file, err := os.Open(path) + if err != nil { + log.Fatal(err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := strings.TrimSpace(scanner.Text()) + if len(line) == 0 { + continue + } + if line[0] == '#' { + continue + } + result := keyWhitespaceValPattern.FindStringSubmatch(line) + if len(result) == 0 { + continue + } + vals[result[1]] = result[2] + } + return vals +} + +func getStringFromTree(m map[string]interface{}, path []string) (string, error) { + if len(path) == 0 { + return "", errors.New(fmt.Sprintf("empty path, nothing to navigate in: %s", m)) + } + obj, ok := m[path[0]] + if !ok { + return "", errors.New(fmt.Sprintf("unable to find key '%s' in map: %s", path[0], m)) + } + if len(path) == 1 { + ret, ok := obj.(string) + if !ok { + return "", errors.New(fmt.Sprintf("unable to cast map value '%s' (for key '%s') as string: %s", obj, path[0], m)) + } + return ret, nil + } else { + next, ok := obj.(map[string]interface{}) + if !ok { + return "", errors.New(fmt.Sprintf("unable to cast map value '%s' (for key '%s') as string=>object map: %s", obj, path[0], m)) + } + return getStringFromTree(next, path[1:]) + } +} + +func submitJson(argsStr string, dockerImage string, submitEnv map[string]string) (string, error) { + // first, import any values in the provided properties file (space separated "key val") + // then map applicable envvars + // then parse all -Dprop.key=propVal, and all --conf prop.key=propVal + // then map flags + + submit, args := sparkSubmitArgSetup() + + kingpin.MustParse(submit.Parse(cleanUpSubmitArgs(argsStr, args.boolVals))) + + for _, boolVal := range(args.boolVals) { + if boolVal.b { + args.properties[boolVal.propName] = "true" + } + } + for _, stringVal := range(args.stringVals) { + if len(stringVal.s) != 0 { + args.properties[stringVal.propName] = stringVal.s + } + } + + for k, v := range(getValsFromPropertiesFile(args.propertiesFile)) { + // populate value if not already present (due to envvars or args): + _, ok := args.properties[k] + if !ok { + args.properties[k] = v + } + } + + // insert/overwrite some default properties: + args.properties["spark.submit.deployMode"] = "cluster" + if (strings.EqualFold(cli.OptionalCLIConfigValue("core.ssl_verify"), "false")) { + args.properties["spark.ssl.noCertVerification"] = "true" + } + sparkMasterURL := cli.CreateURL("", "") + if sparkMasterURL.Scheme == "http" { + sparkMasterURL.Scheme = "mesos" + } else if sparkMasterURL.Scheme == "https" { + sparkMasterURL.Scheme = "mesos-ssl" + } else { + log.Fatalf("Unsupported protocol '%s': %s", sparkMasterURL.Scheme, sparkMasterURL.String()) + } + args.properties["spark.master"] = sparkMasterURL.String() + + // copy appResource to front of 'spark.jars' data: + jars, ok := args.properties["spark.jars"] + if !ok || len(jars) == 0 { + args.properties["spark.jars"] = args.appJar.String() + } else { + args.properties["spark.jars"] = fmt.Sprintf("%s,%s", args.appJar.String(), jars) + } + + // if spark.app.name is missing, set it to mainClass + _, ok = args.properties["spark.app.name"] + if !ok { + args.properties["spark.app.name"] = args.mainClass + } + + // fetch the spark task definition from Marathon: + url := cli.CreateURL("replaceme", "") + url.Path = fmt.Sprintf("/marathon/v2/apps/%s", cli.ServiceName) + responseBytes := cli.GetResponseBytes(cli.CheckHTTPResponse(cli.HTTPQuery(cli.CreateHTTPURLRequest("GET", url, "", "")))) + + responseJson := make(map[string]interface{}) + err := json.Unmarshal(responseBytes, &responseJson) + if err != nil { + return "", err + } + + docker_image, err := getStringFromTree(responseJson, []string{"app", "container", "docker", "image"}) + if err != nil { + return "", err + } + args.properties["spark.mesos.executor.docker.image"] = docker_image + hdfs_config_url, err := getStringFromTree(responseJson, []string{"app", "labels", "SPARK_HDFS_CONFIG_URL"}) + if err == nil && len(hdfs_config_url) != 0 { // fail silently: it's normal for this to be unset + hdfs_config_url = strings.TrimRight(hdfs_config_url, "/") + args.properties["spark.mesos.uris"] = fmt.Sprintf( + "%s/hdfs-site.xml,%s/core-site.xml", hdfs_config_url, hdfs_config_url) + } + + jsonMap := map[string]interface{} { + "action": "CreateSubmissionRequest", + "appArgs": args.appArgs, + "appResource": args.appJar.String(), + "clientSparkVersion": "2.0.0", + "environmentVariables": submitEnv, + "mainClass": args.mainClass, + "sparkProperties": args.properties, + } + jsonPayload, err := json.Marshal(jsonMap) + if err != nil { + return "", err + } + return string(jsonPayload), nil +} diff --git a/cli/dcos_spark/__init__.py b/cli/dcos_spark/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/cli/dcos_spark/cli.py b/cli/dcos_spark/cli.py deleted file mode 100644 index 44aa53c565730..0000000000000 --- a/cli/dcos_spark/cli.py +++ /dev/null @@ -1,121 +0,0 @@ -"""Run and manage Spark jobs - -Usage: - dcos spark --help - dcos spark --info - dcos spark --version - dcos spark --config-schema - dcos spark run --help - dcos spark run --submit-args= - [--docker-image=] - [--verbose] - dcos spark status [--verbose] - dcos spark log - [--follow] - [--lines_count=] - [--file=] - dcos spark kill [--verbose] - dcos spark webui - -Options: - --help Show this screen - --info Show info - --version Show version -""" -from __future__ import print_function - -import docopt -import pkg_resources -from dcos import mesos -from dcos_spark import discovery, log, spark_submit, version - - -def dispatcher(): - return discovery.get_spark_dispatcher() - - -def run_spark_job(args): - docker_image = args.get('--docker-image') or \ - spark_submit.spark_docker_image() - return spark_submit.submit_job( - dispatcher(), - args['--submit-args'], - docker_image, - args['--verbose']) - - -def show_spark_submit_help(): - return spark_submit.show_help() - - -def job_status(args): - return spark_submit.job_status( - dispatcher(), - args[''], - args['--verbose']) - - -def kill_job(args): - return spark_submit.kill_job( - dispatcher(), - args[''], - args['--verbose']) - - -def log_job(args): - dcos_client = mesos.DCOSClient() - task = mesos.get_master(dcos_client).task(args['']) - log_file = args.get('--file', "stdout") - if log_file is None: - log_file = "stdout" - mesos_file = mesos.MesosFile(log_file, task=task, dcos_client=dcos_client) - lines_count = args.get('--lines_count', "10") - if lines_count is None: - lines_count = "10" - return log.log_files([mesos_file], args['--follow'], int(lines_count)) - - -def print_webui(args): - print(discovery.get_spark_webui()) - return 0 - - -def print_schema(): - schema = pkg_resources.resource_string( - 'dcos_spark', - 'data/config-schema/spark.json').decode('utf-8') - print(schema) - - -def main(): - args = docopt.docopt( - __doc__, - version='dcos-spark version {}'.format(version.version), help=False) - - if args['--info']: - print(__doc__.split('\n')[0]) - elif args['--config-schema']: - print_schema() - elif args['run'] and args['--help']: - return show_spark_submit_help() - elif args['run']: - return run_spark_job(args) - elif args['status']: - return job_status(args) - elif args['kill']: - return kill_job(args) - elif args['webui']: - return print_webui(args) - elif args['log']: - return log_job(args) - elif args['--help']: - print(__doc__) - return 0 - else: - print(__doc__) - return 1 - - return 0 - -if __name__ == "__main__": - main() diff --git a/cli/dcos_spark/constants.py b/cli/dcos_spark/constants.py deleted file mode 100644 index 1efd49a1308b3..0000000000000 --- a/cli/dcos_spark/constants.py +++ /dev/null @@ -1 +0,0 @@ -PATH_ENV = 'PATH' diff --git a/cli/dcos_spark/data/config-schema/spark.json b/cli/dcos_spark/data/config-schema/spark.json deleted file mode 100644 index 48fc53be380c5..0000000000000 --- a/cli/dcos_spark/data/config-schema/spark.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "$schema": "http://json-schema.org/schema#", - "type": "object", - "properties": { - "app_id": { - "type": "string", - "title": "Spark App ID", - "description": "App ID that the Spark CLI is targetting to.", - "default": "spark" - }, - "distribution_directory": { - "type": "string", - "title": "Spark distribution directory", - "description": "Local directory used to download and run Spark distributions", - } - }, - "additionalProperties": false -} diff --git a/cli/dcos_spark/discovery.py b/cli/dcos_spark/discovery.py deleted file mode 100644 index d27c9342c6bc6..0000000000000 --- a/cli/dcos_spark/discovery.py +++ /dev/null @@ -1,21 +0,0 @@ -from __future__ import print_function - -import os - -from dcos import config -from dcos_spark import service -from six.moves import urllib - - -def get_spark_webui(): - base_url = config.get_config().get('core.dcos_url') - return base_url + '/service/' + service.app_id() + '/ui' - - -def get_spark_dispatcher(): - dcos_spark_url = os.getenv("DCOS_SPARK_URL") - if dcos_spark_url is not None: - return dcos_spark_url - - base_url = config.get_config().get('core.dcos_url') - return urllib.parse.urljoin(base_url, '/service/' + service.app_id() + '/') diff --git a/cli/dcos_spark/log.py b/cli/dcos_spark/log.py deleted file mode 100644 index 9f818dc53ff73..0000000000000 --- a/cli/dcos_spark/log.py +++ /dev/null @@ -1,199 +0,0 @@ -import functools -import sys -import time - -from dcos import emitting, util -from dcos.errors import DCOSException - -logger = util.get_logger(__name__) -emitter = emitting.FlatEmitter() - - -def _no_file_exception(): - return DCOSException('No files exist. Exiting.') - - -def log_files(mesos_files, follow, lines): - """Print the contents of the given `mesos_files`. Behaves like unix - tail. - - :param mesos_files: file objects to print - :type mesos_files: [MesosFile] - :param follow: same as unix tail's -f - :type follow: bool - :param lines: number of lines to print - :type lines: int - :rtype: None - """ - - fn = functools.partial(_read_last_lines, lines) - curr_header, mesos_files = _stream_files(None, fn, mesos_files) - if not mesos_files: - raise _no_file_exception() - - while follow: - # This flush is needed only for testing, since stdout is fully - # buffered (as opposed to line-buffered) when redirected to a - # pipe. So if we don't flush, our --follow tests, which use a - # pipe, never see the data - sys.stdout.flush() - - curr_header, mesos_files = _stream_files(curr_header, - _read_rest, - mesos_files) - if not mesos_files: - raise _no_file_exception() - time.sleep(1) - - -def _stream_files(curr_header, fn, mesos_files): - """Apply `fn` in parallel to each file in `mesos_files`. `fn` must - return a list of strings, and these strings are then printed - serially as separate lines. - - `curr_header` is the most recently printed header. It's used to - group lines. Each line has an associated header (e.g. a string - representation of the MesosFile it was read from), and we only - print the header before printing a line with a different header - than the previous line. This effectively groups lines together - when the have the same header. - - :param curr_header: Most recently printed header - :type curr_header: str - :param fn: function that reads a sequence of lines from a MesosFile - :type fn: MesosFile -> [str] - :param mesos_files: files to read - :type mesos_files: [MesosFile] - :returns: Returns the most recently printed header, and a list of - files that are still reachable. Once we detect a file is - unreachable, we stop trying to read from it. - :rtype: (str, [MesosFile]) - """ - - reachable_files = list(mesos_files) - - # TODO switch to map - for job, mesos_file in util.stream(fn, mesos_files): - try: - lines = job.result() - except DCOSException as e: - # The read function might throw an exception if read.json - # is unavailable, or if the file doesn't exist in the - # sandbox. In any case, we silently remove the file and - # continue. - logger.exception("Error reading file: {}".format(e)) - - reachable_files.remove(mesos_file) - continue - - if lines: - curr_header = _output(curr_header, - len(reachable_files) > 1, - str(mesos_file), - lines) - - return curr_header, reachable_files - - -def _output(curr_header, output_header, header, lines): - """Prints a sequence of lines. If `header` is different than - `curr_header`, first print the header. - - :param curr_header: most recently printed header - :type curr_header: str - :param output_header: whether or not to output the header - :type output_header: bool - :param header: header for `lines` - :type header: str - :param lines: lines to print - :type lines: [str] - :returns: `header` - :rtype: str - """ - - if lines: - if output_header and header != curr_header: - emitter.publish('===> {} <==='.format(header)) - for line in lines: - emitter.publish(line) - return header - - -# A liberal estimate of a line size. Used to estimate how much data -# we need to fetch from a file when we want to read N lines. -LINE_SIZE = 200 - - -def _read_last_lines(num_lines, mesos_file): - """Returns the last `num_lines` of a file, or less if the file is - smaller. Seeks to EOF. - - :param num_lines: number of lines to read - :type num_lines: int - :param mesos_file: file to read - :type mesos_file: MesosFile - :returns: lines read - :rtype: [str] - """ - - file_size = mesos_file.size() - - # estimate how much data we need to fetch to read `num_lines`. - fetch_size = LINE_SIZE * num_lines - - end = file_size - start = max(end - fetch_size, 0) - data = '' - while True: - # fetch data - mesos_file.seek(start) - data = mesos_file.read(end - start) + data - - # break if we have enough lines - data_tmp = _strip_trailing_newline(data) - lines = data_tmp.split('\n') - if len(lines) > num_lines: - ret = lines[-num_lines:] - break - elif start == 0: - ret = lines - break - - # otherwise shift our read window and repeat - end = start - start = max(end - fetch_size, 0) - - mesos_file.seek(file_size) - return ret - - -def _read_rest(mesos_file): - """ Reads the rest of the file, and returns the lines. - - :param mesos_file: file to read - :type mesos_file: MesosFile - :returns: lines read - :rtype: [str] - """ - data = mesos_file.read() - if data == '': - return [] - else: - data_tmp = _strip_trailing_newline(data) - return data_tmp.split('\n') - - -def _strip_trailing_newline(s): - """Returns a modified version of the string with the last character - truncated if it's a newline. - - :param s: string to trim - :type s: str - :returns: modified string - :rtype: str - """ - - if s == "": - return s - else: - return s[:-1] if s[-1] == '\n' else s diff --git a/cli/dcos_spark/service.py b/cli/dcos_spark/service.py deleted file mode 100644 index a3a9fc21417cf..0000000000000 --- a/cli/dcos_spark/service.py +++ /dev/null @@ -1,12 +0,0 @@ -from dcos import config - - -def app_id(): - try: - return config.get_config()["spark.app_id"] - except KeyError: - return "spark" - - -def set_app_id(app_id): - config.set_val("spark.app_id", app_id) diff --git a/cli/dcos_spark/spark_submit.py b/cli/dcos_spark/spark_submit.py deleted file mode 100644 index 159eed4a2016a..0000000000000 --- a/cli/dcos_spark/spark_submit.py +++ /dev/null @@ -1,472 +0,0 @@ -from __future__ import print_function - -import json -import logging -import os -import os.path -import posixpath -import re -import shutil -import ssl -import subprocess -import sys -import tarfile -import threading - -import requests -import six -from dcos import config, http, marathon, util -from dcos_spark import constants, service - -from six.moves import urllib -from six.moves.BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer -from six.moves.http_client import HTTPMessage - -# singleton storing the spark marathon app -app = None - - -logging.basicConfig() -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) - - -def spark_app(): - global app - if app: - return app - - client = marathon.create_client() - apps = client.get_apps() - for marathon_app in apps: - if marathon_app.get('labels', {}).get('DCOS_PACKAGE_FRAMEWORK_NAME') \ - == service.app_id(): - app = marathon_app - return app - - if not app: - sys.stderr.write('No spark app found in marathon. Quitting...\n') - sys.exit(1) - - -def partition(args, pred): - ain = [] - aout = [] - for x in args: - if pred(x): - ain.append(x) - else: - aout.append(x) - return (ain, aout) - - -def spark_docker_image(): - return spark_app()['container']['docker']['image'] - - -def _spark_dist_dir(): - dist_dir = config.get_config().get('spark.distribution_directory', - '~/.dcos/spark/dist') - return os.path.expanduser(dist_dir) - - -def spark_dist(): - """Returns the directory location of the local spark distribution. - Fetches it if it doesn't exist.""" - - app = spark_app() - spark_uri = app['labels']['SPARK_URI'] - - # .tgz - basename = posixpath.basename(spark_uri) - - # - root = posixpath.splitext(basename)[0] - - # data/ - data_dir = _spark_dist_dir() - if not os.path.exists(data_dir): - os.makedirs(data_dir) - - # data/ - spark_archive = os.path.join(data_dir, basename) - - # data/ - spark_dir = os.path.join(data_dir, root) - - # data/tmp - data_tmp_dir = os.path.join(data_dir, 'tmp') - - # only download spark if data/ doesn't yet exist - if not os.path.exists(spark_dir): - # download archive - print('Spark distribution {} not found locally.'.format(root)) - print('It looks like this is your first time running Spark!') - print('Downloading {}...'.format(spark_uri)) - - resp = http.request('GET', spark_uri, stream=True) - resp.raise_for_status() - - # write to data/ - with open(spark_archive, 'wb') as spark_archive_file: - for block in resp: - spark_archive_file.write(block) - - # extract to data/tmp/ - print('Extracting spark distribution {}...'.format(spark_archive)) - tf = tarfile.open(spark_archive) - tf.extractall(data_tmp_dir) - tf.close() - - # move from data/tmp/ to data/ - spark_tmp = os.path.join(data_tmp_dir, root) - shutil.copytree(spark_tmp, spark_dir) - - # clean up data/tmp/ and data/ - shutil.rmtree(spark_tmp) - os.remove(spark_archive) - print('Successfully fetched spark distribution {}!'.format(spark_uri)) - - return spark_dir - - -def spark_file(path): - return os.path.join(spark_dist(), path) - - -def show_help(): - submit_file = spark_file(os.path.join('bin', 'spark-submit')) - - command = [submit_file, "--help"] - - process = subprocess.Popen( - command, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - - stdout, stderr = process.communicate() - - for line in stderr.decode("utf-8").split("\n"): - if line.startswith("Usage:"): - continue - print(line) - - return 0 - - -def submit_job(dispatcher, args, docker_image, verbose=False): - (props, args) = partition(args.split(" "), lambda a: a.startswith("-D")) - - props = props + ["-Dspark.mesos.executor.docker.image=" + docker_image] - - hdfs_url = _get_spark_hdfs_url() - if hdfs_url is not None: - # urljoin only works as expected if the base URL ends with '/' - if hdfs_url[-1] != '/': - hdfs_url += '/' - hdfs_config_url = urllib.parse.urljoin(hdfs_url, 'hdfs-site.xml') - site_config_url = urllib.parse.urljoin(hdfs_url, 'core-site.xml') - props = props + ["-Dspark.mesos.uris={0},{1}".format(hdfs_config_url, - site_config_url)] - - response = run(dispatcher, args, verbose, props) - if response[0] is not None: - print("Run job succeeded. Submission id: " + - response[0]['submissionId']) - return response[1] - - -def job_status(dispatcher, submissionId, verbose=False): - response = run(dispatcher, ["--status", submissionId], verbose) - if response[0] is not None: - print("Submission ID: " + response[0]['submissionId']) - print("Driver state: " + response[0]['driverState']) - if 'message' in response[0]: - print("Last status: " + response[0]['message']) - elif response[1] == 0: - print("Job id '" + submissionId + "' is not found") - return response[1] - - -def kill_job(dispatcher, submissionId, verbose=False): - response = run(dispatcher, ["--kill", submissionId], verbose) - if response[0] is not None: - if bool(response[0]['success']): - success = "succeeded." - else: - success = "failed." - print("Kill job " + success) - print("Message: " + response[0]['message']) - return response[1] - - -def which(program): - """Returns the path to the named executable program. - - :param program: The program to locate: - :type program: str - :rtype: str - """ - - def is_exe(file_path): - return os.path.isfile(file_path) and os.access(file_path, os.X_OK) - - file_path, filename = os.path.split(program) - if file_path: - if is_exe(program): - return program - elif constants.PATH_ENV in os.environ: - for path in os.environ[constants.PATH_ENV].split(os.pathsep): - path = path.strip('"') - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - - return None - - -def check_java_version(java_path): - process = subprocess.Popen( - [java_path, "-version"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - - stdout, stderr = process.communicate() - - lines = stderr.decode('utf8').split(os.linesep) - if len(lines) == 0: - print("Unable to check java version, error: no output detected " - "from " + java_path + " -version") - return False - - match = re.search("1\.(\d+)", lines[0]) - if match and int(match.group(1)) < 7: - print("DCOS Spark requires Java 1.7.x or greater to be installed, " - "found " + lines[0]) - return False - - return True - - -def check_java(): - java_executable = 'java.exe' if util.is_windows_platform() else 'java' - # Check if JAVA is in the PATH - if which(java_executable) is not None: - return check_java_version(java_executable) - - # Check if JAVA_HOME is set and find java - java_home = os.environ.get('JAVA_HOME') - - if java_home is not None: - java_path = os.path.join(java_home, "bin", java_executable) - if os.path.isfile(java_path): - return check_java_version(java_path) - - print("DCOS Spark requires Java 1.7.x to be installed, please install JRE") - return False - - -def run(dispatcher, args, verbose, props=[]): - """ - This method runs spark_submit with the passed in parameters. - ie: ./bin/spark-submit --deploy-mode cluster --class - org.apache.spark.examples.SparkPi --master mesos://10.127.131.174:8077 - --executor-memory 1G --total-executor-cores 100 --driver-memory 1G - http://10.127.131.174:8000/spark-examples_2.10-1.3.0-SNAPSHOT.jar 30 - """ - if not check_java(): - return (None, 1) - - proxying = _should_proxy(dispatcher) - proxy_thread = ProxyThread(_get_token() if proxying else None, dispatcher) - if proxying: - proxy_thread.start() - dispatcher = 'http://localhost:{}'.format(proxy_thread.port()) - - command = _get_command(dispatcher, args) - - extra_env = {"SPARK_JAVA_OPTS": ' '.join(props)} - env = dict(os.environ, **extra_env) - # On Windows python 2 complains about unicode in env - if util.is_windows_platform() and sys.version_info[0] < 3: - env = dict([str(key), str(value)] for key, value in env.iteritems()) - process = subprocess.Popen( - command, - env=env, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE) - - stdout, stderr = process.communicate() - - if proxying: - proxy_thread.proxy.shutdown() - proxy_thread.join() - - if verbose is True: - print("Ran command: " + " ".join(command)) - print("With added env vars: {0}".format(extra_env)) - print("Stdout:") - print(stdout) - print("Stderr:") - print(stderr) - - err = stderr.decode("utf-8") - if process.returncode != 0: - if "502 Bad Gateway" in err: - print("Spark service is not found in your DCOS cluster.") - return (None, process.returncode) - - if "500 Internal Server Error" in err: - print("Error reaching Spark cluster endpoint. Please make sure " - "Spark service is in running state in Marathon.") - return (None, process.returncode) - - print("Spark submit failed:") - print(stderr) - return (None, process.returncode) - else: - if "{" in err: - lines = err.splitlines() - jsonStr = "" - startScan = False - for l in lines: - if l.startswith("}") and startScan: - jsonStr += l + os.linesep - startScan = False - elif startScan: - jsonStr += l + os.linesep - elif l.startswith("{"): - startScan = True - jsonStr += l + os.linesep - - response = json.loads(jsonStr) - return (response, process.returncode) - return (None, process.returncode) - - -def _get_spark_hdfs_url(): - return spark_app()['labels'].get('SPARK_HDFS_CONFIG_URL') - - -def _get_command(dispatcher, args): - spark_executable = 'spark-submit.cmd' if util.is_windows_platform() \ - else 'spark-submit' - submit_file = spark_file(os.path.join('bin', spark_executable)) - - if dispatcher.startswith("https://"): - dispatcher = "mesos-ssl://" + dispatcher[8:] - else: - dispatcher = "mesos://" + dispatcher[7:] - - if _cert_verification(): - ssl_ops = [] - else: - ssl_ops = ["--conf", "spark.ssl.noCertVerification=true"] - - return [submit_file, "--deploy-mode", "cluster", "--master", - dispatcher] + ssl_ops + args - - -def _cert_verification(): - try: - core_verify_ssl = config.get_config()['core.ssl_verify'] - return str(core_verify_ssl).lower() in ['true', 'yes', '1'] - except: - return True - - -def _should_proxy(dispatcher): - resp = requests.get(dispatcher, verify=_cert_verification()) - return resp.status_code == 401 - - -def _get_token(): - dcos_url = config.get_config().get('core.dcos_url') - hostname = urllib.parse.urlparse(dcos_url).hostname - return http._get_dcos_auth(None, None, None, hostname).token - - -class ProxyThread(threading.Thread): - def __init__(self, token, dispatcher): - self.proxy = HTTPServer(('localhost', 0), ProxyHandler) - self.proxy.dispatcher = dispatcher - self.proxy._dcos_auth_token = token - super(ProxyThread, self).__init__() - - def run(self): - self.proxy.serve_forever() - - def port(self): - return self.proxy.socket.getsockname()[1] - - -class ProxyHandler(BaseHTTPRequestHandler): - MessageClass = HTTPMessage - - def do_GET(self): - self._request('GET') - - def do_POST(self): - self._request('POST') - - def _request(self, method): - self.server._dcos_auth_token - - url = self.server.dispatcher - if url.endswith('/'): - url = url[:-1] - url = url + self.path - - if method == 'POST': - if 'content-length' in self.headers: - body = self.rfile.read( - int(self.headers['content-length'])) - else: - body = six.b('') - req = urllib.request.Request(url, body) - else: - body = six.b('') - req = urllib.request.Request(url) - - logger.debug('=== BEGIN REQUEST ===') - logger.debug(url) - logger.debug('\n') - - for key, value in self.headers.items(): - # key, value = line.strip().split(':', 1) - logger.debug('{0}:{1}'.format(key, value)) - req.add_header(key, value) - - req.add_header( - 'Authorization', - 'token={}'.format(self.server._dcos_auth_token)) - - logger.debug('\n') - logger.debug(body) - - ctx = ssl.create_default_context() - if not _cert_verification(): - ctx.check_hostname = False - ctx.verify_mode = ssl.CERT_NONE - - try: - resp = urllib.request.urlopen(req, context=ctx) - except urllib.error.HTTPError as e: - resp = e - - self.send_response(resp.getcode()) - - logger.debug('=== BEGIN RESPONSE ===') - logger.debug(resp.getcode()) - - for key, value in resp.info().items(): - # key, value = header.strip().split(':', 1) - self.send_header(key, value) - logger.debug('{0}:{1}'.format(key, value)) - self.end_headers() - - body = resp.read() - self.wfile.write(body) - - logger.debug('\n') - logger.debug(body) diff --git a/cli/dcos_spark/version.py b/cli/dcos_spark/version.py deleted file mode 100644 index 2910900f28c55..0000000000000 --- a/cli/dcos_spark/version.py +++ /dev/null @@ -1 +0,0 @@ -version = 'SNAPSHOT' diff --git a/cli/python/.gitignore b/cli/python/.gitignore new file mode 100644 index 0000000000000..309df6e031315 --- /dev/null +++ b/cli/python/.gitignore @@ -0,0 +1,6 @@ +*.egg-info/ +*.pyc +dist/ +build/ +binaries/ +MANIFEST.in diff --git a/cli/python/README.txt b/cli/python/README.txt new file mode 100644 index 0000000000000..d6bbf89fe57b7 --- /dev/null +++ b/cli/python/README.txt @@ -0,0 +1,15 @@ +Python utility which wraps a set of architecture-specific binaries. + +This effectively allows us to use Binary CLI modules without breaking compatibility for DC/OS 1.7 users. + +Assumptions: +- All binaries should be in a 'binaries' subdir under bin_wrapper/. +- Linux file should end in '-linux' +- MacOS file should end in '-darwin' +- Windows file should end in '.exe' + +Configuration: +See declarations at top of setup.py. + +Usage: +python bin_wrapper/__init__.py test1 test2 test3 diff --git a/cli/python/bin_wrapper/__init__.py b/cli/python/bin_wrapper/__init__.py new file mode 100644 index 0000000000000..50539b79f26b2 --- /dev/null +++ b/cli/python/bin_wrapper/__init__.py @@ -0,0 +1,50 @@ +import os +import os.path +import subprocess +import sys + +# platform-specific executables are expected to end with one of these suffixes: +EXE_SUFFIX_DARWIN = '-darwin' +EXE_SUFFIX_LINUX = '-linux' +EXE_SUFFIX_WINDOWS = '.exe' + +# all executables are expected to live under this relative path (relative to this file): +EXE_DIRECTORY = 'binaries' + +def main(): + # determine suffix based on runtime platform: + if sys.platform.startswith('darwin'): + find_suffix = EXE_SUFFIX_DARWIN + elif sys.platform.startswith('linux'): + find_suffix = EXE_SUFFIX_LINUX + elif sys.platform.startswith('win32'): + find_suffix = EXE_SUFFIX_WINDOWS + else: + print('Unsupported system platform (expected darwin/linux/win32): {}'.format(sys.platform)) + return -1 + + here = os.path.abspath(os.path.dirname(__file__)) + + # get full path to directory and validate presence: + binpath = os.path.join(here, EXE_DIRECTORY) + if not os.path.exists(binpath): + print('Path {} not found.'.format(binpath)) + return -1 + if not os.path.isdir(binpath): + print('Path {} is not a directory.'.format(binpath)) + return -1 + + # find file with matching suffix in directory: + for filename in os.listdir(binpath): + if not filename.endswith(find_suffix): + continue + filepath = os.path.join(binpath, filename) + args = [filepath] + sys.argv[1:] + return subprocess.call(args) + + print('No executable in {} ending with "{}" was found.'.format(binpath, find_suffix)) + return -1 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/cli/setup.cfg b/cli/python/setup.cfg similarity index 100% rename from cli/setup.cfg rename to cli/python/setup.cfg diff --git a/cli/python/setup.py b/cli/python/setup.py new file mode 100644 index 0000000000000..855e9ae9c2063 --- /dev/null +++ b/cli/python/setup.py @@ -0,0 +1,48 @@ +import os.path +import setuptools +import shutil +import sys + +# These are the only things you should need to edit: + +# 1. Name used for the frontend executable in [pyenv]/bin. MUST be prefixed by 'dcos-': +exe_name = 'dcos-spark' + +# 2. Paths to the executables, relative to this file: +relative_bin_paths = [ + '../{0}/{0}-darwin'.format(exe_name), + '../{0}/{0}-linux'.format(exe_name), + '../{0}/{0}.exe'.format(exe_name) +] + +def main(): + here = os.path.abspath(os.path.dirname(__file__)) + package_dir = 'bin_wrapper' + + # wipe/recreate 'binaries' directory + binaries_dir = 'binaries' + bindirpath = os.path.join(here, package_dir, binaries_dir) + if os.path.exists(bindirpath): + shutil.rmtree(bindirpath) + os.makedirs(bindirpath) + # copy everything from ../BINNAME to ./bin_wrapper/binaries/BINNAME + for bin_path in relative_bin_paths: + shutil.copy( + os.path.join(here, bin_path), + os.path.join(bindirpath, os.path.basename(bin_path))) + + # run setup with generated MANIFEST.in + setuptools.setup( + name=package_dir, + version='0.0.1', + url='http://mesosphere.com', + packages=setuptools.find_packages(exclude=['contrib', 'docs', 'tests']), + entry_points={ 'console_scripts': [ '{}={}:main'.format(exe_name, package_dir) ] }, + package_data={ package_dir: [ os.path.join(binaries_dir, os.path.basename(f)) for f in relative_bin_paths] }) + + # clean up binaries afterwards: + shutil.rmtree(bindirpath) + return 0 + +if __name__ == '__main__': + sys.exit(main()) diff --git a/cli/requirements.txt b/cli/requirements.txt deleted file mode 100644 index b142560c39b63..0000000000000 --- a/cli/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -sphinx==1.3.1 -tox==2.2.1 -wheel==0.24.0 -mock==1.3.0 diff --git a/cli/setup.py b/cli/setup.py deleted file mode 100644 index 9cfcf244e1f43..0000000000000 --- a/cli/setup.py +++ /dev/null @@ -1,112 +0,0 @@ -import os -from setuptools import setup, find_packages -from codecs import open - -from dcos_spark import version - - -here = os.path.abspath(os.path.dirname(__file__)) - -# Get the long description from the relevant file -with open(os.path.join(here, 'README.rst'), encoding='utf-8') as f: - long_description = f.read() - - -setup( - name='dcos-spark', - - # Versions should comply with PEP440. For a discussion on single-sourcing - # the version across setup.py and the project code, see - # https://packaging.python.org/en/latest/single_source_version.html - version=version.version, - - description='DCOS Spark Command Line Interface', - long_description=long_description, - - # The project's main homepage. - url='https://github.com/mesosphere/dcos-spark', - - # Author details - author='Mesosphere, Inc.', - author_email='support@mesosphere.io', - - - # See https://pypi.python.org/pypi?%3Aaction=list_classifiers - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - 'Intended Audience :: Information Technology', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: TODO: License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - ], - - # What does your project relate to? - keywords='dcos command spark mesosphere', - - # You can just specify the packages manually here if your project is - # simple. Or you can use find_packages(). - packages=find_packages(exclude=['contrib', 'docs', 'tests*']), - - # List run-time dependencies here. These will be installed by pip when your - # project is installed. For an analysis of "install_requires" vs pip's - # requirements files see: - # https://packaging.python.org/en/latest/requirements.html - install_requires=[ - 'dcos==0.4.6', - 'docopt', - 'toml', - 'requests', - 'six>=1.9, <2.0' - ], - - # List additional groups of dependencies here (e.g. development - # dependencies). You can install these using the following syntax, for - # example: - # $ pip install -e .[dev,test] - extras_require={ - 'dev': ['check-manifest'], - 'test': ['coverage'], - }, - - # If there are data files included in your packages that need to be - # installed, specify them here. If using Python 2.6 or less, then these - # have to be included in MANIFEST.in as well. - - # Although 'package_data' is the preferred approach, in some case you may - # need to place data files outside of your packages. - # In this case, 'data_file' will be installed into '/my_data' - # data_files=[('my_data', ['data/data_file'])], - data_files=[], - - # To provide executable scripts, use entry points in preference to the - # "scripts" keyword. Entry points provide cross-platform support and allow - # pip to create the appropriate form of executable for the target platform. - entry_points={ - 'console_scripts': [ - 'dcos-spark=dcos_spark.cli:main', - ], - }, - - package_data={ - 'dcos_spark': [ - 'data/config-schema/spark.json' - ] - } -) diff --git a/cli/tox.ini b/cli/tox.ini deleted file mode 100644 index 8eabd62d1b63a..0000000000000 --- a/cli/tox.ini +++ /dev/null @@ -1,25 +0,0 @@ -[tox] -envlist = py{27,3}-integration, syntax - -[testenv] -deps = - pytest - pytest-cov - mock - -[testenv:syntax] -deps = - flake8 - isort - -commands = - flake8 --verbose dcos_spark tests - isort --recursive --check-only --diff --verbose dcos_spark tests - -[testenv:py27-integration] -commands = - py.test -vv tests/integration - -[testenv:py3-integration] -commands = - py.test -vv tests/integration diff --git a/manifest.json b/manifest.json index 6a6226af69cc4..079edd3df43a7 100644 --- a/manifest.json +++ b/manifest.json @@ -1,4 +1,3 @@ { - "cli_version": "0.5.19", "spark_uri": "https://downloads.mesosphere.com/spark/assets/spark-2.0.0.tgz" } diff --git a/package/command.json b/package/command.json index e46176fa3e592..ac1775affbe82 100644 --- a/package/command.json +++ b/package/command.json @@ -1,5 +1,5 @@ { "pip": [ - "{{artifact-dir}}/dcos_spark-{{cli-version}}-py2.py3-none-any.whl" + "{{artifact-dir}}/bin_wrapper-0.0.1-py2.py3-none-any.whl" ] } diff --git a/package/resource.json b/package/resource.json index a8ffad97e94f0..2ce3497006b0f 100644 --- a/package/resource.json +++ b/package/resource.json @@ -10,5 +10,30 @@ "icon-medium": "https://downloads.mesosphere.io/spark/assets/icon-service-spark-medium.png", "icon-small": "https://downloads.mesosphere.io/spark/assets/icon-service-spark-small.png", "icon-large": "https://downloads.mesosphere.io/spark/assets/icon-service-spark-large.png" + }, + "cli":{ + "binaries":{ + "darwin":{ + "x86-64":{ + "contentHash":[ { "algo":"sha256", "value":"{{sha256:dcos-spark-darwin}}" } ], + "kind":"executable", + "url":"{{artifact-dir}}/dcos-spark-darwin" + } + }, + "linux":{ + "x86-64":{ + "contentHash":[ { "algo":"sha256", "value":"{{sha256:dcos-spark-linux}}" } ], + "kind":"executable", + "url":"{{artifact-dir}}/dcos-spark-linux" + } + }, + "windows":{ + "x86-64":{ + "contentHash":[ { "algo":"sha256", "value":"{{sha256:dcos-spark.exe}}" } ], + "kind":"executable", + "url":"{{artifact-dir}}/dcos-spark.exe" + } + } + } } } From f4de6c9526111f86e3565911bf15f7429f49027e Mon Sep 17 00:00:00 2001 From: Nick Date: Thu, 15 Sep 2016 16:51:14 -0700 Subject: [PATCH 2/4] Fix S3 paths: jenkins.sh's spark_test wants DEV_S3_* --- bin/build.sh | 2 +- bin/jenkins-package-test.sh | 23 +++++++++++++++++++++-- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/bin/build.sh b/bin/build.sh index 981097dfa5f58..dd5685cf8727a 100755 --- a/bin/build.sh +++ b/bin/build.sh @@ -84,7 +84,7 @@ upload_cli_and_stub_universe() { TEMPLATE_SPARK_DIST_URI=${SPARK_DIST_URI} \ TEMPLATE_DOCKER_IMAGE=${DOCKER_IMAGE} \ TEMPLATE_PACKAGE_VERSION=${VERSION} \ - ARTIFACT_DIR="https://downloads.mesosphere.com/spark/assets" \ + ARTIFACT_DIR="https://${S3_BUCKET}.s3.amazonaws.com/${S3_PREFIX}" \ S3_URL="s3://${S3_BUCKET}/${S3_PREFIX}" \ ${COMMONS_TOOLS_DIR}/ci_upload.py \ spark \ diff --git a/bin/jenkins-package-test.sh b/bin/jenkins-package-test.sh index 998472563b579..5797d5fce6337 100755 --- a/bin/jenkins-package-test.sh +++ b/bin/jenkins-package-test.sh @@ -1,8 +1,27 @@ #!/bin/bash +export VERSION=${ghprbActualCommit} +if [ -z "$VERSION" ]; then + export VERSION=${GIT_COMMIT} +fi + +export DOCKER_IMAGE=mesosphere/spark-dev:${VERSION} + export S3_BUCKET=infinity-artifacts -export S3_PREFIX=spark/ -export DOCKER_IMAGE=mesosphere/spark-dev:${GIT_COMMIT} +export S3_PREFIX=autodelete7d/spark/${VERSION} +# fill in any missing DEV_* AWS envvars required by test.sh: +if [ -z "$DEV_S3_BUCKET" ]; then + export DEV_S3_BUCKET=$S3_BUCKET +fi +if [ -z "$DEV_S3_PREFIX" ]; then + export DEV_S3_PREFIX=$S3_PREFIX +fi +if [ -z "$DEV_AWS_ACCESS_KEY_ID" ]; then + export DEV_AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID +fi +if [ -z "$DEV_AWS_SECRET_ACCESS_KEY" ]; then + export DEV_AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY +fi source spark-build/bin/jenkins.sh From 0a2842a604036423207f1b39c5fc79d88ecf2eb2 Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 16 Sep 2016 11:28:14 -0700 Subject: [PATCH 3/4] Make test.py more PEP-ey, return error if tests fail Don't bother with binary CLI for now. Seems to cause linker problems on alpine. --- bin/test.sh | 2 ++ tests/test.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/bin/test.sh b/bin/test.sh index 2b98a5689b08d..1f6b6a582b5f9 100755 --- a/bin/test.sh +++ b/bin/test.sh @@ -55,6 +55,7 @@ configure_cli() { notify_github pending "Configuring CLI" dcos config set core.dcos_url "${DCOS_URL}" + dcos config set core.ssl_verify false ${COMMONS_TOOLS_DIR}/dcos_login.py dcos config show dcos package repo add --index=0 spark-test "${STUB_UNIVERSE_URL}" @@ -106,6 +107,7 @@ run_tests() { python test.py if [ $? -ne 0 ]; then notify_github failure "Tests failed" + exit 1 fi popd } diff --git a/tests/test.py b/tests/test.py index 58fa758da4a65..69b1d6fc01a00 100644 --- a/tests/test.py +++ b/tests/test.py @@ -12,6 +12,7 @@ import subprocess import shakedown + def upload_jar(jar): conn = S3Connection(os.environ['AWS_ACCESS_KEY_ID'], os.environ['AWS_SECRET_ACCESS_KEY']) bucket = conn.get_bucket(os.environ['S3_BUCKET']) @@ -43,6 +44,7 @@ def submit_job(jar_url): match = re.search(regex, stdout) return match.group(1) + def task_log(task_id): cmd = "dcos task log --completed --lines=1000 {}".format(task_id) print('Running {}'.format(cmd)) @@ -59,4 +61,6 @@ def main(): print(log) assert "All tests passed" in log -main() + +if __name__ == '__main__': + main() From 954c3710ca06851dd0db1a48d6449f3e34d42629 Mon Sep 17 00:00:00 2001 From: Nick Date: Fri, 16 Sep 2016 14:00:29 -0700 Subject: [PATCH 4/4] Add quotes to avoid issues if BIN_DIR contains spaces --- cli/bin/build-go.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/bin/build-go.sh b/cli/bin/build-go.sh index ce31b0f721ef9..c181632d09c30 100755 --- a/cli/bin/build-go.sh +++ b/cli/bin/build-go.sh @@ -3,7 +3,7 @@ set +x BIN_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" -cd $BIN_DIR/.. +cd "$BIN_DIR"/.. if [ -z "$GOPATH" -o -z "$(which go)" ]; then echo "Missing GOPATH environment variable or 'go' executable. Please configure a Go build environment."