From 8916021e187054bb285548886158d68308ae77ae Mon Sep 17 00:00:00 2001 From: yanghua Date: Wed, 1 Mar 2023 23:09:35 +0800 Subject: [PATCH] [Addon #579] Refactor the spark-workload parameter definition Signed-off-by: yanghua --- .../spark-kubernetes-operator/sparkapp.yaml | 18 +-- .../spark-kubernetes-operator/README.md | 116 ++++++++++-------- .../definitions/spark-workload.cue | 44 +++---- 3 files changed, 93 insertions(+), 85 deletions(-) diff --git a/examples/spark-kubernetes-operator/sparkapp.yaml b/examples/spark-kubernetes-operator/sparkapp.yaml index 487c8e4b..34966ac4 100644 --- a/examples/spark-kubernetes-operator/sparkapp.yaml +++ b/examples/spark-kubernetes-operator/sparkapp.yaml @@ -22,11 +22,13 @@ spec: hostPath: path: "/tmp" type: Directory - driverCores: 1 - executorCores: 1 - driverVolumeMounts: - - name: "test-volume" - mountPath: "/tmp" - executorVolumeMounts: - - name: "test-volume" - mountPath: "/tmp" + driver: + cores: 1 + volumeMounts: + - name: "test-volume" + mountPath: "/tmp" + executor: + cores: 1 + volumeMounts: + - name: "test-volume" + mountPath: "/tmp" diff --git a/experimental/addons/spark-kubernetes-operator/README.md b/experimental/addons/spark-kubernetes-operator/README.md index 2299d023..eac7402f 100644 --- a/experimental/addons/spark-kubernetes-operator/README.md +++ b/experimental/addons/spark-kubernetes-operator/README.md @@ -34,39 +34,71 @@ vela ls -A | grep spark ``` vela show spark-workload # Specification -+----------------------+------------------------------------------------------------------------------------------------------+-------------------------------------------------+----------+---------+ -| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | -+----------------------+------------------------------------------------------------------------------------------------------+-------------------------------------------------+----------+---------+ -| name | Specify the spark application name. | string | true | | -| namespace | Specify the namespace for spark application to install. | string | true | | -| type | Specify the application language type, e.g. "Scala", "Python", "Java" or "R". | string | true | | -| pythonVersion | Specify the python version. | string | false | | -| mode | Specify the deploy mode, e.go "cluster", "client" or "in-cluster-client". | string | true | | -| image | Specify the container image for the driver, executor, and init-container. | string | true | | -| imagePullPolicy | Specify the image pull policy for the driver, executor, and init-container. | string | true | | -| mainClass | Specify the fully-qualified main class of the Spark application. | string | true | | -| mainApplicationFile | Specify the path to a bundled JAR, Python, or R file of the application. | string | true | | -| sparkVersion | Specify the version of Spark the application uses. | string | true | | -| driverCores | Specify the number of CPU cores to request for the driver pod. | int | true | | -| executorCores | Specify the number of CPU cores to request for the executor pod. | int | true | | -| arguments | Specify a list of arguments to be passed to the application. | []string | false | | -| sparkConf | Specify the config information carries user-specified Spark configuration properties as they would | map[string]string | false | | -| | use the "--conf" option in spark-submit. | | | | -| hadoopConf | Specify the config information carries user-specified Hadoop configuration properties as they would | map[string]string | false | | -| | use the the "--conf" option in spark-submit. The SparkApplication controller automatically adds | | | | -| | prefix "spark.hadoop." to Hadoop configuration properties. | | | | -| sparkConfigMap | Specify the name of the ConfigMap containing Spark configuration files such as log4j.properties. The | string | false | | -| | controller will add environment variable SPARK_CONF_DIR to the path where the ConfigMap is mounted | | | | -| | to. | | | | -| hadoopConfigMap | Specify the name of the ConfigMap containing Hadoop configuration files such as core-site.xml. The | string | false | | -| | controller will add environment variable HADOOP_CONF_DIR to the path where the ConfigMap is mounted | | | | -| | to. | | | | -| volumes | Specify the list of Kubernetes volumes that can be mounted by the driver and/or executors. | [[]volumes](#volumes) | false | | -| driverVolumeMounts | Specify the volumes listed in "parameter.volumes" to mount into the main container’s filesystem for | [[]driverVolumeMounts](#drivervolumemounts) | false | | -| | driver pod. | | | | -| executorVolumeMounts | Specify the volumes listed in "parameter.volumes" to mount into the main container’s filesystem for | [[]executorVolumeMounts](#executorvolumemounts) | false | | -| | executor pod. | | | | -+----------------------+------------------------------------------------------------------------------------------------------+-------------------------------------------------+----------+---------+ ++---------------------+------------------------------------------------------------------------------------------------------+-----------------------+----------+---------+ +| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | ++---------------------+------------------------------------------------------------------------------------------------------+-----------------------+----------+---------+ +| name | Specify the spark application name. | string | true | | +| namespace | Specify the namespace for spark application to install. | string | true | | +| type | Specify the application language type, e.g. "Scala", "Python", "Java" or "R". | string | true | | +| pythonVersion | Specify the python version. | string | false | | +| mode | Specify the deploy mode, e.go "cluster", "client" or "in-cluster-client". | string | true | | +| image | Specify the container image for the driver, executor, and init-container. | string | true | | +| imagePullPolicy | Specify the image pull policy for the driver, executor, and init-container. | string | true | | +| mainClass | Specify the fully-qualified main class of the Spark application. | string | true | | +| mainApplicationFile | Specify the path to a bundled JAR, Python, or R file of the application. | string | true | | +| sparkVersion | Specify the version of Spark the application uses. | string | true | | +| driver | Specify the driver sepc request for the driver pod. | [driver](#driver) | true | | +| executor | Specify the executor spec request for the executor pod. | [executor](#executor) | true | | +| arguments | Specify a list of arguments to be passed to the application. | []string | false | | +| sparkConf | Specify the config information carries user-specified Spark configuration properties as they would | map[string]string | false | | +| | use the "--conf" option in spark-submit. | | | | +| hadoopConf | Specify the config information carries user-specified Hadoop configuration properties as they would | map[string]string | false | | +| | use the the "--conf" option in spark-submit. The SparkApplication controller automatically adds | | | | +| | prefix "spark.hadoop." to Hadoop configuration properties. | | | | +| sparkConfigMap | Specify the name of the ConfigMap containing Spark configuration files such as log4j.properties. The | string | false | | +| | controller will add environment variable SPARK_CONF_DIR to the path where the ConfigMap is mounted | | | | +| | to. | | | | +| hadoopConfigMap | Specify the name of the ConfigMap containing Hadoop configuration files such as core-site.xml. The | string | false | | +| | controller will add environment variable HADOOP_CONF_DIR to the path where the ConfigMap is mounted | | | | +| | to. | | | | +| volumes | Specify the list of Kubernetes volumes that can be mounted by the driver and/or executors. | [[]volumes](#volumes) | false | | ++---------------------+------------------------------------------------------------------------------------------------------+-----------------------+----------+---------+ + + +## driver ++--------------+-------------+---------------------------------+----------+---------+ +| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | ++--------------+-------------+---------------------------------+----------+---------+ +| cores | | int | true | | +| volumeMounts | | [[]volumeMounts](#volumemounts) | false | | ++--------------+-------------+---------------------------------+----------+---------+ + + +### volumeMounts ++-----------+-------------+--------+----------+---------+ +| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | ++-----------+-------------+--------+----------+---------+ +| name | | string | true | | +| mountPath | | string | true | | ++-----------+-------------+--------+----------+---------+ + + +## executor ++--------------+-------------+---------------------------------+----------+---------+ +| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | ++--------------+-------------+---------------------------------+----------+---------+ +| cores | | int | true | | +| volumeMounts | | [[]volumeMounts](#volumemounts) | false | | ++--------------+-------------+---------------------------------+----------+---------+ + + +### volumeMounts ++-----------+-------------+--------+----------+---------+ +| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | ++-----------+-------------+--------+----------+---------+ +| name | | string | true | | +| mountPath | | string | true | | ++-----------+-------------+--------+----------+---------+ ## volumes @@ -85,24 +117,6 @@ vela show spark-workload | path | | string | true | | | type | | string | false | Directory | +------+-------------+--------+----------+-----------+ - - -## driverVolumeMounts -+-----------+-------------+--------+----------+---------+ -| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | -+-----------+-------------+--------+----------+---------+ -| name | | string | true | | -| mountPath | | string | true | | -+-----------+-------------+--------+----------+---------+ - - -## executorVolumeMounts -+-----------+-------------+--------+----------+---------+ -| NAME | DESCRIPTION | TYPE | REQUIRED | DEFAULT | -+-----------+-------------+--------+----------+---------+ -| name | | string | true | | -| mountPath | | string | true | | -+-----------+-------------+--------+----------+---------+ ``` # Example for how to run a component typed spark-cluster in application diff --git a/experimental/addons/spark-kubernetes-operator/definitions/spark-workload.cue b/experimental/addons/spark-kubernetes-operator/definitions/spark-workload.cue index 616e2391..f7959eb8 100644 --- a/experimental/addons/spark-kubernetes-operator/definitions/spark-workload.cue +++ b/experimental/addons/spark-kubernetes-operator/definitions/spark-workload.cue @@ -28,10 +28,22 @@ template: { mainApplicationFile: string // +usage=Specify the version of Spark the application uses sparkVersion: string - // +usage=Specify the number of CPU cores to request for the driver pod - driverCores: int - // +usage=Specify the number of CPU cores to request for the executor pod - executorCores: int + // +usage=Specify the driver sepc request for the driver pod + driver: { + cores: int + volumeMounts?: [...{ + name: string + mountPath: string + }] + } + // +usage=Specify the executor spec request for the executor pod + executor: { + cores: int + volumeMounts?: [...{ + name: string + mountPath: string + }] + } // +usage=Specify a list of arguments to be passed to the application arguments?: [...string] // +usage=Specify the config information carries user-specified Spark configuration properties as they would use the "--conf" option in spark-submit @@ -50,16 +62,6 @@ template: { type: *"Directory" | string } }] - // +usage=Specify the volumes listed in "parameter.volumes" to mount into the main container’s filesystem for driver pod - driverVolumeMounts?: [...{ - name: string - mountPath: string - }] - // +usage=Specify the volumes listed in "parameter.volumes" to mount into the main container’s filesystem for executor pod - executorVolumeMounts?: [...{ - name: string - mountPath: string - }] } output: { @@ -95,18 +97,8 @@ template: { mainClass: parameter.mainClass mainApplicationFile: parameter.mainApplicationFile sparkVersion: parameter.sparkVersion - driver: { - cores: parameter.driverCores - if parameter.driverVolumeMounts != _|_ { - volumeMounts: parameter.driverVolumeMounts - } - } - executor: { - cores: parameter.executorCores - if parameter.executorVolumeMounts != _|_ { - volumeMounts: parameter.executorVolumeMounts - } - } + driver: parameter.driver + executor: parameter.executor if parameter.volumes != _|_ { volumes: parameter.volumes }