Skip to content

Commit

Permalink
ksonnet changes to support deploying the v1alpha2 TFJob operator. (ku…
Browse files Browse the repository at this point in the history
…beflow#851)

* ksonnet changes to support deploying the v1alpha2 TFJob operator.

* K8s doesn't support installing multiple versions of a CRD in the same cluster.
* So we add an option to choose which version to support.
* We don't add an E2E test in this PR because its not straightforward.
  Currently the E2E test deploys Kubeflow in a different namespace but not
  a different cluster. This won't allow us to simulatenously test both
  versions.

* So to test multiple versions we will need to spin up a separate GKE cluster.

Related to: kubeflow#599

* Autoformat jsonnet.
  • Loading branch information
jlewi authored and k8s-ci-robot committed May 24, 2018
1 parent 62d07ef commit 13d4afa
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 14 deletions.
3 changes: 2 additions & 1 deletion kubeflow/core/prototypes/all.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
// @optionalParam disks string null Comma separated list of Google persistent disks to attach to jupyter environments.
// @optionalParam cloud string null String identifying the cloud to customize the deployment for.
// @optionalParam tfAmbassadorServiceType string ClusterIP The service type for the API Gateway.
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v20180329-a7511ff The image for the TfJob controller.
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v20180522-77375baf The image for the TfJob controller.
// @optionalParam tfDefaultImage string null The default image to use for TensorFlow.
// @optionalParam tfJobUiServiceType string ClusterIP The service type for the UI.
// @optionalParam jupyterHubServiceType string ClusterIP The service type for Jupyterhub.
Expand All @@ -16,6 +16,7 @@
// @optionalParam jupyterNotebookPVCMount string null Mount path for PVC. Set empty to disable PVC
// @optionalParam reportUsage string false Whether or not to report Kubeflow usage to kubeflow.org.
// @optionalParam usageId string unknown_cluster Optional id to use when reporting usage to kubeflow.org
// @optionalParam tfJobVersion string v1alpha1 which version of the TFJob operator to use

local k = import "k.libsonnet";
local all = import "kubeflow/core/all.libsonnet";
Expand Down
3 changes: 2 additions & 1 deletion kubeflow/core/prototypes/tf-job-operator.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@
// @optionalParam namespace string null Namespace to use for the components. It is automatically inherited from the environment if not set.
// @optionalParam cloud string null String identifying the cloud to customize the deployment for.
// @optionalParam tfAmbassadorServiceType string ClusterIP The service type for the API Gateway.
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v20180329-a7511ff The image for the TfJob controller.
// @optionalParam tfJobImage string gcr.io/kubeflow-images-public/tf_operator:v20180522-77375baf The image for the TfJob controller.
// @optionalParam tfDefaultImage string null The default image to use for TensorFlow.
// @optionalParam tfJobUiServiceType string ClusterIP The service type for the UI.
// @optionalParam tfJobVersion string v1alpha1 which version of the TFJob operator to use

// TODO(https://github.com/ksonnet/ksonnet/issues/235): ks param set args won't work if the arg starts with "--".

Expand Down
116 changes: 104 additions & 12 deletions kubeflow/core/tf-job-operator.libsonnet
Original file line number Diff line number Diff line change
@@ -1,17 +1,27 @@
{
all(params):: [
$.parts(params.namespace).tfJobDeploy(params.tfJobImage),
$.parts(params.namespace).configMap(params.cloud, params.tfDefaultImage),
$.parts(params.namespace).serviceAccount,
$.parts(params.namespace).operatorRole,
$.parts(params.namespace).operatorRoleBinding,
$.parts(params.namespace).crd,
$.parts(params.namespace).uiRole,
$.parts(params.namespace).uiRoleBinding,
$.parts(params.namespace).uiService(params.tfJobUiServiceType),
$.parts(params.namespace).uiServiceAccount,
$.parts(params.namespace).ui(params.tfJobImage),
],

$.parts(params.namespace).configMap(params.cloud, params.tfDefaultImage),
$.parts(params.namespace).serviceAccount,
$.parts(params.namespace).operatorRole,
$.parts(params.namespace).operatorRoleBinding,
$.parts(params.namespace).uiRole,
$.parts(params.namespace).uiRoleBinding,
$.parts(params.namespace).uiService(params.tfJobUiServiceType),
$.parts(params.namespace).uiServiceAccount,
$.parts(params.namespace).ui(params.tfJobImage),
] +

if params.tfJobVersion == "v1alpha2" then
[
$.parts(params.namespace).crdv1alpha2,
$.parts(params.namespace).tfJobDeployV1Alpha2(params.tfJobImage),
]
else
[
$.parts(params.namespace).crd,
$.parts(params.namespace).tfJobDeploy(params.tfJobImage),
],

parts(namespace):: {
crd: {
Expand All @@ -31,6 +41,23 @@
},
},

crdv1alpha2: {
apiVersion: "apiextensions.k8s.io/v1beta1",
kind: "CustomResourceDefinition",
metadata: {
name: "tfjobs.kubeflow.org",
},
spec: {
group: "kubeflow.org",
version: "v1alpha2",
names: {
kind: "TFJob",
singular: "tfjob",
plural: "tfjobs",
},
},
},

tfJobDeploy(image): {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
Expand Down Expand Up @@ -97,6 +124,71 @@
},
}, // tfJobDeploy

tfJobDeployV1Alpha2(image): {
apiVersion: "extensions/v1beta1",
kind: "Deployment",
metadata: {
name: "tf-job-operator-v1alpha2",
namespace: namespace,
},
spec: {
replicas: 1,
template: {
metadata: {
labels: {
name: "tf-job-operator",
},
},
spec: {
containers: [
{
command: [
"/opt/kubeflow/tf-operator.v2",
"--alsologtostderr",
"-v=1",
],
env: [
{
name: "MY_POD_NAMESPACE",
valueFrom: {
fieldRef: {
fieldPath: "metadata.namespace",
},
},
},
{
name: "MY_POD_NAME",
valueFrom: {
fieldRef: {
fieldPath: "metadata.name",
},
},
},
],
image: image,
name: "tf-job-operator",
volumeMounts: [
{
mountPath: "/etc/config",
name: "config-volume",
},
],
},
],
serviceAccountName: "tf-job-operator",
volumes: [
{
configMap: {
name: "tf-job-operator-config",
},
name: "config-volume",
},
],
},
},
},
}, // tfJobDeploy

// Default value for
defaultControllerConfig(tfDefaultImage):: {
grpcServerFilePath: "/opt/mlkube/grpc_tensorflow_server/grpc_tensorflow_server.py",
Expand Down

0 comments on commit 13d4afa

Please sign in to comment.