From 81e0706db378caaceb0eeb6a395aa5d0be0c66f2 Mon Sep 17 00:00:00 2001 From: avelichk Date: Thu, 15 Oct 2020 21:33:55 +0100 Subject: [PATCH] Fix few comments --- .../experiments/v1beta1/constants.go | 18 +++++++-------- .../experiments/v1beta1/experiment_types.go | 3 ++- .../experiment/experiment_controller_test.go | 13 ++++++----- .../trial/trial_controller_test.go | 22 +++++++++++-------- .../v1beta1/experiment/validator/validator.go | 4 ++-- .../v1beta1/pod/inject_webhook_test.go | 2 +- 6 files changed, 34 insertions(+), 28 deletions(-) diff --git a/pkg/apis/controller/experiments/v1beta1/constants.go b/pkg/apis/controller/experiments/v1beta1/constants.go index a9077fb0ebb..073660db2bb 100644 --- a/pkg/apis/controller/experiments/v1beta1/constants.go +++ b/pkg/apis/controller/experiments/v1beta1/constants.go @@ -16,32 +16,32 @@ limitations under the License. package v1beta1 const ( - // DefaultTrialParallelCount is the default value of spec.parallelTrialCount + // DefaultTrialParallelCount is the default value of spec.parallelTrialCount. DefaultTrialParallelCount = 3 - // DefaultTrialConfigMapName is the default value of spec.trialTemplate.configMapName for Trial template + // DefaultTrialConfigMapName is the default value of spec.trialTemplate.configMapName for Trial template. DefaultTrialConfigMapName = "trial-template" - // DefaultTrialTemplatePath is the default value of spec.trialTemplate.TemplatePath + // DefaultTrialTemplatePath is the default value of spec.trialTemplate.TemplatePath. DefaultTrialTemplatePath = "defaultTrialTemplate.yaml" - // DefaultResumePolicy is the default value of spec.resumePolicy + // DefaultResumePolicy is the default value of spec.resumePolicy. DefaultResumePolicy = LongRunning - // DefaultJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Job + // DefaultJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Job. DefaultJobSuccessCondition = "status.conditions.#(type==\"Complete\")#|#(status==\"True\")#" - // DefaultJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Job + // DefaultJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Job. DefaultJobFailureCondition = "status.conditions.#(type==\"Failed\")#|#(status==\"True\")#" - // DefaultKubeflowJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Kubeflow Job + // DefaultKubeflowJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Kubeflow Job. DefaultKubeflowJobSuccessCondition = "status.conditions.#(type==\"Succeeded\")#|#(status==\"True\")#" - // DefaultKubeflowJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Kubeflow Job + // DefaultKubeflowJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Kubeflow Job. DefaultKubeflowJobFailureCondition = "status.conditions.#(type==\"Failed\")#|#(status==\"True\")#" ) var ( - // DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Job + // DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Job. DefaultKubeflowJobPrimaryPodLabels = map[string]string{"job-role": "master"} ) diff --git a/pkg/apis/controller/experiments/v1beta1/experiment_types.go b/pkg/apis/controller/experiments/v1beta1/experiment_types.go index 0721af52466..dc945f9838b 100644 --- a/pkg/apis/controller/experiments/v1beta1/experiment_types.go +++ b/pkg/apis/controller/experiments/v1beta1/experiment_types.go @@ -208,7 +208,8 @@ type TrialTemplate struct { // List of parameters that are used in trial template TrialParameters []TrialParameterSpec `json:"trialParameters,omitempty"` - // Labels that determines if pod needs to be injected by Katib sidecar container + // Labels that determines if pod needs to be injected by Katib sidecar container. + // If PrimaryPodLabels is omitted, metrics collector wraps all Trial's pods. PrimaryPodLabels map[string]string `json:"primaryPodLabels,omitempty"` // Name of training container where actual model training is running diff --git a/pkg/controller.v1beta1/experiment/experiment_controller_test.go b/pkg/controller.v1beta1/experiment/experiment_controller_test.go index b0289dda5eb..670c44e8bb3 100644 --- a/pkg/controller.v1beta1/experiment/experiment_controller_test.go +++ b/pkg/controller.v1beta1/experiment/experiment_controller_test.go @@ -33,9 +33,10 @@ import ( ) const ( - experimentName = "test-experiment" - trialName = "test-trial" - namespace = "default" + experimentName = "test-experiment" + trialName = "test-trial" + namespace = "default" + primaryContainer = "tensorflow" timeout = time.Second * 40 ) @@ -372,7 +373,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment { Spec: v1.PodSpec{ Containers: []v1.Container{ { - Name: "tensorflow", + Name: primaryContainer, Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", Command: []string{ "python", @@ -416,7 +417,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment { ResumePolicy: experimentsv1beta1.NeverResume, TrialTemplate: &experimentsv1beta1.TrialTemplate{ PrimaryPodLabels: experimentsv1beta1.DefaultKubeflowJobPrimaryPodLabels, - PrimaryContainerName: "tensorflow", + PrimaryContainerName: primaryContainer, SuccessCondition: experimentsv1beta1.DefaultKubeflowJobSuccessCondition, FailureCondition: experimentsv1beta1.DefaultKubeflowJobFailureCondition, TrialParameters: []experimentsv1beta1.TrialParameterSpec{ @@ -513,7 +514,7 @@ func newFakeTFJob() *tfv1.TFJob { Spec: v1.PodSpec{ Containers: []v1.Container{ { - Name: "tensorflow", + Name: primaryContainer, Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", Command: []string{ "python", diff --git a/pkg/controller.v1beta1/trial/trial_controller_test.go b/pkg/controller.v1beta1/trial/trial_controller_test.go index 819869fed99..f191d602cf3 100644 --- a/pkg/controller.v1beta1/trial/trial_controller_test.go +++ b/pkg/controller.v1beta1/trial/trial_controller_test.go @@ -37,7 +37,7 @@ const ( tfJobName = "test-tfjob" batchJobName = "test-job" objectiveMetric = "accuracy" - timeout = time.Second * 5 + timeout = time.Second * 40 ) var trialKey = types.NamespacedName{Name: trialName, Namespace: namespace} @@ -53,8 +53,8 @@ func TestAdd(t *testing.T) { mgr, err := manager.New(cfg, manager.Options{}) g.Expect(err).NotTo(gomega.HaveOccurred()) - // Set fake trial resources. - // TFJob is installed, MPIJob is missed to test error message. + // Set Trial resources. + // TFJob controller is installed, MPIJob controller is missed. trialResources := trialutil.GvkListFlag{ { Group: "kubeflow.org", @@ -130,7 +130,7 @@ func TestReconcileTFJob(t *testing.T) { }() // Empty result for GetTrialObservationLog. - // If objective metrics are not parsed metrics collector reports "unavailable" value to DB. + // If objective metrics are not parsed, metrics collector reports "unavailable" value to DB. observationLog := &api_pb.GetObservationLogReply{ ObservationLog: &api_pb.ObservationLog{ MetricLogs: []*api_pb.MetricLog{ @@ -435,6 +435,8 @@ func TestGetObjectiveMetricValue(t *testing.T) { } func newFakeTrialTFJob() *trialsv1beta1.Trial { + primaryContainer := "tensorflow" + tfJob := &tfv1.TFJob{ TypeMeta: metav1.TypeMeta{ APIVersion: "kubeflow.org/v1", @@ -453,7 +455,7 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial { Spec: v1.PodSpec{ Containers: []v1.Container{ { - Name: "tensorflow", + Name: primaryContainer, Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", Command: []string{ "python", @@ -474,7 +476,7 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial { Spec: v1.PodSpec{ Containers: []v1.Container{ { - Name: "tensorflow", + Name: primaryContainer, Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0", Command: []string{ "python", @@ -500,7 +502,7 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial { }, Spec: trialsv1beta1.TrialSpec{ PrimaryPodLabels: experimentsv1beta1.DefaultKubeflowJobPrimaryPodLabels, - PrimaryContainerName: "tensorflow", + PrimaryContainerName: primaryContainer, SuccessCondition: experimentsv1beta1.DefaultKubeflowJobSuccessCondition, FailureCondition: experimentsv1beta1.DefaultKubeflowJobFailureCondition, Objective: &commonv1beta1.ObjectiveSpec{ @@ -518,6 +520,8 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial { } func newFakeTrialBatchJob() *trialsv1beta1.Trial { + primaryContainer := "training-container" + job := &batchv1.Job{ TypeMeta: metav1.TypeMeta{ APIVersion: "batch/v1", @@ -532,7 +536,7 @@ func newFakeTrialBatchJob() *trialsv1beta1.Trial { Spec: corev1.PodSpec{ Containers: []corev1.Container{ { - Name: "training-container", + Name: primaryContainer, Image: "docker.io/kubeflowkatib/mxnet-mnist", Command: []string{ "python3", @@ -555,7 +559,7 @@ func newFakeTrialBatchJob() *trialsv1beta1.Trial { Namespace: namespace, }, Spec: trialsv1beta1.TrialSpec{ - PrimaryContainerName: "training-container", + PrimaryContainerName: primaryContainer, SuccessCondition: experimentsv1beta1.DefaultJobSuccessCondition, FailureCondition: experimentsv1beta1.DefaultJobFailureCondition, Objective: &commonv1beta1.ObjectiveSpec{ diff --git a/pkg/webhook/v1beta1/experiment/validator/validator.go b/pkg/webhook/v1beta1/experiment/validator/validator.go index a52b8802b41..75a80eb268b 100644 --- a/pkg/webhook/v1beta1/experiment/validator/validator.go +++ b/pkg/webhook/v1beta1/experiment/validator/validator.go @@ -323,7 +323,7 @@ func (g *DefaultValidator) validateTrialJob(runSpec *unstructured.Unstructured) tfJob := &tfv1.TFJob{} err := runtime.DefaultUnstructuredConverter.FromUnstructured(runSpec.Object, &tfJob) if err != nil { - return fmt.Errorf("Unable to convert spec.TrialTemplate to %v: %v", gvk.Kind, err) + return fmt.Errorf("Unable to convert spec.TrialTemplate: %v to %v: %v", runSpec.Object, gvk.Kind, err) } err = validatePatchJob(runSpec, tfJob, gvk.Kind) if err != nil { @@ -333,7 +333,7 @@ func (g *DefaultValidator) validateTrialJob(runSpec *unstructured.Unstructured) pyTorchJob := &pytorchv1.PyTorchJob{} err := runtime.DefaultUnstructuredConverter.FromUnstructured(runSpec.Object, &pyTorchJob) if err != nil { - return fmt.Errorf("Unable to convert spec.TrialTemplate to %v: %v", gvk.Kind, err) + return fmt.Errorf("Unable to convert spec.TrialTemplate: %v to %v: %v", runSpec.Object, gvk.Kind, err) } err = validatePatchJob(runSpec, pyTorchJob, gvk.Kind) if err != nil { diff --git a/pkg/webhook/v1beta1/pod/inject_webhook_test.go b/pkg/webhook/v1beta1/pod/inject_webhook_test.go index 928c0e7ab57..c42674371f9 100644 --- a/pkg/webhook/v1beta1/pod/inject_webhook_test.go +++ b/pkg/webhook/v1beta1/pod/inject_webhook_test.go @@ -140,7 +140,7 @@ func TestWrapWorkerContainer(t *testing.T) { }, PathKind: common.FileKind, Err: true, - TestDescription: "Training pod doesn't have primary container name", + TestDescription: "Training pod doesn't have primary container", }, }