Skip to content

Commit

Permalink
Fix few comments
Browse files Browse the repository at this point in the history
  • Loading branch information
andreyvelich committed Oct 15, 2020
1 parent 4d51fc3 commit 964798f
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 28 deletions.
18 changes: 9 additions & 9 deletions pkg/apis/controller/experiments/v1beta1/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,32 +16,32 @@ limitations under the License.
package v1beta1

const (
// DefaultTrialParallelCount is the default value of spec.parallelTrialCount
// DefaultTrialParallelCount is the default value of spec.parallelTrialCount.
DefaultTrialParallelCount = 3

// DefaultTrialConfigMapName is the default value of spec.trialTemplate.configMapName for Trial template
// DefaultTrialConfigMapName is the default value of spec.trialTemplate.configMapName for Trial template.
DefaultTrialConfigMapName = "trial-template"

// DefaultTrialTemplatePath is the default value of spec.trialTemplate.TemplatePath
// DefaultTrialTemplatePath is the default value of spec.trialTemplate.TemplatePath.
DefaultTrialTemplatePath = "defaultTrialTemplate.yaml"

// DefaultResumePolicy is the default value of spec.resumePolicy
// DefaultResumePolicy is the default value of spec.resumePolicy.
DefaultResumePolicy = LongRunning

// DefaultJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Job
// DefaultJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Job.
DefaultJobSuccessCondition = "status.conditions.#(type==\"Complete\")#|#(status==\"True\")#"

// DefaultJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Job
// DefaultJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Job.
DefaultJobFailureCondition = "status.conditions.#(type==\"Failed\")#|#(status==\"True\")#"

// DefaultKubeflowJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Kubeflow Job
// DefaultKubeflowJobSuccessCondition is the default value of spec.trialTemplate.successCondition for Kubeflow Job.
DefaultKubeflowJobSuccessCondition = "status.conditions.#(type==\"Succeeded\")#|#(status==\"True\")#"

// DefaultKubeflowJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Kubeflow Job
// DefaultKubeflowJobFailureCondition is the default value of spec.trialTemplate.failureCondition for Kubeflow Job.
DefaultKubeflowJobFailureCondition = "status.conditions.#(type==\"Failed\")#|#(status==\"True\")#"
)

var (
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Job
// DefaultKubeflowJobPrimaryPodLabels is the default value of spec.trialTemplate.primaryPodLabels for Kubeflow Job.
DefaultKubeflowJobPrimaryPodLabels = map[string]string{"job-role": "master"}
)
3 changes: 2 additions & 1 deletion pkg/apis/controller/experiments/v1beta1/experiment_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ type TrialTemplate struct {
// List of parameters that are used in trial template
TrialParameters []TrialParameterSpec `json:"trialParameters,omitempty"`

// Labels that determines if pod needs to be injected by Katib sidecar container
// Labels that determines if pod needs to be injected by Katib sidecar container.
// If PrimaryPodLabels is omitted, metrics collector wraps all Trial's pods.
PrimaryPodLabels map[string]string `json:"primaryPodLabels,omitempty"`

// Name of training container where actual model training is running
Expand Down
13 changes: 7 additions & 6 deletions pkg/controller.v1beta1/experiment/experiment_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@ import (
)

const (
experimentName = "test-experiment"
trialName = "test-trial"
namespace = "default"
experimentName = "test-experiment"
trialName = "test-trial"
namespace = "default"
primaryContainer = "tensorflow"

timeout = time.Second * 40
)
Expand Down Expand Up @@ -372,7 +373,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment {
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "tensorflow",
Name: primaryContainer,
Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0",
Command: []string{
"python",
Expand Down Expand Up @@ -416,7 +417,7 @@ func newFakeInstance() *experimentsv1beta1.Experiment {
ResumePolicy: experimentsv1beta1.NeverResume,
TrialTemplate: &experimentsv1beta1.TrialTemplate{
PrimaryPodLabels: experimentsv1beta1.DefaultKubeflowJobPrimaryPodLabels,
PrimaryContainerName: "tensorflow",
PrimaryContainerName: primaryContainer,
SuccessCondition: experimentsv1beta1.DefaultKubeflowJobSuccessCondition,
FailureCondition: experimentsv1beta1.DefaultKubeflowJobFailureCondition,
TrialParameters: []experimentsv1beta1.TrialParameterSpec{
Expand Down Expand Up @@ -513,7 +514,7 @@ func newFakeTFJob() *tfv1.TFJob {
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "tensorflow",
Name: primaryContainer,
Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0",
Command: []string{
"python",
Expand Down
22 changes: 13 additions & 9 deletions pkg/controller.v1beta1/trial/trial_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ const (
tfJobName = "test-tfjob"
batchJobName = "test-job"
objectiveMetric = "accuracy"
timeout = time.Second * 5
timeout = time.Second * 40
)

var trialKey = types.NamespacedName{Name: trialName, Namespace: namespace}
Expand All @@ -53,8 +53,8 @@ func TestAdd(t *testing.T) {
mgr, err := manager.New(cfg, manager.Options{})
g.Expect(err).NotTo(gomega.HaveOccurred())

// Set fake trial resources.
// TFJob is installed, MPIJob is missed to test error message.
// Set Trial resources.
// TFJob controller is installed, MPIJob controller is missed.
trialResources := trialutil.GvkListFlag{
{
Group: "kubeflow.org",
Expand Down Expand Up @@ -130,7 +130,7 @@ func TestReconcileTFJob(t *testing.T) {
}()

// Empty result for GetTrialObservationLog.
// If objective metrics are not parsed metrics collector reports "unavailable" value to DB.
// If objective metrics are not parsed, metrics collector reports "unavailable" value to DB.
observationLog := &api_pb.GetObservationLogReply{
ObservationLog: &api_pb.ObservationLog{
MetricLogs: []*api_pb.MetricLog{
Expand Down Expand Up @@ -435,6 +435,8 @@ func TestGetObjectiveMetricValue(t *testing.T) {
}

func newFakeTrialTFJob() *trialsv1beta1.Trial {
primaryContainer := "tensorflow"

tfJob := &tfv1.TFJob{
TypeMeta: metav1.TypeMeta{
APIVersion: "kubeflow.org/v1",
Expand All @@ -453,7 +455,7 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial {
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "tensorflow",
Name: primaryContainer,
Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0",
Command: []string{
"python",
Expand All @@ -474,7 +476,7 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial {
Spec: v1.PodSpec{
Containers: []v1.Container{
{
Name: "tensorflow",
Name: primaryContainer,
Image: "gcr.io/kubeflow-ci/tf-mnist-with-summaries:1.0",
Command: []string{
"python",
Expand All @@ -500,7 +502,7 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial {
},
Spec: trialsv1beta1.TrialSpec{
PrimaryPodLabels: experimentsv1beta1.DefaultKubeflowJobPrimaryPodLabels,
PrimaryContainerName: "tensorflow",
PrimaryContainerName: primaryContainer,
SuccessCondition: experimentsv1beta1.DefaultKubeflowJobSuccessCondition,
FailureCondition: experimentsv1beta1.DefaultKubeflowJobFailureCondition,
Objective: &commonv1beta1.ObjectiveSpec{
Expand All @@ -518,6 +520,8 @@ func newFakeTrialTFJob() *trialsv1beta1.Trial {
}

func newFakeTrialBatchJob() *trialsv1beta1.Trial {
primaryContainer := "training-container"

job := &batchv1.Job{
TypeMeta: metav1.TypeMeta{
APIVersion: "batch/v1",
Expand All @@ -532,7 +536,7 @@ func newFakeTrialBatchJob() *trialsv1beta1.Trial {
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "training-container",
Name: primaryContainer,
Image: "docker.io/kubeflowkatib/mxnet-mnist",
Command: []string{
"python3",
Expand All @@ -555,7 +559,7 @@ func newFakeTrialBatchJob() *trialsv1beta1.Trial {
Namespace: namespace,
},
Spec: trialsv1beta1.TrialSpec{
PrimaryContainerName: "training-container",
PrimaryContainerName: primaryContainer,
SuccessCondition: experimentsv1beta1.DefaultJobSuccessCondition,
FailureCondition: experimentsv1beta1.DefaultJobFailureCondition,
Objective: &commonv1beta1.ObjectiveSpec{
Expand Down
4 changes: 2 additions & 2 deletions pkg/webhook/v1beta1/experiment/validator/validator.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,7 +323,7 @@ func (g *DefaultValidator) validateTrialJob(runSpec *unstructured.Unstructured)
tfJob := &tfv1.TFJob{}
err := runtime.DefaultUnstructuredConverter.FromUnstructured(runSpec.Object, &tfJob)
if err != nil {
return fmt.Errorf("Unable to convert spec.TrialTemplate to %v: %v", gvk.Kind, err)
return fmt.Errorf("Unable to convert spec.TrialTemplate: %v to %v: %v", runSpec.Object, gvk.Kind, err)
}
err = validatePatchJob(runSpec, tfJob, gvk.Kind)
if err != nil {
Expand All @@ -333,7 +333,7 @@ func (g *DefaultValidator) validateTrialJob(runSpec *unstructured.Unstructured)
pyTorchJob := &pytorchv1.PyTorchJob{}
err := runtime.DefaultUnstructuredConverter.FromUnstructured(runSpec.Object, &pyTorchJob)
if err != nil {
return fmt.Errorf("Unable to convert spec.TrialTemplate to %v: %v", gvk.Kind, err)
return fmt.Errorf("Unable to convert spec.TrialTemplate: %v to %v: %v", runSpec.Object, gvk.Kind, err)
}
err = validatePatchJob(runSpec, pyTorchJob, gvk.Kind)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhook/v1beta1/pod/inject_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ func TestWrapWorkerContainer(t *testing.T) {
},
PathKind: common.FileKind,
Err: true,
TestDescription: "Training pod doesn't have primary container name",
TestDescription: "Training pod doesn't have primary container",
},
}

Expand Down

0 comments on commit 964798f

Please sign in to comment.