Skip to content

Commit

Permalink
improve error messages and logging
Browse files Browse the repository at this point in the history
  • Loading branch information
danielvegamyhre committed Feb 13, 2024
1 parent 09bc836 commit 3b0257c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 6 deletions.
28 changes: 26 additions & 2 deletions api/jobset/v1alpha2/jobset_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,24 @@ import (
corev1 "k8s.io/api/core/v1"
)

const (
// This is the error message returned by IsDNS1035Label when the given input
// is longer than 63 characters.
dns1035MaxLengthExceededErrorMsg = "must be no more than 63 characters"

// Error message returned by JobSet validation if the generated child jobs
// will be longer than 63 characters.
jobNameTooLongErrorMsg = "JobSet name is too long, job names generated for this JobSet will exceed 63 characters"

// Error message returned by JobSet validation if the generated pod names
// will be longer than 63 characters.
podNameTooLongErrorMsg = "JobSet name is too long, job names generated for this JobSet will exceed 63 characters"

// Error message returned by JobSet validation if the network subdomain
// will be longer than 63 characters.
subdomainTooLongErrMsg = ".spec.network.subdomain is too long, must be less than 63 characters"
)

func (js *JobSet) SetupWebhookWithManager(mgr ctrl.Manager) error {
return ctrl.NewWebhookManagedBy(mgr).
For(js).
Expand Down Expand Up @@ -96,6 +114,9 @@ func (js *JobSet) ValidateCreate() (admission.Warnings, error) {

// Since subdomain name is also used as service name, it must adhere to RFC 1035 as well.
for _, errMessage := range validation.IsDNS1035Label(js.Spec.Network.Subdomain) {
if strings.Contains(errMessage, dns1035MaxLengthExceededErrorMsg) {
errMessage = subdomainTooLongErrMsg
}
allErrs = append(allErrs, fmt.Errorf(errMessage))
}
}
Expand All @@ -112,6 +133,9 @@ func (js *JobSet) ValidateCreate() (admission.Warnings, error) {
// Use the largest job index as it will have the longest name.
longestJobName := placement.GenJobName(js.Name, rjob.Name, int(rjob.Replicas-1))
for _, errMessage := range validation.IsDNS1035Label(longestJobName) {
if strings.Contains(errMessage, dns1035MaxLengthExceededErrorMsg) {
errMessage = jobNameTooLongErrorMsg
}
allErrs = append(allErrs, fmt.Errorf(errMessage))
}
// Check that the generated pod names for the replicated job is DNS 1035 compliant.
Expand All @@ -122,8 +146,8 @@ func (js *JobSet) ValidateCreate() (admission.Warnings, error) {
// Add 5 char suffix to the deterministic part of the pod name to validate the full pod name is compliant.
longestPodName := placement.GenPodName(js.Name, rjob.Name, maxJobIndex, maxPodIndex) + "-abcde"
for _, errMessage := range validation.IsDNS1035Label(longestPodName) {
if strings.Contains(errMessage, "must be no more than 63 characters") {
errMessage = "JobSet name must be shorter; pod names generated for this JobSet will exceed 63 characters."
if strings.Contains(errMessage, dns1035MaxLengthExceededErrorMsg) {
errMessage = podNameTooLongErrorMsg
}
allErrs = append(allErrs, fmt.Errorf(errMessage))
}
Expand Down
6 changes: 3 additions & 3 deletions api/jobset/v1alpha2/jobset_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -532,7 +532,7 @@ func TestValidateCreate(t *testing.T) {
},
},
want: errors.Join(
fmt.Errorf("must be no more than 63 characters"),
fmt.Errorf(subdomainTooLongErrMsg),
),
},
{
Expand Down Expand Up @@ -576,7 +576,7 @@ func TestValidateCreate(t *testing.T) {
},
},
want: errors.Join(
fmt.Errorf("must be no more than 63 characters"),
fmt.Errorf(jobNameTooLongErrorMsg),
),
},
{
Expand All @@ -603,7 +603,7 @@ func TestValidateCreate(t *testing.T) {
},
},
want: errors.Join(
fmt.Errorf("JobSet name must be shorter; pod names generated for this JobSet will exceed 63 characters."),
fmt.Errorf(podNameTooLongErrorMsg),
),
},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/webhooks/pod_admission_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func (p *podWebhook) leaderPodForFollower(ctx context.Context, pod *corev1.Pod)
log := ctrl.LoggerFrom(ctx)
leaderPodName, err := genLeaderPodName(pod)
if err != nil {
log.Error(err, "getting leader pod name for follower pod")
log.V(3).Info("leader pod (%s) for follower pod does not exist or has not been scheduled yet", leaderPodName)
return nil, err
}

Expand Down

0 comments on commit 3b0257c

Please sign in to comment.