Skip to content

Commit

Permalink
Merge pull request #3746 from liuyuanchun11/fixPgWarning
Browse files Browse the repository at this point in the history
fix job controller reports duplicate warnings
  • Loading branch information
volcano-sh-bot authored Sep 27, 2024
2 parents 61ff6a7 + f731d36 commit 2d768d6
Show file tree
Hide file tree
Showing 2 changed files with 148 additions and 9 deletions.
30 changes: 23 additions & 7 deletions pkg/controllers/job/job_controller_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"fmt"
"reflect"
"strings"
"sync"
"sync/atomic"

Expand Down Expand Up @@ -286,13 +287,7 @@ func (cc *jobcontroller) syncJob(jobInfo *apis.JobInfo, updateStatus state.Updat
if pg.Status.Phase != "" && pg.Status.Phase != scheduling.PodGroupPending {
syncTask = true
}

for _, condition := range pg.Status.Conditions {
if condition.Type == scheduling.PodGroupUnschedulableType {
cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending),
fmt.Sprintf("PodGroup %s:%s unschedule,reason: %s", job.Namespace, job.Name, condition.Message))
}
}
cc.recordPodGroupEvent(job, pg)
}

var jobCondition batch.JobCondition
Expand Down Expand Up @@ -845,6 +840,27 @@ func (cc *jobcontroller) initJobStatus(job *batch.Job) (*batch.Job, error) {
return newJob, nil
}

func (cc *jobcontroller) recordPodGroupEvent(job *batch.Job, podGroup *scheduling.PodGroup) {
var latestCondition *scheduling.PodGroupCondition

// Get the latest condition by timestamp
for _, condition := range podGroup.Status.Conditions {
if condition.Status == v1.ConditionTrue {
if latestCondition == nil ||
condition.LastTransitionTime.Time.After(latestCondition.LastTransitionTime.Time) {
latestCondition = &condition
}
}
}

// If the latest condition is not scheduled, then a warning event is recorded
if latestCondition != nil && latestCondition.Type != scheduling.PodGroupScheduled {
cc.recorder.Eventf(job, v1.EventTypeWarning, string(batch.PodGroupPending),
fmt.Sprintf("PodGroup %s:%s %s, reason: %s", job.Namespace, job.Name,
strings.ToLower(string(latestCondition.Type)), latestCondition.Message))
}
}

func classifyAndAddUpPodBaseOnPhase(pod *v1.Pod, pending, running, succeeded, failed, unknown *int32) {
switch pod.Status.Phase {
case v1.PodPending:
Expand Down
127 changes: 125 additions & 2 deletions pkg/controllers/job/job_controller_actions_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,14 @@ import (
"context"
"errors"
"fmt"
"reflect"
"testing"
"time"

"github.com/agiledragon/gomonkey/v2"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"reflect"
"testing"
"k8s.io/client-go/tools/record"

"volcano.sh/apis/pkg/apis/batch/v1alpha1"
schedulingapi "volcano.sh/apis/pkg/apis/scheduling/v1beta1"
Expand Down Expand Up @@ -664,3 +667,123 @@ func TestDeleteJobPod(t *testing.T) {
})
}
}

func TestRecordPodGroupEvent(t *testing.T) {
job1 := &v1alpha1.Job{
ObjectMeta: metav1.ObjectMeta{
Name: "job1",
Namespace: "default",
ResourceVersion: "100",
UID: "e7f18111-1cec-11ea-b688-fa163ec79500",
},
Spec: v1alpha1.JobSpec{
Tasks: []v1alpha1.TaskSpec{},
},
Status: v1alpha1.JobStatus{
State: v1alpha1.JobState{
Phase: v1alpha1.Running,
},
},
}

pg1 := &schedulingapi.PodGroup{
ObjectMeta: metav1.ObjectMeta{
Name: "job1-e7f18111-1cec-11ea-b688-fa163ec79500",
Namespace: "default",
},
Spec: schedulingapi.PodGroupSpec{
MinResources: &v1.ResourceList{},
MinTaskMember: map[string]int32{},
},
Status: schedulingapi.PodGroupStatus{
Phase: schedulingapi.PodGroupRunning,
Conditions: []schedulingapi.PodGroupCondition{},
},
}
testcases := []struct {
Name string
pgConditions []schedulingapi.PodGroupCondition
ExpectEvent string
}{
{
Name: "pg with no conditions",
pgConditions: []schedulingapi.PodGroupCondition{},
ExpectEvent: "",
},
{
Name: "pg with Unschedulable condition",
pgConditions: []schedulingapi.PodGroupCondition{
{
Type: schedulingapi.PodGroupUnschedulableType,
Status: v1.ConditionTrue,
Reason: "test reason",
Message: "test message",
LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 4, 5, 0, time.UTC)),
},
},
ExpectEvent: "Warning PodGroupPending PodGroup default:job1 unschedulable, reason: test message",
},
{
Name: "pg with Unschedulable later than Scheduled",
pgConditions: []schedulingapi.PodGroupCondition{
{
Type: schedulingapi.PodGroupUnschedulableType,
Status: v1.ConditionTrue,
Reason: "test reason",
Message: "test message",
LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 15, 5, 0, time.UTC)),
},
{
Type: schedulingapi.PodGroupScheduled,
Status: v1.ConditionTrue,
Reason: "test reason",
Message: "test message",
LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 4, 5, 0, time.UTC)),
},
},
ExpectEvent: "Warning PodGroupPending PodGroup default:job1 unschedulable, reason: test message",
},
{
Name: "pg with Scheduled later than Unschedulable ",
pgConditions: []schedulingapi.PodGroupCondition{
{
Type: schedulingapi.PodGroupUnschedulableType,
Status: v1.ConditionTrue,
Reason: "test reason",
Message: "test message",
LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 15, 5, 0, time.UTC)),
},
{
Type: schedulingapi.PodGroupScheduled,
Status: v1.ConditionTrue,
Reason: "test reason",
Message: "test message",
LastTransitionTime: metav1.NewTime(time.Date(2024, time.July, 1, 15, 20, 5, 0, time.UTC)),
},
},
ExpectEvent: "",
},
}
for _, tt := range testcases {
t.Run(tt.Name, func(t *testing.T) {
fakeController := newFakeController()
fakeController.recorder = record.NewFakeRecorder(100)
pg1.Status.Conditions = tt.pgConditions
fakeController.recordPodGroupEvent(job1, pg1)
r := fakeController.recorder.(*record.FakeRecorder)
close(r.Events)
event := <-r.Events

if len(tt.ExpectEvent) == 0 {
if len(event) != 0 {
t.Errorf("Testcase %s failed, expect no event, but got %s", tt.Name, event)
}
} else {
if tt.ExpectEvent != event {
t.Errorf("Testcase %s failed, expect event: %s, but got %s", tt.Name, tt.ExpectEvent, event)
}
}
})
}

}

0 comments on commit 2d768d6

Please sign in to comment.