Skip to content

Commit

Permalink
fix: add watcher for job and avoid unnecessary reconcile (#527)
Browse files Browse the repository at this point in the history
This PR addresses two problems:

1) We should add watcher for tunning jobs. Otherwise, if a tunning job
is deleted before it is done, the controller will not recreate it.
2) Currently, the gpu-provisioner will update the machine object with
heartbeat information every two minutes. This will trigger a workspace
reconcile. In practice, if the machine keeps in the ready state, we
don't need to reconcile the workspace. Add an additional machine event
filter to avoid unnecessary reconciles.
  • Loading branch information
Fei-Guo authored Jul 19, 2024
1 parent cefdab9 commit 137154b
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions pkg/controllers/workspace_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ import (
"github.com/samber/lo"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"k8s.io/utils/clock"
"knative.dev/pkg/apis"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
Expand Down Expand Up @@ -635,6 +637,7 @@ func (c *WorkspaceReconciler) SetupWithManager(mgr ctrl.Manager) error {
For(&kaitov1alpha1.Workspace{}).
Owns(&appsv1.Deployment{}).
Owns(&appsv1.StatefulSet{}).
Owns(&batchv1.Job{}).
Watches(&v1alpha5.Machine{}, c.watchMachines()).
WithOptions(controller.Options{MaxConcurrentReconciles: 5})

Expand All @@ -658,6 +661,14 @@ func (c *WorkspaceReconciler) watchMachines() handler.EventHandler {
if !ok {
return nil
}
_, conditionFound := lo.Find(machineObj.GetConditions(), func(condition apis.Condition) bool {
return condition.Type == apis.ConditionReady &&
condition.Status == v1.ConditionTrue
})
if conditionFound && machineObj.DeletionTimestamp.IsZero() {
// No need to reconcile workspace if the machine is in READY state unless machine is deleted.
return nil
}
return []reconcile.Request{
{
NamespacedName: client.ObjectKey{
Expand Down

0 comments on commit 137154b

Please sign in to comment.