From 50b1d4599ef38ef2c7ba441ddd7499ca56aa4474 Mon Sep 17 00:00:00 2001 From: JimmyYang20 <yangjin39@huawei.com> Date: Thu, 17 Mar 2022 14:53:36 +0800 Subject: [PATCH] Fix downstrem bug in IL bug: after deleting dataset, the event of deleting job can not be sent to LC. Signed-off-by: JimmyYang20 <yangjin39@huawei.com> --- .../incrementallearning/downstream.go | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/pkg/globalmanager/controllers/incrementallearning/downstream.go b/pkg/globalmanager/controllers/incrementallearning/downstream.go index 84be05287..d323ff602 100644 --- a/pkg/globalmanager/controllers/incrementallearning/downstream.go +++ b/pkg/globalmanager/controllers/incrementallearning/downstream.go @@ -62,18 +62,15 @@ func (c *Controller) syncToEdge(eventType watch.EventType, obj interface{}) erro // more details at https://github.com/kubernetes/kubernetes/issues/3030 job.Kind = KindName - jobConditions := job.Status.Conditions - if len(jobConditions) == 0 { - return nil - } - dataName := job.Spec.Dataset.Name + // LC has dataset object on this node that may call dataset node + var dsNodeName string ds, err := c.client.Datasets(job.Namespace).Get(context.TODO(), dataName, metav1.GetOptions{}) if err != nil { - return fmt.Errorf("dataset(%s/%s) not found", job.Namespace, dataName) + klog.Errorf("not found job(name=%s/%s)'s dataset, error: %v", job.Kind, job.Name, err) + } else { + dsNodeName = ds.Spec.NodeName } - // LC has dataset object on this node that may call dataset node - dsNodeName := ds.Spec.NodeName var trainNodeName string var evalNodeName string @@ -102,6 +99,15 @@ func (c *Controller) syncToEdge(eventType watch.EventType, obj interface{}) erro return nil } + if dsNodeName == "" { + return nil + } + + jobConditions := job.Status.Conditions + if len(jobConditions) == 0 { + return nil + } + latestCondition := jobConditions[len(jobConditions)-1] currentType := latestCondition.Type jobStage := latestCondition.Stage