Skip to content

Commit

Permalink
Merge pull request kosmos-io#614 from OrangeBao/release-0.4.0-cert
Browse files Browse the repository at this point in the history
fix: node label lost
  • Loading branch information
kosmos-robot authored Jun 3, 2024
2 parents ffd32f7 + 666cd87 commit 902adfb
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 13 deletions.
2 changes: 1 addition & 1 deletion cmd/kubenest/operator/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,5 +58,5 @@ func (o *Options) AddFlags(flags *pflag.FlagSet) {
flags.BoolVar(&o.KubeNestOptions.ForceDestroy, "kube-nest-force-destroy", false, "Force destroy the node.If it set true.If set to true, Kubernetes will not evict the existing nodes on the node when joining nodes to the tenant's control plane, but will instead force destroy.")
flags.StringVar(&o.KubeNestOptions.AnpMode, "kube-nest-anp-mode", "tcp", "kube-apiserver network proxy mode, must be set to tcp or uds. uds mode the replicas for apiserver should be one, and tcp for multi apiserver replicas.")
flags.BoolVar(&o.KubeNestOptions.AdmissionPlugins, "kube-nest-admission-plugins", false, "kube-apiserver network disable-admission-plugins, false for - --disable-admission-plugins=License, true for remove the --disable-admission-plugins=License flag .")
flags.IntVar(&o.KubeNestOptions.ApiServerReplicas, "kube-nest-apiserver-replicas", 2, "virtual-cluster kube-apiserver replicas. default is 2.")
flags.IntVar(&o.KubeNestOptions.ApiServerReplicas, "kube-nest-apiserver-replicas", 1, "virtual-cluster kube-apiserver replicas. default is 2.")
}
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ func (r *GlobalNodeController) SyncTaint(ctx context.Context, globalNode *v1alph
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
var targetNode v1.Node
if err := r.Get(ctx, types.NamespacedName{Name: globalNode.Name}, &targetNode); err != nil {
klog.Errorf("global-node-controller: SyncTaints: can not get global node, err: %s", globalNode.Name)
klog.Errorf("global-node-controller: SyncTaints: can not get host node, err: %s", globalNode.Name)
return err
}

Expand Down Expand Up @@ -175,15 +175,19 @@ func (r *GlobalNodeController) SyncState(ctx context.Context, globalNode *v1alph
}

func (r *GlobalNodeController) SyncLabel(ctx context.Context, globalNode *v1alpha1.GlobalNode) error {
if globalNode.Spec.State == v1alpha1.NodeInUse {
klog.V(4).Infof("global-node-controller: SyncLabel: node is in use %s, skip", globalNode.Name)
return nil
}
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
rootNode, err := r.RootClientSet.CoreV1().Nodes().Get(ctx, globalNode.Name, metav1.GetOptions{})
if err != nil {
klog.Errorf("global-node-controller: SyncState: can not get root node: %s", globalNode.Name)
klog.Errorf("global-node-controller: SyncLabel: can not get root node: %s", globalNode.Name)
return err
}

if _, err = r.KosmosClient.KosmosV1alpha1().GlobalNodes().Get(ctx, globalNode.Name, metav1.GetOptions{}); err != nil {
klog.Errorf("global-node-controller: SyncState: can not get global node: %s", globalNode.Name)
klog.Errorf("global-node-controller: SyncLabel: can not get global node: %s", globalNode.Name)
return err
}

Expand All @@ -195,7 +199,7 @@ func (r *GlobalNodeController) SyncLabel(ctx context.Context, globalNode *v1alph
updateGlobalNode.Spec.Labels = rootNode.Labels

if _, err = r.KosmosClient.KosmosV1alpha1().GlobalNodes().Update(ctx, updateGlobalNode, metav1.UpdateOptions{}); err != nil {
klog.Errorf("global-node-controller: SyncState: update global node label failed, err: %s", err)
klog.Errorf("global-node-controller: SyncLabel: update global node label failed, err: %s", err)
return err
}
return nil
Expand Down Expand Up @@ -251,6 +255,10 @@ func (r *GlobalNodeController) Reconcile(ctx context.Context, request reconcile.
klog.Errorf("can not get root node: %s", globalNode.Name)
return reconcile.Result{RequeueAfter: utils.DefaultRequeueTime}, nil
}
if globalNode.Spec.State == v1alpha1.NodeInUse {
// wait globalNode free
return reconcile.Result{RequeueAfter: utils.DefaultRequeueTime}, nil
}

if err := r.SyncLabel(ctx, &globalNode); err != nil {
klog.Warningf("sync label %s error: %v", request.NamespacedName, err)
Expand Down
12 changes: 12 additions & 0 deletions pkg/kubenest/controller/virtualcluster.node.controller/env/env.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,15 @@ func GetControlPlaneLabel() string {
}
return controllPlaneLabel
}

func GetWaitNodeReadTime() int {
readTimeSeconds := os.Getenv("WAIT_NODE_READ_TIME")
if len(readTimeSeconds) == 0 {
readTimeSeconds = "30"
}
num, err := strconv.Atoi(readTimeSeconds)
if err != nil {
klog.Fatalf("convert WAIT_NODE_READ_TIME failed, err: %s", err)
}
return num
}
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,8 @@ func NewWaitNodeReadyTask(isHost bool) Task {
Run: func(ctx context.Context, to TaskOpt, _ interface{}) (interface{}, error) {
isReady := false

waitFunc := func() {
waitCtx, cancel := context.WithTimeout(ctx, 30*time.Second) // total waiting time
waitFunc := func(timeout time.Duration) {
waitCtx, cancel := context.WithTimeout(ctx, timeout) // total waiting time
defer cancel()
wait.UntilWithContext(waitCtx, func(ctx context.Context) {
client := to.VirtualK8sClient
Expand All @@ -295,7 +295,7 @@ func NewWaitNodeReadyTask(isHost bool) Task {
}, 10*time.Second) // Interval time
}

waitFunc()
waitFunc(time.Duration(env.GetWaitNodeReadTime()) * time.Second)

if isReady {
return nil, nil
Expand All @@ -322,7 +322,7 @@ func NewWaitNodeReadyTask(isHost bool) Task {
}

klog.V(4).Infof("wait for the node to be ready again. %s", to.NodeInfo.Name)
waitFunc()
waitFunc(time.Duration(env.GetWaitNodeReadTime()*2) * time.Second)

if isReady {
return nil, nil
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package workflow

import (
"context"
"time"

"k8s.io/klog/v2"

Expand All @@ -11,7 +12,7 @@ import (

const (
retryCount = 0
maxRetries = 3
maxRetries = 5
)

type WorkflowData struct {
Expand All @@ -27,7 +28,9 @@ func RunWithRetry(ctx context.Context, task task.Task, opt task.TaskOpt, preArgs
if !task.Retry {
break
}
klog.V(4).Infof("work flow retry %d, task name: %s, err: %s", i, task.Name, err)
waitTime := 3 * (i + 1)
klog.V(4).Infof("work flow retry %d after %ds, task name: %s, err: %s", i, waitTime, task.Name, err)
time.Sleep(time.Duration(waitTime) * time.Second)
} else {
break
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubenest/tasks/coredns.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ func applyYMLTemplate(dynamicClient dynamic.Interface, manifestGlob string, temp
}

err = apiclient.TryRunCommand(func() error {
return util.CreateObject(dynamicClient, obj.GetNamespace(), obj.GetName(), &obj)
return util.ApplyObject(dynamicClient, &obj)
}, 3)
if err != nil {
return errors.Wrapf(err, "Create object error")
Expand Down
2 changes: 1 addition & 1 deletion pkg/kubenest/tasks/manifests_components.go
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ func applyTemplatedManifests(component string, dynamicClient dynamic.Interface,
obj = unstructured.Unstructured{Object: res}
}
err = apiclient.TryRunCommand(func() error {
return util.CreateObject(dynamicClient, obj.GetNamespace(), obj.GetName(), &obj)
return util.ApplyObject(dynamicClient, &obj)
}, 3)
if err != nil {
return errors.Wrapf(err, "Create object error")
Expand Down

0 comments on commit 902adfb

Please sign in to comment.