From 66e510a8ae2159f0abef37034781999ba8d789f3 Mon Sep 17 00:00:00 2001 From: Alexandre Mahdhaoui Date: Sat, 23 Mar 2024 18:00:03 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20prevent=20leader=20election=20wh?= =?UTF-8?q?en=20shutting=20down=20a=20non-elected=20manager?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When leader election is enabled, a non-leader manager would never start the LeaderElection runnable group. Thus, as the shutdown process calls the sync.Once Start func of the runnableGroup; it will start a new election. This change ensures `Start` is ineffective during shutdown. The test ensures the LeaderElection runnableGroup is not started during shutdown. Signed-off-by: Alexandre Mahdhaoui --- pkg/manager/internal.go | 2 + pkg/manager/manager_test.go | 93 +++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/pkg/manager/internal.go b/pkg/manager/internal.go index a16f354a1b..fdb9d982d9 100644 --- a/pkg/manager/internal.go +++ b/pkg/manager/internal.go @@ -518,6 +518,8 @@ func (cm *controllerManager) engageStopProcedure(stopComplete <-chan struct{}) e // Stop all the leader election runnables, which includes reconcilers. cm.logger.Info("Stopping and waiting for leader election runnables") + // Prevent leader election when shutting down a non-elected manager + cm.runnables.LeaderElection.startOnce.Do(func() {}) cm.runnables.LeaderElection.StopAndWait(cm.shutdownCtx) // Stop the caches before the leader election runnables, this is an important diff --git a/pkg/manager/manager_test.go b/pkg/manager/manager_test.go index a88ccca00f..3336e0a4bd 100644 --- a/pkg/manager/manager_test.go +++ b/pkg/manager/manager_test.go @@ -378,6 +378,86 @@ var _ = Describe("manger.Manager", func() { Expect(cm.gracefulShutdownTimeout.Nanoseconds()).To(Equal(int64(0))) }) + + It("should prevent leader election when shutting down a non-elected manager", func() { + var rl resourcelock.Interface + m1, err := New(cfg, Options{ + LeaderElection: true, + LeaderElectionNamespace: "default", + LeaderElectionID: "test-leader-election-id", + newResourceLock: func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error) { + var err error + rl, err = leaderelection.NewResourceLock(config, recorderProvider, options) + return rl, err + }, + HealthProbeBindAddress: "0", + Metrics: metricsserver.Options{BindAddress: "0"}, + PprofBindAddress: "0", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m1).ToNot(BeNil()) + Expect(rl.Describe()).To(Equal("default/test-leader-election-id")) + + m1cm, ok := m1.(*controllerManager) + Expect(ok).To(BeTrue()) + m1cm.onStoppedLeading = func() {} + + m2, err := New(cfg, Options{ + LeaderElection: true, + LeaderElectionNamespace: "default", + LeaderElectionID: "test-leader-election-id", + newResourceLock: func(config *rest.Config, recorderProvider recorder.Provider, options leaderelection.Options) (resourcelock.Interface, error) { + var err error + rl, err = leaderelection.NewResourceLock(config, recorderProvider, options) + return rl, err + }, + HealthProbeBindAddress: "0", + Metrics: metricsserver.Options{BindAddress: "0"}, + PprofBindAddress: "0", + }) + Expect(err).ToNot(HaveOccurred()) + Expect(m2).ToNot(BeNil()) + Expect(rl.Describe()).To(Equal("default/test-leader-election-id")) + + m1done := make(chan struct{}) + Expect(m1.Add(RunnableFunc(func(ctx context.Context) error { + defer GinkgoRecover() + close(m1done) + return nil + }))).To(Succeed()) + + ctx1, cancel1 := context.WithCancel(context.Background()) + defer cancel1() + go func() { + defer GinkgoRecover() + Expect(m1.Elected()).ShouldNot(BeClosed()) + Expect(m1.Start(ctx1)).NotTo(HaveOccurred()) + }() + <-m1.Elected() + <-m1done + + electionRunnable := &needElection{make(chan struct{})} + + Expect(m2.Add(electionRunnable)).To(Succeed()) + + ctx2, cancel2 := context.WithCancel(context.Background()) + m2done := make(chan struct{}) + go func() { + defer GinkgoRecover() + Expect(m2.Start(ctx2)).NotTo(HaveOccurred()) + close(m2done) + }() + Consistently(m2.Elected()).ShouldNot(Receive()) + + go func() { + defer GinkgoRecover() + Consistently(electionRunnable.ch).ShouldNot(Receive()) + }() + cancel2() + <-m2done + <-m1done + }) + It("should default ID to controller-runtime if ID is not set", func() { var rl resourcelock.Interface m1, err := New(cfg, Options{ @@ -1929,3 +2009,16 @@ func (f *fakeDeferredLoader) InjectScheme(scheme *runtime.Scheme) error { type metricsDefaultServer interface { GetBindAddr() string } + +type needElection struct { + ch chan struct{} +} + +func (n *needElection) Start(_ context.Context) error { + n.ch <- struct{}{} + return nil +} + +func (n *needElection) NeedLeaderElection() bool { + return true +}