From e8712af75ce74d231d76db1f667bd6fe10758337 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 09:57:05 -0400 Subject: [PATCH 1/8] fix panic from keyring raft entries being written during upgrade During an upgrade to Nomad 1.4.0, if a server running 1.4.0 becomes the leader before one of the 1.3.x servers, the old server will crash because the keyring is initialized and writes a raft entry. Wait until all members are on a version that supports the keyring before initializing it. --- nomad/leader.go | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/nomad/leader.go b/nomad/leader.go index 950931ce9fb0..56708f9f9ce6 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -294,10 +294,7 @@ func (s *Server) establishLeadership(stopCh chan struct{}) error { schedulerConfig := s.getOrCreateSchedulerConfig() // Create the first root key if it doesn't already exist - err := s.initializeKeyring() - if err != nil { - return err - } + go s.initializeKeyring(stopCh) // Initialize the ClusterID _, _ = s.ClusterID() @@ -1966,12 +1963,13 @@ func (s *Server) getOrCreateSchedulerConfig() *structs.SchedulerConfiguration { return config } +var minVersionKeyring = version.Must(version.NewVersion("1.4.0")) + // initializeKeyring creates the first root key if the leader doesn't // already have one. The metadata will be replicated via raft and then // the followers will get the key material from their own key // replication. -func (s *Server) initializeKeyring() error { - +func (s *Server) initializeKeyring(stopCh <-chan struct{}) error { store := s.fsm.State() keyMeta, err := store.GetActiveRootKeyMeta(nil) if err != nil { @@ -1981,6 +1979,35 @@ func (s *Server) initializeKeyring() error { return nil } + s.logger.Named("core").Trace("verifying cluster is ready to initialize keyring") + + versionCheck := func() bool { + members := s.serf.Members() + for _, member := range members { + build := member.Tags["build"] + memberVersion := version.Must(version.NewVersion(build)) + if memberVersion.LessThan(minVersionKeyring) { + return false + } + } + return true + } + + for { + select { + case <-stopCh: + return nil + default: + } + if versionCheck() { + break + } + } + // we might have lost leadershuip during the version check + if !s.IsLeader() { + return nil + } + s.logger.Named("core").Trace("initializing keyring") rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) From 367a7454294f5bbb68274680fc426ba050db35ea Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 10:13:45 -0400 Subject: [PATCH 2/8] fix version check --- nomad/encrypter_test.go | 24 ++++++++++++++++++++++++ nomad/leader.go | 2 +- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/nomad/encrypter_test.go b/nomad/encrypter_test.go index b006b503d1cc..95395cfb0f1c 100644 --- a/nomad/encrypter_test.go +++ b/nomad/encrypter_test.go @@ -7,7 +7,9 @@ import ( "testing" "time" + version "github.com/hashicorp/go-version" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" + "github.com/shoenig/test/must" "github.com/stretchr/testify/require" "github.com/hashicorp/nomad/ci" @@ -312,3 +314,25 @@ func TestEncrypter_SignVerify(t *testing.T) { require.Equal(t, alloc.JobID, got.JobID) require.Equal(t, "web", got.TaskName) } + +func TestEncrypter_VersionCheck(t *testing.T) { + + validVersions := []string{"1.4.0", "1.4.1", "1.5.2", "1.4.0-dev", "1.4.0-beta.1"} + invalidVersions := []string{"1.3.5", "1.3.5-dev"} + + for _, v := range validVersions { + memberVersion := version.Must(version.NewVersion(v)) + + must.True(t, memberVersion.Core().GreaterThanOrEqual(minVersionKeyring), + must.Sprintf("expected %v >= 1.4.0", v), + ) + } + + for _, v := range invalidVersions { + memberVersion := version.Must(version.NewVersion(v)) + must.True(t, memberVersion.Core().LessThan(minVersionKeyring), + must.Sprintf("expected %v < 1.4.0", v), + ) + } + +} diff --git a/nomad/leader.go b/nomad/leader.go index 56708f9f9ce6..dbe991a0d3e9 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -1986,7 +1986,7 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) error { for _, member := range members { build := member.Tags["build"] memberVersion := version.Must(version.NewVersion(build)) - if memberVersion.LessThan(minVersionKeyring) { + if memberVersion.Core().LessThan(minVersionKeyring) { return false } } From ce81033aa3625abcc5f12a2ff1c937c22f07bf77 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 10:30:45 -0400 Subject: [PATCH 3/8] enforce keyring has been initialized before getting active key --- nomad/encrypter.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nomad/encrypter.go b/nomad/encrypter.go index 7787154144e7..8b3851ff9aed 100644 --- a/nomad/encrypter.go +++ b/nomad/encrypter.go @@ -277,6 +277,9 @@ func (e *Encrypter) activeKeySetLocked() (*keyset, error) { if err != nil { return nil, err } + if keyMeta == nil { + return nil, fmt.Errorf("keyring has not been initialized yet") + } return e.keysetByIDLocked(keyMeta.KeyID) } From f21d30cf764dbd43d073deed077a034c30097059 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 10:32:37 -0400 Subject: [PATCH 4/8] set minimum versions for plan apply tests to have keyring --- nomad/plan_apply_test.go | 2 +- nomad/worker_test.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nomad/plan_apply_test.go b/nomad/plan_apply_test.go index bb9523476266..fc21bc6e2ffc 100644 --- a/nomad/plan_apply_test.go +++ b/nomad/plan_apply_test.go @@ -243,7 +243,7 @@ func TestPlanApply_applyPlanWithNormalizedAllocs(t *testing.T) { ci.Parallel(t) s1, cleanupS1 := TestServer(t, func(c *Config) { - c.Build = "0.9.2" + c.Build = "1.4.0" }) defer cleanupS1() testutil.WaitForLeader(t, s1.RPC) diff --git a/nomad/worker_test.go b/nomad/worker_test.go index d8d5f4481095..a72304db92e9 100644 --- a/nomad/worker_test.go +++ b/nomad/worker_test.go @@ -488,7 +488,7 @@ func TestWorker_SubmitPlanNormalizedAllocations(t *testing.T) { s1, cleanupS1 := TestServer(t, func(c *Config) { c.NumSchedulers = 0 c.EnabledSchedulers = []string{structs.JobTypeService} - c.Build = "0.9.2" + c.Build = "1.4.0" }) defer cleanupS1() testutil.WaitForLeader(t, s1.RPC) From 3845b9cf8b88e934ff4aa524f42d8af1fbf37071 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 10:46:51 -0400 Subject: [PATCH 5/8] log errors from initialization and use 'keyring' as component name --- nomad/leader.go | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/nomad/leader.go b/nomad/leader.go index dbe991a0d3e9..9b4439e6dc54 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -1969,17 +1969,21 @@ var minVersionKeyring = version.Must(version.NewVersion("1.4.0")) // already have one. The metadata will be replicated via raft and then // the followers will get the key material from their own key // replication. -func (s *Server) initializeKeyring(stopCh <-chan struct{}) error { +func (s *Server) initializeKeyring(stopCh <-chan struct{}) { + + logger := s.logger.Named("keyring") + store := s.fsm.State() keyMeta, err := store.GetActiveRootKeyMeta(nil) if err != nil { - return err + logger.Error("failed to get active key: %v", err) + return } if keyMeta != nil { - return nil + return } - s.logger.Named("core").Trace("verifying cluster is ready to initialize keyring") + logger.Trace("verifying cluster is ready to initialize keyring") versionCheck := func() bool { members := s.serf.Members() @@ -1996,7 +2000,7 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) error { for { select { case <-stopCh: - return nil + return default: } if versionCheck() { @@ -2005,31 +2009,33 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) error { } // we might have lost leadershuip during the version check if !s.IsLeader() { - return nil + return } - s.logger.Named("core").Trace("initializing keyring") + logger.Trace("initializing keyring") rootKey, err := structs.NewRootKey(structs.EncryptionAlgorithmAES256GCM) rootKey.Meta.SetActive() if err != nil { - return fmt.Errorf("could not initialize keyring: %v", err) + logger.Error("could not initialize keyring: %v", err) + return } err = s.encrypter.AddKey(rootKey) if err != nil { - return fmt.Errorf("could not add initial key to keyring: %v", err) + logger.Error("could not add initial key to keyring: %v", err) + return } if _, _, err = s.raftApply(structs.RootKeyMetaUpsertRequestType, structs.KeyringUpdateRootKeyMetaRequest{ RootKeyMeta: rootKey.Meta, }); err != nil { - return fmt.Errorf("could not initialize keyring: %v", err) + logger.Error("could not initialize keyring: %v", err) + return } - s.logger.Named("core").Info("initialized keyring", "id", rootKey.Meta.KeyID) - return nil + logger.Info("initialized keyring", "id", rootKey.Meta.KeyID) } func (s *Server) generateClusterID() (string, error) { From bae41f450fd5444687ebb3e65845240a47f966af Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 12:20:35 -0400 Subject: [PATCH 6/8] changelog entry --- .changelog/14821.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/14821.txt diff --git a/.changelog/14821.txt b/.changelog/14821.txt new file mode 100644 index 000000000000..162f961007e5 --- /dev/null +++ b/.changelog/14821.txt @@ -0,0 +1,3 @@ +```release-note:bug +keyring: Fixed a panic that can occur during upgrades to 1.4.0 when initializing the keyring +``` From 7120265a0c6d0e7d75c7d87c8e000163e3ec3221 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 12:27:44 -0400 Subject: [PATCH 7/8] use minimum version helper --- nomad/leader.go | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/nomad/leader.go b/nomad/leader.go index 9b4439e6dc54..4d82fd689dc7 100644 --- a/nomad/leader.go +++ b/nomad/leader.go @@ -1984,26 +1984,13 @@ func (s *Server) initializeKeyring(stopCh <-chan struct{}) { } logger.Trace("verifying cluster is ready to initialize keyring") - - versionCheck := func() bool { - members := s.serf.Members() - for _, member := range members { - build := member.Tags["build"] - memberVersion := version.Must(version.NewVersion(build)) - if memberVersion.Core().LessThan(minVersionKeyring) { - return false - } - } - return true - } - for { select { case <-stopCh: return default: } - if versionCheck() { + if ServersMeetMinimumVersion(s.serf.Members(), minVersionKeyring, true) { break } } From b98248a370658ea26a4b9e664ac92d634bb505b1 Mon Sep 17 00:00:00 2001 From: Tim Gross Date: Thu, 6 Oct 2022 12:28:59 -0400 Subject: [PATCH 8/8] remove unused test --- nomad/encrypter_test.go | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/nomad/encrypter_test.go b/nomad/encrypter_test.go index 95395cfb0f1c..b006b503d1cc 100644 --- a/nomad/encrypter_test.go +++ b/nomad/encrypter_test.go @@ -7,9 +7,7 @@ import ( "testing" "time" - version "github.com/hashicorp/go-version" msgpackrpc "github.com/hashicorp/net-rpc-msgpackrpc" - "github.com/shoenig/test/must" "github.com/stretchr/testify/require" "github.com/hashicorp/nomad/ci" @@ -314,25 +312,3 @@ func TestEncrypter_SignVerify(t *testing.T) { require.Equal(t, alloc.JobID, got.JobID) require.Equal(t, "web", got.TaskName) } - -func TestEncrypter_VersionCheck(t *testing.T) { - - validVersions := []string{"1.4.0", "1.4.1", "1.5.2", "1.4.0-dev", "1.4.0-beta.1"} - invalidVersions := []string{"1.3.5", "1.3.5-dev"} - - for _, v := range validVersions { - memberVersion := version.Must(version.NewVersion(v)) - - must.True(t, memberVersion.Core().GreaterThanOrEqual(minVersionKeyring), - must.Sprintf("expected %v >= 1.4.0", v), - ) - } - - for _, v := range invalidVersions { - memberVersion := version.Must(version.NewVersion(v)) - must.True(t, memberVersion.Core().LessThan(minVersionKeyring), - must.Sprintf("expected %v < 1.4.0", v), - ) - } - -}