From c2df97a6104aeeb91056c41bbe7f1393df296476 Mon Sep 17 00:00:00 2001 From: Erikson Tung Date: Tue, 7 Jul 2020 18:41:51 -0700 Subject: [PATCH] bottlerocket host update api integration Adds support for updating via the Bottlerocket Update API. The new platform is used if the node is labeled bottlerocket.aws/updater-interface-version=2.0.0 --- go.mod | 4 +- go.sum | 6 +- main.go | 7 +- pkg/agent/agent.go | 38 +++++- pkg/agent/agent_test.go | 17 ++- pkg/platform/api/api.go | 219 +++++++++++++++++++++++++++++++++++ pkg/platform/api/api_test.go | 152 ++++++++++++++++++++++++ pkg/platform/api/platform.go | 145 +++++++++++++++++++++++ pkg/platform/interface.go | 2 +- update-operator.yaml | 76 +++++++++++- 10 files changed, 639 insertions(+), 27 deletions(-) create mode 100644 pkg/platform/api/api.go create mode 100644 pkg/platform/api/api_test.go create mode 100644 pkg/platform/api/platform.go diff --git a/go.mod b/go.mod index 73ab8c1f..31f100bc 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,7 @@ module github.com/bottlerocket-os/bottlerocket-update-operator go 1.12 require ( - github.com/coreos/go-systemd/v22 v22.0.0 - github.com/godbus/dbus/v5 v5.0.3 + github.com/Masterminds/semver v1.5.0 github.com/google/go-cmp v0.3.1 // indirect github.com/googleapis/gnostic v0.3.1 // indirect github.com/imdario/mergo v0.3.7 // indirect @@ -12,6 +11,7 @@ require ( github.com/karlseguin/expect v1.0.1 // indirect github.com/pkg/errors v0.8.1 github.com/sirupsen/logrus v1.4.2 + github.com/stretchr/testify v1.3.0 github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 // indirect golang.org/x/crypto v0.0.0-20190829043050-9756ffdc2472 // indirect golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 // indirect diff --git a/go.sum b/go.sum index 73509ad3..3f41ba7c 100644 --- a/go.sum +++ b/go.sum @@ -12,6 +12,8 @@ github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbt github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E= +github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= +github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= @@ -25,8 +27,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd/v22 v22.0.0 h1:XJIw/+VlJ+87J+doOxznsAWIdmWuViOVhkQamW5YV28= -github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -58,8 +58,6 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA github.com/go-openapi/spec v0.19.2/go.mod h1:sCxk3jxKgioEJikev4fgkNmwS+3kuYdJtcsZsD5zxMY= github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME= -github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= diff --git a/main.go b/main.go index b31ced7b..d6074a0c 100644 --- a/main.go +++ b/main.go @@ -11,7 +11,6 @@ import ( "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/controller" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/k8sutil" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" - "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform/updog" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/sigcontext" "github.com/pkg/errors" "k8s.io/client-go/kubernetes" @@ -89,11 +88,7 @@ func runController(ctx context.Context, kube kubernetes.Interface, nodeName stri func runAgent(ctx context.Context, kube kubernetes.Interface, nodeName string) error { log := logging.New("agent") - platform, err := updog.New() - if err != nil { - return errors.WithMessage(err, "could not setup platform for agent") - } - a, err := agent.New(log, kube, platform, nodeName) + a, err := agent.New(log, kube, nodeName) if err != nil { return err } diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index ee34d9a7..7cafaeda 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -13,6 +13,8 @@ import ( "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/marker" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/nodestream" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform/api" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform/updog" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/workgroup" "github.com/pkg/errors" @@ -67,18 +69,42 @@ type proc interface { KillProcess() error } -func New(log logging.Logger, kube kubernetes.Interface, plat platform.Platform, nodeName string) (*Agent, error) { +func New(log logging.Logger, kube kubernetes.Interface, nodeName string) (*Agent, error) { if nodeName == "" { return nil, errors.New("nodeName must be provided for Agent to manage") } - var nodeclient corev1.NodeInterface - if kube != nil { - nodeclient = kube.CoreV1().Nodes() + + nodeclient := kube.CoreV1().Nodes() + // Determine which platform to use depending on the updater interface version + node, err := nodeclient.Get(nodeName, v1meta.GetOptions{}) + if err != nil { + return nil, errors.New("failed to retrieve node information") } + // Get the updater interface version from the node label + var platform platform.Platform + platformVersion := node.Labels[marker.UpdaterInterfaceVersionKey] + switch platformVersion { + default: + // If the updater interface version is not specified, default to + // using Updog as the platform + log.Warn("unknown platform version specified, defaulting to using updog") + fallthrough + case "1.0.0": + platform, err = updog.New() + if err != nil { + return nil, errors.WithMessage(err, "could not setup Updog platform for agent") + } + case "2.0.0": + platform, err = api.New() + if err != nil { + return nil, errors.WithMessage(err, "could not setup Update API platform for agent") + } + } + return &Agent{ log: log, kube: kube, - platform: plat, + platform: platform, poster: &k8sPoster{log, nodeclient}, proc: &osProc{}, nodeName: nodeName, @@ -314,7 +340,7 @@ func (a *Agent) realize(in *intent.Intent) error { case marker.NodeActionUnknown, marker.NodeActionStabilize: log.Debug("sitrep") - _, err = a.platform.Status() + err = platform.Ping(a.platform) if err != nil { break } diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 9816909b..8cb65fd3 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -5,11 +5,13 @@ import ( "testing" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/intent" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/intent/cache" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/internal/intents" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/internal/testoutput" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/marker" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform" + "gotest.tools/assert" ) @@ -82,12 +84,17 @@ func testAgent(t *testing.T) (*Agent, *testHooks) { Platform: &testPlatform{}, Proc: &testProc{}, } - a, err := New(testoutput.Logger(t, logging.New("agent")), nil, hooks.Platform, intents.NodeName) - if err != nil { - panic(err) + log := testoutput.Logger(t, logging.New("agent")) + a := &Agent{ + log: log, + kube: nil, + platform: hooks.Platform, + poster: hooks.Poster, + proc: hooks.Proc, + nodeName: intents.NodeName, + lastCache: cache.NewLastCache(), + tracker: newPostTracker(), } - a.poster = hooks.Poster - a.proc = hooks.Proc return a, hooks } diff --git a/pkg/platform/api/api.go b/pkg/platform/api/api.go new file mode 100644 index 00000000..049eaf6b --- /dev/null +++ b/pkg/platform/api/api.go @@ -0,0 +1,219 @@ +package api + +import ( + "context" + "encoding/json" + "io/ioutil" + "net" + "net/http" + "time" + + "github.com/pkg/errors" + + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" +) + +const ( + bottlerocketAPISock = "/run/api.sock" + // The minimum required host Bottlerocket OS version is v0.4.1 because that's when the Update API + // was first added. https://github.com/bottlerocket-os/bottlerocket/releases/tag/v0.4.1 + minimumRequiredOSVer = "0.4.1" +) + +type updateState string + +const ( + stateIdle updateState = "Idle" + stateAvailable updateState = "Available" + stateStaged updateState = "Staged" + stateReady updateState = "Ready" +) + +type updateImage struct { + Arch string `json:"arch"` + Version string `json:"version"` + Variant string `json:"variant"` +} + +func (ui *updateImage) Identifier() interface{} { + return ui.Version +} + +type stagedImage struct { + Image updateImage `json:"image"` + NextToBoot bool `json:"next_to_boot"` +} + +type updateCommand string + +const ( + commandRefresh updateCommand = "refresh" + commandPrepare updateCommand = "prepare" + commandActivate updateCommand = "activate" + commandDeactivate updateCommand = "deactivate" +) + +type commandStatus string + +const ( + statusSuccess commandStatus = "Success" + Failed commandStatus = "Failed" + Unknown commandStatus = "Unknown" +) + +type commandResult struct { + CmdType updateCommand `json:"cmd_type"` + CmdStatus commandStatus `json:"cmd_status"` + Timestamp string `json:"timestamp"` + ExitStatus *int32 `json:"exit_status"` + Stderr *string `json:"stderr"` +} + +type updateStatus struct { + UpdateState updateState `json:"update_state"` + AvailableUpdates []string `json:"available_updates"` + ChosenUpdate *updateImage `json:"chosen_update"` + ActivePartition *stagedImage `json:"active_partition"` + StagingPartition *stagedImage `json:"staging_partition"` + MostRecentCommand *commandResult `json:"most_recent_command"` +} + +type apiClient struct { + log logging.Logger + httpClient *http.Client +} + +func newAPIClient() *apiClient { + return &apiClient{log: logging.New("update-api"), httpClient: &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + dialer := net.Dialer{} + return dialer.DialContext(ctx, "unix", bottlerocketAPISock) + }, + }, + // By default, Timeout is set to 0 which would mean no timeout. + // Set a 10 second timeout for all requests so we don't wait forever if the API fails to return a response. + // The Bottlerocket API should always immediately return a response regardless of the request. + // The 10 second value picked here is arbitrary and should be changed if it proves insufficient. + Timeout: 10 * time.Second, + }, + } +} + +func (c *apiClient) do(req *http.Request) (*http.Response, error) { + var response *http.Response + const maxAttempts = 5 + attempts := 0 + // Retry up to 5 times in case the Update API is busy; Waiting 10 seconds between each attempt. + for ; attempts < maxAttempts; attempts++ { + var err error + response, err = c.httpClient.Do(req) + if err != nil { + return nil, errors.Wrapf(err, "update API request error") + } + if response.StatusCode >= 200 && response.StatusCode < 300 { + // Response OK + break + } else if response.StatusCode == 423 { + if attempts < maxAttempts-1 { + c.log.Info("API server busy, retrying in 10 seconds ...") + // Retry after ten seconds if we get a 423 Locked response (update API busy) + time.Sleep(10 * time.Second) + continue + } + } + // API response was a non-transient error, bail out. + return response, errors.Errorf("bad http response, status code: %d", response.StatusCode) + } + if attempts == 5 { + return nil, errors.New("update API unavailable: retries exhausted") + } + return response, nil +} + +func (c *apiClient) Get(path string) (*http.Response, error) { + req, err := http.NewRequest(http.MethodGet, "http://unix"+path, nil) + if err != nil { + return nil, err + } + c.log.WithField("path", path).WithField("method", http.MethodGet).Debugf("update API request") + return c.do(req) +} + +func (c *apiClient) Post(path string) (*http.Response, error) { + req, err := http.NewRequest(http.MethodPost, "http://unix"+path, http.NoBody) + if err != nil { + return nil, err + } + c.log.WithField("path", path).WithField("method", http.MethodPost).Debugf("update API request") + return c.do(req) +} + +// GetUpdateStatus returns the update status from the update API +func (c *apiClient) GetUpdateStatus() (*updateStatus, error) { + response, err := c.Get("/updates/status") + if err != nil { + return nil, err + } + + var updateStatus updateStatus + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return nil, err + } + err = json.Unmarshal(body, &updateStatus) + if err != nil { + return nil, err + } + return &updateStatus, nil +} + +func (c *apiClient) GetMostRecentCommand() (*commandResult, error) { + updateStatus, err := c.GetUpdateStatus() + if err != nil { + return nil, err + } + return updateStatus.MostRecentCommand, nil +} + +type osInfo struct { + VersionID string `json:"version_id"` +} + +func (c *apiClient) GetOSInfo() (*osInfo, error) { + response, err := c.Get("/os") + if err != nil { + return nil, err + } + + var osInfo osInfo + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return nil, err + } + err = json.Unmarshal(body, &osInfo) + if err != nil { + return nil, err + } + return &osInfo, nil +} + +func (c *apiClient) RefreshUpdates() error { + _, err := c.Post("/actions/refresh-updates") + return err +} + +func (c *apiClient) PrepareUpdate() error { + _, err := c.Post("/actions/prepare-update") + return err +} + +func (c *apiClient) ActivateUpdate() error { + _, err := c.Post("/actions/activate-update") + return err +} + +func (c *apiClient) Reboot() error { + _, err := c.Post("/actions/reboot") + return err +} diff --git a/pkg/platform/api/api_test.go b/pkg/platform/api/api_test.go new file mode 100644 index 00000000..f64a77d1 --- /dev/null +++ b/pkg/platform/api/api_test.go @@ -0,0 +1,152 @@ +package api + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestUnmarshallUpdateStatus(t *testing.T) { + update_string := "Starting update to 0.4.0\n" + cases := []struct { + Name string + UpdateStatusJson []byte + Expected updateStatus + }{ + { + Name: "No update available after refresh", + UpdateStatusJson: []byte(`{"update_state":"Idle","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":null,"active_partition":{"image":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"next_to_boot":true},"staging_partition":null,"most_recent_command":{"cmd_type":"refresh","cmd_status":"Success","timestamp":"2020-07-08T21:32:35.802253160Z","exit_status":0,"stderr":""}}`), + Expected: updateStatus{ + UpdateState: stateIdle, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: nil, + ActivePartition: &stagedImage{ + Image: updateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + StagingPartition: nil, + MostRecentCommand: &commandResult{ + CmdType: commandRefresh, + CmdStatus: statusSuccess, + Timestamp: "2020-07-08T21:32:35.802253160Z", + ExitStatus: new(int32), + Stderr: new(string), + }, + }, + }, + { + Name: "Update available after refresh", + UpdateStatusJson: []byte(`{"update_state":"Available","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"active_partition":{"image":{"arch":"x86_64","version":"0.3.2","variant":"aws-k8s-1.15"},"next_to_boot":true},"staging_partition":null,"most_recent_command":{"cmd_type":"refresh","cmd_status":"Success","timestamp":"2020-06-18T17:57:43.141433622Z","exit_status":0,"stderr":""}}`), + Expected: updateStatus{ + UpdateState: stateAvailable, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: &updateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + ActivePartition: &stagedImage{ + Image: updateImage{ + Arch: "x86_64", + Version: "0.3.2", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + StagingPartition: nil, + MostRecentCommand: &commandResult{ + CmdType: commandRefresh, + CmdStatus: statusSuccess, + Timestamp: "2020-06-18T17:57:43.141433622Z", + ExitStatus: new(int32), + Stderr: new(string), + }, + }, + }, + { + Name: "Update staged", + UpdateStatusJson: []byte(`{"update_state":"Staged","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"active_partition":{"image":{"arch":"x86_64","version":"0.3.4","variant":"aws-k8s-1.15"},"next_to_boot":true},"staging_partition":{"image":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"next_to_boot":false},"most_recent_command":{"cmd_type":"prepare","cmd_status":"Success","timestamp":"2020-07-10T06:44:58.766493367Z","exit_status":0,"stderr":"Starting update to 0.4.0\n"}}`), + Expected: updateStatus{ + UpdateState: stateStaged, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: &updateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + ActivePartition: &stagedImage{ + Image: updateImage{ + Arch: "x86_64", + Version: "0.3.4", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + StagingPartition: &stagedImage{ + Image: updateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + NextToBoot: false, + }, + MostRecentCommand: &commandResult{ + CmdType: commandPrepare, + CmdStatus: statusSuccess, + Timestamp: "2020-07-10T06:44:58.766493367Z", + ExitStatus: new(int32), + Stderr: &update_string, + }, + }, + }, + { + Name: "Update ready", + UpdateStatusJson: []byte(`{"update_state":"Ready","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"active_partition":{"image":{"arch":"x86_64","version":"0.3.4","variant":"aws-k8s-1.15"},"next_to_boot":false},"staging_partition":{"image":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"next_to_boot":true},"most_recent_command":{"cmd_type":"activate","cmd_status":"Success","timestamp":"2020-07-10T06:47:19.903337270Z","exit_status":0,"stderr":""}}`), + Expected: updateStatus{ + UpdateState: stateReady, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: &updateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + ActivePartition: &stagedImage{ + Image: updateImage{ + Arch: "x86_64", + Version: "0.3.4", + Variant: "aws-k8s-1.15", + }, + NextToBoot: false, + }, + StagingPartition: &stagedImage{ + Image: updateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + MostRecentCommand: &commandResult{ + CmdType: commandActivate, + CmdStatus: statusSuccess, + Timestamp: "2020-07-10T06:47:19.903337270Z", + ExitStatus: new(int32), + Stderr: new(string), + }, + }, + }, + } + for _, tc := range cases { + t.Run(tc.Name, func(t *testing.T) { + var unmarshaledStatus updateStatus + err := json.Unmarshal(tc.UpdateStatusJson, &unmarshaledStatus) + assert.NoError(t, err, "failed to unmarshal into update status") + assert.Equal(t, tc.Expected, unmarshaledStatus) + }) + } +} diff --git a/pkg/platform/api/platform.go b/pkg/platform/api/platform.go new file mode 100644 index 00000000..51495375 --- /dev/null +++ b/pkg/platform/api/platform.go @@ -0,0 +1,145 @@ +package api + +import ( + "github.com/Masterminds/semver" + "github.com/pkg/errors" + + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform" +) + +// Assert Update-API as a platform implementor. +var _ platform.Platform = (*apiPlatform)(nil) + +type apiPlatform struct { + log logging.Logger + apiClient *apiClient +} + +func New() (*apiPlatform, error) { + return &apiPlatform{log: logging.New("platform"), apiClient: newAPIClient()}, nil +} + +type statusResponse struct { + osVersion *semver.Version +} + +func (sr *statusResponse) OK() bool { + // Bottlerocket OS version needs to be at least a certain version to support the Update API + constraint, err := semver.NewConstraint(">= " + minimumRequiredOSVer) + if err != nil { + return false + } + return constraint.Check(sr.osVersion) +} + +func (p apiPlatform) Status() (platform.Status, error) { + // Try to determine if the update API is supported in the Bottlerocket host + osInfo, err := p.apiClient.GetOSInfo() + if err != nil { + return nil, err + } + + osVersion, err := semver.NewVersion(osInfo.VersionID) + p.log.Info("current running OS version: ", osInfo.VersionID) + if err != nil { + return nil, errors.Wrap(err, "failed to parse 'version_id' field as semver") + } + return &statusResponse{osVersion: osVersion}, nil +} + +type listAvailableResponse struct { + chosenUpdate *updateImage +} + +func (lar *listAvailableResponse) Updates() []platform.Update { + return []platform.Update{lar.chosenUpdate} +} + +func (p apiPlatform) ListAvailable() (platform.Available, error) { + p.log.Debug("fetching list of available updates") + + // Refresh list of updates and check if there are any available + err := p.apiClient.RefreshUpdates() + if err != nil { + return nil, err + } + + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return nil, err + } + if updateStatus.MostRecentCommand.CmdType != commandRefresh && updateStatus.MostRecentCommand.CmdStatus != statusSuccess { + return nil, errors.New("failed to refresh updates or update action performed out of band") + + } + return &listAvailableResponse{chosenUpdate: updateStatus.ChosenUpdate}, nil +} + +func (p apiPlatform) Prepare(target platform.Update) error { + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return err + } + if updateStatus.UpdateState != stateAvailable && updateStatus.UpdateState != stateStaged { + return errors.Errorf("unexpected update state: %s, expecting state to be 'Available' or 'Staged'. update action performed out of band?", updateStatus.UpdateState) + } + + // Download the update and apply it to the inactive partition + err = p.apiClient.PrepareUpdate() + if err != nil { + return err + } + + commandResult, err := p.apiClient.GetMostRecentCommand() + if err != nil { + return err + } + if commandResult.CmdType != commandPrepare || commandResult.CmdStatus != statusSuccess { + return errors.New("failed to prepare update or update action performed out of band") + } + return nil +} + +func (p apiPlatform) Update(target platform.Update) error { + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return err + } + if updateStatus.UpdateState != stateStaged { + return errors.Errorf("unexpected update state: %s, expecting state to be 'Staged'. update action performed out of band?", updateStatus.UpdateState) + } + + // Activate the prepared update + + err = p.apiClient.ActivateUpdate() + if err != nil { + return err + } + + commandResult, err := p.apiClient.GetMostRecentCommand() + if err != nil { + return err + } + if commandResult.CmdType != commandActivate || commandResult.CmdStatus != statusSuccess { + return errors.New("failed to activate update or update action performed out of band") + } + return nil +} + +func (p apiPlatform) BootUpdate(target platform.Update, rebootNow bool) error { + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return err + } + if updateStatus.UpdateState != stateReady { + return errors.Errorf("unexpected update state: %s, expecting state to be 'Ready'. update action performed out of band?", updateStatus.UpdateState) + } + + // Reboot the host into the activated update + err = p.apiClient.Reboot() + if err != nil { + return err + } + return nil +} diff --git a/pkg/platform/interface.go b/pkg/platform/interface.go index bc718438..dc414719 100644 --- a/pkg/platform/interface.go +++ b/pkg/platform/interface.go @@ -50,7 +50,7 @@ type Update interface { func Ping(p Platform) error { status, err := p.Status() if err != nil { - return errors.WithMessage(err, "could not retrieve platform status") + return errors.Wrap(err, "could not retrieve platform status") } if !status.OK() { return errors.New("platform did not report OK status") diff --git a/update-operator.yaml b/update-operator.yaml index 15c370d1..19868f2c 100644 --- a/update-operator.yaml +++ b/update-operator.yaml @@ -117,6 +117,7 @@ spec: values: ["agent"] containers: - name: controller + # TODO Update this field to the new version that supports update API once that's released image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.3" imagePullPolicy: Always args: @@ -129,11 +130,12 @@ spec: valueFrom: fieldRef: fieldPath: spec.nodeName +# This DaemonSet is for Bottlerocket hosts that support updates through the Bottlerocket API --- apiVersion: apps/v1 kind: DaemonSet metadata: - name: update-operator-agent + name: update-operator-agent-update-api namespace: bottlerocket labels: update-operator: agent @@ -152,8 +154,76 @@ spec: requiredDuringSchedulingIgnoredDuringExecution: nodeSelectorTerms: - matchExpressions: - - key: bottlerocket.aws/updater-interface-version - operator: Exists + - key: "bottlerocket.aws/updater-interface-version" + operator: In + values: + - 2.0.0 + - key: "kubernetes.io/os" + operator: In + values: + - linux + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + hostPID: true + containers: + - name: agent + # TODO Update this field to the new version that supports update API once that's released + image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.3" + imagePullPolicy: Always + args: + - -agent + - -debug + - -nodeName + - $(NODE_NAME) + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + resources: + limits: + memory: 600Mi + requests: + cpu: 100m + memory: 600Mi + volumeMounts: + - name: bottlerocket-api-socket + mountPath: /run/api.sock + volumes: + - name: bottlerocket-api-socket + hostPath: + path: /run/api.sock + type: Socket +# This DaemonSet is for Bottlerocket hosts that can only support updates through updog +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: update-operator-agent-updog + namespace: bottlerocket + labels: + update-operator: agent +spec: + selector: + matchLabels: + update-operator: agent + template: + metadata: + labels: + update-operator: agent + spec: + serviceAccountName: update-operator-agent + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: "bottlerocket.aws/updater-interface-version" + operator: In + values: + - 1.0.0 - key: "kubernetes.io/os" operator: In values: