diff --git a/deploy/update-operator-api.yaml b/deploy/update-operator-api.yaml new file mode 100644 index 00000000..04f7fbda --- /dev/null +++ b/deploy/update-operator-api.yaml @@ -0,0 +1,193 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: bottlerocket +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: bottlerocket-update-operator-controller +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch", "update", "patch"] + # Allow the controller to remove Pods running on the Nodes that are updating. + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: bottlerocket-update-operator-controller +subjects: + - kind: ServiceAccount + name: update-operator-controller + namespace: bottlerocket +roleRef: + kind: ClusterRole + name: bottlerocket-update-operator-controller + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: bottlerocket-update-operator-agent +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: bottlerocket-update-operator-agent +subjects: + - kind: ServiceAccount + name: update-operator-agent + namespace: bottlerocket +roleRef: + kind: ClusterRole + name: bottlerocket-update-operator-agent + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: update-operator-controller + namespace: bottlerocket + annotations: + kubernetes.io/service-account.name: update-operator-controller +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: update-operator-agent + namespace: bottlerocket + annotations: + kubernetes.io/service-account.name: update-operator-agent +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: update-operator-controller + namespace: bottlerocket + labels: + update-operator: controller +spec: + replicas: 1 + strategy: + rollingUpdate: + maxUnavailable: 100% + selector: + matchLabels: + update-operator: controller + template: + metadata: + namespace: bottlerocket + labels: + update-operator: controller + spec: + serviceAccountName: update-operator-controller + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: bottlerocket.aws/updater-interface-version + operator: Exists + - key: "kubernetes.io/os" + operator: In + values: + - linux + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + # Avoid update-operator's Agent Pods if possible. + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + podAffinityTerm: + topologyKey: bottlerocket.aws/updater-interface-version + labelSelector: + matchExpressions: + - key: update-operator + operator: In + values: ["agent"] + containers: + - name: controller + image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.4" + imagePullPolicy: Always + args: + - -controller + - -debug + - -nodeName + - $(NODE_NAME) + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: update-operator-agent + namespace: bottlerocket + labels: + update-operator: agent +spec: + selector: + matchLabels: + update-operator: agent + template: + metadata: + labels: + update-operator: agent + spec: + serviceAccountName: update-operator-agent + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: bottlerocket.aws/updater-interface-version + operator: Exists + - key: "kubernetes.io/os" + operator: In + values: + - linux + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + hostPID: true + containers: + - name: agent + image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.4" + imagePullPolicy: Always + args: + - -agent + - -debug + - -nodeName + - $(NODE_NAME) + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + resources: + limits: + memory: 600Mi + requests: + cpu: 100m + memory: 600Mi + volumeMounts: + - name: bottlerocket-api-socket + mountPath: /run/api.sock + volumes: + - name: bottlerocket-api-socket + hostPath: + path: /run/api.sock + type: Socket diff --git a/deploy/update-operator-updog.yaml b/deploy/update-operator-updog.yaml new file mode 100644 index 00000000..15c370d1 --- /dev/null +++ b/deploy/update-operator-updog.yaml @@ -0,0 +1,198 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: bottlerocket +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: bottlerocket-update-operator-controller +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch", "update", "patch"] + # Allow the controller to remove Pods running on the Nodes that are updating. + - apiGroups: [""] + resources: ["pods"] + verbs: ["get", "list", "delete"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: bottlerocket-update-operator-controller +subjects: + - kind: ServiceAccount + name: update-operator-controller + namespace: bottlerocket +roleRef: + kind: ClusterRole + name: bottlerocket-update-operator-controller + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: bottlerocket-update-operator-agent +rules: + - apiGroups: [""] + resources: ["nodes"] + verbs: ["get", "list", "watch", "update", "patch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: bottlerocket-update-operator-agent +subjects: + - kind: ServiceAccount + name: update-operator-agent + namespace: bottlerocket +roleRef: + kind: ClusterRole + name: bottlerocket-update-operator-agent + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: update-operator-controller + namespace: bottlerocket + annotations: + kubernetes.io/service-account.name: update-operator-controller +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: update-operator-agent + namespace: bottlerocket + annotations: + kubernetes.io/service-account.name: update-operator-agent +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: update-operator-controller + namespace: bottlerocket + labels: + update-operator: controller +spec: + replicas: 1 + strategy: + rollingUpdate: + maxUnavailable: 100% + selector: + matchLabels: + update-operator: controller + template: + metadata: + namespace: bottlerocket + labels: + update-operator: controller + spec: + serviceAccountName: update-operator-controller + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: bottlerocket.aws/updater-interface-version + operator: Exists + - key: "kubernetes.io/os" + operator: In + values: + - linux + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + # Avoid update-operator's Agent Pods if possible. + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 10 + podAffinityTerm: + topologyKey: bottlerocket.aws/updater-interface-version + labelSelector: + matchExpressions: + - key: update-operator + operator: In + values: ["agent"] + containers: + - name: controller + image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.3" + imagePullPolicy: Always + args: + - -controller + - -debug + - -nodeName + - $(NODE_NAME) + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: update-operator-agent + namespace: bottlerocket + labels: + update-operator: agent +spec: + selector: + matchLabels: + update-operator: agent + template: + metadata: + labels: + update-operator: agent + spec: + serviceAccountName: update-operator-agent + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: bottlerocket.aws/updater-interface-version + operator: Exists + - key: "kubernetes.io/os" + operator: In + values: + - linux + - key: "kubernetes.io/arch" + operator: In + values: + - amd64 + hostPID: true + containers: + - name: agent + image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.3" + imagePullPolicy: Always + # XXX: tty required to exec binaries that use `simplelog` until https://github.com/bottlerocket-os/bottlerocket/issues/576 is resolved. + tty: true + args: + - -agent + - -debug + - -nodeName + - $(NODE_NAME) + env: + - name: NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + securityContext: + # Required for executing OS update operations. + privileged: true + resources: + limits: + memory: 600Mi + requests: + cpu: 100m + memory: 600Mi + volumeMounts: + - name: rootfs + mountPath: /.bottlerocket/rootfs + volumes: + - name: rootfs + hostPath: + path: / + type: Directory diff --git a/go.mod b/go.mod index 73ab8c1f..31f100bc 100644 --- a/go.mod +++ b/go.mod @@ -3,8 +3,7 @@ module github.com/bottlerocket-os/bottlerocket-update-operator go 1.12 require ( - github.com/coreos/go-systemd/v22 v22.0.0 - github.com/godbus/dbus/v5 v5.0.3 + github.com/Masterminds/semver v1.5.0 github.com/google/go-cmp v0.3.1 // indirect github.com/googleapis/gnostic v0.3.1 // indirect github.com/imdario/mergo v0.3.7 // indirect @@ -12,6 +11,7 @@ require ( github.com/karlseguin/expect v1.0.1 // indirect github.com/pkg/errors v0.8.1 github.com/sirupsen/logrus v1.4.2 + github.com/stretchr/testify v1.3.0 github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 // indirect golang.org/x/crypto v0.0.0-20190829043050-9756ffdc2472 // indirect golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297 // indirect diff --git a/go.sum b/go.sum index 73509ad3..3f41ba7c 100644 --- a/go.sum +++ b/go.sum @@ -12,6 +12,8 @@ github.com/Azure/go-autorest/tracing v0.5.0/go.mod h1:r/s2XiOKccPW3HrqB+W0TQzfbt github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/MakeNowJust/heredoc v0.0.0-20170808103936-bb23615498cd/go.mod h1:64YHyfSL2R96J44Nlwm39UHepQbyR5q10x7iYa1ks2E= +github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= +github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/NYTimes/gziphandler v0.0.0-20170623195520-56545f4a5d46/go.mod h1:3wb06e3pkSAbeQ52E9H9iFoQsEEwGN64994WTCIhntQ= github.com/PuerkitoBio/purell v1.0.0/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= github.com/PuerkitoBio/purell v1.1.1/go.mod h1:c11w/QuzBsJSee3cPx9rAFu61PvFxuPbtSwDGJws/X0= @@ -25,8 +27,6 @@ github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDk github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= -github.com/coreos/go-systemd/v22 v22.0.0 h1:XJIw/+VlJ+87J+doOxznsAWIdmWuViOVhkQamW5YV28= -github.com/coreos/go-systemd/v22 v22.0.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= github.com/cpuguy83/go-md2man v1.0.10/go.mod h1:SmD6nW6nTyfqj6ABTjUi3V3JVMnlJmwcJI5acqYI6dE= github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -58,8 +58,6 @@ github.com/go-openapi/spec v0.0.0-20160808142527-6aced65f8501/go.mod h1:J8+jY1nA github.com/go-openapi/spec v0.19.2/go.mod h1:sCxk3jxKgioEJikev4fgkNmwS+3kuYdJtcsZsD5zxMY= github.com/go-openapi/swag v0.0.0-20160704191624-1d0bd113de87/go.mod h1:DXUve3Dpr1UfpPtxFw+EFuQ41HhCWZfha5jSVRG7C7I= github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= -github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME= -github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d h1:3PaI8p3seN09VjbTYC/QWlUZdZ1qS1zGjy7LH2Wt07I= github.com/gogo/protobuf v1.2.2-0.20190723190241-65acae22fc9d/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= diff --git a/main.go b/main.go index b31ced7b..d6074a0c 100644 --- a/main.go +++ b/main.go @@ -11,7 +11,6 @@ import ( "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/controller" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/k8sutil" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" - "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform/updog" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/sigcontext" "github.com/pkg/errors" "k8s.io/client-go/kubernetes" @@ -89,11 +88,7 @@ func runController(ctx context.Context, kube kubernetes.Interface, nodeName stri func runAgent(ctx context.Context, kube kubernetes.Interface, nodeName string) error { log := logging.New("agent") - platform, err := updog.New() - if err != nil { - return errors.WithMessage(err, "could not setup platform for agent") - } - a, err := agent.New(log, kube, platform, nodeName) + a, err := agent.New(log, kube, nodeName) if err != nil { return err } diff --git a/pkg/agent/agent.go b/pkg/agent/agent.go index ee34d9a7..002ae6a5 100644 --- a/pkg/agent/agent.go +++ b/pkg/agent/agent.go @@ -13,6 +13,8 @@ import ( "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/marker" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/nodestream" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform/api" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform/updog" "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/workgroup" "github.com/pkg/errors" @@ -67,18 +69,45 @@ type proc interface { KillProcess() error } -func New(log logging.Logger, kube kubernetes.Interface, plat platform.Platform, nodeName string) (*Agent, error) { +func New(log logging.Logger, kube kubernetes.Interface, nodeName string) (*Agent, error) { if nodeName == "" { return nil, errors.New("nodeName must be provided for Agent to manage") } var nodeclient corev1.NodeInterface + var platform platform.Platform if kube != nil { nodeclient = kube.CoreV1().Nodes() + // Determine which platform to use depending on the updater interface version + var node, err = nodeclient.Get(nodeName, v1meta.GetOptions{}) + if err != nil { + return nil, errors.New("failed to retrieve node information") + } + // Get the updater interface version from the node label + var platformVersion = node.Labels[marker.UpdaterInterfaceVersionKey] + switch platformVersion { + default: + // If the updater interface version is not specified, default to + // using Updog as the platform + log.Warn("unknown platform version specified, defaulting to using updog") + fallthrough + case "1.0.0": + platform, err = updog.New() + if err != nil { + return nil, errors.WithMessage(err, "could not setup Updog platform for agent") + } + case "2.0.0": + platform, err = api.New() + if err != nil { + return nil, errors.WithMessage(err, "could not setup Update API platform for agent") + } + + } } + return &Agent{ log: log, kube: kube, - platform: plat, + platform: platform, poster: &k8sPoster{log, nodeclient}, proc: &osProc{}, nodeName: nodeName, @@ -314,7 +343,7 @@ func (a *Agent) realize(in *intent.Intent) error { case marker.NodeActionUnknown, marker.NodeActionStabilize: log.Debug("sitrep") - _, err = a.platform.Status() + err = platform.Ping(a.platform) if err != nil { break } diff --git a/pkg/agent/agent_test.go b/pkg/agent/agent_test.go index 9816909b..528a3d66 100644 --- a/pkg/agent/agent_test.go +++ b/pkg/agent/agent_test.go @@ -82,8 +82,10 @@ func testAgent(t *testing.T) (*Agent, *testHooks) { Platform: &testPlatform{}, Proc: &testProc{}, } - a, err := New(testoutput.Logger(t, logging.New("agent")), nil, hooks.Platform, intents.NodeName) + a, err := New(testoutput.Logger(t, logging.New("agent")), nil, intents.NodeName) + a.platform = hooks.Platform if err != nil { + panic(err) } a.poster = hooks.Poster diff --git a/pkg/platform/api/api.go b/pkg/platform/api/api.go new file mode 100644 index 00000000..b7f4c8d6 --- /dev/null +++ b/pkg/platform/api/api.go @@ -0,0 +1,201 @@ +package api + +import ( + "context" + "encoding/json" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" + "github.com/pkg/errors" + "io/ioutil" + "net" + "net/http" + "time" +) + +const bottlerocketAPISock = "/run/api.sock" +const minimumRequiredOSVer = "0.3.4" + +type updateState = string + +const ( + Idle updateState = "Idle" + Available updateState = "Available" + Staged updateState = "Staged" + Ready updateState = "Ready" +) + +type UpdateImage struct { + Arch string `json:"arch"` + Version string `json:"version"` + Variant string `json:"variant"` +} + +func (ui *UpdateImage) Identifier() interface{} { + return ui.Version +} + +type StagedImage struct { + Image UpdateImage `json:"image"` + NextToBoot bool `json:"next_to_boot"` +} + +type updateCommand = string + +const ( + refresh updateCommand = "refresh" + prepare updateCommand = "prepare" + activate updateCommand = "activate" + deactivate updateCommand = "deactivate" +) + +type commandStatus = string + +const ( + Success commandStatus = "Success" + Failed commandStatus = "Failed" + Unknown commandStatus = "Unknown" +) + +type CommandResult struct { + CmdType updateCommand `json:"cmd_type"` + CmdStatus commandStatus `json:"cmd_status"` + Timestamp string `json:"timestamp"` + ExitStatus *int32 `json:"exit_status"` + Stderr *string `json:"stderr"` +} + +type UpdateStatus struct { + UpdateState updateState `json:"update_state"` + AvailableUpdates []string `json:"available_updates"` + ChosenUpdate *UpdateImage `json:"chosen_update"` + ActivePartition *StagedImage `json:"active_partition"` + StagingPartition *StagedImage `json:"staging_partition"` + MostRecentCommand *CommandResult `json:"most_recent_command"` +} + +type APIClient struct { + log logging.Logger + httpClient *http.Client +} + +func NewAPIClient() *APIClient { + return &APIClient {log: logging.New("api"), httpClient: &http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + dialer := net.Dialer{} + return dialer.DialContext(ctx, "unix", bottlerocketAPISock) + }, + }, + // Set a 10 second timeout for all requests + Timeout: 10 * time.Second, + }, + } +} + +// makeAPIRequest makes an http request to the Bottlerocket API and returns the response +// We only retry the request if the response was a 423 Locked (indicates that the update API is not ready) +func(c *APIClient) makeAPIRequest(method string, path string) (*http.Response, error) { + var response *http.Response + var attempts = 0 + // Retry up to 5 times in case the Update API is busy; Waiting 10 seconds between each attempt. + for ; attempts < 5; attempts++ { + var err error + c.log.Debugf("making '%s' request to '%s' API endpoint", method, path) + switch method { + case "GET": + response, err = c.httpClient.Get("http://unix" + path) + case "POST": + response, err = c.httpClient.Post("http://unix" + path, "text/plain", http.NoBody) + default: + err = errors.Errorf("unsupported method %s", method) + } + if err != nil { + return nil, errors.Errorf("error when trying to '%s' '%s'", method, path) + } + if response.StatusCode >= 200 && response.StatusCode < 300 { + // Response OK + break + } else if response.StatusCode == 423 { + if attempts < 4 { + c.log.Info("API server busy, retrying in 10 seconds ...") + // Retry after ten seconds if we get a 423 Locked response (update API busy) + time.Sleep(10 * time.Second) + } + } else { + return response, errors.Errorf("bad http response when trying to '%s' '%s': status code %d", method, path, response.StatusCode) + } + } + if attempts == 5 { + return nil, errors.New("update API unavailable: retries exhausted") + } + return response, nil +} + + +// GetUpdateStatus returns the update status from the update API +func(c *APIClient) GetUpdateStatus() (*UpdateStatus, error) { + response, err := c.makeAPIRequest("GET", "/updates/status") + if err != nil { + return nil, err + } + + var updateStatus UpdateStatus + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return nil, err + } + err = json.Unmarshal(body, &updateStatus) + if err != nil { + return nil, err + } + return &updateStatus, nil +} + +func(c *APIClient) GetMostRecentCommand() (*CommandResult, error) { + updateStatus, err := c.GetUpdateStatus() + if err != nil { + return nil, err + } + return updateStatus.MostRecentCommand, nil +} + +type OSInfoJSON struct { + VersionID string `json:"version_id"` +} + +func(c *APIClient) GetOSInfo() (*OSInfoJSON, error) { + response, err := c.makeAPIRequest("GET", "/os") + if err != nil { + return nil, err + } + + var osInfo OSInfoJSON + body, err := ioutil.ReadAll(response.Body) + if err != nil { + return nil, err + } + err = json.Unmarshal(body, &osInfo) + if err != nil { + return nil, err + } + return &osInfo, nil +} + +func(c *APIClient) RefreshUpdates() error { + _, err := c.makeAPIRequest("POST", "/actions/refresh-updates") + return err +} + +func(c *APIClient) PrepareUpdate() error { + _, err := c.makeAPIRequest("POST", "/actions/prepare-update") + return err +} + +func (c *APIClient) ActivateUpdate() error { + _, err := c.makeAPIRequest("POST", "/actions/activate-update") + return err +} + +func (c *APIClient) Reboot() error { + _, err := c.makeAPIRequest("POST", "/actions/reboot") + return err +} diff --git a/pkg/platform/api/api_test.go b/pkg/platform/api/api_test.go new file mode 100644 index 00000000..a1282ef8 --- /dev/null +++ b/pkg/platform/api/api_test.go @@ -0,0 +1,151 @@ +package api + +import ( + "encoding/json" + "github.com/stretchr/testify/assert" + "testing" +) + +func TestUnmarshallUpdateStatus(t *testing.T) { + update_string := "Starting update to 0.4.0\n" + cases := []struct { + Name string + UpdateStatusJson []byte + Expected UpdateStatus + }{ + { + Name: "No update available after refresh", + UpdateStatusJson: []byte(`{"update_state":"Idle","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":null,"active_partition":{"image":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"next_to_boot":true},"staging_partition":null,"most_recent_command":{"cmd_type":"refresh","cmd_status":"Success","timestamp":"2020-07-08T21:32:35.802253160Z","exit_status":0,"stderr":""}}`), + Expected: UpdateStatus{ + UpdateState: Idle, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: nil, + ActivePartition: &StagedImage{ + Image: UpdateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + StagingPartition: nil, + MostRecentCommand: &CommandResult{ + CmdType: refresh, + CmdStatus: Success, + Timestamp: "2020-07-08T21:32:35.802253160Z", + ExitStatus: new(int32), + Stderr: new(string), + }, + }, + }, + { + Name: "Update available after refresh", + UpdateStatusJson: []byte(`{"update_state":"Available","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"active_partition":{"image":{"arch":"x86_64","version":"0.3.2","variant":"aws-k8s-1.15"},"next_to_boot":true},"staging_partition":null,"most_recent_command":{"cmd_type":"refresh","cmd_status":"Success","timestamp":"2020-06-18T17:57:43.141433622Z","exit_status":0,"stderr":""}}`), + Expected: UpdateStatus{ + UpdateState: Available, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: &UpdateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + ActivePartition: &StagedImage{ + Image: UpdateImage{ + Arch: "x86_64", + Version: "0.3.2", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + StagingPartition: nil, + MostRecentCommand: &CommandResult{ + CmdType: refresh, + CmdStatus: Success, + Timestamp: "2020-06-18T17:57:43.141433622Z", + ExitStatus: new(int32), + Stderr: new(string), + }, + }, + }, + { + Name: "Update staged", + UpdateStatusJson: []byte(`{"update_state":"Staged","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"active_partition":{"image":{"arch":"x86_64","version":"0.3.4","variant":"aws-k8s-1.15"},"next_to_boot":true},"staging_partition":{"image":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"next_to_boot":false},"most_recent_command":{"cmd_type":"prepare","cmd_status":"Success","timestamp":"2020-07-10T06:44:58.766493367Z","exit_status":0,"stderr":"Starting update to 0.4.0\n"}}`), + Expected: UpdateStatus{ + UpdateState: Staged, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: &UpdateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + ActivePartition: &StagedImage{ + Image: UpdateImage{ + Arch: "x86_64", + Version: "0.3.4", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + StagingPartition: &StagedImage{ + Image: UpdateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + NextToBoot: false, + }, + MostRecentCommand: &CommandResult{ + CmdType: prepare, + CmdStatus: Success, + Timestamp: "2020-07-10T06:44:58.766493367Z", + ExitStatus: new(int32), + Stderr: &update_string, + }, + }, + }, + { + Name: "Update ready", + UpdateStatusJson: []byte(`{"update_state":"Ready","available_updates":["0.4.0","0.3.4","0.3.3","0.3.2","0.3.1","0.3.0"],"chosen_update":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"active_partition":{"image":{"arch":"x86_64","version":"0.3.4","variant":"aws-k8s-1.15"},"next_to_boot":false},"staging_partition":{"image":{"arch":"x86_64","version":"0.4.0","variant":"aws-k8s-1.15"},"next_to_boot":true},"most_recent_command":{"cmd_type":"activate","cmd_status":"Success","timestamp":"2020-07-10T06:47:19.903337270Z","exit_status":0,"stderr":""}}`), + Expected: UpdateStatus{ + UpdateState: Ready, + AvailableUpdates: []string{"0.4.0", "0.3.4", "0.3.3", "0.3.2", "0.3.1", "0.3.0"}, + ChosenUpdate: &UpdateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + ActivePartition: &StagedImage{ + Image: UpdateImage{ + Arch: "x86_64", + Version: "0.3.4", + Variant: "aws-k8s-1.15", + }, + NextToBoot: false, + }, + StagingPartition: &StagedImage{ + Image: UpdateImage{ + Arch: "x86_64", + Version: "0.4.0", + Variant: "aws-k8s-1.15", + }, + NextToBoot: true, + }, + MostRecentCommand: &CommandResult{ + CmdType: activate, + CmdStatus: Success, + Timestamp: "2020-07-10T06:47:19.903337270Z", + ExitStatus: new(int32), + Stderr: new(string), + }, + }, + }, + } + for _, tc := range cases { + t.Run(tc.Name, func(t *testing.T) { + var unmarshaledStatus UpdateStatus + err := json.Unmarshal(tc.UpdateStatusJson, &unmarshaledStatus) + assert.NoError(t, err, "failed to unmarshal into update status") + assert.Equal(t, tc.Expected, unmarshaledStatus) + }) + } +} diff --git a/pkg/platform/api/platform.go b/pkg/platform/api/platform.go new file mode 100644 index 00000000..252b5342 --- /dev/null +++ b/pkg/platform/api/platform.go @@ -0,0 +1,146 @@ +package api + +import ( + "github.com/Masterminds/semver" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/logging" + "github.com/bottlerocket-os/bottlerocket-update-operator/pkg/platform" + "github.com/pkg/errors" +) + +// Assert Update-API as a platform implementor. +var _ platform.Platform = (*Platform)(nil) + +type Platform struct { + log logging.Logger + apiClient *APIClient +} + +func New() (*Platform, error) { + return &Platform{log: logging.New("platform"), apiClient: NewAPIClient()}, nil +} + +type statusResponse struct { + osVersion *semver.Version +} + +func (sr *statusResponse) OK() bool { + // Bottlerocket OS version needs to be at least a certain version to support the Update API + constraint, _ := semver.NewConstraint(">= " + minimumRequiredOSVer) + return constraint.Check(sr.osVersion) +} + +func (p Platform) Status() (platform.Status, error) { + // Try to determine if the update API is supported in the Bottlerocket host + osInfo, err := p.apiClient.GetOSInfo() + if err != nil { + return nil, err + } + + osVersion, err := semver.NewVersion(osInfo.VersionID) + p.log.Info("current running OS version: ", osInfo.VersionID) + if err != nil { + return nil, errors.Wrap(err, "failed to parse 'version_id' field as semver") + } + return &statusResponse{osVersion: osVersion}, nil +} + +type listAvailableResponse struct { + chosenUpdate *UpdateImage +} + +func (lar *listAvailableResponse) Updates() []platform.Update { + if lar.chosenUpdate == nil { + return nil + } + updates := make([]platform.Update, 1) + updates[0] = lar.chosenUpdate + return updates +} + +func (p Platform) ListAvailable() (platform.Available, error) { + p.log.Debug("fetching list of available updates") + + // Refresh list of updates and check if there are any available + err := p.apiClient.RefreshUpdates() + if err != nil { + return nil, err + } + + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return &listAvailableResponse{}, err + } + if updateStatus.MostRecentCommand.CmdType != refresh && updateStatus.MostRecentCommand.CmdStatus != Success { + return &listAvailableResponse{chosenUpdate: nil}, errors.New("failed to refresh updates or update action performed out of band") + + } + return &listAvailableResponse{chosenUpdate: updateStatus.ChosenUpdate}, nil +} + +func (p Platform) Prepare(target platform.Update) error { + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return err + } + if updateStatus.UpdateState != Available && updateStatus.UpdateState != Staged { + return errors.Errorf("unexpected update state: %s, expecting state to be 'Available' or 'Staged'. update action performed out of band?", updateStatus.UpdateState) + } + + // Download the update and apply it to the inactive partition + err = p.apiClient.PrepareUpdate() + if err != nil { + return err + } + + commandResult, err := p.apiClient.GetMostRecentCommand() + if err != nil { + return err + } + if commandResult.CmdType != prepare || commandResult.CmdStatus != Success { + return errors.New("failed to prepare update or update action performed out of band") + } + return nil +} + +func (p Platform) Update(target platform.Update) error { + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return err + } + if updateStatus.UpdateState != Staged { + return errors.Errorf("unexpected update state: %s, expecting state to be 'Staged'. update action performed out of band?", updateStatus.UpdateState) + } + + // Activate the prepared update + + err = p.apiClient.ActivateUpdate() + if err != nil { + return err + } + + commandResult, err := p.apiClient.GetMostRecentCommand() + if err != nil { + return err + } + if commandResult.CmdType != activate || commandResult.CmdStatus != Success { + return errors.New("failed to activate update or update action performed out of band") + } + return nil +} + +func (p Platform) BootUpdate(target platform.Update, rebootNow bool) error { + updateStatus, err := p.apiClient.GetUpdateStatus() + if err != nil { + return err + } + if updateStatus.UpdateState != Ready { + return errors.Errorf("unexpected update state: %s, expecting state to be 'Ready'. update action performed out of band?", updateStatus.UpdateState) + } + + // Reboot the host into the activated update + err = p.apiClient.Reboot() + if err != nil { + return err + } + return nil +} diff --git a/pkg/platform/interface.go b/pkg/platform/interface.go index bc718438..dc414719 100644 --- a/pkg/platform/interface.go +++ b/pkg/platform/interface.go @@ -50,7 +50,7 @@ type Update interface { func Ping(p Platform) error { status, err := p.Status() if err != nil { - return errors.WithMessage(err, "could not retrieve platform status") + return errors.Wrap(err, "could not retrieve platform status") } if !status.OK() { return errors.New("platform did not report OK status") diff --git a/update-operator.yaml b/update-operator.yaml deleted file mode 100644 index 15c370d1..00000000 --- a/update-operator.yaml +++ /dev/null @@ -1,198 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: bottlerocket ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: bottlerocket-update-operator-controller -rules: - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get", "list", "watch", "update", "patch"] - # Allow the controller to remove Pods running on the Nodes that are updating. - - apiGroups: [""] - resources: ["pods"] - verbs: ["get", "list", "delete"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: bottlerocket-update-operator-controller -subjects: - - kind: ServiceAccount - name: update-operator-controller - namespace: bottlerocket -roleRef: - kind: ClusterRole - name: bottlerocket-update-operator-controller - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: bottlerocket-update-operator-agent -rules: - - apiGroups: [""] - resources: ["nodes"] - verbs: ["get", "list", "watch", "update", "patch"] ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRoleBinding -metadata: - name: bottlerocket-update-operator-agent -subjects: - - kind: ServiceAccount - name: update-operator-agent - namespace: bottlerocket -roleRef: - kind: ClusterRole - name: bottlerocket-update-operator-agent - apiGroup: rbac.authorization.k8s.io ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: update-operator-controller - namespace: bottlerocket - annotations: - kubernetes.io/service-account.name: update-operator-controller ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: update-operator-agent - namespace: bottlerocket - annotations: - kubernetes.io/service-account.name: update-operator-agent ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: update-operator-controller - namespace: bottlerocket - labels: - update-operator: controller -spec: - replicas: 1 - strategy: - rollingUpdate: - maxUnavailable: 100% - selector: - matchLabels: - update-operator: controller - template: - metadata: - namespace: bottlerocket - labels: - update-operator: controller - spec: - serviceAccountName: update-operator-controller - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: bottlerocket.aws/updater-interface-version - operator: Exists - - key: "kubernetes.io/os" - operator: In - values: - - linux - - key: "kubernetes.io/arch" - operator: In - values: - - amd64 - # Avoid update-operator's Agent Pods if possible. - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 10 - podAffinityTerm: - topologyKey: bottlerocket.aws/updater-interface-version - labelSelector: - matchExpressions: - - key: update-operator - operator: In - values: ["agent"] - containers: - - name: controller - image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.3" - imagePullPolicy: Always - args: - - -controller - - -debug - - -nodeName - - $(NODE_NAME) - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: update-operator-agent - namespace: bottlerocket - labels: - update-operator: agent -spec: - selector: - matchLabels: - update-operator: agent - template: - metadata: - labels: - update-operator: agent - spec: - serviceAccountName: update-operator-agent - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: bottlerocket.aws/updater-interface-version - operator: Exists - - key: "kubernetes.io/os" - operator: In - values: - - linux - - key: "kubernetes.io/arch" - operator: In - values: - - amd64 - hostPID: true - containers: - - name: agent - image: "328549459982.dkr.ecr.us-west-2.amazonaws.com/bottlerocket-update-operator:v0.1.3" - imagePullPolicy: Always - # XXX: tty required to exec binaries that use `simplelog` until https://github.com/bottlerocket-os/bottlerocket/issues/576 is resolved. - tty: true - args: - - -agent - - -debug - - -nodeName - - $(NODE_NAME) - env: - - name: NODE_NAME - valueFrom: - fieldRef: - fieldPath: spec.nodeName - securityContext: - # Required for executing OS update operations. - privileged: true - resources: - limits: - memory: 600Mi - requests: - cpu: 100m - memory: 600Mi - volumeMounts: - - name: rootfs - mountPath: /.bottlerocket/rootfs - volumes: - - name: rootfs - hostPath: - path: / - type: Directory diff --git a/update-operator.yaml b/update-operator.yaml new file mode 120000 index 00000000..a104ee05 --- /dev/null +++ b/update-operator.yaml @@ -0,0 +1 @@ +./deploy/update-operator-api.yaml \ No newline at end of file