From cbb09540c952e7dafbb5456bdc31eab6411d45a3 Mon Sep 17 00:00:00 2001 From: Joe Kratzat Date: Fri, 20 May 2022 08:50:07 -0400 Subject: [PATCH] feat: add support for MachinePool (#89) --- Dockerfile | 2 + Makefile | 13 +- api/v1beta1/ocicluster_types.go | 15 + api/v1beta1/zz_generated.deepcopy.go | 27 + cloud/ociutil/ociutil.go | 10 + cloud/scope/clients.go | 31 +- cloud/scope/cluster.go | 50 +- cloud/scope/cluster_test.go | 108 ++++ cloud/scope/machine.go | 2 + cloud/scope/machine_pool.go | 510 ++++++++++++++++++ cloud/services/computemanagement/client.go | 22 + ...tructure.cluster.x-k8s.io_ociclusters.yaml | 19 + ...ture.cluster.x-k8s.io_ocimachinepools.yaml | 164 ++++++ config/crd/kustomization.yaml | 1 + .../cainjection_in_ocimachinepools.yaml | 7 + config/manager/manager.yaml | 1 + config/rbac/role.yaml | 46 ++ exp/api/v1beta1/conditions_consts.go | 39 ++ exp/api/v1beta1/groupversion_type.go | 33 ++ exp/api/v1beta1/ocimachinepool_types.go | 148 +++++ exp/api/v1beta1/zz_generated.deepcopy.go | 236 ++++++++ exp/controllers/ocimachinepool_controller.go | 382 +++++++++++++ feature/feature.go | 38 ++ feature/gates.go | 35 ++ go.mod | 2 + kind-with-registry.sh | 64 +++ main.go | 31 +- templates/cluster-template-machinepool.yaml | 139 +++++ vendor/modules.txt | 2 + 29 files changed, 2159 insertions(+), 18 deletions(-) create mode 100644 cloud/scope/machine_pool.go create mode 100644 cloud/services/computemanagement/client.go create mode 100644 config/crd/bases/infrastructure.cluster.x-k8s.io_ocimachinepools.yaml create mode 100644 config/crd/patches/cainjection_in_ocimachinepools.yaml create mode 100644 exp/api/v1beta1/conditions_consts.go create mode 100644 exp/api/v1beta1/groupversion_type.go create mode 100644 exp/api/v1beta1/ocimachinepool_types.go create mode 100644 exp/api/v1beta1/zz_generated.deepcopy.go create mode 100644 exp/controllers/ocimachinepool_controller.go create mode 100644 feature/feature.go create mode 100644 feature/gates.go create mode 100755 kind-with-registry.sh create mode 100644 templates/cluster-template-machinepool.yaml diff --git a/Dockerfile b/Dockerfile index d975dd5b..15a02898 100644 --- a/Dockerfile +++ b/Dockerfile @@ -15,6 +15,8 @@ COPY main.go main.go COPY api/ api/ COPY controllers/ controllers/ COPY cloud/ cloud/ +COPY exp/ exp/ +COPY feature/ feature/ COPY vendor/ vendor/ # Build diff --git a/Makefile b/Makefile index cc2d2d3e..3f7968d8 100644 --- a/Makefile +++ b/Makefile @@ -54,6 +54,9 @@ IMG ?= controller:latest # Produce CRDs that work back to Kubernetes 1.11 (no version conversion) CRD_OPTIONS ?= "crd" +# enable machine pool feature +EXP_MACHINE_POOL ?= false + # Set build time variables including version details LDFLAGS := $(shell source ./hack/version.sh; version::ldflags) @@ -91,10 +94,14 @@ help: ## Display this help. ##@ Development manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - $(CONTROLLER_GEN) $(CRD_OPTIONS) rbac:roleName=manager-role webhook paths="./api/..." output:crd:artifacts:config=config/crd/bases + $(CONTROLLER_GEN) $(CRD_OPTIONS) \ + rbac:roleName=manager-role webhook \ + paths="./api/..." \ + paths="./exp/api/..." \ + output:crd:artifacts:config=config/crd/bases generate: controller-gen ## Generate code containing DeepCopy, DeepCopyInto, and DeepCopyObject method implementations. - $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./api/..." + $(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./api/..." paths="./exp/api/..." fmt: ## Run go fmt against code. go fmt ./... @@ -115,7 +122,7 @@ build: generate fmt vet ## Build manager binary. CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "${LDFLAGS} -extldflags '-static'" -o bin/manager . run: manifests generate fmt vet ## Run a controller from your host. - go run ./main.go + go run ./main.go --feature-gates=MachinePool=${EXP_MACHINE_POOL} ## -------------------------------------- ## Linting diff --git a/api/v1beta1/ocicluster_types.go b/api/v1beta1/ocicluster_types.go index 2dfcdfd7..0359ab3b 100644 --- a/api/v1beta1/ocicluster_types.go +++ b/api/v1beta1/ocicluster_types.go @@ -71,6 +71,11 @@ type OCIClusterStatus struct { // +optional FailureDomains clusterv1.FailureDomains `json:"failureDomains,omitempty"` + // AvailabilityDomains encapsulates the clusters Availability Domain (AD) information in a map + // where the map key is the AD name and the struct is details about the AD. + // +optional + AvailabilityDomains map[string]OCIAvailabilityDomain `json:"availabilityDomains,omitempty"` + // +optional Ready bool `json:"ready"` // NetworkSpec encapsulates all things related to OCI network. @@ -99,6 +104,16 @@ type OCIClusterList struct { Items []OCICluster `json:"items"` } +// OCIAvailabilityDomain contains information about an Availability Domain (AD). +type OCIAvailabilityDomain struct { + + // Name is the AD's full name. Example: Uocm:PHX-AD-1 + Name string `json:"name,omitempty"` + + // FaultDomains a list of fault domain (FD) names. Example: ["FAULT-DOMAIN-1"] + FaultDomains []string `json:"faultDomains,omitempty"` +} + // GetConditions returns the list of conditions for an OCICluster API object. func (c *OCICluster) GetConditions() clusterv1.Conditions { return c.Status.Conditions diff --git a/api/v1beta1/zz_generated.deepcopy.go b/api/v1beta1/zz_generated.deepcopy.go index 5afe11ab..bdae014b 100644 --- a/api/v1beta1/zz_generated.deepcopy.go +++ b/api/v1beta1/zz_generated.deepcopy.go @@ -320,6 +320,26 @@ func (in *NetworkSpec) DeepCopy() *NetworkSpec { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OCIAvailabilityDomain) DeepCopyInto(out *OCIAvailabilityDomain) { + *out = *in + if in.FaultDomains != nil { + in, out := &in.FaultDomains, &out.FaultDomains + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OCIAvailabilityDomain. +func (in *OCIAvailabilityDomain) DeepCopy() *OCIAvailabilityDomain { + if in == nil { + return nil + } + out := new(OCIAvailabilityDomain) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *OCICluster) DeepCopyInto(out *OCICluster) { *out = *in @@ -430,6 +450,13 @@ func (in *OCIClusterStatus) DeepCopyInto(out *OCIClusterStatus) { (*out)[key] = *val.DeepCopy() } } + if in.AvailabilityDomains != nil { + in, out := &in.AvailabilityDomains, &out.AvailabilityDomains + *out = make(map[string]OCIAvailabilityDomain, len(*in)) + for key, val := range *in { + (*out)[key] = *val.DeepCopy() + } + } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make(apiv1beta1.Conditions, len(*in)) diff --git a/cloud/ociutil/ociutil.go b/cloud/ociutil/ociutil.go index 4198bf22..f42a9d6a 100644 --- a/cloud/ociutil/ociutil.go +++ b/cloud/ociutil/ociutil.go @@ -103,6 +103,16 @@ func GetBaseLineOcpuOptimizationEnum(baseLineOcpuOptmimizationString string) (co return "", errors.New("invalid baseline cpu optimization parameter") } +// GetInstanceConfigBaseLineOcpuOptimizationEnum iterates over the valid baseline OCPUs to validate the passed in value +func GetInstanceConfigBaseLineOcpuOptimizationEnum(baseLineOcpuOptmimizationString string) (core.InstanceConfigurationLaunchInstanceShapeConfigDetailsBaselineOcpuUtilizationEnum, error) { + for _, e := range core.GetInstanceConfigurationLaunchInstanceShapeConfigDetailsBaselineOcpuUtilizationEnumValues() { + if string(e) == baseLineOcpuOptmimizationString { + return e, nil + } + } + return "", errors.New("invalid baseline cpu optimization parameter") +} + // GetDefaultClusterTags creates and returns a map of the default tags for all clusters func GetDefaultClusterTags() map[string]string { tags := make(map[string]string) diff --git a/cloud/scope/clients.go b/cloud/scope/clients.go index 4e700500..9d481fae 100644 --- a/cloud/scope/clients.go +++ b/cloud/scope/clients.go @@ -21,6 +21,7 @@ import ( "github.com/go-logr/logr" "github.com/oracle/cluster-api-provider-oci/cloud/services/compute" + "github.com/oracle/cluster-api-provider-oci/cloud/services/computemanagement" identityClient "github.com/oracle/cluster-api-provider-oci/cloud/services/identity" nlb "github.com/oracle/cluster-api-provider-oci/cloud/services/networkloadbalancer" "github.com/oracle/cluster-api-provider-oci/cloud/services/vcn" @@ -34,10 +35,11 @@ import ( // OCIClients is the struct of all the needed OCI clients type OCIClients struct { - ComputeClient compute.ComputeClient - VCNClient vcn.Client - LoadBalancerClient nlb.NetworkLoadBalancerClient - IdentityClient identityClient.Client + ComputeClient compute.ComputeClient + ComputeManagementClient computemanagement.Client + VCNClient vcn.Client + LoadBalancerClient nlb.NetworkLoadBalancerClient + IdentityClient identityClient.Client } // ClientProvider defines the regional clients @@ -96,16 +98,18 @@ func createClients(region string, oCIAuthConfigProvider common.ConfigurationProv lbClient, err := createLbClient(region, oCIAuthConfigProvider, logger) identityClient, err := createIdentityClient(region, oCIAuthConfigProvider, logger) computeClient, err := createComputeClient(region, oCIAuthConfigProvider, logger) + computeManagementClient, err := createComputeManagementClient(region, oCIAuthConfigProvider, logger) if err != nil { return OCIClients{}, err } return OCIClients{ - VCNClient: vcnClient, - LoadBalancerClient: lbClient, - IdentityClient: identityClient, - ComputeClient: computeClient, + VCNClient: vcnClient, + LoadBalancerClient: lbClient, + IdentityClient: identityClient, + ComputeClient: computeClient, + ComputeManagementClient: computeManagementClient, }, err } @@ -152,3 +156,14 @@ func createComputeClient(region string, ociAuthConfigProvider common.Configurati return &computeClient, nil } + +func createComputeManagementClient(region string, ociAuthConfigProvider common.ConfigurationProvider, logger *logr.Logger) (*core.ComputeManagementClient, error) { + computeManagementClient, err := core.NewComputeManagementClientWithConfigurationProvider(ociAuthConfigProvider) + if err != nil { + logger.Error(err, "unable to create OCI Compute Management Client") + return nil, err + } + computeManagementClient.SetRegion(region) + + return &computeManagementClient, nil +} diff --git a/cloud/scope/cluster.go b/cloud/scope/cluster.go index 79c823bb..0c9aa27d 100644 --- a/cloud/scope/cluster.go +++ b/cloud/scope/cluster.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "reflect" + "sigs.k8s.io/cluster-api/util/conditions" "strconv" "github.com/oracle/cluster-api-provider-oci/cloud/services/vcn" @@ -108,6 +109,7 @@ func NewClusterScope(params ClusterScopeParams) (*ClusterScope, error) { // PatchObject persists the cluster configuration and status. func (s *ClusterScope) PatchObject(ctx context.Context) error { + conditions.SetSummary(s.OCICluster) return s.patchHelper.Patch(ctx, s.OCICluster) } @@ -137,15 +139,21 @@ func (s *ClusterScope) IsResourceCreatedByClusterAPI(resourceFreeFormTags map[st // in case of single AD regions, the failure domain will be fault domain, in case of multi Ad regions, it will // be AD func (s *ClusterScope) setFailureDomains(ctx context.Context) error { - req := identity.ListAvailabilityDomainsRequest{CompartmentId: common.String(s.GetCompartmentId())} + reqAd := identity.ListAvailabilityDomainsRequest{CompartmentId: common.String(s.GetCompartmentId())} - resp, err := s.IdentityClient.ListAvailabilityDomains(ctx, req) + respAd, err := s.IdentityClient.ListAvailabilityDomains(ctx, reqAd) if err != nil { s.Logger.Error(err, "failed to list identity domains") return err } - numOfAds := len(resp.Items) + // build the AD list for cluster + err = s.setAvailabiltyDomainStatus(ctx, respAd.Items) + if err != nil { + return err + } + + numOfAds := len(respAd.Items) if numOfAds != 1 && numOfAds != 3 { err := errors.New(fmt.Sprintf("invalid number of Availability Domains, should be either 1 or 3, but got %d", numOfAds)) s.Logger.Error(err, "invalid number of Availability Domains") @@ -153,7 +161,7 @@ func (s *ClusterScope) setFailureDomains(ctx context.Context) error { } if numOfAds == 3 { - for i, ad := range resp.Items { + for i, ad := range respAd.Items { s.SetFailureDomain(strconv.Itoa(i+1), clusterv1.FailureDomainSpec{ ControlPlane: true, Attributes: map[string]string{AvailabilityDomain: *ad.Name}, @@ -162,7 +170,7 @@ func (s *ClusterScope) setFailureDomains(ctx context.Context) error { } else { req := identity.ListFaultDomainsRequest{ CompartmentId: common.String(s.GetCompartmentId()), - AvailabilityDomain: resp.Items[0].Name, + AvailabilityDomain: respAd.Items[0].Name, } resp, err := s.IdentityClient.ListFaultDomains(ctx, req) if err != nil { @@ -191,6 +199,38 @@ func (s *ClusterScope) SetFailureDomain(id string, spec clusterv1.FailureDomainS s.OCICluster.Status.FailureDomains[id] = spec } +// setAvailabiltyDomainStatus builds the OCIAvailabilityDomain list and sets the OCICluster's status with this list +// so that other parts of the provider have access to ADs and FDs without having to make multiple calls to identity. +func (s *ClusterScope) setAvailabiltyDomainStatus(ctx context.Context, ads []identity.AvailabilityDomain) error { + clusterAds := make(map[string]infrastructurev1beta1.OCIAvailabilityDomain) + for _, ad := range ads { + reqFd := identity.ListFaultDomainsRequest{ + CompartmentId: common.String(s.GetCompartmentId()), + AvailabilityDomain: ad.Name, + } + respFd, err := s.IdentityClient.ListFaultDomains(ctx, reqFd) + if err != nil { + s.Logger.Error(err, "failed to list fault domains") + return err + } + + var faultDomains []string + for _, fd := range respFd.Items { + faultDomains = append(faultDomains, *fd.Name) + } + + adName := *ad.Name + clusterAds[adName] = infrastructurev1beta1.OCIAvailabilityDomain{ + Name: adName, + FaultDomains: faultDomains, + } + } + + s.OCICluster.Status.AvailabilityDomains = clusterAds + + return nil +} + func (s *ClusterScope) IsTagsEqual(freeFromTags map[string]string, definedTags map[string]map[string]interface{}) bool { if reflect.DeepEqual(freeFromTags, s.GetFreeFormTags()) && reflect.DeepEqual(definedTags, s.GetDefinedTags()) { return true diff --git a/cloud/scope/cluster_test.go b/cloud/scope/cluster_test.go index 77a13ad6..cd56c333 100644 --- a/cloud/scope/cluster_test.go +++ b/cloud/scope/cluster_test.go @@ -97,6 +97,114 @@ func TestClusterScope_ReconcileFailureDomains(t *testing.T) { }, }}, nil) + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ + CompartmentId: common.String("3ad"), + AvailabilityDomain: common.String("ad1"), + })).Return(identity.ListFaultDomainsResponse{Items: []identity.FaultDomain{ + { + Name: common.String("fault-domain-1"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fault-domain-2"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fault-domain-3"), + AvailabilityDomain: common.String("ad1"), + }, + }}, nil) + + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ + CompartmentId: common.String("3ad"), + AvailabilityDomain: common.String("ad2"), + })).Return(identity.ListFaultDomainsResponse{Items: []identity.FaultDomain{ + { + Name: common.String("fault-domain-1"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fault-domain-2"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fault-domain-3"), + AvailabilityDomain: common.String("ad1"), + }, + }}, nil) + + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ + CompartmentId: common.String("3ad"), + AvailabilityDomain: common.String("ad3"), + })).Return(identity.ListFaultDomainsResponse{Items: []identity.FaultDomain{ + { + Name: common.String("fault-domain-1"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fault-domain-2"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fault-domain-3"), + AvailabilityDomain: common.String("ad1"), + }, + }}, nil) + + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ + CompartmentId: common.String("1ad"), + AvailabilityDomain: common.String("ad1"), + })).Return(identity.ListFaultDomainsResponse{Items: []identity.FaultDomain{ + { + Name: common.String("fd1"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fd2"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fd3"), + AvailabilityDomain: common.String("ad1"), + }, + }}, nil) + + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ + CompartmentId: common.String("2ad"), + AvailabilityDomain: common.String("ad1"), + })).Return(identity.ListFaultDomainsResponse{Items: []identity.FaultDomain{ + { + Name: common.String("fd1"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fd2"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fd3"), + AvailabilityDomain: common.String("ad1"), + }, + }}, nil) + + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ + CompartmentId: common.String("2ad"), + AvailabilityDomain: common.String("ad2"), + })).Return(identity.ListFaultDomainsResponse{Items: []identity.FaultDomain{ + { + Name: common.String("fd1"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fd2"), + AvailabilityDomain: common.String("ad1"), + }, + { + Name: common.String("fd3"), + AvailabilityDomain: common.String("ad1"), + }, + }}, nil) + identityClient.EXPECT().ListFaultDomains(gomock.Any(), gomock.Eq(identity.ListFaultDomainsRequest{ CompartmentId: common.String("list-fd-error"), AvailabilityDomain: common.String("ad1"), diff --git a/cloud/scope/machine.go b/cloud/scope/machine.go index 5d5e3f3d..da00c346 100644 --- a/cloud/scope/machine.go +++ b/cloud/scope/machine.go @@ -21,6 +21,7 @@ import ( "encoding/base64" "fmt" "math/rand" + "sigs.k8s.io/cluster-api/util/conditions" "strconv" "time" @@ -330,6 +331,7 @@ func (m *MachineScope) GetMachineByDisplayName(ctx context.Context, name string) // PatchObject persists the cluster configuration and status. func (m *MachineScope) PatchObject(ctx context.Context) error { + conditions.SetSummary(m.OCIMachine) return m.patchHelper.Patch(ctx, m.OCIMachine) } diff --git a/cloud/scope/machine_pool.go b/cloud/scope/machine_pool.go new file mode 100644 index 00000000..8cb17ca8 --- /dev/null +++ b/cloud/scope/machine_pool.go @@ -0,0 +1,510 @@ +/* + Copyright (c) 2022 Oracle and/or its affiliates. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + https://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ + +package scope + +import ( + "context" + "encoding/base64" + "fmt" + "strconv" + "strings" + + "github.com/go-logr/logr" + infrastructurev1beta1 "github.com/oracle/cluster-api-provider-oci/api/v1beta1" + "github.com/oracle/cluster-api-provider-oci/cloud/ociutil" + "github.com/oracle/cluster-api-provider-oci/cloud/services/computemanagement" + expinfra1 "github.com/oracle/cluster-api-provider-oci/exp/api/v1beta1" + infrav1exp "github.com/oracle/cluster-api-provider-oci/exp/api/v1beta1" + "github.com/oracle/oci-go-sdk/v63/common" + "github.com/oracle/oci-go-sdk/v63/core" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/klog/v2/klogr" + "k8s.io/utils/pointer" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + capierrors "sigs.k8s.io/cluster-api/errors" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +const OCIMachinePoolKind = "OCIMachinePool" + +// MachineScopeParams defines the params need to create a new MachineScope +type MachinePoolScopeParams struct { + Logger *logr.Logger + Cluster *clusterv1.Cluster + MachinePool *expclusterv1.MachinePool + Client client.Client + ComputeManagementClient computemanagement.Client + OCICluster *infrastructurev1beta1.OCICluster + OCIMachinePool *expinfra1.OCIMachinePool + OCIMachine *infrastructurev1beta1.OCIMachine +} + +type MachinePoolScope struct { + *logr.Logger + Client client.Client + patchHelper *patch.Helper + Cluster *clusterv1.Cluster + MachinePool *expclusterv1.MachinePool + ComputeManagementClient computemanagement.Client + OCICluster *infrastructurev1beta1.OCICluster + OCIMachinePool *expinfra1.OCIMachinePool + OCIMachine *infrastructurev1beta1.OCIMachine +} + +// NewMachinePoolScope creates a MachinePoolScope given the MachinePoolScopeParams +func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, error) { + if params.MachinePool == nil { + return nil, errors.New("failed to generate new scope from nil MachinePool") + } + if params.OCICluster == nil { + return nil, errors.New("failed to generate new scope from nil OCICluster") + } + + if params.Logger == nil { + log := klogr.New() + params.Logger = &log + } + helper, err := patch.NewHelper(params.OCIMachinePool, params.Client) + if err != nil { + return nil, errors.Wrap(err, "failed to init patch helper") + } + + return &MachinePoolScope{ + Logger: params.Logger, + Client: params.Client, + ComputeManagementClient: params.ComputeManagementClient, + Cluster: params.Cluster, + OCICluster: params.OCICluster, + patchHelper: helper, + MachinePool: params.MachinePool, + OCIMachinePool: params.OCIMachinePool, + }, nil +} + +// PatchObject persists the cluster configuration and status. +func (m *MachinePoolScope) PatchObject(ctx context.Context) error { + return m.patchHelper.Patch(ctx, m.OCIMachinePool) +} + +// Close closes the current scope persisting the cluster configuration and status. +func (m *MachinePoolScope) Close(ctx context.Context) error { + return m.PatchObject(ctx) +} + +// HasFailed returns true when the OCIMachinePool's Failure reason or Failure message is populated. +func (m *MachinePoolScope) HasFailed() bool { + return m.OCIMachinePool.Status.FailureReason != nil || m.OCIMachinePool.Status.FailureMessage != nil +} + +// GetInstanceConfigurationId returns the MachinePoolScope instance configuration id. +func (m *MachinePoolScope) GetInstanceConfigurationId() string { + return m.OCIMachinePool.Spec.InstanceConfiguration.InstanceConfigurationId +} + +// SetInstanceConfigurationIdStatus sets the MachinePool InstanceConfigurationId status. +func (m *MachinePoolScope) SetInstanceConfigurationIdStatus(id string) { + m.OCIMachinePool.Spec.InstanceConfiguration.InstanceConfigurationId = id +} + +// SetFailureMessage sets the OCIMachine status error message. +func (m *MachinePoolScope) SetFailureMessage(v error) { + m.OCIMachinePool.Status.FailureMessage = pointer.StringPtr(v.Error()) +} + +// SetFailureReason sets the OCIMachine status error reason. +func (m *MachinePoolScope) SetFailureReason(v capierrors.MachineStatusError) { + m.OCIMachinePool.Status.FailureReason = &v +} + +// SetReady sets the OCIMachine Ready Status. +func (m *MachinePoolScope) SetReady() { + m.OCIMachinePool.Status.Ready = true +} + +// GetWorkerMachineSubnet returns the WorkerRole core.Subnet id for the cluster +func (m *MachinePoolScope) GetWorkerMachineSubnet() *string { + for _, subnet := range m.OCICluster.Spec.NetworkSpec.Vcn.Subnets { + if subnet.Role == infrastructurev1beta1.WorkerRole { + return subnet.ID + } + } + return nil +} + +// GetBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName. +func (m *MachinePoolScope) GetBootstrapData() (string, error) { + if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { + return "", errors.New("error retrieving bootstrap data: linked MachinePool's bootstrap.dataSecretName is nil") + } + + secret := &corev1.Secret{} + key := types.NamespacedName{Namespace: m.MachinePool.Namespace, Name: *m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName} + if err := m.Client.Get(context.TODO(), key, secret); err != nil { + return "", errors.Wrapf(err, "failed to retrieve bootstrap data secret for OCIMachinePool %s/%s", m.MachinePool.Namespace, m.MachinePool.Name) + } + + value, ok := secret.Data["value"] + if !ok { + return "", errors.New("error retrieving bootstrap data: secret value key is missing") + } + return string(value), nil +} + +// GetWorkerMachineNSG returns the worker role core.NetworkSecurityGroup id for the cluster +func (m *MachinePoolScope) GetWorkerMachineNSG() *string { + for _, nsg := range m.OCICluster.Spec.NetworkSpec.Vcn.NetworkSecurityGroups { + if nsg.Role == infrastructurev1beta1.WorkerRole { + return nsg.ID + } + } + return nil +} + +// BuildInstanceConfigurationShapeConfig builds the core.InstanceConfigurationLaunchInstanceShapeConfigDetails based +// on the MachinePoolScope +func (m *MachinePoolScope) BuildInstanceConfigurationShapeConfig() (core.InstanceConfigurationLaunchInstanceShapeConfigDetails, error) { + shapeConfig := core.InstanceConfigurationLaunchInstanceShapeConfigDetails{} + if (m.OCIMachinePool.Spec.ShapeConfig != expinfra1.ShapeConfig{}) { + ocpuString := m.OCIMachinePool.Spec.ShapeConfig.Ocpus + if ocpuString != "" { + ocpus, err := strconv.ParseFloat(ocpuString, 32) + if err != nil { + return core.InstanceConfigurationLaunchInstanceShapeConfigDetails{}, errors.New(fmt.Sprintf("ocpus provided %s is not a valid floating point", + ocpuString)) + } + shapeConfig.Ocpus = common.Float32(float32(ocpus)) + } + + memoryInGBsString := m.OCIMachinePool.Spec.ShapeConfig.MemoryInGBs + if memoryInGBsString != "" { + memoryInGBs, err := strconv.ParseFloat(memoryInGBsString, 32) + if err != nil { + return core.InstanceConfigurationLaunchInstanceShapeConfigDetails{}, errors.New(fmt.Sprintf("memoryInGBs provided %s is not a valid floating point", + memoryInGBsString)) + } + shapeConfig.MemoryInGBs = common.Float32(float32(memoryInGBs)) + } + baselineOcpuOptString := m.OCIMachinePool.Spec.ShapeConfig.BaselineOcpuUtilization + if baselineOcpuOptString != "" { + value, err := ociutil.GetInstanceConfigBaseLineOcpuOptimizationEnum(baselineOcpuOptString) + if err != nil { + return core.InstanceConfigurationLaunchInstanceShapeConfigDetails{}, err + } + shapeConfig.BaselineOcpuUtilization = value + } + } + + return shapeConfig, nil +} + +func (s *MachinePoolScope) BuildInstancePoolPlacement() ([]core.CreateInstancePoolPlacementConfigurationDetails, error) { + var placements []core.CreateInstancePoolPlacementConfigurationDetails + + ads := s.OCICluster.Status.AvailabilityDomains + + specPlacementDetails := s.OCIMachinePool.Spec.PlacementDetails + + // make sure user doesn't specify 3 ads when there is only one available + if len(specPlacementDetails) > len(ads) { + errMsg := fmt.Sprintf("Cluster has %d ADs specified and the machine pools spec has %d", len(ads), len(specPlacementDetails)) + return nil, errors.New(errMsg) + } + + // build placements from the user spec + for _, ad := range ads { + for _, specPlacment := range specPlacementDetails { + if strings.HasSuffix(ad.Name, strconv.Itoa(specPlacment.AvailabilityDomain)) { + placement := core.CreateInstancePoolPlacementConfigurationDetails{ + AvailabilityDomain: common.String(ad.Name), + PrimarySubnetId: s.GetWorkerMachineSubnet(), + FaultDomains: ad.FaultDomains, + } + s.Info("Adding machine placement for AD", "AD", ad.Name) + placements = append(placements, placement) + } + } + } + + // build placements if the user hasn't specified any + if len(placements) <= 0 { + for _, ad := range ads { + placement := core.CreateInstancePoolPlacementConfigurationDetails{ + AvailabilityDomain: common.String(ad.Name), + PrimarySubnetId: s.GetWorkerMachineSubnet(), + FaultDomains: ad.FaultDomains, + } + placements = append(placements, placement) + } + } + + return placements, nil +} + +// IsResourceCreatedByClusterAPI determines if the instance was created by the cluster using the +// tags created at instance launch. +func (s *MachinePoolScope) IsResourceCreatedByClusterAPI(resourceFreeFormTags map[string]string) bool { + tagsAddedByClusterAPI := ociutil.BuildClusterTags(string(s.OCICluster.GetOCIResourceIdentifier())) + for k, v := range tagsAddedByClusterAPI { + if resourceFreeFormTags[k] != v { + return false + } + } + return true +} + +// GetFreeFormTags gets the free form tags for the MachinePoolScope cluster and returns them +func (m *MachinePoolScope) GetFreeFormTags() map[string]string { + + tags := ociutil.BuildClusterTags(m.OCICluster.GetOCIResourceIdentifier()) + if m.OCICluster.Spec.FreeformTags != nil { + for k, v := range m.OCICluster.Spec.FreeformTags { + tags[k] = v + } + } + + return tags +} + +// ReconcileInstanceConfiguration works to try to reconcile the state of the instance configuration for the cluster +func (m *MachinePoolScope) ReconcileInstanceConfiguration(ctx context.Context) error { + var instanceConfiguration *core.InstanceConfiguration + instanceConfigurationId := m.GetInstanceConfigurationId() + + if len(instanceConfigurationId) > 0 { + req := core.GetInstanceConfigurationRequest{InstanceConfigurationId: common.String(instanceConfigurationId)} + instanceConfiguration, err := m.ComputeManagementClient.GetInstanceConfiguration(ctx, req) + if err == nil { + m.Info("instance configuration found", "InstanceConfigurationId", instanceConfiguration.Id) + m.SetInstanceConfigurationIdStatus(instanceConfigurationId) + return m.PatchObject(ctx) + } else { + return errors.Wrap(err, fmt.Sprintf("error getting instance configuration by id %s", instanceConfigurationId)) + } + } + + if instanceConfiguration == nil { + m.Info("Create new instance configuration") + + tags := m.GetFreeFormTags() + + cloudInitData, err := m.GetBootstrapData() + if err != nil { + return err + } + + metadata := m.OCIMachinePool.Spec.Metadata + if metadata == nil { + metadata = make(map[string]string) + } + metadata["user_data"] = base64.StdEncoding.EncodeToString([]byte(cloudInitData)) + + subnetId := m.GetWorkerMachineSubnet() + nsgId := m.GetWorkerMachineNSG() + + launchInstanceDetails := core.ComputeInstanceDetails{ + LaunchDetails: &core.InstanceConfigurationLaunchInstanceDetails{ + CompartmentId: common.String(m.OCICluster.Spec.CompartmentId), + DisplayName: common.String(m.OCIMachinePool.GetName()), + Shape: common.String(m.OCIMachinePool.Spec.InstanceConfiguration.InstanceDetails.Shape), + SourceDetails: core.InstanceConfigurationInstanceSourceViaImageDetails{ + ImageId: common.String(m.OCIMachinePool.Spec.ImageId), + }, + Metadata: metadata, + CreateVnicDetails: &core.InstanceConfigurationCreateVnicDetails{ + SubnetId: subnetId, + // TODO variablize AssignPublicIp in the future + AssignPublicIp: common.Bool(false), + NsgIds: []string{*nsgId}, + }, + }, + } + + shapeConfig, err := m.BuildInstanceConfigurationShapeConfig() + if err != nil { + conditions.MarkFalse(m.MachinePool, infrav1exp.LaunchTemplateReadyCondition, infrav1exp.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) + m.Info("failed to create instance configuration due to shape config") + return err + } + if (shapeConfig != core.InstanceConfigurationLaunchInstanceShapeConfigDetails{}) { + launchInstanceDetails.LaunchDetails.ShapeConfig = &shapeConfig + } + + req := core.CreateInstanceConfigurationRequest{ + CreateInstanceConfiguration: core.CreateInstanceConfigurationDetails{ + CompartmentId: common.String(m.OCICluster.Spec.CompartmentId), + DisplayName: common.String(m.OCIMachinePool.GetName()), + FreeformTags: tags, + InstanceDetails: launchInstanceDetails, + }, + } + + resp, err := m.ComputeManagementClient.CreateInstanceConfiguration(ctx, req) + if err != nil { + conditions.MarkFalse(m.MachinePool, infrav1exp.LaunchTemplateReadyCondition, infrav1exp.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) + m.Info("failed to create instance configuration") + return err + } + + m.SetInstanceConfigurationIdStatus(*resp.Id) + return m.PatchObject(ctx) + } + + return nil +} + +// FindInstancePool attempts to find the instance pool by name and checks to make sure +// the instance pool was created by the cluster before returning the correct pool +func (m *MachinePoolScope) FindInstancePool(ctx context.Context) (*core.InstancePool, error) { + // We have to first list the pools to get the instance pool. + // List returns InstancePoolSummary which lacks some details of InstancePool + + reqList := core.ListInstancePoolsRequest{ + CompartmentId: common.String(m.OCICluster.Spec.CompartmentId), + DisplayName: common.String(m.OCIMachinePool.GetName()), + } + respList, err := m.ComputeManagementClient.ListInstancePools(ctx, reqList) + if err != nil { + return nil, errors.Wrapf(err, "failed to query OCIMachinePool by name") + } + + if len(respList.Items) <= 0 { + m.Info("No machine pool found", "machinepool-name", m.OCIMachinePool.GetName()) + return nil, nil + } + + var instancePoolSummary *core.InstancePoolSummary + for _, i := range respList.Items { + if m.IsResourceCreatedByClusterAPI(i.FreeformTags) { + instancePoolSummary = &i + break + } + } + if instancePoolSummary == nil { + m.Info("No machine pool found created by this cluster", "machinepool-name", m.OCIMachinePool.GetName()) + return nil, nil + } + + reqGet := core.GetInstancePoolRequest{ + InstancePoolId: instancePoolSummary.Id, + } + respGet, err := m.ComputeManagementClient.GetInstancePool(ctx, reqGet) + if err != nil { + return nil, errors.Wrapf(err, "failed to query OCIMachinePool with id %s", *instancePoolSummary.Id) + } + + if !m.IsResourceCreatedByClusterAPI(respGet.InstancePool.FreeformTags) { + return nil, errors.Wrapf(err, "failed to query OCIMachinePool not created by this cluster.") + } + + m.Info("Found existing instance pool", "id", instancePoolSummary.Id, "machinepool-name", m.OCIMachinePool.GetName()) + + return &respGet.InstancePool, nil +} + +// CreateInstancePool attempts to create an instance pool +func (m *MachinePoolScope) CreateInstancePool(ctx context.Context) (*core.InstancePool, error) { + if m.GetInstanceConfigurationId() == "" { + return nil, errors.New("OCIMachinePool has no InstanceConfigurationId for some reason") + } + + tags := m.GetFreeFormTags() + + // build placements + placements, err := m.BuildInstancePoolPlacement() + if err != nil { + return nil, errors.Wrapf(err, "unable to build instance pool placements") + } + + replicas := int(1) + if m.MachinePool.Spec.Replicas != nil { + replicas = int(*m.MachinePool.Spec.Replicas) + } + + m.Info("Creating Instance Pool") + req := core.CreateInstancePoolRequest{ + CreateInstancePoolDetails: core.CreateInstancePoolDetails{ + CompartmentId: common.String(m.OCICluster.Spec.CompartmentId), + InstanceConfigurationId: common.String(m.GetInstanceConfigurationId()), + Size: common.Int(replicas), + DisplayName: common.String(m.OCIMachinePool.GetName()), + + PlacementConfigurations: placements, + FreeformTags: tags, + }, + } + instancePool, err := m.ComputeManagementClient.CreateInstancePool(ctx, req) + if err != nil { + return nil, errors.Wrapf(err, "failed to create OCIMachinePool") + } + m.Info("Created Instance Pool", "id", instancePool.Id) + + return &instancePool.InstancePool, nil +} + +// UpdatePool attempts to update the instance pool +func (m *MachinePoolScope) UpdatePool(ctx context.Context, instancePool *core.InstancePool) error { + + if instancePoolNeedsUpdates(m, instancePool) { + m.Info("updating instance pool") + + replicas := int(1) + if m.MachinePool.Spec.Replicas != nil { + replicas = int(*m.MachinePool.Spec.Replicas) + } + + req := core.UpdateInstancePoolRequest{InstancePoolId: instancePool.Id, + UpdateInstancePoolDetails: core.UpdateInstancePoolDetails{Size: common.Int(replicas)}} + + if _, err := m.ComputeManagementClient.UpdateInstancePool(ctx, req); err != nil { + return errors.Wrap(err, "unable to update instance pool") + } + } + + return nil +} + +func (m *MachinePoolScope) TerminateInstancePool(ctx context.Context, instancePool *core.InstancePool) error { + m.Info("Terminating instance pool", "id", instancePool.Id, "lifecycleState", instancePool.LifecycleState) + req := core.TerminateInstancePoolRequest{InstancePoolId: instancePool.Id} + if _, err := m.ComputeManagementClient.TerminateInstancePool(ctx, req); err != nil { + return err + } + + return nil +} + +// instancePoolNeedsUpdates compares incoming OCIMachinePool and compares against existing ASG. +func instancePoolNeedsUpdates(machinePoolScope *MachinePoolScope, instancePool *core.InstancePool) bool { + + // Allow pool resize + if machinePoolScope.MachinePool.Spec.Replicas != nil { + if instancePool.Size == nil || int(*machinePoolScope.MachinePool.Spec.Replicas) != *instancePool.Size { + return true + } + } else if instancePool.Size != nil { + return true + } + + // todo subnet diff + + return false +} diff --git a/cloud/services/computemanagement/client.go b/cloud/services/computemanagement/client.go new file mode 100644 index 00000000..4b0b3a64 --- /dev/null +++ b/cloud/services/computemanagement/client.go @@ -0,0 +1,22 @@ +package computemanagement + +import ( + "context" + + "github.com/oracle/oci-go-sdk/v63/core" +) + +type Client interface { + // Instance Pool + CreateInstancePool(ctx context.Context, request core.CreateInstancePoolRequest) (response core.CreateInstancePoolResponse, err error) + GetInstancePool(ctx context.Context, request core.GetInstancePoolRequest) (response core.GetInstancePoolResponse, err error) + TerminateInstancePool(ctx context.Context, request core.TerminateInstancePoolRequest) (response core.TerminateInstancePoolResponse, err error) + UpdateInstancePool(ctx context.Context, request core.UpdateInstancePoolRequest) (response core.UpdateInstancePoolResponse, err error) + ListInstancePools(ctx context.Context, request core.ListInstancePoolsRequest) (response core.ListInstancePoolsResponse, err error) + + // Instance Configuration + CreateInstanceConfiguration(ctx context.Context, request core.CreateInstanceConfigurationRequest) (response core.CreateInstanceConfigurationResponse, err error) + GetInstanceConfiguration(ctx context.Context, request core.GetInstanceConfigurationRequest) (response core.GetInstanceConfigurationResponse, err error) + ListInstanceConfigurations(ctx context.Context, request core.ListInstanceConfigurationsRequest) (response core.ListInstanceConfigurationsResponse, err error) + DeleteInstanceConfiguration(ctx context.Context, request core.DeleteInstanceConfigurationRequest) (response core.DeleteInstanceConfigurationResponse, err error) +} diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_ociclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_ociclusters.yaml index f53ecb43..3393af5f 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_ociclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_ociclusters.yaml @@ -946,6 +946,25 @@ spec: status: description: OCIClusterStatus defines the observed state of OCICluster properties: + availabilityDomains: + additionalProperties: + description: OCIAvailabilityDomain contains information about an + Availability Domain (AD). + properties: + faultDomains: + description: 'FaultDomains a list of fault domain (FD) names. + Example: ["FAULT-DOMAIN-1"]' + items: + type: string + type: array + name: + description: 'Name is the AD''s full name. Example: Uocm:PHX-AD-1' + type: string + type: object + description: AvailabilityDomains encapsulates the clusters Availability + Domain (AD) information in a map where the map key is the AD name + and the struct is details about the AD. + type: object conditions: description: NetworkSpec encapsulates all things related to OCI network. items: diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_ocimachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_ocimachinepools.yaml new file mode 100644 index 00000000..d52b5cff --- /dev/null +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_ocimachinepools.yaml @@ -0,0 +1,164 @@ + +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.7.0 + creationTimestamp: null + name: ocimachinepools.infrastructure.cluster.x-k8s.io +spec: + group: infrastructure.cluster.x-k8s.io + names: + kind: OCIMachinePool + listKind: OCIMachinePoolList + plural: ocimachinepools + singular: ocimachinepool + scope: Namespaced + versions: + - name: v1beta1 + schema: + openAPIV3Schema: + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: OCIMachinePoolSpec defines the desired state of OCIMachinePool + properties: + imageId: + description: OCID of the image to be used to launch the instance. + type: string + instanceConfiguration: + properties: + instanceConfigurationId: + type: string + instanceDetails: + properties: + shape: + type: string + type: object + type: object + metadata: + additionalProperties: + type: string + description: Custom metadata key/value pairs that you provide, such + as the SSH public key required to connect to the instance. + type: object + placementDetails: + items: + properties: + availabilityDomain: + description: The availability domain to place instances. + type: integer + required: + - availabilityDomain + type: object + type: array + providerID: + description: ProviderID is the ARN of the associated InstancePool + type: string + shapeConfig: + description: The shape configuration of the instance, applicable for + flex instances. + properties: + baselineOcpuUtilization: + description: 'The baseline OCPU utilization for a subcore burstable + VM instance. Leave this attribute blank for a non-burstable + instance, or explicitly specify non-burstable with `BASELINE_1_1`. + The following values are supported: - `BASELINE_1_8` - baseline + usage is 1/8 of an OCPU. - `BASELINE_1_2` - baseline usage is + 1/2 of an OCPU. - `BASELINE_1_1` - baseline usage is an entire + OCPU. This represents a non-burstable instance.' + type: string + memoryInGBs: + description: The total amount of memory available to the instance, + in gigabytes. + type: string + ocpus: + description: The total number of OCPUs available to the instance. + type: string + type: object + type: object + status: + description: OCIMachinePoolStatus defines the observed state of OCIMachinePool + properties: + conditions: + description: Conditions defines current service state of the OCIMachinePool. + items: + description: Condition defines an observation of a Cluster API resource + operational state. + properties: + lastTransitionTime: + description: Last time the condition transitioned from one status + to another. This should be when the underlying condition changed. + If that is not known, then using the time when the API field + changed is acceptable. + format: date-time + type: string + message: + description: A human readable message indicating details about + the transition. This field may be empty. + type: string + reason: + description: The reason for the condition's last transition + in CamelCase. The specific API may choose whether or not this + field is considered a guaranteed API. This field may not be + empty. + type: string + severity: + description: Severity provides an explicit classification of + Reason code, so the users or machines can immediately understand + the current situation and act accordingly. The Severity field + MUST be set only when Status=False. + type: string + status: + description: Status of the condition, one of True, False, Unknown. + type: string + type: + description: Type of condition in CamelCase or in foo.example.com/CamelCase. + Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. + type: string + required: + - lastTransitionTime + - status + - type + type: object + type: array + failureMessage: + type: string + failureReason: + description: MachineStatusError defines errors states for Machine + objects. + type: string + ready: + description: Ready is true when the provider resource is ready. + type: boolean + replicas: + description: Replicas is the most recently observed number of replicas + format: int32 + type: integer + type: object + type: object + served: true + storage: true + subresources: + status: {} +status: + acceptedNames: + kind: "" + plural: "" + conditions: [] + storedVersions: [] diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 698803c3..06ec247f 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -8,6 +8,7 @@ commonLabels: resources: - bases/infrastructure.cluster.x-k8s.io_ociclusters.yaml - bases/infrastructure.cluster.x-k8s.io_ocimachines.yaml +- bases/infrastructure.cluster.x-k8s.io_ocimachinepools.yaml - bases/infrastructure.cluster.x-k8s.io_ocimachinetemplates.yaml - bases/infrastructure.cluster.x-k8s.io_ociclustertemplates.yaml #+kubebuilder:scaffold:crdkustomizeresource diff --git a/config/crd/patches/cainjection_in_ocimachinepools.yaml b/config/crd/patches/cainjection_in_ocimachinepools.yaml new file mode 100644 index 00000000..a439dd5a --- /dev/null +++ b/config/crd/patches/cainjection_in_ocimachinepools.yaml @@ -0,0 +1,7 @@ +# The following patch adds a directive for certmanager to inject CA into the CRD +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) + name: ocimachinepools.infrastructure.cluster.x-k8s.io diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 861cfc5f..0e6ee9e9 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -29,6 +29,7 @@ spec: - /manager args: - "--leader-elect" + - "--feature-gates=MachinePool=${EXP_MACHINE_POOL:=false}" - "--metrics-bind-address=127.0.0.1:8080" image: controller:latest name: manager diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 38e7fcda..a3d84735 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -69,6 +69,43 @@ rules: - get - patch - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - ocimachinepools + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - ocimachinepools + - ocimachinepools/status + verbs: + - get + - list + - watch +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - ocimachinepools/status + verbs: + - get + - patch + - update +- apiGroups: + - infrastructure.cluster.x-k8s.io + resources: + - ocimachinepools/status + verbs: + - get + - patch + - update - apiGroups: - cluster.x-k8s.io resources: @@ -87,6 +124,15 @@ rules: - get - list - watch +- apiGroups: + - cluster.x-k8s.io + resources: + - machinepools + - machinepools/status + verbs: + - get + - list + - watch - apiGroups: - "" resources: diff --git a/exp/api/v1beta1/conditions_consts.go b/exp/api/v1beta1/conditions_consts.go new file mode 100644 index 00000000..8cb8d1a8 --- /dev/null +++ b/exp/api/v1beta1/conditions_consts.go @@ -0,0 +1,39 @@ +/* +Copyright (c) 2022 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + +const ( + // InstancePoolReadyCondition reports on current status of the Instance Pool. Ready indicates the group is provisioned. + InstancePoolReadyCondition clusterv1.ConditionType = "InstancePoolReady" + // InstancePoolNotFoundReason used when the Instance Pool couldn't be retrieved. + InstancePoolNotFoundReason = "InstancePoolNotFound" + // InstancePoolProvisionFailedReason used for failures during Instance Pool provisioning. + InstancePoolProvisionFailedReason = "InstancePoolProvisionFailed" + // InstancePoolDeletionInProgress Instance Pool is in a deletion in progress state. + InstancePoolDeletionInProgress = "InstancePoolDeletionInProgress" + // InstancePoolNotReadyReason used when the instance pool is in a pending state. + InstancePoolNotReadyReason = "InstancePoolNotReady" + + // LaunchTemplateReadyCondition represents the status of an OCIachinePool's associated Instance Template. + LaunchTemplateReadyCondition clusterv1.ConditionType = "LaunchTemplateReady" + // LaunchTemplateNotFoundReason is used when an associated Launch Template can't be found. + LaunchTemplateNotFoundReason = "LaunchTemplateNotFound" + // LaunchTemplateCreateFailedReason used for failures during Launch Template creation. + LaunchTemplateCreateFailedReason = "LaunchTemplateCreateFailed" +) diff --git a/exp/api/v1beta1/groupversion_type.go b/exp/api/v1beta1/groupversion_type.go new file mode 100644 index 00000000..90bce315 --- /dev/null +++ b/exp/api/v1beta1/groupversion_type.go @@ -0,0 +1,33 @@ +/* +Copyright (c) 2022 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + "k8s.io/apimachinery/pkg/runtime/schema" + "sigs.k8s.io/controller-runtime/pkg/scheme" +) + +var ( + // GroupVersion is group version used to register these objects + GroupVersion = schema.GroupVersion{Group: "infrastructure.cluster.x-k8s.io", Version: "v1beta1"} + + // SchemeBuilder is used to add go types to the GroupVersionKind scheme + SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} + + // AddToScheme adds the types in this group-version to the given scheme. + AddToScheme = SchemeBuilder.AddToScheme +) diff --git a/exp/api/v1beta1/ocimachinepool_types.go b/exp/api/v1beta1/ocimachinepool_types.go new file mode 100644 index 00000000..8395fef9 --- /dev/null +++ b/exp/api/v1beta1/ocimachinepool_types.go @@ -0,0 +1,148 @@ +/* +Copyright (c) 2022 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1beta1 + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/errors" +) + +// +kubebuilder:object:generate=true +// +groupName=infrastructure.cluster.x-k8s.io + +// Constants block. +const ( + // MachinePoolFinalizer is the finalizer for the machine pool. + MachinePoolFinalizer = "ocimachinepool.infrastructure.cluster.x-k8s.io" +) + +// OCIMachinePoolSpec defines the desired state of OCIMachinePool +type OCIMachinePoolSpec struct { + // ProviderID is the ARN of the associated InstancePool + // +optional + ProviderID *string `json:"providerID,omitempty"` + + // OCID of the image to be used to launch the instance. + ImageId string `json:"imageId,omitempty"` + + // Custom metadata key/value pairs that you provide, such as the SSH public key + // required to connect to the instance. + Metadata map[string]string `json:"metadata,omitempty"` + + // The shape configuration of the instance, applicable for flex instances. + ShapeConfig ShapeConfig `json:"shapeConfig,omitempty"` + + PlacementDetails []PlacementDetails `json:"placementDetails,omitempty"` + + InstanceConfiguration InstanceConfiguration `json:"instanceConfiguration,omitempty"` +} + +type InstanceConfiguration struct { + InstanceConfigurationId string `json:"instanceConfigurationId,omitempty"` + InstanceDetails InstanceDetails `json:"instanceDetails,omitempty"` +} + +type PlacementDetails struct { + // The availability domain to place instances. + AvailabilityDomain int `mandatory:"true" json:"availabilityDomain"` +} + +type InstanceDetails struct { + Shape string `json:"shape,omitempty"` +} + +// LaunchDetails Instance launch details for creating an instance from an instance configuration +// https://docs.oracle.com/en-us/iaas/api/#/en/iaas/20160918/datatypes/InstanceConfigurationLaunchInstanceDetails +type LaunchDetails struct { + // Custom metadata key/value pairs that you provide, such as the SSH public key + // required to connect to the instance. + Metadata map[string]string `json:"metadata,omitempty"` + + Shape string `json:"shape,omitempty"` +} + +// ShapeConfig defines the configuration options for the compute instance shape +// https://docs.oracle.com/en-us/iaas/api/#/en/iaas/20160918/datatypes/LaunchInstanceShapeConfigDetails +type ShapeConfig struct { + // The total number of OCPUs available to the instance. + Ocpus string `json:"ocpus,omitempty"` + + // The total amount of memory available to the instance, in gigabytes. + MemoryInGBs string `json:"memoryInGBs,omitempty"` + + // The baseline OCPU utilization for a subcore burstable VM instance. Leave this attribute blank for a + // non-burstable instance, or explicitly specify non-burstable with `BASELINE_1_1`. + // The following values are supported: + // - `BASELINE_1_8` - baseline usage is 1/8 of an OCPU. + // - `BASELINE_1_2` - baseline usage is 1/2 of an OCPU. + // - `BASELINE_1_1` - baseline usage is an entire OCPU. This represents a non-burstable instance. + BaselineOcpuUtilization string `json:"baselineOcpuUtilization,omitempty"` +} + +// OCIMachinePoolStatus defines the observed state of OCIMachinePool +type OCIMachinePoolStatus struct { + // Ready is true when the provider resource is ready. + // +optional + Ready bool `json:"ready"` + + // Replicas is the most recently observed number of replicas + // +optional + Replicas int32 `json:"replicas"` + + // Conditions defines current service state of the OCIMachinePool. + // +optional + Conditions clusterv1.Conditions `json:"conditions,omitempty"` + + FailureReason *errors.MachineStatusError `json:"failureReason,omitempty"` + + FailureMessage *string `json:"failureMessage,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status + +type OCIMachinePool struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec OCIMachinePoolSpec `json:"spec,omitempty"` + Status OCIMachinePoolStatus `json:"status,omitempty"` +} + +// +kubebuilder:object:root=true + +// OCIMachinePoolList contains a list of OCIMachinePool. +type OCIMachinePoolList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []OCIMachinePool `json:"items"` +} + +// GetConditions returns the list of conditions for an OCIMachine API object. +func (m *OCIMachinePool) GetConditions() clusterv1.Conditions { + return m.Status.Conditions +} + +// SetConditions will set the given conditions on an OCIMachine object. +func (m *OCIMachinePool) SetConditions(conditions clusterv1.Conditions) { + m.Status.Conditions = conditions +} + +func init() { + SchemeBuilder.Register(&OCIMachinePool{}, &OCIMachinePoolList{}) +} diff --git a/exp/api/v1beta1/zz_generated.deepcopy.go b/exp/api/v1beta1/zz_generated.deepcopy.go new file mode 100644 index 00000000..55996de2 --- /dev/null +++ b/exp/api/v1beta1/zz_generated.deepcopy.go @@ -0,0 +1,236 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +/* +Copyright (c) 2022, Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Code generated by controller-gen. DO NOT EDIT. + +package v1beta1 + +import ( + runtime "k8s.io/apimachinery/pkg/runtime" + apiv1beta1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/errors" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InstanceConfiguration) DeepCopyInto(out *InstanceConfiguration) { + *out = *in + out.InstanceDetails = in.InstanceDetails +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceConfiguration. +func (in *InstanceConfiguration) DeepCopy() *InstanceConfiguration { + if in == nil { + return nil + } + out := new(InstanceConfiguration) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *InstanceDetails) DeepCopyInto(out *InstanceDetails) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new InstanceDetails. +func (in *InstanceDetails) DeepCopy() *InstanceDetails { + if in == nil { + return nil + } + out := new(InstanceDetails) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *LaunchDetails) DeepCopyInto(out *LaunchDetails) { + *out = *in + if in.Metadata != nil { + in, out := &in.Metadata, &out.Metadata + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LaunchDetails. +func (in *LaunchDetails) DeepCopy() *LaunchDetails { + if in == nil { + return nil + } + out := new(LaunchDetails) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OCIMachinePool) DeepCopyInto(out *OCIMachinePool) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OCIMachinePool. +func (in *OCIMachinePool) DeepCopy() *OCIMachinePool { + if in == nil { + return nil + } + out := new(OCIMachinePool) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *OCIMachinePool) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OCIMachinePoolList) DeepCopyInto(out *OCIMachinePoolList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]OCIMachinePool, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OCIMachinePoolList. +func (in *OCIMachinePoolList) DeepCopy() *OCIMachinePoolList { + if in == nil { + return nil + } + out := new(OCIMachinePoolList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *OCIMachinePoolList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OCIMachinePoolSpec) DeepCopyInto(out *OCIMachinePoolSpec) { + *out = *in + if in.ProviderID != nil { + in, out := &in.ProviderID, &out.ProviderID + *out = new(string) + **out = **in + } + if in.Metadata != nil { + in, out := &in.Metadata, &out.Metadata + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + out.ShapeConfig = in.ShapeConfig + if in.PlacementDetails != nil { + in, out := &in.PlacementDetails, &out.PlacementDetails + *out = make([]PlacementDetails, len(*in)) + copy(*out, *in) + } + out.InstanceConfiguration = in.InstanceConfiguration +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OCIMachinePoolSpec. +func (in *OCIMachinePoolSpec) DeepCopy() *OCIMachinePoolSpec { + if in == nil { + return nil + } + out := new(OCIMachinePoolSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *OCIMachinePoolStatus) DeepCopyInto(out *OCIMachinePoolStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make(apiv1beta1.Conditions, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.FailureReason != nil { + in, out := &in.FailureReason, &out.FailureReason + *out = new(errors.MachineStatusError) + **out = **in + } + if in.FailureMessage != nil { + in, out := &in.FailureMessage, &out.FailureMessage + *out = new(string) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new OCIMachinePoolStatus. +func (in *OCIMachinePoolStatus) DeepCopy() *OCIMachinePoolStatus { + if in == nil { + return nil + } + out := new(OCIMachinePoolStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PlacementDetails) DeepCopyInto(out *PlacementDetails) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PlacementDetails. +func (in *PlacementDetails) DeepCopy() *PlacementDetails { + if in == nil { + return nil + } + out := new(PlacementDetails) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ShapeConfig) DeepCopyInto(out *ShapeConfig) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ShapeConfig. +func (in *ShapeConfig) DeepCopy() *ShapeConfig { + if in == nil { + return nil + } + out := new(ShapeConfig) + in.DeepCopyInto(out) + return out +} diff --git a/exp/controllers/ocimachinepool_controller.go b/exp/controllers/ocimachinepool_controller.go new file mode 100644 index 00000000..8466628e --- /dev/null +++ b/exp/controllers/ocimachinepool_controller.go @@ -0,0 +1,382 @@ +/* + * + * Copyright (c) 2022, Oracle and/or its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * / + * + */ + +package controllers + +import ( + "context" + "fmt" + "time" + + "github.com/go-logr/logr" + infrastructurev1beta1 "github.com/oracle/cluster-api-provider-oci/api/v1beta1" + "github.com/oracle/cluster-api-provider-oci/cloud/scope" + infrav1exp "github.com/oracle/cluster-api-provider-oci/exp/api/v1beta1" + "github.com/oracle/oci-go-sdk/v63/common" + "github.com/oracle/oci-go-sdk/v63/core" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/tools/record" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + capierrors "sigs.k8s.io/cluster-api/errors" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/predicates" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +// OCIMachinePoolReconciler reconciles a OCIMachinePool object +type OCIMachinePoolReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder + ClientProvider *scope.ClientProvider + Region string +} + +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=ocimachinepools,verbs=get;list;watch;create;update;patch;delete +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=ocimachinepools/status,verbs=get;update;patch +//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch +//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=ocimachinepools/finalizers,verbs=update + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the machinepool closer to the desired state. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.8.3/pkg/reconcile +func (r *OCIMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Request) (result ctrl.Result, reterr error) { + logger := log.FromContext(ctx) + logger.Info("Got reconciliation event for machine pool") + + // Fetch the OCIMachinePool. + ociMachinePool := &infrav1exp.OCIMachinePool{} + err := r.Get(ctx, req.NamespacedName, ociMachinePool) + if err != nil { + if apierrors.IsNotFound(err) { + return ctrl.Result{}, nil + } + return ctrl.Result{}, err + } + + // Fetch the CAPI MachinePool + machinePool, err := getOwnerMachinePool(ctx, r.Client, ociMachinePool.ObjectMeta) + if err != nil { + return reconcile.Result{}, err + } + if machinePool == nil { + logger.Info("MachinePool Controller has not yet set OwnerRef") + return reconcile.Result{}, nil + } + logger = logger.WithValues("machinePool", machinePool.Name) + + // Fetch the Cluster. + cluster, err := util.GetClusterFromMetadata(ctx, r.Client, machinePool.ObjectMeta) + if err != nil { + logger.Info("MachinePool is missing cluster label or cluster does not exist") + return reconcile.Result{}, nil + } + logger = logger.WithValues("cluster", cluster.Name) + + // Return early if the object or Cluster is paused. + if annotations.IsPaused(cluster, ociMachinePool) { + logger.Info("OCIMachinePool or linked Cluster is marked as paused. Won't reconcile") + return ctrl.Result{}, nil + } + + ociCluster := &infrastructurev1beta1.OCICluster{} + ociClusterName := client.ObjectKey{ + Namespace: cluster.Namespace, + Name: cluster.Name, + } + + if err := r.Client.Get(ctx, ociClusterName, ociCluster); err != nil { + logger.Info("Cluster is not available yet") + r.Recorder.Eventf(ociMachinePool, corev1.EventTypeWarning, "ClusterNotAvailable", "Cluster is not available yet") + logger.V(2).Info("OCICluster is not available yet") + return ctrl.Result{}, nil + } + + regionOverride := r.Region + if len(ociCluster.Spec.Region) > 0 { + regionOverride = ociCluster.Spec.Region + } + if len(regionOverride) <= 0 { + return ctrl.Result{}, errors.New("OCIMachinePoolReconciler Region can't be nil") + } + + clients, err := r.ClientProvider.GetOrBuildClient(regionOverride) + if err != nil { + logger.Error(err, "Couldn't get the clients for region") + } + + // Create the machine pool scope + machinePoolScope, err := scope.NewMachinePoolScope(scope.MachinePoolScopeParams{ + Client: r.Client, + ComputeManagementClient: clients.ComputeManagementClient, + Logger: &logger, + Cluster: cluster, + OCICluster: ociCluster, + MachinePool: machinePool, + OCIMachinePool: ociMachinePool, + }) + if err != nil { + return ctrl.Result{}, errors.Errorf("failed to create scope: %+v", err) + } + + // Always close the scope when exiting this function so we can persist any GCPMachine changes. + defer func() { + if err := machinePoolScope.Close(ctx); err != nil && reterr == nil { + reterr = err + } + }() + + // Handle deleted machines + if !ociMachinePool.DeletionTimestamp.IsZero() { + return r.reconcileDelete(ctx, machinePoolScope) + } + + // Handle non-deleted machines + return r.reconcileNormal(ctx, logger, machinePoolScope) +} + +// SetupWithManager sets up the controller with the Manager. +func (r *OCIMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, options controller.Options) error { + logger := log.FromContext(ctx) + return ctrl.NewControllerManagedBy(mgr). + WithOptions(options). + For(&infrav1exp.OCIMachinePool{}). + Watches( + &source.Kind{Type: &expclusterv1.MachinePool{}}, + handler.EnqueueRequestsFromMapFunc(machinePoolToInfrastructureMapFunc(infrav1exp. + GroupVersion.WithKind(scope.OCIMachinePoolKind), logger)), + ). + WithEventFilter(predicates.ResourceNotPaused(ctrl.LoggerFrom(ctx))). + Complete(r) +} + +func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind, logger logr.Logger) handler.MapFunc { + return func(o client.Object) []reconcile.Request { + m, ok := o.(*expclusterv1.MachinePool) + if !ok { + panic(fmt.Sprintf("Expected a MachinePool but got a %T", o)) + } + + gk := gvk.GroupKind() + // Return early if the GroupKind doesn't match what we expect + infraGK := m.Spec.Template.Spec.InfrastructureRef.GroupVersionKind().GroupKind() + if gk != infraGK { + logger.V(4).Info("gk does not match", "gk", gk, "infraGK", infraGK) + return nil + } + + return []reconcile.Request{ + { + NamespacedName: client.ObjectKey{ + Namespace: m.Namespace, + Name: m.Spec.Template.Spec.InfrastructureRef.Name, + }, + }, + } + } +} + +// getOwnerMachinePool returns the MachinePool object owning the current resource. +func getOwnerMachinePool(ctx context.Context, c client.Client, obj metav1.ObjectMeta) (*expclusterv1.MachinePool, error) { + for _, ref := range obj.OwnerReferences { + if ref.Kind != "MachinePool" { + continue + } + gv, err := schema.ParseGroupVersion(ref.APIVersion) + if err != nil { + return nil, errors.WithStack(err) + } + if gv.Group == expclusterv1.GroupVersion.Group { + return getMachinePoolByName(ctx, c, obj.Namespace, ref.Name) + } + } + return nil, nil +} + +// getMachinePoolByName finds and return a Machine object using the specified params. +func getMachinePoolByName(ctx context.Context, c client.Client, namespace, name string) (*expclusterv1.MachinePool, error) { + m := &expclusterv1.MachinePool{} + key := client.ObjectKey{Name: name, Namespace: namespace} + if err := c.Get(ctx, key, m); err != nil { + return nil, err + } + return m, nil +} + +func (r *OCIMachinePoolReconciler) reconcileNormal(ctx context.Context, logger logr.Logger, machinePoolScope *scope.MachinePoolScope) (ctrl.Result, error) { + machinePoolScope.Info("Handling reconcile OCIMachinePool") + + // If the OCIMachinePool is in an error state, return early. + if machinePoolScope.HasFailed() { + machinePoolScope.Info("Error state detected, skipping reconciliation") + + return ctrl.Result{}, nil + } + + // If the OCIMachinePool doesn't have our finalizer, add it. + controllerutil.AddFinalizer(machinePoolScope.OCIMachinePool, infrav1exp.MachinePoolFinalizer) + // Register the finalizer immediately to avoid orphaning OCI resources on delete + if err := machinePoolScope.PatchObject(ctx); err != nil { + return reconcile.Result{}, err + } + + if !machinePoolScope.Cluster.Status.InfrastructureReady { + logger.Info("Cluster infrastructure is not ready yet") + return reconcile.Result{}, nil + } + + // Make sure bootstrap data is available and populated. + if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { + r.Recorder.Event(machinePoolScope.OCIMachinePool, corev1.EventTypeNormal, infrastructurev1beta1.WaitingForBootstrapDataReason, "Bootstrap data secret reference is not yet available") + conditions.MarkFalse(machinePoolScope.OCIMachinePool, infrastructurev1beta1.InstanceReadyCondition, infrastructurev1beta1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") + logger.Info("Bootstrap data secret reference is not yet available") + return reconcile.Result{}, nil + } + + // get or create the InstanceConfiguration + // https://docs.oracle.com/en-us/iaas/api/#/en/iaas/20160918/InstanceConfiguration/ + if err := machinePoolScope.ReconcileInstanceConfiguration(ctx); err != nil { + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) + machinePoolScope.Error(err, "failed to reconcile launch template") + return ctrl.Result{}, err + } + + // set the LaunchTemplateReady condition + conditions.MarkTrue(machinePoolScope.OCIMachinePool, infrav1exp.LaunchTemplateReadyCondition) + + // Find existing Instance Pool + instancePool, err := machinePoolScope.FindInstancePool(ctx) + if err != nil { + conditions.MarkUnknown(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition, infrav1exp.InstancePoolNotFoundReason, err.Error()) + return ctrl.Result{}, err + } + + if instancePool == nil { + if _, err := machinePoolScope.CreateInstancePool(ctx); err != nil { + conditions.MarkFalse(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition, infrav1exp.InstancePoolProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return ctrl.Result{}, err + } + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, "SuccessfulCreate", "Created new Instance Pool: %s", machinePoolScope.OCIMachinePool.GetName()) + return ctrl.Result{}, nil + } + + if err := machinePoolScope.UpdatePool(ctx, instancePool); err != nil { + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeWarning, "FailedUpdate", "Failed to update instance pool: %v", err) + machinePoolScope.Error(err, "error updating OCIMachinePool") + return ctrl.Result{}, err + } + + machinePoolScope.Info("OCI Compute Instance Pool found", "InstancePoolId", *instancePool.Id) + machinePoolScope.OCIMachinePool.Spec.ProviderID = common.String(fmt.Sprintf("oci://%s", *instancePool.Id)) + + switch instancePool.LifecycleState { + case core.InstancePoolLifecycleStateProvisioning, core.InstancePoolLifecycleStateStarting: + machinePoolScope.Info("Instance Pool is pending") + conditions.MarkFalse(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition, infrav1exp.InstancePoolNotReadyReason, clusterv1.ConditionSeverityInfo, "") + return reconcile.Result{RequeueAfter: 10 * time.Second}, nil + case core.InstancePoolLifecycleStateRunning: + machinePoolScope.Info("Instance pool is active") + + // record the event only when pool goes from not ready to ready state + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeNormal, "InstancePoolReady", + "Instance pool is in ready state") + conditions.MarkTrue(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition) + machinePoolScope.SetReady() + default: + conditions.MarkFalse(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition, infrav1exp.InstancePoolProvisionFailedReason, clusterv1.ConditionSeverityError, "") + machinePoolScope.SetFailureReason(capierrors.CreateMachineError) + machinePoolScope.SetFailureMessage(errors.Errorf("Instance Pool status %q is unexpected", instancePool.LifecycleState)) + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeWarning, "ReconcileError", + "Instance pool has invalid lifecycle state %s", instancePool.LifecycleState) + return reconcile.Result{}, errors.New(fmt.Sprintf("instance pool has invalid lifecycle state %s", instancePool.LifecycleState)) + } + + return ctrl.Result{}, nil +} + +func (r *OCIMachinePoolReconciler) reconcileDelete(ctx context.Context, machinePoolScope *scope.MachinePoolScope) (_ ctrl.Result, reterr error) { + machinePoolScope.Info("Handling deleted OCIMachinePool") + + // Find existing Instance Pool + instancePool, err := machinePoolScope.FindInstancePool(ctx) + if err != nil { + conditions.MarkUnknown(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition, infrav1exp.InstancePoolNotFoundReason, err.Error()) + return ctrl.Result{}, err + } + + if instancePool == nil { + machinePoolScope.V(2).Info("Unable to locate instance pool", "id", machinePoolScope.OCIMachinePool.Spec.ProviderID) + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeNormal, infrav1exp.InstancePoolNotFoundReason, "Unable to find matching instance pool") + } else { + switch instancePool.LifecycleState { + case core.InstancePoolLifecycleStateTerminating: + // Instance Pool is already deleting + machinePoolScope.OCIMachinePool.Status.Ready = false + conditions.MarkFalse(machinePoolScope.OCIMachinePool, infrav1exp.InstancePoolReadyCondition, infrav1exp.InstancePoolDeletionInProgress, clusterv1.ConditionSeverityWarning, "") + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeWarning, "DeletionInProgress", "Instance Pool deletion in progress: %s - %s", instancePool.DisplayName, instancePool.Id) + machinePoolScope.Info("Instance Pool is already deleting", "displayName", instancePool.DisplayName, "id", instancePool.Id) + case core.InstancePoolLifecycleStateTerminated: + machinePoolScope.Info("Instance Pool is already deleted", "displayName", instancePool.DisplayName, "id", instancePool.Id) + default: + if err := machinePoolScope.TerminateInstancePool(ctx, instancePool); err != nil { + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete instance pool %q: %v", instancePool.Id, err) + return ctrl.Result{}, errors.Wrap(err, "failed to delete instance pool") + } + } + } + + instanceConfigurationId := machinePoolScope.GetInstanceConfigurationId() + if len(instanceConfigurationId) <= 0 { + machinePoolScope.V(2).Info("Unable to locate instance configuration") + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeNormal, infrav1exp.InstancePoolNotFoundReason, "Unable to find matching instance configuration") + controllerutil.RemoveFinalizer(machinePoolScope.OCIMachinePool, infrav1exp.MachinePoolFinalizer) + return ctrl.Result{}, nil + } + + machinePoolScope.Info("deleting instance configuration", "id", instanceConfigurationId) + req := core.DeleteInstanceConfigurationRequest{InstanceConfigurationId: common.String(instanceConfigurationId)} + if _, err := machinePoolScope.ComputeManagementClient.DeleteInstanceConfiguration(ctx, req); err != nil { + r.Recorder.Eventf(machinePoolScope.OCIMachinePool, corev1.EventTypeWarning, "FailedDelete", "Failed to delete instance configuration %q: %v", instanceConfigurationId, err) + return ctrl.Result{}, errors.Wrap(err, "failed to delete instance pool") + } + + machinePoolScope.Info("successfully deleted instance pool and Launch Template") + + // remove finalizer + controllerutil.RemoveFinalizer(machinePoolScope.OCIMachinePool, infrav1exp.MachinePoolFinalizer) + + return ctrl.Result{}, nil +} diff --git a/feature/feature.go b/feature/feature.go new file mode 100644 index 00000000..f296625f --- /dev/null +++ b/feature/feature.go @@ -0,0 +1,38 @@ +/* +Copyright (c) 2022 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package feature + +import ( + "k8s.io/apimachinery/pkg/util/runtime" + "k8s.io/component-base/featuregate" +) + +const ( + // MachinePool is used to enable instance pool support + MachinePool featuregate.Feature = "MachinePool" +) + +func init() { + runtime.Must(MutableGates.Add(defaultCAPOCIFeatureGates)) +} + +// defaultCAPOCIFeatureGates consists of all known capa-specific feature keys. +// To add a new feature, define a key for it above and add it here. +var defaultCAPOCIFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ + // Every feature should be initiated here: + MachinePool: {Default: false, PreRelease: featuregate.Alpha}, +} diff --git a/feature/gates.go b/feature/gates.go new file mode 100644 index 00000000..831f0911 --- /dev/null +++ b/feature/gates.go @@ -0,0 +1,35 @@ +/* +Copyright (c) 2022 Oracle and/or its affiliates. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package feature + +import ( + "k8s.io/component-base/featuregate" + + "sigs.k8s.io/cluster-api/feature" +) + +var ( + // MutableGates is a mutable version of DefaultFeatureGate. + // Only top-level commands/options setup and the k8s.io/component-base/featuregate/testing package should make use of this. + // Tests that need to modify featuregate gates for the duration of their test should use: + // defer featuregatetesting.SetFeatureGateDuringTest(t, utilfeature.DefaultFeatureGate, features., )() + MutableGates featuregate.MutableFeatureGate = feature.MutableGates + + // Gates is a shared global FeatureGate. + // Top-level commands/options setup that needs to modify this featuregate gate should use DefaultMutableFeatureGate. + Gates featuregate.FeatureGate = MutableGates +) diff --git a/go.mod b/go.mod index 099b1f01..01128058 100644 --- a/go.mod +++ b/go.mod @@ -9,10 +9,12 @@ require ( github.com/onsi/gomega v1.17.0 github.com/oracle/oci-go-sdk/v63 v63.0.0 github.com/pkg/errors v0.9.1 + github.com/spf13/pflag v1.0.5 gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.23.0 k8s.io/apimachinery v0.23.0 k8s.io/client-go v0.23.0 + k8s.io/component-base v0.23.0 k8s.io/klog/v2 v2.30.0 k8s.io/utils v0.0.0-20210930125809-cb0fa318a74b sigs.k8s.io/cluster-api v1.0.1-0.20211111175208-4cc2fce2111a diff --git a/kind-with-registry.sh b/kind-with-registry.sh new file mode 100755 index 00000000..b027aa26 --- /dev/null +++ b/kind-with-registry.sh @@ -0,0 +1,64 @@ +#!/bin/sh +# +# /* +# Copyright (c) 2022, Oracle and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# */ +# +# + +set -o errexit + +# create registry container unless it already exists +reg_name='kind-registry' +reg_port='5000' +running="$(docker inspect -f '{{.State.Running}}' "${reg_name}" 2>/dev/null || true)" +if [ "${running}" != 'true' ]; then + docker run \ + -d --restart=always -p "127.0.0.1:${reg_port}:5000" --name "${reg_name}" \ + registry:2 +fi + +# create a cluster with the local registry enabled in containerd +cat <