Skip to content

Commit

Permalink
Merge pull request openshift#7730 from nutanix-cloud-native/nutanix-f…
Browse files Browse the repository at this point in the history
…ailuredomains

SPLAT-1272: Support Nutanix Failure Domains
  • Loading branch information
openshift-merge-bot[bot] committed Nov 28, 2023
2 parents 08e2e52 + 5f01650 commit 8032847
Show file tree
Hide file tree
Showing 26 changed files with 1,786 additions and 120 deletions.
93 changes: 89 additions & 4 deletions data/data/install.openshift.io_installconfigs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,13 @@ spec:
cores to assign a vm.
format: int64
type: integer
failureDomains:
description: FailureDomains optionally configures a list
of failure domain names that will be applied to the MachinePool
items:
type: string
type: array
x-kubernetes-list-type: set
memoryMiB:
description: Memory is the size of a VM's memory in MiB.
format: int64
Expand Down Expand Up @@ -1674,6 +1681,13 @@ spec:
cores to assign a vm.
format: int64
type: integer
failureDomains:
description: FailureDomains optionally configures a list of
failure domain names that will be applied to the MachinePool
items:
type: string
type: array
x-kubernetes-list-type: set
memoryMiB:
description: Memory is the size of a VM's memory in MiB.
format: int64
Expand Down Expand Up @@ -3483,7 +3497,7 @@ spec:
uniqueItems: true
clusterOSImage:
description: ClusterOSImage overrides the url provided in rhcos.json
to download the RHCOS Image
to download the RHCOS Image.
type: string
defaultMachinePlatform:
description: DefaultMachinePlatform is the default configuration
Expand Down Expand Up @@ -3544,6 +3558,13 @@ spec:
cores to assign a vm.
format: int64
type: integer
failureDomains:
description: FailureDomains optionally configures a list of
failure domain names that will be applied to the MachinePool
items:
type: string
type: array
x-kubernetes-list-type: set
memoryMiB:
description: Memory is the size of a VM's memory in MiB.
format: int64
Expand Down Expand Up @@ -3577,6 +3598,70 @@ spec:
- type
type: object
type: object
failureDomains:
description: FailureDomains configures failure domains for the
Nutanix platform.
items:
description: FailureDomain configures failure domain information
for the Nutanix platform.
properties:
name:
description: Name defines the unique name of a failure domain.
maxLength: 64
minLength: 1
pattern: ^[0-9A-Za-z_.-@/]+$
type: string
prismElement:
description: prismElement holds the identification (name,
uuid) and the optional endpoint address and port of the
Nutanix Prism Element. When a cluster-wide proxy is installed,
by default, this endpoint will be accessed via the proxy.
Should you wish for communication with this endpoint not
to be proxied, please add the endpoint to the proxy spec.noProxy
list.
properties:
endpoint:
description: Endpoint holds the address and port of
the Prism Element
properties:
address:
description: address is the endpoint address (DNS
name or IP address) of the Nutanix Prism Central
or Element (cluster)
type: string
port:
description: port is the port number to access the
Nutanix Prism Central or Element (cluster)
format: int32
type: integer
required:
- address
- port
type: object
name:
description: Name is prism endpoint Name
type: string
uuid:
description: UUID is the UUID of the Prism Element (cluster)
type: string
required:
- uuid
type: object
subnetUUIDs:
description: SubnetUUIDs identifies the network subnets
of the Prism Element. Currently we only support one subnet
for a failure domain.
items:
type: string
minItems: 1
type: array
x-kubernetes-list-type: atomic
required:
- name
- prismElement
- subnetUUIDs
type: object
type: array
ingressVIP:
description: 'DeprecatedIngressVIP is the virtual IP address for
ingress Deprecated: use IngressVIPs'
Expand Down Expand Up @@ -3615,7 +3700,8 @@ spec:
type: object
prismCentral:
description: PrismCentral is the endpoint (address and port) and
credentials to connect to the Prism Central.
credentials to connect to the Prism Central. This serves as
the default Prism-Central.
properties:
endpoint:
description: Endpoint holds the address and port of the Prism
Expand Down Expand Up @@ -3652,7 +3738,7 @@ spec:
description: PrismElements holds a list of Prism Elements (clusters).
A Prism Element encompasses all Nutanix resources (VMs, subnets,
etc.) used to host the OpenShift cluster. Currently only a single
Prism Element may be defined.
Prism Element may be defined. This serves as the default Prism-Element.
items:
description: PrismElement holds the uuid, endpoint of the Prism
Element (cluster)
Expand Down Expand Up @@ -3682,7 +3768,6 @@ spec:
description: UUID is the UUID of the Prism Element (cluster)
type: string
required:
- endpoint
- uuid
type: object
type: array
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ require (
github.com/microsoftgraph/msgraph-sdk-go v0.47.0
github.com/nutanix-cloud-native/prism-go-client v0.2.1-0.20220804130801-c8a253627c64
github.com/onsi/gomega v1.28.1
github.com/openshift/api v0.0.0-20231120145327-841b3aa7251d
github.com/openshift/api v0.0.0-20231120222239-b86761094ee3
github.com/openshift/assisted-image-service v0.0.0-20230829160050-0b98ec74397b
github.com/openshift/assisted-service/api v0.0.0
github.com/openshift/assisted-service/client v0.0.0
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -954,8 +954,8 @@ github.com/opencontainers/selinux v1.8.2/go.mod h1:MUIHuUEvKB1wtJjQdOyYRgOnLD2xA
github.com/openshift/api v0.0.0-20200326160804-ecb9283fe820/go.mod h1:RKMJ5CBnljLfnej+BJ/xnOWc3kZDvJUaIAEq2oKSPtE=
github.com/openshift/api v0.0.0-20200827090112-c05698d102cf/go.mod h1:M3xexPhgM8DISzzRpuFUy+jfPjQPIcs9yqEYj17mXV8=
github.com/openshift/api v0.0.0-20200829102639-8a3a835f1acf/go.mod h1:M3xexPhgM8DISzzRpuFUy+jfPjQPIcs9yqEYj17mXV8=
github.com/openshift/api v0.0.0-20231120145327-841b3aa7251d h1:8vowDTdM3QSfReuzk7L+66yguCySV8ayKkQX7uEpef4=
github.com/openshift/api v0.0.0-20231120145327-841b3aa7251d/go.mod h1:qNtV0315F+f8ld52TLtPvrfivZpdimOzTi3kn9IVbtU=
github.com/openshift/api v0.0.0-20231120222239-b86761094ee3 h1:nLhV2lbWrJ3E3hx0/97G3ZZvppC67cNwo+CLp7/PAbA=
github.com/openshift/api v0.0.0-20231120222239-b86761094ee3/go.mod h1:qNtV0315F+f8ld52TLtPvrfivZpdimOzTi3kn9IVbtU=
github.com/openshift/assisted-image-service v0.0.0-20230829160050-0b98ec74397b h1:wLVEgmzQjs3t4Z96gZzSLF/ws6ULliAks7z1lozNJrE=
github.com/openshift/assisted-image-service v0.0.0-20230829160050-0b98ec74397b/go.mod h1:KTt/pnfs9gt0McDPrb0zVTkwd0xIFNik/ZJROIBzsbc=
github.com/openshift/assisted-service/api v0.0.0-20230831114549-1922eda29cf8 h1:+fZLKbycDo4JeLwPGVSAgf2XPaJGLM341l9ZfrrlxG0=
Expand Down
34 changes: 28 additions & 6 deletions pkg/asset/installconfig/nutanix/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package nutanix

import (
"context"
"fmt"
"strconv"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/util/validation/field"

"github.com/openshift/installer/pkg/types"
Expand All @@ -24,8 +24,11 @@ func Validate(ic *types.InstallConfig) error {
// provisioned infrastructure. In this case, self-hosted networking is a requirement
// when the installer creates infrastructure for nutanix clusters.
func ValidateForProvisioning(ic *types.InstallConfig) error {
errList := field.ErrorList{}
parentPath := field.NewPath("platform", "nutanix")

if ic.Platform.Nutanix == nil {
return field.Required(field.NewPath("platform", "nutanix"), "nutanix validation requires a nutanix platform configuration")
return field.Required(parentPath, "nutanix validation requires a nutanix platform configuration")
}

p := ic.Platform.Nutanix
Expand All @@ -35,24 +38,43 @@ func ValidateForProvisioning(ic *types.InstallConfig) error {
p.PrismCentral.Username,
p.PrismCentral.Password)
if err != nil {
return field.InternalError(field.NewPath("platform", "nutanix"), errors.Wrapf(err, "unable to connect to Prism Central %q", p.PrismCentral.Endpoint.Address))
return field.Invalid(parentPath.Child("prismCentral"), p.PrismCentral.Endpoint.Address, fmt.Sprintf("failed to connect to the prism-central with the configured credentials: %v", err))
}

// validate whether a prism element with the UUID actually exists
for _, pe := range p.PrismElements {
_, err = nc.V3.GetCluster(pe.UUID)
if err != nil {
return field.InternalError(field.NewPath("platform", "nutanix", "prismElements"), errors.Wrapf(err, "prism element UUID %s does not correspond to a valid prism element in Prism", pe.UUID))
errList = append(errList, field.Invalid(parentPath.Child("prismElements"), pe.UUID, fmt.Sprintf("the prism element %s's UUID does not correspond to a valid prism element in Prism: %v", pe.Name, err)))
}
}

// validate whether a subnet with the UUID actually exists
for _, subnetUUID := range p.SubnetUUIDs {
_, err = nc.V3.GetSubnet(subnetUUID)
if err != nil {
return field.InternalError(field.NewPath("platform", "nutanix", "subnetUUIDs"), errors.Wrapf(err, "subnet UUID %s does not correspond to a valid subnet in Prism", subnetUUID))
errList = append(errList, field.Invalid(parentPath.Child("subnetUUIDs"), subnetUUID, fmt.Sprintf("the subnet UUID does not correspond to a valid subnet in Prism: %v", err)))
}
}

return nil
// validate each FailureDomain configuration
for _, fd := range p.FailureDomains {
// validate whether the prism element with the UUID exists
_, err = nc.V3.GetCluster(fd.PrismElement.UUID)
if err != nil {
errList = append(errList, field.Invalid(parentPath.Child("failureDomains", "prismElements"), fd.PrismElement.UUID,
fmt.Sprintf("the failure domain %s configured prism element UUID does not correspond to a valid prism element in Prism: %v", fd.Name, err)))
}

// validate whether a subnet with the UUID actually exists
for _, subnetUUID := range fd.SubnetUUIDs {
_, err = nc.V3.GetSubnet(subnetUUID)
if err != nil {
errList = append(errList, field.Invalid(parentPath.Child("failureDomains", "subnetUUIDs"), subnetUUID,
fmt.Sprintf("the failure domain %s configured subnet UUID does not correspond to a valid subnet in Prism: %v", fd.Name, err)))
}
}
}

return errList.ToAggregate()
}
72 changes: 59 additions & 13 deletions pkg/asset/machines/nutanix/machines.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"

configv1 "github.com/openshift/api/config/v1"
machinev1 "github.com/openshift/api/machine/v1"
machineapi "github.com/openshift/api/machine/v1beta1"
"github.com/openshift/installer/pkg/types"
Expand All @@ -26,18 +27,31 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
platform := config.Platform.Nutanix
mpool := pool.Platform.Nutanix

failureDomains := make([]*nutanix.FailureDomain, 0, len(mpool.FailureDomains))
for _, fdName := range mpool.FailureDomains {
fd, err := platform.GetFailureDomainByName(fdName)
if err != nil {
return nil, nil, err
}
failureDomains = append(failureDomains, fd)
}

total := int64(1)
if pool.Replicas != nil {
total = *pool.Replicas
}
var machines []machineapi.Machine
machineSetProvider := &machinev1.NutanixMachineProviderConfig{}
var machineSetProvider *machinev1.NutanixMachineProviderConfig
for idx := int64(0); idx < total; idx++ {
provider, err := provider(clusterID, platform, mpool, osImage, userDataSecret)

var failureDomain *nutanix.FailureDomain
if len(failureDomains) > 0 {
failureDomain = failureDomains[idx%int64(len(failureDomains))]
}
provider, err := provider(clusterID, platform, mpool, osImage, userDataSecret, failureDomain)
if err != nil {
return nil, nil, fmt.Errorf("failed to create provider: %w", err)
}

machine := machineapi.Machine{
TypeMeta: metav1.TypeMeta{
APIVersion: "machine.openshift.io/v1beta1",
Expand All @@ -59,7 +73,7 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
// we don't need to set Versions, because we control those via operators.
},
}
*machineSetProvider = *provider
machineSetProvider = provider.DeepCopy()
machines = append(machines, machine)
}

Expand Down Expand Up @@ -96,23 +110,50 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
"machine.openshift.io/cluster-api-machine-type": role,
},
},
Spec: machineapi.MachineSpec{
ProviderSpec: machineapi.ProviderSpec{
Value: &runtime.RawExtension{Object: machineSetProvider},
},
},
},
},
},
}

if len(failureDomains) > 0 {
fdRefs := make([]machinev1.NutanixFailureDomainReference, 0, len(failureDomains))
for _, fd := range failureDomains {
fdRefs = append(fdRefs, machinev1.NutanixFailureDomainReference{Name: fd.Name})
}
controlPlaneMachineSet.Spec.Template.OpenShiftMachineV1Beta1Machine.FailureDomains = &machinev1.FailureDomains{
Platform: configv1.NutanixPlatformType,
Nutanix: fdRefs,
}

// Reset the providerSpec fields related to the failure domain
machineSetProvider.Cluster = machinev1.NutanixResourceIdentifier{}
machineSetProvider.Subnets = []machinev1.NutanixResourceIdentifier{}
machineSetProvider.FailureDomain = nil
}

controlPlaneMachineSet.Spec.Template.OpenShiftMachineV1Beta1Machine.Spec = machineapi.MachineSpec{
ProviderSpec: machineapi.ProviderSpec{
Value: &runtime.RawExtension{Object: machineSetProvider},
},
}

return machines, controlPlaneMachineSet, nil
}

func provider(clusterID string, platform *nutanix.Platform, mpool *nutanix.MachinePool, osImage string, userDataSecret string) (*machinev1.NutanixMachineProviderConfig, error) {
func provider(clusterID string, platform *nutanix.Platform, mpool *nutanix.MachinePool, osImage string, userDataSecret string, failureDomain *nutanix.FailureDomain) (*machinev1.NutanixMachineProviderConfig, error) {
// cluster
peUUID := platform.PrismElements[0].UUID
if failureDomain != nil {
peUUID = failureDomain.PrismElement.UUID
}

// subnets
subnets := []machinev1.NutanixResourceIdentifier{}
for _, subnetUUID := range platform.SubnetUUIDs {
subnetUUIDs := platform.SubnetUUIDs
if failureDomain != nil {
subnetUUIDs = failureDomain.SubnetUUIDs
}
for _, subnetUUID := range subnetUUIDs {
subnet := machinev1.NutanixResourceIdentifier{
Type: machinev1.NutanixIdentifierUUID,
UUID: &subnetUUID,
Expand All @@ -126,7 +167,7 @@ func provider(clusterID string, platform *nutanix.Platform, mpool *nutanix.Machi
Kind: "NutanixMachineProviderConfig",
},
UserDataSecret: &corev1.LocalObjectReference{Name: userDataSecret},
CredentialsSecret: &corev1.LocalObjectReference{Name: "nutanix-credentials"},
CredentialsSecret: &corev1.LocalObjectReference{Name: nutanix.CredentialsSecretName},
Image: machinev1.NutanixResourceIdentifier{
Type: machinev1.NutanixIdentifierName,
Name: &osImage,
Expand All @@ -137,11 +178,16 @@ func provider(clusterID string, platform *nutanix.Platform, mpool *nutanix.Machi
MemorySize: resource.MustParse(fmt.Sprintf("%dMi", mpool.MemoryMiB)),
Cluster: machinev1.NutanixResourceIdentifier{
Type: machinev1.NutanixIdentifierUUID,
UUID: &platform.PrismElements[0].UUID,
UUID: &peUUID,
},
SystemDiskSize: resource.MustParse(fmt.Sprintf("%dGi", mpool.OSDisk.DiskSizeGiB)),
}

// FailureDomain
if failureDomain != nil {
providerCfg.FailureDomain = &machinev1.NutanixFailureDomainReference{Name: failureDomain.Name}
}

if len(mpool.BootType) != 0 {
providerCfg.BootType = mpool.BootType
}
Expand Down
Loading

0 comments on commit 8032847

Please sign in to comment.