Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for EKS accelerated AMIs based on AL2023 #7996

Merged
merged 1 commit into from
Oct 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions pkg/ami/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,16 @@ import (
// Variations of image classes
const (
ImageClassGeneral = iota
ImageClassGPU
ImageClassNvidia
ImageClassNeuron
ImageClassARM
)

// ImageClasses is a list of image class names
var ImageClasses = []string{
"ImageClassGeneral",
"ImageClassGPU",
"ImageClassNvidia",
"ImageClassNeuron",
"ImageClassARM",
}

Expand Down
23 changes: 16 additions & 7 deletions pkg/ami/auto_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@ func MakeImageSearchPatterns(version string) map[string]map[int]string {
return map[string]map[int]string{
api.NodeImageFamilyAmazonLinux2023: {
ImageClassGeneral: fmt.Sprintf("amazon-eks-node-al2023-x86_64-standard-%s-v*", version),
ImageClassNvidia: fmt.Sprintf("amazon-eks-node-al2023-x86_64-nvidia-*-%s-v*", version),
ImageClassNeuron: fmt.Sprintf("amazon-eks-node-al2023-x86_64-neuron-%s-v*", version),
ImageClassARM: fmt.Sprintf("amazon-eks-node-al2023-arm64-standard-%s-v*", version),
},
api.NodeImageFamilyAmazonLinux2: {
ImageClassGeneral: fmt.Sprintf("amazon-eks-node-%s-v*", version),
ImageClassGPU: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
ImageClassNvidia: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
ImageClassNeuron: fmt.Sprintf("amazon-eks-gpu-node-%s-*", version),
ImageClassARM: fmt.Sprintf("amazon-eks-arm64-node-%s-*", version),
},
api.NodeImageFamilyUbuntuPro2204: {
Expand Down Expand Up @@ -90,16 +93,22 @@ func (r *AutoResolver) Resolve(ctx context.Context, region, version, instanceTyp

imageClasses := MakeImageSearchPatterns(version)[imageFamily]
namePattern := imageClasses[ImageClassGeneral]
if instanceutils.IsGPUInstanceType(instanceType) {
var ok bool
switch {
case instanceutils.IsNvidiaInstanceType(instanceType):
namePattern, ok = imageClasses[ImageClassNvidia]
if !ok {
logger.Critical("image family %s doesn't support Nvidia GPU image class", imageFamily)
return "", NewErrFailedResolution(region, version, instanceType, imageFamily)
}
case instanceutils.IsNeuronInstanceType(instanceType):
var ok bool
namePattern, ok = imageClasses[ImageClassGPU]
namePattern, ok = imageClasses[ImageClassNeuron]
if !ok {
logger.Critical("image family %s doesn't support GPU image class", imageFamily)
logger.Critical("image family %s doesn't support Neuron GPU image class", imageFamily)
return "", NewErrFailedResolution(region, version, instanceType, imageFamily)
}
}

if instanceutils.IsARMInstanceType(instanceType) {
case instanceutils.IsARMInstanceType(instanceType):
var ok bool
namePattern, ok = imageClasses[ImageClassARM]
if !ok {
Expand Down
16 changes: 14 additions & 2 deletions pkg/ami/ssm_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ func MakeSSMParameterName(version, instanceType, imageFamily string) (string, er

switch imageFamily {
case api.NodeImageFamilyAmazonLinux2023:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/%s/standard/recommended/%s",
version, utils.ToKebabCase(imageFamily), instanceEC2ArchName(instanceType), fieldName), nil
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/%s/%s/recommended/%s",
version, utils.ToKebabCase(imageFamily), instanceEC2ArchName(instanceType), imageType(imageFamily, instanceType, version), fieldName), nil
case api.NodeImageFamilyAmazonLinux2:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/recommended/%s", version, imageType(imageFamily, instanceType, version), fieldName), nil
case api.NodeImageFamilyWindowsServer2019CoreContainer,
Expand Down Expand Up @@ -102,6 +102,10 @@ func MakeManagedSSMParameterName(version string, amiType ekstypes.AMITypes) stri
switch amiType {
case ekstypes.AMITypesAl2023X8664Standard:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2023X8664Nvidia:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/nvidia/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2023X8664Neuron:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/x86_64/neuron/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2023Arm64Standard:
return fmt.Sprintf("/aws/service/eks/optimized-ami/%s/%s/arm64/standard/recommended/release_version", version, utils.ToKebabCase(api.NodeImageFamilyAmazonLinux2023))
case ekstypes.AMITypesAl2X8664:
Expand Down Expand Up @@ -138,6 +142,14 @@ func ubuntuArchName(instanceType string) string {
func imageType(imageFamily, instanceType, version string) string {
family := utils.ToKebabCase(imageFamily)
switch imageFamily {
case api.NodeImageFamilyAmazonLinux2023:
if instanceutils.IsNvidiaInstanceType(instanceType) {
return "nvidia"
}
if instanceutils.IsNeuronInstanceType(instanceType) {
return "neuron"
}
return "standard"
case api.NodeImageFamilyBottlerocket:
if instanceutils.IsNvidiaInstanceType(instanceType) {
return fmt.Sprintf("%s-%s", version, "nvidia")
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/eksctl.io/v1alpha5/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ func SetManagedNodeGroupDefaults(ng *ManagedNodeGroup, meta *ClusterMeta, contro
// When using custom AMIs, we want the user to explicitly specify AMI family.
// Thus, we only set up default AMI family when no custom AMI is being used.
if ng.AMIFamily == "" && ng.AMI == "" {
if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer && !instanceutils.IsGPUInstanceType(ng.InstanceType) &&
if isMinVer, _ := utils.IsMinVersion(Version1_30, meta.Version); isMinVer &&
!instanceutils.IsARMGPUInstanceType(ng.InstanceType) {
ng.AMIFamily = NodeImageFamilyAmazonLinux2023
} else {
Expand Down
36 changes: 12 additions & 24 deletions pkg/apis/eksctl.io/v1alpha5/gpu_validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,22 +40,16 @@ var _ = Describe("GPU instance support", func() {
assertValidationError(e, api.ValidateManagedNodeGroup(0, mng))
},
Entry("AL2023 INF", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Inferentia",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
}),
Entry("AL2023 TRN", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Trainium",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
}),
Entry("AL2023 NVIDIA", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "GPU",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "asdf",
Expand Down Expand Up @@ -107,22 +101,16 @@ var _ = Describe("GPU instance support", func() {

},
Entry("AL2023 INF", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Inferentia",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "inf1.xlarge",
}),
Entry("AL2023 TRN", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
expectUnsupportedErr: true,
instanceTypeName: "Trainium",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "trn1.2xlarge",
}),
Entry("AL2023 NVIDIA", gpuInstanceEntry{
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
expectUnsupportedErr: true,
instanceTypeName: "GPU",
amiFamily: api.NodeImageFamilyAmazonLinux2023,
gpuInstanceType: "g4dn.xlarge",
}),
Entry("AL2", gpuInstanceEntry{
gpuInstanceType: "g4dn.xlarge",
Expand Down
18 changes: 9 additions & 9 deletions pkg/apis/eksctl.io/v1alpha5/validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -661,12 +661,10 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool

instanceType := SelectInstanceType(np)

if ng.AMIFamily == NodeImageFamilyAmazonLinux2023 && instanceutils.IsNvidiaInstanceType(instanceType) {
return ErrUnsupportedInstanceTypes("GPU", NodeImageFamilyAmazonLinux2023,
fmt.Sprintf("EKS accelerated AMIs based on %s will be available at a later date", NodeImageFamilyAmazonLinux2023))
}

if ng.AMIFamily != NodeImageFamilyAmazonLinux2 && ng.AMIFamily != NodeImageFamilyBottlerocket && ng.AMIFamily != "" {
if ng.AMIFamily != NodeImageFamilyAmazonLinux2023 &&
ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
ng.AMIFamily != NodeImageFamilyBottlerocket &&
ng.AMIFamily != "" {
if instanceutils.IsNvidiaInstanceType(instanceType) {
logger.Warning(GPUDriversWarning(ng.AMIFamily))
}
Expand All @@ -676,12 +674,14 @@ func validateNodeGroupBase(np NodePool, path string, controlPlaneOnOutposts bool
}
}

if ng.AMIFamily != NodeImageFamilyAmazonLinux2 && ng.AMIFamily != "" {
// Only AL2 supports Inferentia hosts.
if ng.AMIFamily != NodeImageFamilyAmazonLinux2 &&
ng.AMIFamily != NodeImageFamilyAmazonLinux2023 &&
ng.AMIFamily != "" {
// Only AL2 and AL2023 support Inferentia hosts.
if instanceutils.IsInferentiaInstanceType(instanceType) {
return ErrUnsupportedInstanceTypes("Inferentia", ng.AMIFamily, fmt.Sprintf("please use %s instead", NodeImageFamilyAmazonLinux2))
}
// Only AL2 supports Trainium hosts.
// Only AL2 and AL2023 support Trainium hosts.
if instanceutils.IsTrainiumInstanceType(instanceType) {
return ErrUnsupportedInstanceTypes("Trainium", ng.AMIFamily, fmt.Sprintf("please use %s instead", NodeImageFamilyAmazonLinux2))
}
Expand Down
57 changes: 31 additions & 26 deletions pkg/cfn/builder/managed_nodegroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,41 +263,45 @@ func validateLaunchTemplate(launchTemplateData *ec2types.ResponseLaunchTemplateD

func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes {
amiTypeMapping := map[string]struct {
X86x64 ekstypes.AMITypes
X86GPU ekstypes.AMITypes
ARM ekstypes.AMITypes
ARMGPU ekstypes.AMITypes
X86x64 ekstypes.AMITypes
X86Nvidia ekstypes.AMITypes
X86Neuron ekstypes.AMITypes
ARM ekstypes.AMITypes
ARMGPU ekstypes.AMITypes
}{
api.NodeImageFamilyAmazonLinux2023: {
X86x64: ekstypes.AMITypesAl2023X8664Standard,
ARM: ekstypes.AMITypesAl2023Arm64Standard,
X86x64: ekstypes.AMITypesAl2023X8664Standard,
X86Nvidia: ekstypes.AMITypesAl2023X8664Nvidia,
X86Neuron: ekstypes.AMITypesAl2023X8664Neuron,
ARM: ekstypes.AMITypesAl2023Arm64Standard,
},
api.NodeImageFamilyAmazonLinux2: {
X86x64: ekstypes.AMITypesAl2X8664,
X86GPU: ekstypes.AMITypesAl2X8664Gpu,
ARM: ekstypes.AMITypesAl2Arm64,
X86x64: ekstypes.AMITypesAl2X8664,
X86Nvidia: ekstypes.AMITypesAl2X8664Gpu,
X86Neuron: ekstypes.AMITypesAl2X8664Gpu,
ARM: ekstypes.AMITypesAl2Arm64,
},
api.NodeImageFamilyBottlerocket: {
X86x64: ekstypes.AMITypesBottlerocketX8664,
X86GPU: ekstypes.AMITypesBottlerocketX8664Nvidia,
ARM: ekstypes.AMITypesBottlerocketArm64,
ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia,
X86x64: ekstypes.AMITypesBottlerocketX8664,
X86Nvidia: ekstypes.AMITypesBottlerocketX8664Nvidia,
ARM: ekstypes.AMITypesBottlerocketArm64,
ARMGPU: ekstypes.AMITypesBottlerocketArm64Nvidia,
},
api.NodeImageFamilyWindowsServer2019FullContainer: {
X86x64: ekstypes.AMITypesWindowsFull2019X8664,
X86GPU: ekstypes.AMITypesWindowsFull2019X8664,
X86x64: ekstypes.AMITypesWindowsFull2019X8664,
X86Nvidia: ekstypes.AMITypesWindowsFull2019X8664,
},
api.NodeImageFamilyWindowsServer2019CoreContainer: {
X86x64: ekstypes.AMITypesWindowsCore2019X8664,
X86GPU: ekstypes.AMITypesWindowsCore2019X8664,
X86x64: ekstypes.AMITypesWindowsCore2019X8664,
X86Nvidia: ekstypes.AMITypesWindowsCore2019X8664,
},
api.NodeImageFamilyWindowsServer2022FullContainer: {
X86x64: ekstypes.AMITypesWindowsFull2022X8664,
X86GPU: ekstypes.AMITypesWindowsFull2022X8664,
X86x64: ekstypes.AMITypesWindowsFull2022X8664,
X86Nvidia: ekstypes.AMITypesWindowsFull2022X8664,
},
api.NodeImageFamilyWindowsServer2022CoreContainer: {
X86x64: ekstypes.AMITypesWindowsCore2022X8664,
X86GPU: ekstypes.AMITypesWindowsCore2022X8664,
X86x64: ekstypes.AMITypesWindowsCore2022X8664,
X86Nvidia: ekstypes.AMITypesWindowsCore2022X8664,
},
}

Expand All @@ -307,13 +311,14 @@ func getAMIType(ng *api.ManagedNodeGroup, instanceType string) ekstypes.AMITypes
}

switch {
case instanceutils.IsGPUInstanceType(instanceType):
if instanceutils.IsARMInstanceType(instanceType) {
return amiType.ARMGPU
}
return amiType.X86GPU
case instanceutils.IsARMGPUInstanceType(instanceType):
return amiType.ARMGPU
case instanceutils.IsARMInstanceType(instanceType):
return amiType.ARM
case instanceutils.IsNvidiaInstanceType(instanceType):
return amiType.X86Nvidia
case instanceutils.IsNeuronInstanceType(instanceType):
return amiType.X86Neuron
default:
return amiType.X86x64
}
Expand Down
23 changes: 17 additions & 6 deletions pkg/cfn/builder/managed_nodegroup_ami_type_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,23 +77,24 @@ var _ = DescribeTable("Managed Nodegroup AMI type", func(e amiTypeEntry) {
expectedAMIType: "AL2_x86_64",
}),

Entry("AMI type", amiTypeEntry{
Entry("default Nvidia GPU instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Name: "test",
Name: "test",
InstanceType: "p2.xlarge",
},
},
expectedAMIType: "AL2023_x86_64_STANDARD",
expectedAMIType: "AL2023_x86_64_NVIDIA",
}),

Entry("default GPU instance type", amiTypeEntry{
Entry("default Neuron GPU instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Name: "test",
InstanceType: "p2.xlarge",
InstanceType: "inf1.2xlarge",
},
},
expectedAMIType: "AL2_x86_64_GPU",
expectedAMIType: "AL2023_x86_64_NEURON",
}),

Entry("AL2 GPU instance type", amiTypeEntry{
Expand All @@ -107,6 +108,16 @@ var _ = DescribeTable("Managed Nodegroup AMI type", func(e amiTypeEntry) {
expectedAMIType: "AL2_x86_64_GPU",
}),

Entry("default ARM instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Name: "test",
InstanceType: "a1.2xlarge",
},
},
expectedAMIType: "AL2023_ARM_64_STANDARD",
}),

Entry("AL2 ARM instance type", amiTypeEntry{
nodeGroup: &api.ManagedNodeGroup{
NodeGroupBase: &api.NodeGroupBase{
Expand Down
Loading