aws · jonathan-innis · Jul 3, 2023 · Aug 30, 2023
@@ -132,7 +132,7 @@ func main() {
 
 	// Format and print to the file
 	formatted := lo.Must(format.Source([]byte(fmt.Sprintf(fileFormat, license, body))))
-	file := lo.Must(os.Create(flag.Args()[0]))
+	file := lo.Must(os.Create(flag.Arg(0)))
 	lo.Must(file.Write(formatted))
 	file.Close()
 }

@@ -0,0 +1,212 @@
+/*
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package main
+
+import (
+	"bytes"
+	"context"
+	"flag"
+	"fmt"
+	"go/format"
+	"os"
+	"sort"
+	"time"
+
+	"github.com/avast/retry-go"
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/aws/session"
+	"github.com/aws/aws-sdk-go/service/ec2"
+	"github.com/samber/lo"
+	"go.uber.org/zap"
+	v1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/errors"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/apimachinery/pkg/types"
+	"k8s.io/client-go/util/workqueue"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/client/config"
+
+	"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
+	"github.com/aws/karpenter-core/pkg/operator/scheme"
+	"github.com/aws/karpenter/pkg/apis"
+	"github.com/aws/karpenter/pkg/apis/settings"
+	"github.com/aws/karpenter/pkg/apis/v1alpha1"
+	"github.com/aws/karpenter/pkg/providers/instancetype"
+)
+
+var pollingOptions = []retry.Option{
+	retry.Attempts(100), // This whole poll should take ~500s
+	retry.Delay(time.Second * 5),
+	retry.DelayType(retry.FixedDelay),
+	retry.LastErrorOnly(true),
+}
+
+var outFile string
+var clusterName string
+
+func init() {
+	flag.StringVar(&clusterName, "cluster-name", "", "cluster name to use when passing subnets into GetInstanceTypes()")
+	flag.StringVar(&outFile, "out-file", "zz_generated.memory_overhead.go", "file to output the generated data")
+	flag.Parse()
+
+	lo.Must0(apis.AddToScheme(scheme.Scheme))
+}
+
+type InstanceTypeMemoryData struct {
+	Name     string
+	Reported int64
+	Actual   int64
+}
+
+const fileFormat = `
+//go:build !ignore_autogenerated
+
+%s
+package instancetype
+
+// GENERATED FILE. DO NOT EDIT DIRECTLY.
+// Update hack/code/memory_overhead_gen.go and re-generate to edit
+
+var (
+	MemoryOverheadMebibytes = map[string]int64{
+		%s
+	}
+)
+`
+
+func main() {
+	logger := lo.Must(zap.NewProduction()).Sugar()
+	restConfig := config.GetConfigOrDie()
+	kubeClient := lo.Must(client.New(restConfig, client.Options{Scheme: scheme.Scheme}))
+	ctx := context.Background()
+	ctx = settings.ToContext(ctx, &settings.Settings{ClusterName: clusterName, IsolatedVPC: true, VMMemoryOverheadPercent: 0})
+
+	nodeTemplate := &v1alpha1.AWSNodeTemplate{
+		ObjectMeta: metav1.ObjectMeta{
+			Name: "default",
+		},
+		// Bottlerocket has the highest overhead between AL2, Ubuntu, Windows, and BR
+		Spec: v1alpha1.AWSNodeTemplateSpec{
+			AWS: v1alpha1.AWS{
+				AMIFamily:             &v1alpha1.AMIFamilyBottlerocket,
+				SecurityGroupSelector: map[string]string{"karpenter.sh/discovery": clusterName},
+				SubnetSelector:        map[string]string{"karpenter.sh/discovery": clusterName},
+			},
+		},
+	}
+	lo.Must0(kubeClient.Create(ctx, nodeTemplate))
+	defer func() {
+		lo.Must0(kubeClient.Delete(ctx, nodeTemplate))
+	}()
+
+	sess := session.Must(session.NewSession())
+	ec2Client := ec2.New(sess)
+
+	var instanceTypes []InstanceTypeMemoryData
+	lo.Must0(ec2Client.DescribeInstanceTypesPagesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}, func(page *ec2.DescribeInstanceTypesOutput, _ bool) bool {
+		for _, it := range page.InstanceTypes {
+			instanceTypes = append(instanceTypes, InstanceTypeMemoryData{
+				Name:     aws.StringValue(it.InstanceType),
+				Reported: aws.Int64Value(it.MemoryInfo.SizeInMiB),
+			})
+		}
+		return true
+	}))
+	workqueue.ParallelizeUntil(ctx, 100, len(instanceTypes), func(i int) {
+		machine := &v1alpha5.Machine{
+			ObjectMeta: metav1.ObjectMeta{
+				GenerateName: "default-",
+			},
+			Spec: v1alpha5.MachineSpec{
+				Requirements: []v1.NodeSelectorRequirement{
+					{
+						Key:      v1.LabelInstanceTypeStable,
+						Operator: v1.NodeSelectorOpIn,
+						Values:   []string{instanceTypes[i].Name},
+					},
+					{
+						Key:      v1alpha5.LabelCapacityType,
+						Operator: v1.NodeSelectorOpIn,
+						Values:   []string{v1alpha5.CapacityTypeOnDemand},
+					},
+				},
+				MachineTemplateRef: &v1alpha5.MachineTemplateRef{
+					Name: nodeTemplate.Name,
+				},
+			},
+		}
+		if err := kubeClient.Create(ctx, machine); err != nil {
+			logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Error(err)
+			return
+		}
+		logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Infof("creating machine for instance type")
+		// Wait until the corresponding node registers and reports its allocatable
+		node := &v1.Node{}
+		if err := retry.Do(func() error {
+			m := &v1alpha5.Machine{}
+			if err := kubeClient.Get(ctx, client.ObjectKeyFromObject(machine), m); err != nil {
+				if errors.IsNotFound(err) {
+					return retry.Unrecoverable(err)
+				}
+				return err
+			}
+			if m.Status.NodeName == "" {
+				return fmt.Errorf("node status hasn't populated")
+			}
+			if err := kubeClient.Get(ctx, types.NamespacedName{Name: m.Status.NodeName}, node); err != nil {
+				return err
+			}
+			if node.Status.Allocatable == nil {
+				return fmt.Errorf("node allocatable details haven't populated yet")
+			}
+			return nil
+		}, pollingOptions...); err != nil {
+			logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Error(err)
+			return
+		}
+		logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name, "capacity", node.Status.Capacity).Infof("retrieved capacity information for instance")
+		instanceTypes[i].Actual = node.Status.Capacity.Memory().Value() / 1024 / 1024
+		if err := kubeClient.Delete(ctx, machine); err != nil {
+			logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Error(err)
+		}
+	})
+
+	// If we didn't get updated data, we should use the existing data
+	addedTypes := 0
+	for k, v := range instancetype.MemoryOverheadMebibytes {
+		if _, ok := lo.Find(instanceTypes, func(i InstanceTypeMemoryData) bool {
+			return i.Name == k
+		}); !ok {
+			addedTypes++
+			instanceTypes = append(instanceTypes, InstanceTypeMemoryData{Name: k, Reported: v})
+		}
+	}
+	logger.With("count", addedTypes).Info("utilized instance types from existing data")
+
+	// Sort the data so it has a consistent ordering
+	sort.Slice(instanceTypes, func(i, j int) bool {
+		return instanceTypes[i].Name < instanceTypes[j].Name
+	})
+
+	src := &bytes.Buffer{}
+	for _, it := range instanceTypes {
+		src.WriteString(fmt.Sprintf("\"%s\": %d,\n", it.Name, it.Reported-it.Actual))
+	}
+
+	// Format and print to the file
+	license := lo.Must(os.ReadFile("hack/boilerplate.go.txt"))
+	formatted := lo.Must(format.Source([]byte(fmt.Sprintf(fileFormat, license, src.String()))))
+	lo.Must0(os.WriteFile(outFile, formatted, 0644))
+}
@@ -62,7 +62,7 @@ func NewInstanceType(ctx context.Context, info *ec2.InstanceTypeInfo, kc *corev1
 		Overhead: &cloudprovider.InstanceTypeOverhead{
 			KubeReserved:      kubeReservedResources(cpu(info), pods(ctx, info, amiFamily, kc), ENILimitedPods(ctx, info), amiFamily, kc),
 			SystemReserved:    systemReservedResources(kc),
-			EvictionThreshold: evictionThreshold(memory(ctx, info), ephemeralStorage(amiFamily, nodeClass.Spec.BlockDeviceMappings), amiFamily, kc),
+			EvictionThreshold: evictionThreshold(memory(info), ephemeralStorage(amiFamily, nodeClass.Spec.BlockDeviceMappings), amiFamily, kc),
 		},
 	}
 }
@@ -175,7 +175,7 @@ func computeCapacity(ctx context.Context, info *ec2.InstanceTypeInfo, amiFamily
 
 	resourceList := v1.ResourceList{
 		v1.ResourceCPU:               *cpu(info),
-		v1.ResourceMemory:            *memory(ctx, info),
+		v1.ResourceMemory:            *memory(info),
 		v1.ResourceEphemeralStorage:  *ephemeralStorage(amiFamily, blockDeviceMappings),
 		v1.ResourcePods:              *pods(ctx, info, amiFamily, kc),
 		v1alpha1.ResourceAWSPodENI:   *awsPodENI(ctx, aws.StringValue(info.InstanceType)),
@@ -195,15 +195,19 @@ func cpu(info *ec2.InstanceTypeInfo) *resource.Quantity {
 	return resources.Quantity(fmt.Sprint(*info.VCpuInfo.DefaultVCpus))
 }
 
-func memory(ctx context.Context, info *ec2.InstanceTypeInfo) *resource.Quantity {
+func memory(info *ec2.InstanceTypeInfo) *resource.Quantity {
 	sizeInMib := *info.MemoryInfo.SizeInMiB
 	// Gravitons have an extra 64 MiB of cma reserved memory that we can't use
 	if len(info.ProcessorInfo.SupportedArchitectures) > 0 && *info.ProcessorInfo.SupportedArchitectures[0] == "arm64" {
 		sizeInMib -= 64
 	}
 	mem := resources.Quantity(fmt.Sprintf("%dMi", sizeInMib))
 	// Account for VM overhead in calculation
-	mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(mem.Value())*awssettings.FromContext(ctx).VMMemoryOverheadPercent/1024/1024)))))
+	if overhead, ok := MemoryOverheadMebibytes[aws.StringValue(info.InstanceType)]; ok {
+		mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(overhead)*1.05))))) // add a 5% buffer on top of generated overhead
+	} else {
+		mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(mem.Value())*0.075/1024/1024)))))
+	}
 	return mem
 }