Skip to content

Commit

Permalink
Add capacity difference generation
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-innis committed Sep 4, 2023
1 parent 65f44c6 commit 439b6fe
Show file tree
Hide file tree
Showing 4 changed files with 945 additions and 2 deletions.
2 changes: 1 addition & 1 deletion hack/code/bandwidth_gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func main() {

// Format and print to the file
formatted := lo.Must(format.Source([]byte(fmt.Sprintf(fileFormat, license, body))))
file := lo.Must(os.Create(flag.Args()[0]))
file := lo.Must(os.Create(flag.Arg(0)))
lo.Must(file.Write(formatted))
file.Close()
}
Expand Down
212 changes: 212 additions & 0 deletions hack/code/memory_overhead_gen.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
/*
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"bytes"
"context"
"flag"
"fmt"
"go/format"
"os"
"sort"
"time"

"github.com/avast/retry-go"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/samber/lo"
"go.uber.org/zap"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/util/workqueue"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/client/config"

"github.com/aws/karpenter-core/pkg/apis/v1alpha5"
"github.com/aws/karpenter-core/pkg/operator/scheme"
"github.com/aws/karpenter/pkg/apis"
"github.com/aws/karpenter/pkg/apis/settings"
"github.com/aws/karpenter/pkg/apis/v1alpha1"
"github.com/aws/karpenter/pkg/providers/instancetype"
)

var pollingOptions = []retry.Option{
retry.Attempts(100), // This whole poll should take ~500s
retry.Delay(time.Second * 5),
retry.DelayType(retry.FixedDelay),
retry.LastErrorOnly(true),
}

var outFile string
var clusterName string

func init() {
flag.StringVar(&clusterName, "cluster-name", "", "cluster name to use when passing subnets into GetInstanceTypes()")
flag.StringVar(&outFile, "out-file", "zz_generated.memory_overhead.go", "file to output the generated data")
flag.Parse()

lo.Must0(apis.AddToScheme(scheme.Scheme))
}

type InstanceTypeMemoryData struct {
Name string
Reported int64
Actual int64
}

const fileFormat = `
//go:build !ignore_autogenerated
%s
package instancetype
// GENERATED FILE. DO NOT EDIT DIRECTLY.
// Update hack/code/memory_overhead_gen.go and re-generate to edit
var (
MemoryOverheadMebibytes = map[string]int64{
%s
}
)
`

func main() {
logger := lo.Must(zap.NewProduction()).Sugar()
restConfig := config.GetConfigOrDie()
kubeClient := lo.Must(client.New(restConfig, client.Options{Scheme: scheme.Scheme}))
ctx := context.Background()
ctx = settings.ToContext(ctx, &settings.Settings{ClusterName: clusterName, IsolatedVPC: true, VMMemoryOverheadPercent: 0})

nodeTemplate := &v1alpha1.AWSNodeTemplate{
ObjectMeta: metav1.ObjectMeta{
Name: "default",
},
// Bottlerocket has the highest overhead between AL2, Ubuntu, Windows, and BR
Spec: v1alpha1.AWSNodeTemplateSpec{
AWS: v1alpha1.AWS{
AMIFamily: &v1alpha1.AMIFamilyBottlerocket,
SecurityGroupSelector: map[string]string{"karpenter.sh/discovery": clusterName},
SubnetSelector: map[string]string{"karpenter.sh/discovery": clusterName},
},
},
}
lo.Must0(kubeClient.Create(ctx, nodeTemplate))
defer func() {
lo.Must0(kubeClient.Delete(ctx, nodeTemplate))
}()

sess := session.Must(session.NewSession())
ec2Client := ec2.New(sess)

var instanceTypes []InstanceTypeMemoryData
lo.Must0(ec2Client.DescribeInstanceTypesPagesWithContext(ctx, &ec2.DescribeInstanceTypesInput{}, func(page *ec2.DescribeInstanceTypesOutput, _ bool) bool {
for _, it := range page.InstanceTypes {
instanceTypes = append(instanceTypes, InstanceTypeMemoryData{
Name: aws.StringValue(it.InstanceType),
Reported: aws.Int64Value(it.MemoryInfo.SizeInMiB),
})
}
return true
}))
workqueue.ParallelizeUntil(ctx, 100, len(instanceTypes), func(i int) {
machine := &v1alpha5.Machine{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "default-",
},
Spec: v1alpha5.MachineSpec{
Requirements: []v1.NodeSelectorRequirement{
{
Key: v1.LabelInstanceTypeStable,
Operator: v1.NodeSelectorOpIn,
Values: []string{instanceTypes[i].Name},
},
{
Key: v1alpha5.LabelCapacityType,
Operator: v1.NodeSelectorOpIn,
Values: []string{v1alpha5.CapacityTypeOnDemand},
},
},
MachineTemplateRef: &v1alpha5.MachineTemplateRef{
Name: nodeTemplate.Name,
},
},
}
if err := kubeClient.Create(ctx, machine); err != nil {
logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Error(err)
return
}
logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Infof("creating machine for instance type")
// Wait until the corresponding node registers and reports its allocatable
node := &v1.Node{}
if err := retry.Do(func() error {
m := &v1alpha5.Machine{}
if err := kubeClient.Get(ctx, client.ObjectKeyFromObject(machine), m); err != nil {
if errors.IsNotFound(err) {
return retry.Unrecoverable(err)
}
return err
}
if m.Status.NodeName == "" {
return fmt.Errorf("node status hasn't populated")
}
if err := kubeClient.Get(ctx, types.NamespacedName{Name: m.Status.NodeName}, node); err != nil {
return err
}
if node.Status.Allocatable == nil {
return fmt.Errorf("node allocatable details haven't populated yet")
}
return nil
}, pollingOptions...); err != nil {
logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Error(err)
return
}
logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name, "capacity", node.Status.Capacity).Infof("retrieved capacity information for instance")
instanceTypes[i].Actual = node.Status.Capacity.Memory().Value() / 1024 / 1024
if err := kubeClient.Delete(ctx, machine); err != nil {
logger.With("instance-type", instanceTypes[i].Name, "machine", machine.Name).Error(err)
}
})

// If we didn't get updated data, we should use the existing data
addedTypes := 0
for k, v := range instancetype.MemoryOverheadMebibytes {
if _, ok := lo.Find(instanceTypes, func(i InstanceTypeMemoryData) bool {
return i.Name == k
}); !ok {
addedTypes++
instanceTypes = append(instanceTypes, InstanceTypeMemoryData{Name: k, Reported: v})
}
}
logger.With("count", addedTypes).Info("utilized instance types from existing data")

// Sort the data so it has a consistent ordering
sort.Slice(instanceTypes, func(i, j int) bool {
return instanceTypes[i].Name < instanceTypes[j].Name
})

src := &bytes.Buffer{}
for _, it := range instanceTypes {
src.WriteString(fmt.Sprintf("\"%s\": %d,\n", it.Name, it.Reported-it.Actual))
}

// Format and print to the file
license := lo.Must(os.ReadFile("hack/boilerplate.go.txt"))
formatted := lo.Must(format.Source([]byte(fmt.Sprintf(fileFormat, license, src.String()))))
lo.Must0(os.WriteFile(outFile, formatted, 0644))
}
6 changes: 5 additions & 1 deletion pkg/providers/instancetype/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,11 @@ func memory(ctx context.Context, info *ec2.InstanceTypeInfo) *resource.Quantity
}
mem := resources.Quantity(fmt.Sprintf("%dMi", sizeInMib))
// Account for VM overhead in calculation
mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(mem.Value())*awssettings.FromContext(ctx).VMMemoryOverheadPercent/1024/1024)))))
if overhead, ok := MemoryOverheadMebibytes[aws.StringValue(info.InstanceType)]; ok {
mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(overhead)*1.05))))) // add a 5% buffer on top of generated overhead
} else {
mem.Sub(resource.MustParse(fmt.Sprintf("%dMi", int64(math.Ceil(float64(mem.Value())*0.075/1024/1024)))))
}
return mem
}

Expand Down
Loading

0 comments on commit 439b6fe

Please sign in to comment.