Skip to content

Commit

Permalink
🐛: ec2/byoip: fix EIP leak when creating machine
Browse files Browse the repository at this point in the history
The instance creation flow is creating by default EIP to instances even
if the BYO IP flow is set. BYO IPv4 creates and associates the EIP to
instance after it is created, preventing paying for additional EIP
(amazon-provided) when creating the instance when the BYO IPv4 Pool is
defined to be used by the machine.

Furthermore, the fix provides additional checks to prevent duplicated EIP
in the BYO IP reconciliation loop. The extra checks include running the
EIP association many times, while the EIP is already associated, and
failures in the log when running the EIP association prematurely - when
the instance isn't ready, Eg ec2 in pending state.
  • Loading branch information
mtulio committed Jun 28, 2024
1 parent 1313226 commit d5882fa
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 42 deletions.
2 changes: 1 addition & 1 deletion controllers/awsmachine_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,7 @@ func (r *AWSMachineReconciler) reconcileNormal(_ context.Context, machineScope *
// a BYOIP without duplication.
if pool := machineScope.GetElasticIPPool(); pool != nil {
if err := ec2svc.ReconcileElasticIPFromPublicPool(pool, instance); err != nil {
machineScope.Error(err, "failed to associate elastic IP address")
machineScope.Error(err, "failed to reconcile BYO Public IPv4")
return ctrl.Result{}, err
}
}
Expand Down
35 changes: 31 additions & 4 deletions pkg/cloud/services/ec2/eip.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
"k8s.io/utils/ptr"

infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2"
"sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record"
Expand All @@ -17,16 +18,42 @@ func getElasticIPRoleName(instanceID string) string {

// ReconcileElasticIPFromPublicPool reconciles the elastic IP from a custom Public IPv4 Pool.
func (s *Service) ReconcileElasticIPFromPublicPool(pool *infrav1.ElasticIPPool, instance *infrav1.Instance) error {
// TODO: check if the instance is in the state allowing EIP association.
// Expected instance states: pending or running
// https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-lifecycle.html
iip := ptr.Deref(instance.PublicIP, "")
s.scope.Debug("Reconciling machine with custom Public IPv4 Pool", "instance-id", instance.ID, "instance-state", instance.State, "instance-public-ip", iip, "publicIpv4PoolID", pool.PublicIpv4Pool)

// Requeue when the instance is not ready to be associated.
if instance.State != infrav1.InstanceStateRunning {
err := fmt.Errorf("unable to reconcile Elastic IP Pool to instance %q with state: %v", instance.ID, instance.State)
s.scope.Error(err, "failed to set Elastic IP Pool to machine", "publicIpv4PoolID", pool.PublicIpv4Pool)
return err
}

// Prevent running association every reconciliation when it is already done.
addrs, err := s.netService.GetAddresses(getElasticIPRoleName(instance.ID))
if err != nil {
s.scope.Error(err, "error checking if addresses exists for Elastic IP Pool to machine", "eip-role", getElasticIPRoleName(instance.ID))
return err
}
if len(addrs.Addresses) > 0 {
if len(addrs.Addresses) != 1 {
return fmt.Errorf("unexpected number of EIPs allocated to the role. expected 1, got %d", len(addrs.Addresses))
}
addr := addrs.Addresses[0]
// address is already associated.
if addr.AssociationId != nil && addr.InstanceId != nil && *addr.InstanceId == instance.ID {
s.scope.Debug("machine is already associated with an Elastic IP with custom Public IPv4 pool", "eip", addr.AllocationId, "eip-address", addr.PublicIp, "eip-associationID", addr.AssociationId, "eip-instance", addr.InstanceId)
return nil
}
}

// Associate EIP.
if err := s.getAndAssociateAddressesToInstance(pool, getElasticIPRoleName(instance.ID), instance.ID); err != nil {
return fmt.Errorf("failed to reconcile Elastic IP: %w", err)
}
return nil
}

// ReleaseElasticIP releases a specific elastic IP based on the instance role.
// ReleaseElasticIP releases a specific Elastic IP based on the instance role.
func (s *Service) ReleaseElasticIP(instanceID string) error {
return s.netService.ReleaseAddressByRole(getElasticIPRoleName(instanceID))
}
Expand Down
22 changes: 7 additions & 15 deletions pkg/cloud/services/ec2/instances.go
Original file line number Diff line number Diff line change
Expand Up @@ -563,21 +563,13 @@ func (s *Service) runInstance(role string, i *infrav1.Instance) (*infrav1.Instan

input.NetworkInterfaces = netInterfaces
} else {
if ptr.Deref(i.PublicIPOnLaunch, false) {
input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String(i.SubnetID),
Groups: aws.StringSlice(i.SecurityGroupIDs),
AssociatePublicIpAddress: i.PublicIPOnLaunch,
},
}
} else {
input.SubnetId = aws.String(i.SubnetID)

if len(i.SecurityGroupIDs) > 0 {
input.SecurityGroupIds = aws.StringSlice(i.SecurityGroupIDs)
}
input.NetworkInterfaces = []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String(i.SubnetID),
Groups: aws.StringSlice(i.SecurityGroupIDs),
AssociatePublicIpAddress: i.PublicIPOnLaunch,
},
}
}

Expand Down
66 changes: 48 additions & 18 deletions pkg/cloud/services/ec2/instances_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -672,11 +672,16 @@ func TestCreateInstance(t *testing.T) {
}, nil)
m.
RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
SecurityGroupIds: aws.StringSlice([]string{"2", "3"}),
SubnetId: aws.String("subnet-3"),
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-3"),
Groups: aws.StringSlice([]string{"2", "3"}),
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -870,11 +875,16 @@ func TestCreateInstance(t *testing.T) {
}, nil)
m.
RunInstancesWithContext(context.TODO(), &ec2.RunInstancesInput{
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
SecurityGroupIds: aws.StringSlice([]string{"4", "3"}),
SubnetId: aws.String("subnet-5"),
ImageId: aws.String("abc"),
InstanceType: aws.String("m5.2xlarge"),
KeyName: aws.String("default"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-5"),
Groups: aws.StringSlice([]string{"4", "3"}),
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3246,8 +3256,13 @@ func TestCreateInstance(t *testing.T) {
Placement: &ec2.Placement{
Tenancy: &tenancy,
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: []*string{aws.String("2"), aws.String("3")},
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3405,8 +3420,13 @@ func TestCreateInstance(t *testing.T) {
Placement: &ec2.Placement{
GroupName: aws.String("placement-group1"),
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: []*string{aws.String("2"), aws.String("3")},
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3585,8 +3605,13 @@ func TestCreateInstance(t *testing.T) {
Tenancy: &tenancy,
GroupName: aws.String("placement-group1"),
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: []*string{aws.String("2"), aws.String("3")},
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down Expand Up @@ -3726,8 +3751,13 @@ func TestCreateInstance(t *testing.T) {
GroupName: aws.String("placement-group1"),
PartitionNumber: aws.Int64(2),
},
SecurityGroupIds: []*string{aws.String("2"), aws.String("3")},
SubnetId: aws.String("subnet-1"),
NetworkInterfaces: []*ec2.InstanceNetworkInterfaceSpecification{
{
DeviceIndex: aws.Int64(0),
SubnetId: aws.String("subnet-1"),
Groups: aws.StringSlice([]string{"2", "3"}),
},
},
TagSpecifications: []*ec2.TagSpecification{
{
ResourceType: aws.String("instance"),
Expand Down
13 changes: 9 additions & 4 deletions pkg/cloud/services/network/eips.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,17 @@ func (s *Service) GetOrAllocateAddresses(pool *infrav1.ElasticIPPool, num int, r
return s.getOrAllocateAddresses(num, role, pool)
}

// GetAddresses returns the address associated to a given role.
func (s *Service) GetAddresses(role string) (*ec2.DescribeAddressesOutput, error) {
return s.describeAddresses(role)
}

// ReleaseAddressByRole releases EIP addresses filtering by tag CAPA provider role.
func (s *Service) ReleaseAddressByRole(role string) error {
clusterFilter := []*ec2.Filter{filter.EC2.Cluster(s.scope.Name())}
clusterFilter = append(clusterFilter, filter.EC2.ProviderRole(role))

return s.releaseAddressesWithFilter(clusterFilter)
return s.releaseAddressesWithFilter([]*ec2.Filter{
filter.EC2.ClusterOwned(s.scope.Name()),
filter.EC2.ProviderRole(role),
})
}

// setByoPublicIpv4 check if the config has Public IPv4 Pool defined, then
Expand Down

0 comments on commit d5882fa

Please sign in to comment.