Skip to content

Commit

Permalink
Merge pull request openshift#8584 from r4f4/aws-delete-s3-bucket
Browse files Browse the repository at this point in the history
OCPBUGS-35037: aws: delete ignition bucket on bootstrap destroy
  • Loading branch information
openshift-merge-bot[bot] authored Jun 13, 2024
2 parents 38c6f89 + 9d17c12 commit 972c722
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 20 deletions.
7 changes: 6 additions & 1 deletion pkg/asset/manifests/aws/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ func GenerateClusterAssets(ic *installconfig.InstallConfig, clusterID *installco
},
},
S3Bucket: &capa.S3Bucket{
Name: fmt.Sprintf("openshift-bootstrap-data-%s", clusterID.InfraID),
Name: GetIgnitionBucketName(clusterID.InfraID),
PresignedURLDuration: &metav1.Duration{Duration: 1 * time.Hour},
BestEffortDeleteObjects: ptr.To(ic.Config.AWS.BestEffortDeleteIgnition),
},
Expand Down Expand Up @@ -265,3 +265,8 @@ func GenerateClusterAssets(ic *installconfig.InstallConfig, clusterID *installco
},
}, nil
}

// GetIgnitionBucketName returns the name of the bucket for the given cluster.
func GetIgnitionBucketName(infraID string) string {
return fmt.Sprintf("openshift-bootstrap-data-%s", infraID)
}
106 changes: 93 additions & 13 deletions pkg/infrastructure/aws/clusterapi/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ import (
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/ec2"
"github.com/aws/aws-sdk-go/service/elbv2"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/aws/aws-sdk-go/service/s3/s3manager"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/utils/ptr"
Expand All @@ -30,12 +32,15 @@ var (
_ clusterapi.PreProvider = (*Provider)(nil)
_ clusterapi.InfraReadyProvider = (*Provider)(nil)
_ clusterapi.BootstrapDestroyer = (*Provider)(nil)
_ clusterapi.PostDestroyer = (*Provider)(nil)

errNotFound = errors.New("not found")
)

// Provider implements AWS CAPI installation.
type Provider struct{}
type Provider struct {
bestEffortDeleteIgnition bool
}

// Name gives the name of the provider, AWS.
func (*Provider) Name() string { return awstypes.Name }
Expand Down Expand Up @@ -220,25 +225,28 @@ func getHostedZoneIDForNLB(ctx context.Context, awsSession *session.Session, reg

// DestroyBootstrap removes aws bootstrap resources not handled
// by the deletion of the bootstrap machine by the capi controllers.
func (*Provider) DestroyBootstrap(ctx context.Context, in clusterapi.BootstrapDestroyInput) error {
if err := removeSSHRule(ctx, in.Client, in.Metadata.InfraID); err != nil {
return fmt.Errorf("failed to remove bootstrap SSH rule: %w", err)
}
return nil
}

// removeSSHRule removes the SSH rule for accessing the bootstrap node
// by removing the rule from the cluster spec and updating the object.
func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) error {
func (p *Provider) DestroyBootstrap(ctx context.Context, in clusterapi.BootstrapDestroyInput) error {
awsCluster := &capa.AWSCluster{}
key := k8sClient.ObjectKey{
Name: infraID,
Name: in.Metadata.InfraID,
Namespace: capiutils.Namespace,
}
if err := cl.Get(ctx, key, awsCluster); err != nil {
if err := in.Client.Get(ctx, key, awsCluster); err != nil {
return fmt.Errorf("failed to get AWSCluster: %w", err)
}

// Save this value for use in the post-destroy hook since we don't have capi running anymore by that point.
p.bestEffortDeleteIgnition = ptr.Deref(awsCluster.Spec.S3Bucket.BestEffortDeleteObjects, false)

if err := removeSSHRule(ctx, in.Client, in.Metadata.InfraID, awsCluster); err != nil {
return fmt.Errorf("failed to remove bootstrap SSH rule: %w", err)
}
return nil
}

// removeSSHRule removes the SSH rule for accessing the bootstrap node
// by removing the rule from the cluster spec and updating the object.
func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string, awsCluster *capa.AWSCluster) error {
postBootstrapRules := []capa.IngressRule{}
for _, rule := range awsCluster.Spec.NetworkSpec.AdditionalControlPlaneIngressRules {
if strings.EqualFold(rule.Description, awsmanifest.BootstrapSSHDescription) {
Expand All @@ -254,6 +262,10 @@ func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) err
}
logrus.Debug("Updated AWSCluster to remove bootstrap SSH rule")

key := k8sClient.ObjectKey{
Name: infraID,
Namespace: capiutils.Namespace,
}
timeout := 15 * time.Minute
untilTime := time.Now().Add(timeout)
warnTime := time.Now().Add(5 * time.Minute)
Expand Down Expand Up @@ -299,3 +311,71 @@ func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) err

return nil
}

// PostDestroy deletes the ignition bucket after capi stopped running, so it won't try to reconcile the bucket.
func (p *Provider) PostDestroy(ctx context.Context, in clusterapi.PostDestroyerInput) error {
region := in.Metadata.AWS.Region
session, err := awsconfig.GetSessionWithOptions(
awsconfig.WithRegion(region),
awsconfig.WithServiceEndpoints(region, in.Metadata.AWS.ServiceEndpoints),
)
if err != nil {
return fmt.Errorf("failed to create aws session: %w", err)
}

bucketName := awsmanifest.GetIgnitionBucketName(in.Metadata.InfraID)
if err := removeS3Bucket(ctx, session, bucketName); err != nil {
if p.bestEffortDeleteIgnition {
logrus.Warnf("failed to delete ignition bucket %s: %v", bucketName, err)
return nil
}
return fmt.Errorf("failed to delete ignition bucket %s: %w", bucketName, err)
}

return nil
}

// removeS3Bucket deletes an s3 bucket given its name.
func removeS3Bucket(ctx context.Context, session *session.Session, bucketName string) error {
client := s3.New(session)

iter := s3manager.NewDeleteListIterator(client, &s3.ListObjectsInput{
Bucket: aws.String(bucketName),
})
err := s3manager.NewBatchDeleteWithClient(client).Delete(ctx, iter)
if err != nil && !isBucketNotFound(err) {
return err
}
logrus.Debugf("bucket %q emptied", bucketName)

if _, err := client.DeleteBucketWithContext(ctx, &s3.DeleteBucketInput{Bucket: aws.String(bucketName)}); err != nil {
if isBucketNotFound(err) {
logrus.Debugf("bucket %q already deleted", bucketName)
return nil
}
return err
}
return nil
}

func isBucketNotFound(err interface{}) bool {
switch s3Err := err.(type) {
case awserr.Error:
if s3Err.Code() == s3.ErrCodeNoSuchBucket {
return true
}
origErr := s3Err.OrigErr()
if origErr != nil {
return isBucketNotFound(origErr)
}
case s3manager.Error:
if s3Err.OrigErr != nil {
return isBucketNotFound(s3Err.OrigErr)
}
case s3manager.Errors:
if len(s3Err) == 1 {
return isBucketNotFound(s3Err[0])
}
}
return false
}
23 changes: 17 additions & 6 deletions pkg/infrastructure/clusterapi/clusterapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -386,9 +386,9 @@ func (i *InfraProvider) DestroyBootstrap(ctx context.Context, dir string) error

machineDeletionTimeout := 5 * time.Minute
logrus.Infof("Waiting up to %v for bootstrap machine deletion %s/%s...", machineDeletionTimeout, machineNamespace, machineName)
ctx, cancel := context.WithTimeout(ctx, machineDeletionTimeout)
wait.UntilWithContext(ctx, func(context.Context) {
err := sys.Client().Get(ctx, client.ObjectKey{
cctx, cancel := context.WithTimeout(ctx, machineDeletionTimeout)
wait.UntilWithContext(cctx, func(context.Context) {
err := sys.Client().Get(cctx, client.ObjectKey{
Name: machineName,
Namespace: machineNamespace,
}, &clusterv1.Machine{})
Expand All @@ -402,14 +402,25 @@ func (i *InfraProvider) DestroyBootstrap(ctx context.Context, dir string) error
}
}, 2*time.Second)

err = ctx.Err()
err = cctx.Err()
if err != nil && !errors.Is(err, context.Canceled) {
logrus.Warnf("Timeout deleting bootstrap machine: %s", err)
} else {
logrus.Infof("Finished destroying bootstrap resources")
}
clusterapi.System().Teardown()

if p, ok := i.impl.(PostDestroyer); ok {
postDestroyInput := PostDestroyerInput{
Metadata: *metadata,
}
if err := p.PostDestroy(ctx, postDestroyInput); err != nil {
return fmt.Errorf("failed during post-destroy hook: %w", err)
}
logrus.Debugf("Finished running post-destroy hook")
} else {
logrus.Infof("no post-destroy requirements for the %s provider", i.impl.Name())
}

logrus.Infof("Finished destroying bootstrap resources")
return nil
}

Expand Down
11 changes: 11 additions & 0 deletions pkg/infrastructure/clusterapi/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,14 @@ type BootstrapDestroyInput struct {
Client client.Client
Metadata types.ClusterMetadata
}

// PostDestroyer allows platform-specific behavior after bootstrap has been destroyed and
// ClusterAPI has stopped running.
type PostDestroyer interface {
PostDestroy(ctx context.Context, in PostDestroyerInput) error
}

// PostDestroyerInput collects args passed to the PostDestroyer hook.
type PostDestroyerInput struct {
Metadata types.ClusterMetadata
}

0 comments on commit 972c722

Please sign in to comment.