From 7afffaf4ffc10f78cecfca6b73efe9de35f832cb Mon Sep 17 00:00:00 2001 From: Ilya Dmitrichenko Date: Mon, 1 Apr 2019 16:08:57 +0100 Subject: [PATCH 1/3] Add kops logging retryer --- pkg/eks/api.go | 1 + pkg/eks/logging_retryer.go | 69 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) create mode 100644 pkg/eks/logging_retryer.go diff --git a/pkg/eks/api.go b/pkg/eks/api.go index 0786464574..e51a200bf2 100644 --- a/pkg/eks/api.go +++ b/pkg/eks/api.go @@ -305,6 +305,7 @@ func (c *ClusterProvider) newSession(spec *api.ProviderConfig, endpoint string, } config = config.WithCredentialsChainVerboseErrors(true) + config = request.WithRetryer(config, newLoggingRetryer()) if logger.Level >= api.AWSDebugLevel { config = config.WithLogLevel(aws.LogDebug | aws.LogDebugWithHTTPBody | diff --git a/pkg/eks/logging_retryer.go b/pkg/eks/logging_retryer.go new file mode 100644 index 0000000000..de37f4b4a1 --- /dev/null +++ b/pkg/eks/logging_retryer.go @@ -0,0 +1,69 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package eks + +import ( + "fmt" + "time" + + "github.com/kris-nova/logger" + + "github.com/aws/aws-sdk-go/aws/client" + "github.com/aws/aws-sdk-go/aws/request" +) + +const maxRetries = 13 + +// LoggingRetryer adds some logging when we are retrying, so we have some idea what is happening +// Right now it is very basic - e.g. it only logs when we retry (so doesn't log when we fail due to too many retries) +// It was copied from k8s.io/kops/upup/pkg/fi/cloudup/awsup/logging_retryer.go; the original version used glog, and +// didn't export the constructor +type LoggingRetryer struct { + client.DefaultRetryer +} + +var _ request.Retryer = &LoggingRetryer{} + +func newLoggingRetryer() *LoggingRetryer { + return &LoggingRetryer{ + client.DefaultRetryer{NumMaxRetries: maxRetries}, + } +} + +// RetryRules extends on DefaultRetryer.RetryRules +func (l LoggingRetryer) RetryRules(r *request.Request) time.Duration { + duration := l.DefaultRetryer.RetryRules(r) + + service := r.ClientInfo.ServiceName + name := "?" + if r.Operation != nil { + name = r.Operation.Name + } + methodDescription := service + "/" + name + + var errorDescription string + if r.Error != nil { + // We could check aws error Code & Message, but we expect them to be in the string + errorDescription = fmt.Sprintf("%v", r.Error) + } else { + errorDescription = fmt.Sprintf("%d %s", r.HTTPResponse.StatusCode, r.HTTPResponse.Status) + } + + logger.Debug("retryable error (%s) from %s - will retry after delay of %v", errorDescription, methodDescription, duration) + + return duration +} From 48a6fdec913fbc269fa9a2b57a86ebc1211bab78 Mon Sep 17 00:00:00 2001 From: Ilya Dmitrichenko Date: Mon, 1 Apr 2019 18:18:48 +0100 Subject: [PATCH 2/3] Remove unnecessary `ListStacks` call --- pkg/eks/api.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pkg/eks/api.go b/pkg/eks/api.go index e51a200bf2..6ee4400720 100644 --- a/pkg/eks/api.go +++ b/pkg/eks/api.go @@ -207,12 +207,6 @@ func (c *ClusterProvider) CheckAuth() error { c.Status.iamRoleARN = *output.Arn logger.Debug("role ARN for the current session is %q", c.Status.iamRoleARN) } - { - input := &cloudformation.ListStacksInput{} - if _, err := c.Provider.CloudFormation().ListStacks(input); err != nil { - return errors.Wrap(err, "checking AWS CloudFormation access – cannot list stacks") - } - } return nil } From 180a5ecb446c92bd9e1e4a50b3c5118c1e04a518 Mon Sep 17 00:00:00 2001 From: Ilya Dmitrichenko Date: Mon, 1 Apr 2019 18:24:20 +0100 Subject: [PATCH 3/3] Filter deleted stacks by default This often causes a lot of paging, and we only need to list deleted stacks explicitly in one special case that requires double check if stack had already been deleted when an error has occurred. --- pkg/cfn/manager/api.go | 28 ++++++++++++++++++++++++---- pkg/eks/eks.go | 2 +- pkg/eks/eks_test.go | 37 +++++++++++++++++++++++++++++-------- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/pkg/cfn/manager/api.go b/pkg/cfn/manager/api.go index cb4eb6308b..c51ebf8408 100644 --- a/pkg/cfn/manager/api.go +++ b/pkg/cfn/manager/api.go @@ -174,7 +174,9 @@ func (c *StackCollection) ListStacks(nameRegex string, statusFilters ...string) if err != nil { return nil, errors.Wrap(err, "cannot list stacks") } - input := &cloudformation.ListStacksInput{} + input := &cloudformation.ListStacksInput{ + StackStatusFilter: defaultStackStatusFilter(), + } if len(statusFilters) > 0 { input.StackStatusFilter = aws.StringSlice(statusFilters) } @@ -201,9 +203,27 @@ func (c *StackCollection) ListStacks(nameRegex string, statusFilters ...string) return stacks, nil } -// ListReadyStacks gets all of CloudFormation stacks with READY status -func (c *StackCollection) ListReadyStacks(nameRegex string) ([]*Stack, error) { - return c.ListStacks(nameRegex, cloudformation.StackStatusCreateComplete) +func defaultStackStatusFilter() []*string { + return aws.StringSlice( + []string{ + cloudformation.StackStatusCreateInProgress, + cloudformation.StackStatusCreateFailed, + cloudformation.StackStatusCreateComplete, + cloudformation.StackStatusRollbackInProgress, + cloudformation.StackStatusRollbackFailed, + cloudformation.StackStatusRollbackComplete, + cloudformation.StackStatusDeleteInProgress, + cloudformation.StackStatusDeleteFailed, + cloudformation.StackStatusUpdateInProgress, + cloudformation.StackStatusUpdateCompleteCleanupInProgress, + cloudformation.StackStatusUpdateComplete, + cloudformation.StackStatusUpdateRollbackInProgress, + cloudformation.StackStatusUpdateRollbackFailed, + cloudformation.StackStatusUpdateRollbackCompleteCleanupInProgress, + cloudformation.StackStatusUpdateRollbackComplete, + cloudformation.StackStatusReviewInProgress, + }, + ) } // DeleteStack kills a stack by name without waiting for DELETED status diff --git a/pkg/eks/eks.go b/pkg/eks/eks.go index bff14047dd..031c41ea7b 100644 --- a/pkg/eks/eks.go +++ b/pkg/eks/eks.go @@ -226,7 +226,7 @@ func (c *ClusterProvider) doGetCluster(clusterName string, printer printers.Outp if logger.Level >= 4 { spec := &api.ClusterConfig{Metadata: &api.ClusterMeta{Name: clusterName}} - stacks, err := c.NewStackManager(spec).ListReadyStacks(fmt.Sprintf("^(eksclt|EKS)-%s-.*$", clusterName)) + stacks, err := c.NewStackManager(spec).ListStacks(fmt.Sprintf("^(eksclt|EKS)-%s-.*$", clusterName)) if err != nil { return errors.Wrapf(err, "listing CloudFormation stack for %q", clusterName) } diff --git a/pkg/eks/eks_test.go b/pkg/eks/eks_test.go index ca4fb35789..3064c4d481 100644 --- a/pkg/eks/eks_test.go +++ b/pkg/eks/eks_test.go @@ -68,22 +68,43 @@ var _ = Describe("Eks", func() { Expect(p.MockEKS().AssertNumberOfCalls(GinkgoT(), "DescribeCluster", 1)).To(BeTrue()) }) - It("should not call AWS CFN ListStackPages", func() { + It("should not call AWS CFN ListStacksPages", func() { Expect(p.MockCloudFormation().AssertNumberOfCalls(GinkgoT(), "ListStacksPages", 0)).To(BeTrue()) }) }) Context("and debug log level", func() { - var ( - expectedStatusFilter string - ) + BeforeEach(func() { - expectedStatusFilter = "CREATE_COMPLETE" + expectedStatusFilter := []string{ + "CREATE_IN_PROGRESS", + "CREATE_FAILED", + "CREATE_COMPLETE", + "ROLLBACK_IN_PROGRESS", + "ROLLBACK_FAILED", + "ROLLBACK_COMPLETE", + "DELETE_IN_PROGRESS", + "DELETE_FAILED", + "UPDATE_IN_PROGRESS", + "UPDATE_COMPLETE_CLEANUP_IN_PROGRESS", + "UPDATE_COMPLETE", + "UPDATE_ROLLBACK_IN_PROGRESS", + "UPDATE_ROLLBACK_FAILED", + "UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS", + "UPDATE_ROLLBACK_COMPLETE", + "REVIEW_IN_PROGRESS", + } logger.Level = 4 p.MockCloudFormation().On("ListStacksPages", mock.MatchedBy(func(input *cfn.ListStacksInput) bool { - return *input.StackStatusFilter[0] == expectedStatusFilter + matches := 0 + for i := range input.StackStatusFilter { + if *input.StackStatusFilter[i] == expectedStatusFilter[i] { + matches++ + } + } + return matches == len(expectedStatusFilter) }), mock.Anything).Return(nil) }) @@ -99,7 +120,7 @@ var _ = Describe("Eks", func() { Expect(p.MockEKS().AssertNumberOfCalls(GinkgoT(), "DescribeCluster", 1)).To(BeTrue()) }) - It("should have called AWS CFN ListStackPages", func() { + It("should have called AWS CFN ListStacksPages", func() { Expect(p.MockCloudFormation().AssertNumberOfCalls(GinkgoT(), "ListStacksPages", 1)).To(BeTrue()) }) }) @@ -151,7 +172,7 @@ var _ = Describe("Eks", func() { Expect(p.MockEKS().AssertNumberOfCalls(GinkgoT(), "DescribeCluster", 1)).To(BeTrue()) }) - It("should not call AWS CFN ListStackPages", func() { + It("should not call AWS CFN ListStacksPages", func() { Expect(p.MockCloudFormation().AssertNumberOfCalls(GinkgoT(), "ListStacksPages", 0)).To(BeTrue()) })