Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Retry all GCE's 403 per-minute quota exceeded errors #4223

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .changelog/5913.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
```release-note:enhancement
provider: modified request retry logic to retry all per-minute quota limits returned with a 403 error code. Previously, only read requests were retried. This will generally affect Google Compute Engine resources.
```
2 changes: 1 addition & 1 deletion google-beta/common_operation.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ func CommonRefreshFunc(w Waiter) resource.StateRefreshFunc {
op, err := w.QueryOp()
if err != nil {
// Retry 404 when getting operation (not resource state)
if isRetryableError(err, isNotFoundRetryableError("GET operation"), isOperationReadQuotaError) {
if isRetryableError(err, isNotFoundRetryableError("GET operation")) {
log.Printf("[DEBUG] Dismissed retryable error on GET operation %q: %s", w.OpName(), err)
return nil, "done: false", nil
}
Expand Down
27 changes: 10 additions & 17 deletions google-beta/error_retry_predicates.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"net"
"net/url"
"regexp"
"strings"

"google.golang.org/api/googleapi"
Expand Down Expand Up @@ -45,7 +46,7 @@ var defaultErrorRetryPredicates = []RetryErrorPredicateFunc{
// reads, causing significant failure for our CI and for large customers.
// GCE returns the wrong error code, as this should be a 429, which we retry
// already.
is403ReadRequestsForMinuteError,
is403QuotaExceededPerMinuteError,
}

/** END GLOBAL ERROR RETRY PREDICATES HERE **/
Expand Down Expand Up @@ -127,15 +128,18 @@ func isSubnetworkUnreadyError(err error) (bool, string) {

// GCE (and possibly other APIs) incorrectly return a 403 rather than a 429 on
// rate limits.
func is403ReadRequestsForMinuteError(err error) (bool, string) {
func is403QuotaExceededPerMinuteError(err error) (bool, string) {
gerr, ok := err.(*googleapi.Error)
if !ok {
return false, ""
}

if gerr.Code == 403 && strings.Contains(gerr.Body, "Quota exceeded for quota metric") && strings.Contains(gerr.Body, "Read requests per minute") {
log.Printf("[DEBUG] Dismissed an error as retryable based on error code 403 and error message 'Quota exceeded for quota metric' on metric `Read requests per minute`: %s", err)
return true, "Read requests per minute"
var QuotaRegex = regexp.MustCompile(`Quota exceeded for quota metric '(?P<Metric>.*)' and limit '(?P<Limit>.* per minute)' of service`)
if gerr.Code == 403 && QuotaRegex.MatchString(gerr.Body) {
matches := QuotaRegex.FindStringSubmatch(gerr.Body)
metric := matches[QuotaRegex.SubexpIndex("Metric")]
limit := matches[QuotaRegex.SubexpIndex("Limit")]
log.Printf("[DEBUG] Dismissed an error as retryable based on error code 403 and error message 'Quota exceeded for quota metric `%s`: %s", metric, err)
return true, fmt.Sprintf("Waiting for quota limit %s to refresh", limit)
}
return false, ""
}
Expand Down Expand Up @@ -260,17 +264,6 @@ func isBigqueryIAMQuotaError(err error) (bool, string) {
return false, ""
}

// Retry if operation returns a 403 with the message for
// exceeding the quota limit for 'OperationReadGroup'
func isOperationReadQuotaError(err error) (bool, string) {
if gerr, ok := err.(*googleapi.Error); ok {
if gerr.Code == 403 && strings.Contains(gerr.Body, "Quota exceeded for quota group") {
return true, "Waiting for quota to refresh"
}
}
return false, ""
}

// Retry if Monitoring operation returns a 409 with a specific message for
// concurrent operations.
func isMonitoringConcurrentEditError(err error) (bool, string) {
Expand Down
26 changes: 24 additions & 2 deletions google-beta/error_retry_predicates_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,36 @@ func TestIsCommonRetryableErrorCode_otherError(t *testing.T) {
func TestIsOperationReadQuotaError_quotaExceeded(t *testing.T) {
err := googleapi.Error{
Code: 403,
Body: "Quota exceeded for quota group 'OperationReadGroup' and limit 'Operation read requests per 100 seconds' of service 'compute.googleapis.com' for consumer 'project_number:11111111'.",
Body: "Quota exceeded for quota metric 'OperationReadGroup' and limit 'Operation read requests per minute' of service 'compute.googleapis.com' for consumer 'project_number:11111111'.",
}
isRetryable, _ := isOperationReadQuotaError(&err)
isRetryable, _ := is403QuotaExceededPerMinuteError(&err)
if !isRetryable {
t.Errorf("Error not detected as retryable")
}
}

func TestIs403QuotaExceededPerMinuteError_perMinuteQuotaExceeded(t *testing.T) {
err := googleapi.Error{
Code: 403,
Body: "Quota exceeded for quota metric 'Queries' and limit 'Queries per minute' of service 'compute.googleapis.com' for consumer 'project_number:11111111'.",
}
isRetryable, _ := is403QuotaExceededPerMinuteError(&err)
if !isRetryable {
t.Errorf("Error not detected as retryable")
}
}

func TestIs403QuotaExceededPerMinuteError_perDayQuotaExceededNotRetryable(t *testing.T) {
err := googleapi.Error{
Code: 403,
Body: "Quota exceeded for quota metric 'Queries' and limit 'Queries per day' of service 'compute.googleapis.com' for consumer 'project_number:11111111'.",
}
isRetryable, _ := is403QuotaExceededPerMinuteError(&err)
if isRetryable {
t.Errorf("Error incorrectly detected as retryable")
}
}

func TestGRPCRetryable(t *testing.T) {
code := codes.FailedPrecondition
err := status.Error(code, "is retryable")
Expand Down