Skip to content

Commit

Permalink
🐛 add retry loop to graphQL commit queries which timeout on large git…
Browse files Browse the repository at this point in the history
…hub repos (#3680)

* try to always paginate

in the event of timeouts, make our pagination smaller

Signed-off-by: Spencer Schrock <sschrock@google.com>

* add retry test

Signed-off-by: Spencer Schrock <sschrock@google.com>

---------

Signed-off-by: Spencer Schrock <sschrock@google.com>
  • Loading branch information
spencerschrock committed Nov 20, 2023
1 parent 76878e5 commit 1a17bb8
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 24 deletions.
48 changes: 24 additions & 24 deletions clients/githubrepo/graphql.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ const (
issueCommentsToAnalyze = 30
reviewsToAnalyze = 30
labelsToAnalyze = 30

// https://docs.github.com/en/graphql/overview/rate-limits-and-node-limits-for-the-graphql-api#node-limit
defaultPageLimit = 100
retryLimit = 3
)

//nolint:govet
Expand Down Expand Up @@ -155,28 +159,35 @@ func (handler *graphqlHandler) init(ctx context.Context, repourl *repoURL, commi
}

func populateCommits(handler *graphqlHandler, vars map[string]interface{}) ([]clients.Commit, error) {
var allCommits []clients.Commit
var commitsLeft githubv4.Int
var commits []clients.Commit
commitsLeft, ok := vars["commitsToAnalyze"].(githubv4.Int)
if !ok {
return nil, nil
return nil, sce.WithMessage(sce.ErrScorecardInternal, "unexpected type")
}
for vars["commitsToAnalyze"] = githubv4.Int(100); commitsLeft > 0; commitsLeft = commitsLeft - 100 {
if commitsLeft < 100 {
vars["commitsToAnalyze"] = commitsLeft
}
err := handler.client.Query(handler.ctx, handler.data, vars)
if err != nil {
return nil, fmt.Errorf("failed to populate commits: %w", err)
commitsRequested := min(defaultPageLimit, commitsLeft)
var retries int
for commitsLeft > 0 {
vars["commitsToAnalyze"] = commitsRequested
if err := handler.client.Query(handler.ctx, handler.data, vars); err != nil {
// 502 usually indicate timeouts, where we're requesting too much data
// so make our requests smaller and try again
if retries < retryLimit && strings.Contains(err.Error(), "502 Bad Gateway") {
retries++
commitsRequested /= 2
continue
}
return nil, sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("githubv4.Query: %v", err))
}
vars["historyCursor"] = handler.data.Repository.Object.Commit.History.PageInfo.EndCursor
tmp, err := commitsFrom(handler.data, handler.repourl.owner, handler.repourl.repo)
if err != nil {
return nil, fmt.Errorf("failed to populate commits: %w", err)
}
allCommits = append(allCommits, tmp...)
commits = append(commits, tmp...)
commitsLeft -= commitsRequested
commitsRequested = min(commitsRequested, commitsLeft)
}
return allCommits, nil
return commits, nil
}

func (handler *graphqlHandler) setup() error {
Expand All @@ -194,18 +205,7 @@ func (handler *graphqlHandler) setup() error {
"commitExpression": githubv4.String(commitExpression),
"historyCursor": (*githubv4.String)(nil),
}
// if NumberOfCommits set to < 99 we are required by the graphql to page by 100 commits.
if handler.commitDepth > 99 {
handler.commits, handler.errSetup = populateCommits(handler, vars)
handler.issues = issuesFrom(handler.data)
handler.archived = bool(handler.data.Repository.IsArchived)
return
}
if err := handler.client.Query(handler.ctx, handler.data, vars); err != nil {
handler.errSetup = sce.WithMessage(sce.ErrScorecardInternal, fmt.Sprintf("githubv4.Query: %v", err))
return
}
handler.commits, handler.errSetup = commitsFrom(handler.data, handler.repourl.owner, handler.repourl.repo)
handler.commits, handler.errSetup = populateCommits(handler, vars)
handler.issues = issuesFrom(handler.data)
handler.archived = bool(handler.data.Repository.IsArchived)
})
Expand Down
54 changes: 54 additions & 0 deletions clients/githubrepo/graphql_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2023 OpenSSF Scorecard Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package githubrepo

import (
"context"
"net/http"
"testing"

"github.com/shurcooL/githubv4"
)

type badGatewayRoundTripper struct {
requestCounter *int
}

func (b badGatewayRoundTripper) RoundTrip(*http.Request) (*http.Response, error) {
*b.requestCounter += 1
return &http.Response{
StatusCode: http.StatusBadGateway,
Status: "502 Bad Gateway",
}, nil
}

func Test_getCommits_retry(t *testing.T) {
var nRequests int
rt := badGatewayRoundTripper{requestCounter: &nRequests}
handler := graphqlHandler{
client: githubv4.NewClient(&http.Client{
Transport: rt,
}),
}
handler.init(context.Background(), &repoURL{}, 1)
_, err := handler.getCommits()
if err == nil {
t.Error("expected error")
}
want := retryLimit + 1 // 1 represents the initial request
if *rt.requestCounter != want {
t.Errorf("wanted %d retries, got %d", want, *rt.requestCounter)
}
}

0 comments on commit 1a17bb8

Please sign in to comment.