From e4aab7ea1118c7b3389468c2d1412489459eaab0 Mon Sep 17 00:00:00 2001 From: Alexis-Maurer Fortin Date: Mon, 9 Sep 2024 09:37:05 -0400 Subject: [PATCH] Add Repo Metadata (#193) --- analyze/analyze.go | 35 +++++++- models/github_actions.go | 4 +- models/package_insights.go | 16 ++++ providers/github/client.go | 89 +++++++++++++++++++- providers/github/round_tripper_rate_limit.go | 40 +++++++++ providers/gitlab/client.go | 76 ++++++++++++++++- providers/local/client.go | 56 ++++++++++++ 7 files changed, 309 insertions(+), 7 deletions(-) create mode 100644 providers/github/round_tripper_rate_limit.go diff --git a/analyze/analyze.go b/analyze/analyze.go index 6225eaf..540c662 100644 --- a/analyze/analyze.go +++ b/analyze/analyze.go @@ -26,6 +26,20 @@ type Repository interface { GetRepoIdentifier() string GetIsFork() bool BuildGitURL(baseURL string) string + GetHasIssues() bool + GetHasWiki() bool + GetHasDiscussion() bool + GetOpenIssuesCount() int + GetForksCount() int + GetStarsCount() int + GetPrimaryLanguage() string + GetSize() int + GetDefaultBranch() string + GetLicense() string + GetIsTemplate() bool + GetOrganizationID() int + GetRepositoryID() int + GetIsEmpty() bool } type RepoBatch struct { @@ -316,12 +330,27 @@ func (a *Analyzer) generatePackageInsights(ctx context.Context, tempDir string, } pkg := &models.PackageInsights{ - Purl: purl.String(), LastCommitedAt: commitDate.Format(time.RFC3339), - SourceGitCommitSha: commitSha, + Purl: purl.String(), SourceScmType: repo.GetProviderName(), SourceGitRepo: repo.GetRepoIdentifier(), SourceGitRef: ref, + SourceGitCommitSha: commitSha, + OrgID: repo.GetOrganizationID(), + RepoID: repo.GetRepositoryID(), + RepoSize: repo.GetSize(), + DefaultBranch: repo.GetDefaultBranch(), + IsFork: repo.GetIsFork(), + IsEmpty: repo.GetIsEmpty(), + ForksCount: repo.GetForksCount(), + StarsCount: repo.GetStarsCount(), + IsTemplate: repo.GetIsTemplate(), + HasIssues: repo.GetHasIssues(), + OpenIssuesCount: repo.GetOpenIssuesCount(), + HasWiki: repo.GetHasWiki(), + HasDiscussions: repo.GetHasDiscussion(), + PrimaryLanguage: repo.GetPrimaryLanguage(), + License: repo.GetLicense(), } err = pkg.NormalizePurl() if err != nil { @@ -339,7 +368,7 @@ func (a *Analyzer) cloneRepoToTemp(ctx context.Context, gitURL string, token str err = a.GitClient.Clone(ctx, tempDir, gitURL, token, ref) if err != nil { os.RemoveAll(tempDir) // Clean up if cloning fails - return "", fmt.Errorf("failed to clone repo: %s", err) + return "", fmt.Errorf("failed to clone repo: %w", err) } return tempDir, nil } diff --git a/models/github_actions.go b/models/github_actions.go index 09ff986..cfa0723 100644 --- a/models/github_actions.go +++ b/models/github_actions.go @@ -162,7 +162,7 @@ type GithubActionsJob struct { Uses string `json:"uses,omitempty"` Secrets GithubActionsJobSecrets `json:"secrets,omitempty"` With GithubActionsWith `json:"with,omitempty"` - Permissions GithubActionsPermissions `json:"permissions,omitempty"` + Permissions GithubActionsPermissions `json:"permissions"` Needs StringList `json:"needs,omitempty"` If string `json:"if,omitempty"` RunsOn GithubActionsJobRunsOn `json:"runs_on" yaml:"runs-on"` @@ -181,7 +181,7 @@ type GithubActionsWorkflow struct { Path string `json:"path" yaml:"-"` Name string `json:"name"` Events GithubActionsEvents `json:"events" yaml:"on"` - Permissions GithubActionsPermissions `json:"permissions,omitempty"` + Permissions GithubActionsPermissions `json:"permissions"` Env GithubActionsEnvs `json:"env,omitempty"` Jobs GithubActionsJobs `json:"jobs"` } diff --git a/models/package_insights.go b/models/package_insights.go index ad64f7b..5a7b7b0 100644 --- a/models/package_insights.go +++ b/models/package_insights.go @@ -22,6 +22,22 @@ type PackageInsights struct { SourceGitRef string `json:"source_git_ref"` SourceGitCommitSha string `json:"source_git_commit_sha"` + OrgID int `json:"org_id"` + RepoID int `json:"repo_id"` + RepoSize int `json:"repo_size"` + DefaultBranch string `json:"default_branch"` + IsFork bool `json:"is_fork"` + IsEmpty bool `json:"is_empty"` + ForksCount int `json:"forks_count"` + StarsCount int `json:"stars_count"` + IsTemplate bool `json:"is_template"` + HasIssues bool `json:"has_issues"` + OpenIssuesCount int `json:"open_issues_count"` + HasWiki bool `json:"has_wiki"` + HasDiscussions bool `json:"has_discussions"` + PrimaryLanguage string `json:"primary_language"` + License string `json:"license"` + PackageDependencies []string `json:"package_dependencies"` BuildDependencies []string `json:"build_dependencies"` diff --git a/providers/github/client.go b/providers/github/client.go index c9154c7..caf352d 100644 --- a/providers/github/client.go +++ b/providers/github/client.go @@ -78,8 +78,34 @@ type GithubRepository struct { IsDisabled bool `graphql:"isDisabled"` IsEmpty bool `graphql:"isEmpty"` IsTemplate bool `graphql:"isTemplate"` + IsArchived bool `graphql:"isArchived"` StargazerCount int `graphql:"stargazerCount"` ForkCount int `graphql:"forkCount"` + Owner struct { + Organization struct { + DatabaseId int `graphql:"databaseId"` + } `graphql:"... on Organization"` + User struct { + DatabaseId int `graphql:"databaseId"` + } `graphql:"... on User"` + } `graphql:"owner"` + DatabaseId int `graphql:"databaseId"` + RepoSize int `graphql:"diskUsage"` // kilobytes + DefaultBranchRef struct { + Name string `graphql:"name"` + } `graphql:"defaultBranchRef"` + HasIssues bool `graphql:"hasIssuesEnabled"` + HasWiki bool `graphql:"hasWikiEnabled"` + HasDiscussions bool `graphql:"hasDiscussionsEnabled"` + PrimaryLanguage struct { + Name string `graphql:"name"` + } `graphql:"primaryLanguage"` + License struct { + Name string `graphql:"name"` + } `graphql:"licenseInfo"` + Issues struct { + TotalCount int `graphql:"totalCount"` + } `graphql:"issues"` } func (gh GithubRepository) GetProviderName() string { @@ -127,6 +153,62 @@ func (gh GithubRepository) GetIsFork() bool { return gh.IsFork } +func (gh GithubRepository) GetHasIssues() bool { + return gh.HasIssues +} + +func (gh GithubRepository) GetHasWiki() bool { + return gh.HasWiki +} + +func (gh GithubRepository) GetHasDiscussion() bool { + return gh.HasDiscussions +} + +func (gh GithubRepository) GetPrimaryLanguage() string { + return gh.PrimaryLanguage.Name +} + +func (gh GithubRepository) GetSize() int { + return gh.RepoSize +} + +func (gh GithubRepository) GetDefaultBranch() string { + return gh.DefaultBranchRef.Name +} + +func (gh GithubRepository) GetLicense() string { + return gh.License.Name +} + +func (gh GithubRepository) GetIsTemplate() bool { + return gh.IsTemplate +} + +func (gh GithubRepository) GetOrganizationID() int { + return gh.Owner.Organization.DatabaseId // even if it's a user, the organization will be filled with the same id +} + +func (gh GithubRepository) GetRepositoryID() int { + return gh.DatabaseId +} + +func (gh GithubRepository) GetForksCount() int { + return gh.ForkCount +} + +func (gh GithubRepository) GetStarsCount() int { + return gh.StargazerCount +} + +func (gh GithubRepository) GetOpenIssuesCount() int { + return gh.Issues.TotalCount +} + +func (gh GithubRepository) GetIsEmpty() bool { + return gh.IsEmpty +} + type Client struct { restClient *github.Client graphQLClient *githubv4.Client @@ -139,6 +221,11 @@ func NewClient(ctx context.Context, token string, domain string) (*Client, error return nil, err } + oauth2Client := http.Client{ + Transport: &retryTransport{}, + } + oauth2Context := context.WithValue(ctx, oauth2.HTTPClient, &oauth2Client) + var ( // REST client restClient = github.NewClient(rateLimiter).WithAuthToken(token) @@ -146,7 +233,7 @@ func NewClient(ctx context.Context, token string, domain string) (*Client, error src = oauth2.StaticTokenSource( &oauth2.Token{AccessToken: token}, ) - httpClient = oauth2.NewClient(ctx, src) + httpClient = oauth2.NewClient(oauth2Context, src) graphQLClient *githubv4.Client ) diff --git a/providers/github/round_tripper_rate_limit.go b/providers/github/round_tripper_rate_limit.go new file mode 100644 index 0000000..2540c48 --- /dev/null +++ b/providers/github/round_tripper_rate_limit.go @@ -0,0 +1,40 @@ +package github + +import ( + "fmt" + "net/http" +) + +type retryTransport struct{} + +type RateLimitError struct { + RetryAfter string + Err error +} + +func (e *RateLimitError) Error() string { + return fmt.Sprintf("retry after %s: %v", e.RetryAfter, e.Err) +} + +func (e *RateLimitError) Unwrap() error { + return e.Err +} + +func (s *retryTransport) RoundTrip(r *http.Request) (*http.Response, error) { + resp, err := http.DefaultTransport.RoundTrip(r) + if err != nil { + return nil, err + } + + if resp != nil { + retryAfter := resp.Header.Get("Retry-After") + if retryAfter != "" { + return nil, &RateLimitError{ + RetryAfter: retryAfter, + Err: fmt.Errorf("github graphql rate limit"), + } + } + } + + return resp, err +} diff --git a/providers/gitlab/client.go b/providers/gitlab/client.go index 19326a3..3203c73 100644 --- a/providers/gitlab/client.go +++ b/providers/gitlab/client.go @@ -80,6 +80,13 @@ type GitLabRepo struct { IsArchived bool StarCount int ForksCount int + ID int + IsEmpty bool + IssuesCount int + HasIssues bool + HasWiki bool + License string + DefaultBranch string } func (gl GitLabRepo) GetProviderName() string { @@ -130,10 +137,66 @@ type Client struct { client *gitlab.Client } +func (gl GitLabRepo) GetHasIssues() bool { + return gl.HasIssues +} + +func (gl GitLabRepo) GetHasWiki() bool { + return gl.HasWiki +} + +func (gl GitLabRepo) GetHasDiscussion() bool { + return false +} + +func (gl GitLabRepo) GetPrimaryLanguage() string { + return "" +} + +func (gl GitLabRepo) GetSize() int { + return 1337 +} + +func (gl GitLabRepo) GetDefaultBranch() string { + return gl.DefaultBranch +} + +func (gl GitLabRepo) GetLicense() string { + return gl.License +} + +func (gl GitLabRepo) GetIsTemplate() bool { + return false +} + +func (gl GitLabRepo) GetOrganizationID() int { + return 1337 +} + +func (gl GitLabRepo) GetRepositoryID() int { + return gl.ID +} + +func (gl GitLabRepo) GetForksCount() int { + return gl.ForksCount +} + +func (gl GitLabRepo) GetStarsCount() int { + return gl.StarCount +} + +func (gl GitLabRepo) GetOpenIssuesCount() int { + return gl.IssuesCount +} + +func (gl GitLabRepo) GetIsEmpty() bool { + return gl.IsEmpty +} + func NewClient(ctx context.Context, baseUrl string, token string) (*Client, error) { gitlabClient, err := gitlab.NewClient(token, gitlab.WithBaseURL(fmt.Sprintf("https://%s", baseUrl))) if err != nil { - return nil, fmt.Errorf("failed to create gitlab client: %v", err) + return nil, fmt.Errorf("failed to create gitlab client: %w", err) } return &Client{ Token: token, @@ -199,6 +262,10 @@ func projectToRepo(project *gitlab.Project) *GitLabRepo { if project.ForkedFromProject != nil { isFork = true } + license := "" + if project.License != nil { + license = project.License.Name + } return &GitLabRepo{ NameWithNamespace: project.PathWithNamespace, IsPrivate: !(project.Visibility == gitlab.PublicVisibility), @@ -207,6 +274,13 @@ func projectToRepo(project *gitlab.Project) *GitLabRepo { StarCount: project.StarCount, ForksCount: project.ForksCount, IsFork: isFork, + IsEmpty: project.EmptyRepo, + HasIssues: project.IssuesEnabled, + ID: project.ID, + IssuesCount: project.OpenIssuesCount, + HasWiki: project.WikiEnabled, + License: license, + DefaultBranch: project.DefaultBranch, } } diff --git a/providers/local/client.go b/providers/local/client.go index aea5197..2fe0260 100644 --- a/providers/local/client.go +++ b/providers/local/client.go @@ -129,6 +129,62 @@ func (gl Repo) GetIsFork() bool { return false } +func (gl Repo) GetHasIssues() bool { + return false +} + +func (gl Repo) GetHasWiki() bool { + return false +} + +func (gl Repo) GetHasDiscussion() bool { + return false +} + +func (gl Repo) GetPrimaryLanguage() string { + return "" +} + +func (gl Repo) GetSize() int { + return 1337 +} + +func (gl Repo) GetDefaultBranch() string { + return "" +} + +func (gl Repo) GetLicense() string { + return "" +} + +func (gl Repo) GetIsTemplate() bool { + return false +} + +func (gl Repo) GetOrganizationID() int { + return 1337 +} + +func (gl Repo) GetRepositoryID() int { + return 1337 +} + +func (gl Repo) GetForksCount() int { + return 0 +} + +func (gl Repo) GetStarsCount() int { + return 0 +} + +func (gl Repo) GetOpenIssuesCount() int { + return 0 +} + +func (gl Repo) GetIsEmpty() bool { + return false +} + func extractHostnameFromSSHURL(sshURL string) string { parts := strings.Split(sshURL, "@") if len(parts) != 2 {