From 6a43a9173e8a7bfc1f5fa2ac155dcf5522e5fca9 Mon Sep 17 00:00:00 2001 From: Xueqin Cui <72771658+cuixq@users.noreply.github.com> Date: Thu, 2 May 2024 16:28:10 +1000 Subject: [PATCH] Automated Updates: support parents and dependency imports (#890) This PR adds support for parents and dependency imports: - fetching and parsing upstream pom.xml files - then merging the data into Maven project - add `RequirementsFromOtherPOMs` to system-specific data to store requirements that we don't have access Some of the Maven code should be provided by `deps.dev/util/maven` and `deps.dev/util/resolve` soon... --- .../resolution/datasource/maven_registry.go | 12 +- internal/resolution/datasource/maven_test.go | 4 +- .../manifest/__snapshots__/maven_test.snap | 8 + .../manifest/fixtures/parent/pom.xml | 37 +++ internal/resolution/manifest/fixtures/pom.xml | 8 + internal/resolution/manifest/manifest.go | 2 +- internal/resolution/manifest/maven.go | 217 ++++++++++-------- internal/resolution/manifest/maven_test.go | 132 ++++++++++- 8 files changed, 313 insertions(+), 107 deletions(-) create mode 100644 internal/resolution/manifest/fixtures/parent/pom.xml diff --git a/internal/resolution/datasource/maven_registry.go b/internal/resolution/datasource/maven_registry.go index d4e94f4eee..9990bd3b1a 100644 --- a/internal/resolution/datasource/maven_registry.go +++ b/internal/resolution/datasource/maven_registry.go @@ -15,22 +15,20 @@ import ( const MavenCentral = "https://repo.maven.apache.org/maven2" type MavenRegistryAPIClient struct { - Registry string // Base URL of the registry that we are making requests + registry string // Base URL of the registry that we are making requests } -func NewMavenRegistryAPIClient() (*MavenRegistryAPIClient, error) { - return &MavenRegistryAPIClient{ - Registry: MavenCentral, - }, nil +func NewMavenRegistryAPIClient(registry string) *MavenRegistryAPIClient { + return &MavenRegistryAPIClient{registry: registry} } func (m *MavenRegistryAPIClient) GetProject(ctx context.Context, groupID, artifactID, version string) (maven.Project, error) { - url, err := url.JoinPath(m.Registry, strings.ReplaceAll(groupID, ".", "/"), artifactID, version, fmt.Sprintf("%s-%s.pom", artifactID, version)) + u, err := url.JoinPath(m.registry, strings.ReplaceAll(groupID, ".", "/"), artifactID, version, fmt.Sprintf("%s-%s.pom", artifactID, version)) if err != nil { return maven.Project{}, fmt.Errorf("failed to join path: %w", err) } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u, nil) if err != nil { return maven.Project{}, fmt.Errorf("failed to make new request: %w", err) } diff --git a/internal/resolution/datasource/maven_test.go b/internal/resolution/datasource/maven_test.go index 5c665dfbf7..6a25a14e0e 100644 --- a/internal/resolution/datasource/maven_test.go +++ b/internal/resolution/datasource/maven_test.go @@ -14,9 +14,8 @@ func TestGetProject(t *testing.T) { srv := testutility.NewMockHTTPServer(t) client := &MavenRegistryAPIClient{ - Registry: srv.URL, + registry: srv.URL, } - srv.SetResponse(t, "org/example/x.y.z/1.0.0/x.y.z-1.0.0.pom", []byte(` org.example @@ -24,6 +23,7 @@ func TestGetProject(t *testing.T) { 1.0.0 `)) + got, err := client.GetProject(context.Background(), "org.example", "x.y.z", "1.0.0") if err != nil { t.Fatalf("failed to get Maven project %s:%s verion %s: %v", "org.example", "x.y.z", "1.0.0", err) diff --git a/internal/resolution/manifest/__snapshots__/maven_test.snap b/internal/resolution/manifest/__snapshots__/maven_test.snap index d77a98d932..0c52d6b2b8 100755 --- a/internal/resolution/manifest/__snapshots__/maven_test.snap +++ b/internal/resolution/manifest/__snapshots__/maven_test.snap @@ -18,6 +18,7 @@ org.parent parent-pom 1.2.0 + ./parent/pom.xml @@ -48,6 +49,13 @@ xyz 2.0.1 + + org.import + import + 1.0.0 + pom + import + diff --git a/internal/resolution/manifest/fixtures/parent/pom.xml b/internal/resolution/manifest/fixtures/parent/pom.xml new file mode 100644 index 0000000000..0a10ee8ee7 --- /dev/null +++ b/internal/resolution/manifest/fixtures/parent/pom.xml @@ -0,0 +1,37 @@ + + + + 4.0.0 + + org.parent + parent-pom + 1.1.1 + + my-app + + http://www.example.com + + pom + + + org.upstream + parent-pom + 1.2.3 + + + + 1.1.1 + + + + + + org.example + aaa + ${aaa.version} + + + + + diff --git a/internal/resolution/manifest/fixtures/pom.xml b/internal/resolution/manifest/fixtures/pom.xml index 9dfaaa3d42..8d783ad1ad 100644 --- a/internal/resolution/manifest/fixtures/pom.xml +++ b/internal/resolution/manifest/fixtures/pom.xml @@ -16,6 +16,7 @@ org.parent parent-pom 1.1.1 + ./parent/pom.xml @@ -46,6 +47,13 @@ xyz 2.0.0 + + org.import + import + 1.0.0 + pom + import + diff --git a/internal/resolution/manifest/manifest.go b/internal/resolution/manifest/manifest.go index eb5b3e91da..6ef4eb94bf 100644 --- a/internal/resolution/manifest/manifest.go +++ b/internal/resolution/manifest/manifest.go @@ -94,7 +94,7 @@ func GetManifestIO(pathToManifest string) (ManifestIO, error) { base := filepath.Base(pathToManifest) switch { case base == "pom.xml": - return MavenManifestIO{}, nil + return NewMavenManifestIO(), nil case base == "package.json": return NpmManifestIO{}, nil default: diff --git a/internal/resolution/manifest/maven.go b/internal/resolution/manifest/maven.go index 0500339bc9..49d7ea96c8 100644 --- a/internal/resolution/manifest/maven.go +++ b/internal/resolution/manifest/maven.go @@ -2,30 +2,43 @@ package manifest import ( "bytes" + "context" "encoding/xml" "errors" "fmt" "io" + "os" + "path/filepath" "strings" "deps.dev/util/maven" "deps.dev/util/resolve" - "deps.dev/util/resolve/dep" + "github.com/google/osv-scanner/internal/resolution/datasource" "github.com/google/osv-scanner/pkg/lockfile" ) -type MavenManifestIO struct{} - const ( + OriginImport = "import" OriginManagement = "management" OriginParent = "parent" OriginPlugin = "plugin" OriginProfile = "profile" ) +type MavenManifestIO struct { + datasource.MavenRegistryAPIClient +} + +func NewMavenManifestIO() MavenManifestIO { + return MavenManifestIO{ + MavenRegistryAPIClient: *datasource.NewMavenRegistryAPIClient(datasource.MavenCentral), + } +} + type MavenManifestSpecific struct { Properties []PropertyWithOrigin RequirementsWithProperties []resolve.RequirementVersion + RequirementsFromOtherPOMs []resolve.RequirementVersion // Requirements that we cannot modify directly } type PropertyWithOrigin struct { @@ -33,53 +46,81 @@ type PropertyWithOrigin struct { Origin string // Origin indicates where the property comes from } -// TODO: fetch and merge parent data -// TODO: process dependencies (imports and dedupe) // TODO: handle profiles (activation and interpolation) func (m MavenManifestIO) Read(df lockfile.DepFile) (Manifest, error) { - var project maven.Project - if err := xml.NewDecoder(df).Decode(&project); err != nil { - return Manifest{}, fmt.Errorf("failed to unmarshal input: %w", err) - } - - count := len(project.Properties.Properties) - for _, prof := range project.Profiles { - count += len(prof.Properties.Properties) - } - properties := make([]PropertyWithOrigin, 0, count) - for _, prop := range project.Properties.Properties { - properties = append(properties, PropertyWithOrigin{Property: prop}) - } + ctx := context.Background() var reqsWithProps []resolve.RequirementVersion - addReqsWithProps := func(deps []maven.Dependency, origin string) { + requirementOrigins := make(map[maven.DependencyKey]string) + addRequirementOrigins := func(deps []maven.Dependency, origin string) { for _, dep := range deps { + key := dep.Key() + if _, ok := requirementOrigins[key]; !ok { + requirementOrigins[key] = origin + } if dep.Version.ContainsProperty() { // We only need the original import if the version contains any property. reqsWithProps = append(reqsWithProps, makeRequirementVersion(dep, origin)) } } } - addReqsWithProps(project.Dependencies, "") - addReqsWithProps(project.DependencyManagement.Dependencies, OriginManagement) - for _, profile := range project.Profiles { - addReqsWithProps(profile.Dependencies, mavenOrigin(OriginProfile, string(profile.ID))) - addReqsWithProps(profile.DependencyManagement.Dependencies, mavenOrigin(OriginProfile, string(profile.ID), OriginManagement)) + addAllRequirements := func(project maven.Project, origin string) { + addRequirementOrigins(project.Dependencies, origin) + addRequirementOrigins(project.DependencyManagement.Dependencies, mavenOrigin(origin, OriginManagement)) + for _, profile := range project.Profiles { + addRequirementOrigins(profile.Dependencies, mavenOrigin(origin, OriginProfile, string(profile.ID))) + addRequirementOrigins(profile.DependencyManagement.Dependencies, mavenOrigin(origin, OriginProfile, string(profile.ID), OriginManagement)) + } + for _, plugin := range project.Build.PluginManagement.Plugins { + addRequirementOrigins(plugin.Dependencies, mavenOrigin(origin, OriginPlugin, plugin.ProjectKey.Name())) + } } - for _, plugin := range project.Build.PluginManagement.Plugins { - addReqsWithProps(plugin.Dependencies, mavenOrigin(OriginPlugin, plugin.ProjectKey.Name())) + + var project maven.Project + if err := xml.NewDecoder(df).Decode(&project); err != nil { + return Manifest{}, fmt.Errorf("failed to unmarshal project: %w", err) } + addAllRequirements(project, "") - // Interpolate the project to resolve the properties. - if err := project.Interpolate(); err != nil { - return Manifest{}, fmt.Errorf("failed to interpolate project: %w", err) + // Merging parents data by parsing local parent pom.xml or fetching from upstream. + if err := m.MergeParents(ctx, &project, project.Parent, 1, df.Path(), addAllRequirements, OriginParent); err != nil { + return Manifest{}, fmt.Errorf("failed to merge parents: %w", err) + } + + // Process the dependencies: + // - dedupe dependencies and dependency management + // - import dependency management (not yet transitively) + // - fill in missing dependency version requirement + project.ProcessDependencies(func(groupID, artifactID, version maven.String) (maven.DependencyManagement, error) { + root := maven.Parent{ProjectKey: maven.ProjectKey{GroupID: groupID, ArtifactID: artifactID, Version: version}} + var result maven.Project + if err := m.MergeParents(ctx, &result, root, 0, df.Path(), addAllRequirements, OriginImport); err != nil { + return maven.DependencyManagement{}, err + } + + return result.DependencyManagement, nil + }) + + count := len(project.Properties.Properties) + for _, prof := range project.Profiles { + count += len(prof.Properties.Properties) + } + properties := make([]PropertyWithOrigin, 0, count) + for _, prop := range project.Properties.Properties { + properties = append(properties, PropertyWithOrigin{Property: prop}) } var requirements []resolve.RequirementVersion + var otherRequirements []resolve.RequirementVersion groups := make(map[resolve.PackageKey][]string) - addRequirements := func(deps []maven.Dependency, origin string) { + addRequirements := func(deps []maven.Dependency) { for _, dep := range deps { - requirements = append(requirements, makeRequirementVersion(dep, origin)) + origin := requirementOrigins[dep.Key()] + if strings.HasPrefix(origin, OriginParent+"@") || strings.HasPrefix(origin, OriginImport) { + otherRequirements = append(otherRequirements, makeRequirementVersion(dep, origin)) + } else { + requirements = append(requirements, makeRequirementVersion(dep, origin)) + } if dep.Scope != "" { pk := resolve.PackageKey{ System: resolve.Maven, @@ -100,14 +141,14 @@ func (m MavenManifestIO) Read(df lockfile.DepFile) (Manifest, error) { VersionType: resolve.Requirement, Version: string(project.Parent.Version), }, - Type: makeMavenDepType(maven.Dependency{}, OriginParent), + Type: resolve.MavenDepType(maven.Dependency{}, OriginParent), }) } - addRequirements(project.Dependencies, "") - addRequirements(project.DependencyManagement.Dependencies, OriginManagement) + addRequirements(project.Dependencies) + addRequirements(project.DependencyManagement.Dependencies) for _, profile := range project.Profiles { - addRequirements(profile.Dependencies, mavenOrigin(OriginProfile, string(profile.ID))) - addRequirements(profile.DependencyManagement.Dependencies, mavenOrigin(OriginProfile, string(profile.ID), OriginManagement)) + addRequirements(profile.Dependencies) + addRequirements(profile.DependencyManagement.Dependencies) for _, prop := range profile.Properties.Properties { properties = append(properties, PropertyWithOrigin{ Property: prop, @@ -116,7 +157,7 @@ func (m MavenManifestIO) Read(df lockfile.DepFile) (Manifest, error) { } } for _, plugin := range project.Build.PluginManagement.Plugins { - addRequirements(plugin.Dependencies, mavenOrigin(OriginPlugin, plugin.ProjectKey.Name())) + addRequirements(plugin.Dependencies) } return Manifest{ @@ -136,10 +177,55 @@ func (m MavenManifestIO) Read(df lockfile.DepFile) (Manifest, error) { EcosystemSpecific: MavenManifestSpecific{ Properties: properties, RequirementsWithProperties: reqsWithProps, + RequirementsFromOtherPOMs: otherRequirements, }, }, nil } +// To avoid indefinite loop when fetching parents, +// set a limit on the number of parents. +const MaxParent = 100 + +func (m MavenManifestIO) MergeParents(ctx context.Context, result *maven.Project, current maven.Parent, start int, path string, addRequirements func(maven.Project, string), prefix string) error { + visited := make(map[maven.ProjectKey]bool, MaxParent) + for n := start; n < MaxParent; n++ { + if current.GroupID == "" || current.ArtifactID == "" || current.Version == "" { + break + } + if visited[current.ProjectKey] { + // A cycle of parents is detected + return errors.New("a cycle of parents is detected") + } + visited[current.ProjectKey] = true + + var proj maven.Project + if current.RelativePath != "" { + f, err := os.Open(filepath.Join(filepath.Dir(path), string(current.RelativePath))) + if err != nil { + return fmt.Errorf("failed to open parent file %s: %w", current.RelativePath, err) + } + if err := xml.NewDecoder(f).Decode(&proj); err != nil { + return fmt.Errorf("failed to unmarshal project: %w", err) + } + } else { + var err error + proj, err = m.MavenRegistryAPIClient.GetProject(ctx, string(current.GroupID), string(current.ArtifactID), string(current.Version)) + if err != nil { + return fmt.Errorf("failed to get Maven project %s:%s:%s: %w", current.GroupID, current.ArtifactID, current.Version, err) + } + if n > 0 && proj.Packaging != "pom" { + // A parent project should only be of "pom" packaging type. + return fmt.Errorf("invalid packaging for parent project %s", proj.Packaging) + } + } + addRequirements(proj, mavenOrigin(prefix, current.ProjectKey.Name())) + result.MergeParent(proj) + current = proj.Parent + } + // Interpolate the project to resolve the properties. + return result.Interpolate() +} + // For dependencies in profiles and plugins, we use origin to indicate where they are from. // The origin is in the format prefix@identifier[@postfix] (where @ is the separator): // - prefix indicates it is from profile or plugin @@ -155,7 +241,7 @@ func makeRequirementVersion(dep maven.Dependency, origin string) resolve.Require VersionType: resolve.Requirement, Version: string(dep.Version), }, - Type: makeMavenDepType(dep, origin), + Type: resolve.MavenDepType(dep, origin), } } @@ -173,57 +259,6 @@ func mavenOrigin(list ...string) string { return result } -func makeMavenDepType(dependency maven.Dependency, origin string) dep.Type { - var dt dep.Type - if dependency.Optional == "true" { - dt.AddAttr(dep.Opt, "") - } - if dependency.Scope == "test" { - dt.AddAttr(dep.Test, "") - } else if dependency.Scope != "" && dependency.Scope != "compile" { - dt.AddAttr(dep.Scope, string(dependency.Scope)) - } - if dependency.Type != "" { - dt.AddAttr(dep.MavenArtifactType, string(dependency.Type)) - } - if dependency.Classifier != "" { - dt.AddAttr(dep.MavenClassifier, string(dependency.Classifier)) - } - // Only add Maven dependency origin when it is not direct dependency. - if origin != "" { - dt.AddAttr(dep.MavenDependencyOrigin, origin) - } - - return dt -} - -func depTypeToMavenDependency(typ dep.Type) (maven.Dependency, string, error) { - result := maven.Dependency{} - if _, ok := typ.GetAttr(dep.Opt); ok { - result.Optional = "true" - } - if _, ok := typ.GetAttr(dep.Test); ok { - result.Scope = "test" - } - if s, ok := typ.GetAttr(dep.Scope); ok { - if result.Scope != "" { - return maven.Dependency{}, "", errors.New("invalid Maven dep.Type") - } - result.Scope = maven.String(s) - } - if c, ok := typ.GetAttr(dep.MavenClassifier); ok { - result.Classifier = maven.String(c) - } - if t, ok := typ.GetAttr(dep.MavenArtifactType); ok { - result.Type = maven.String(t) - } - if o, ok := typ.GetAttr(dep.MavenDependencyOrigin); ok { - return result, o, nil - } - - return result, "", nil -} - func projectStartElement(raw string) string { start := strings.Index(raw, " + org.upstream + parent-pom + 1.2.3 + pom + + 2.2.2 + + + + + org.example + bbb + ${bbb.version} + + + + + `)) + srv.SetResponse(t, "org/import/import/1.0.0/import-1.0.0.pom", []byte(` + + org.import + import + 1.0.0 + pom + + 3.3.3 + + + + + org.example + ccc + ${ccc.version} + + + + + `)) + dir, err := os.Getwd() if err != nil { t.Fatalf("failed to get current directory: %v", err) @@ -44,7 +90,10 @@ func TestMavenRead(t *testing.T) { } defer df.Close() - mavenIO := manifest.MavenManifestIO{} + mavenIO := manifest.MavenManifestIO{ + MavenRegistryAPIClient: *datasource.NewMavenRegistryAPIClient(srv.URL), + } + got, err := mavenIO.Read(df) if err != nil { t.Fatalf("failed to read file: %v", err) @@ -163,6 +212,9 @@ func TestMavenRead(t *testing.T) { }, EcosystemSpecific: manifest.MavenManifestSpecific{ Properties: []manifest.PropertyWithOrigin{ + {Property: maven.Property{Name: "bbb.version", Value: "2.2.2"}}, + {Property: maven.Property{Name: "aaa.version", Value: "1.1.1"}}, + {Property: maven.Property{Name: "project.build.sourceEncoding", Value: "UTF-8"}}, {Property: maven.Property{Name: "maven.compiler.source", Value: "1.7"}}, {Property: maven.Property{Name: "maven.compiler.target", Value: "1.7"}}, @@ -192,6 +244,74 @@ func TestMavenRead(t *testing.T) { }, Type: depProfileOne, }, + { + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: "org.example:aaa", + }, + VersionType: resolve.Requirement, + Version: "${aaa.version}", + }, + Type: depParentMgmt, + }, + { + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: "org.example:bbb", + }, + VersionType: resolve.Requirement, + Version: "${bbb.version}", + }, + Type: depParentUpstreamMgmt, + }, + { + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: "org.example:ccc", + }, + VersionType: resolve.Requirement, + Version: "${ccc.version}", + }, + Type: depImport, + }, + }, + RequirementsFromOtherPOMs: []resolve.RequirementVersion{ + { + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: "org.example:aaa", + }, + VersionType: resolve.Requirement, + Version: "1.1.1", + }, + Type: depParentMgmt, + }, + { + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: "org.example:bbb", + }, + VersionType: resolve.Requirement, + Version: "2.2.2", + }, + Type: depParentUpstreamMgmt, + }, + { + VersionKey: resolve.VersionKey{ + PackageKey: resolve.PackageKey{ + System: resolve.Maven, + Name: "org.example:ccc", + }, + VersionType: resolve.Requirement, + Version: "3.3.3", + }, + Type: depImport, + }, }, }, }