From 06c0be44d5aee65bfd6d4af0d6985876fab4b1e2 Mon Sep 17 00:00:00 2001 From: Eleftheria Stein-Kousathana Date: Wed, 15 Nov 2023 17:02:58 +0100 Subject: [PATCH] Update package-lock.json parsing strategy - Start by finding any version changes - Then find the package name whose version was changed Fix #1631 --- internal/engine/eval/vulncheck/pkgdb.go | 7 +- internal/engine/eval/vulncheck/pkgdb_test.go | 48 +++++ internal/engine/eval/vulncheck/review_test.go | 4 +- internal/engine/ingester/diff/parse.go | 109 ++++++------ internal/engine/ingester/diff/parse_test.go | 168 ++++++++++++++++++ 5 files changed, 281 insertions(+), 55 deletions(-) diff --git a/internal/engine/eval/vulncheck/pkgdb.go b/internal/engine/eval/vulncheck/pkgdb.go index aa6f1f6a4d..91925a2bc9 100644 --- a/internal/engine/eval/vulncheck/pkgdb.go +++ b/internal/engine/eval/vulncheck/pkgdb.go @@ -100,12 +100,13 @@ type packageJson struct { } `json:"dist"` } -func (pj *packageJson) IndentedString(indent int, _ string, _ *pb.Dependency) string { +func (pj *packageJson) IndentedString(indent int, oldDepLine string, _ *pb.Dependency) string { padding := fmt.Sprintf("%*s", indent, "") innerPadding := padding + " " // Add 2 extra spaces + // use the old dependency to get the correct package path + data := fmt.Sprintf("%s\n", oldDepLine) // format each line with leadingWhitespace and 2 extra spaces - data := padding + fmt.Sprintf("\"%s\": {\n", pj.Name) data += innerPadding + fmt.Sprintf("\"version\": \"%s\",\n", pj.Version) data += innerPadding + fmt.Sprintf("\"resolved\": \"%s\",\n", pj.Dist.Tarball) data += innerPadding + fmt.Sprintf("\"integrity\": \"%s\",", pj.Dist.Integrity) @@ -114,7 +115,7 @@ func (pj *packageJson) IndentedString(indent int, _ string, _ *pb.Dependency) st } func (pj *packageJson) LineHasDependency(line string) bool { - pkgLine := fmt.Sprintf(`"%s": {`, pj.Name) + pkgLine := fmt.Sprintf(`%s": {`, pj.Name) return strings.Contains(line, pkgLine) } diff --git a/internal/engine/eval/vulncheck/pkgdb_test.go b/internal/engine/eval/vulncheck/pkgdb_test.go index 9627032993..9bed0af1cd 100644 --- a/internal/engine/eval/vulncheck/pkgdb_test.go +++ b/internal/engine/eval/vulncheck/pkgdb_test.go @@ -163,6 +163,54 @@ func TestNpmPkgDb(t *testing.T) { } } +func TestPackageJsonLineHasDependency(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + line string + pj *packageJson + expectRetval bool + }{ + { + name: "MatchWithScope", + line: "\"node_modules/@types/node\": {", + pj: &packageJson{ + Name: "@types/node", + Version: "20.9.0", + }, + expectRetval: true, + }, + { + name: "MatchNoScope", + line: "\"node_modules/lodash\": {", + pj: &packageJson{ + Name: "lodash", + Version: "4.17.21", + }, + expectRetval: true, + }, + { + name: "NoMatch", + line: "\"node_modules/other\": {", + pj: &packageJson{ + Name: "lodash", + Version: "4.17.21", + }, + expectRetval: false, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + }) + + require.Equal(t, tt.expectRetval, tt.pj.LineHasDependency(tt.line), "expected reply to match mock data") + } +} + func TestPyPiReplyLineHasDependency(t *testing.T) { t.Parallel() diff --git a/internal/engine/eval/vulncheck/review_test.go b/internal/engine/eval/vulncheck/review_test.go index ce34bd6db2..3c18728b6f 100644 --- a/internal/engine/eval/vulncheck/review_test.go +++ b/internal/engine/eval/vulncheck/review_test.go @@ -151,7 +151,7 @@ func TestReviewPrHandlerVulnerabilitiesDifferentIdentities(t *testing.T) { expBody, err := createReviewBody(vulnsFoundText) require.NoError(t, err) - expCommentBody := reviewBodyWithSuggestion(patchPackage.IndentedString(0, "", nil)) + expCommentBody := reviewBodyWithSuggestion(patchPackage.IndentedString(0, fmt.Sprintf(`"%s": {`, patchPackage.Name), nil)) mockClient.EXPECT(). ListReviews(gomock.Any(), pr.RepoOwner, pr.RepoName, int(pr.Number), nil). @@ -424,7 +424,7 @@ func TestCommitStatusPrHandlerWithVulnerabilities(t *testing.T) { expBody, err := createReviewBody(vulnsFoundText) require.NoError(t, err) - expCommentBody := reviewBodyWithSuggestion(patchPackage.IndentedString(0, "", nil)) + expCommentBody := reviewBodyWithSuggestion(patchPackage.IndentedString(0, fmt.Sprintf(`"%s": {`, patchPackage.Name), nil)) mockClient.EXPECT(). ListReviews(gomock.Any(), pr.RepoOwner, pr.RepoName, int(pr.Number), nil). diff --git a/internal/engine/ingester/diff/parse.go b/internal/engine/ingester/diff/parse.go index 04b09bf0d0..bff47eaaac 100644 --- a/internal/engine/ingester/diff/parse.go +++ b/internal/engine/ingester/diff/parse.go @@ -17,8 +17,7 @@ package diff import ( "bufio" - "encoding/json" - "fmt" + "path/filepath" "regexp" "strings" @@ -26,6 +25,11 @@ import ( pb "github.com/stacklok/minder/pkg/api/protobuf/go/minder/v1" ) +var ( + versionRegex = regexp.MustCompile(`^\+\s*"version"\s*:\s*"([^"\n]*)"\s*(?:,|$)`) + dependencyNameRegex = regexp.MustCompile(`\s*"([^"]+)"\s*:\s*{\s*`) +) + type ecosystemParser func(string) ([]*pb.Dependency, error) func newEcosystemParser(eco DependencyEcosystem) ecosystemParser { @@ -161,62 +165,67 @@ func goParse(patch string) ([]*pb.Dependency, error) { return deps, nil } -type npmDependency struct { - Version string `json:"version"` - Resolved string `json:"resolved"` - Integrity string `json:"integrity"` - Requires map[string]string `json:"requires,omitempty"` - Dependencies map[string]*npmDependency `json:"dependencies,omitempty"` - Optional bool `json:"optional,omitempty"` -} - -type npmRoot struct { - Deps map[string]*npmDependency -} - -// TODO(jakub): this is a hacky way to parse the npm patch file func npmParse(patch string) ([]*pb.Dependency, error) { lines := strings.Split(patch, "\n") - var output strings.Builder - - // Write the start of the JSON object to the output - output.WriteString("{\n") - - // Start of crazy code to parse the patch file - // What we do here is first grab all the lines that start with "+" - // Then we remove the "+" symbol and write the modified line to the output - // We then add a { to the start of the output and a } to the end, so we have a valid JSON object - // Then we convert the string builder to a string and unmarshal it into the Package struct - for _, line := range lines { - // Check if the line starts with "+" - if strings.HasPrefix(line, "+") { - // Remove the "+" symbol and write the modified line to the output - line = strings.TrimPrefix(line, "+") - output.WriteString(line + "\n") + + var deps []*pb.Dependency + + for i, line := range lines { + // Check if the line contains a version + matches := versionRegex.FindStringSubmatch(line) + if len(matches) > 1 { + version := matches[1] + name := findDependencyName(i, lines) + // The version is not always a dependency version. It may also be the version of the package in this repo, + // or the version of the root project. See https://docs.npmjs.com/cli/v10/configuring-npm/package-lock-json + if name != "" { + deps = append(deps, &pb.Dependency{ + Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, + Name: name, + Version: version, + }) + } } } - //if string ends with a comma, remove it - outputString := strings.TrimSuffix(output.String(), ",\n") - outputString = outputString + "\n}" + return deps, nil +} - // Convert the string builder to a string and unmarshal it into the Package struct - root := &npmRoot{ - Deps: make(map[string]*npmDependency), +// findDependencyName iterates over all the previous lines to find the JSON key containing the parent dependency name. +// If the parent key does not correspond to a dependency (i.e. it could be the root project), then an empty string is +// returned. +func findDependencyName(i int, lines []string) string { + closingBraces := 0 + for j := i - 1; j >= 0; j-- { + if strings.Contains(lines[j], "}") { + closingBraces = closingBraces + 1 + } + if strings.Contains(lines[j], "{") { + if closingBraces == 0 { + matches := dependencyNameRegex.FindStringSubmatch(lines[j]) + if len(matches) > 1 { + // extract the dependency name from the key, which is the dependency path + dependencyPath := matches[1] + return getDependencyName(dependencyPath) + } + return "" + } + closingBraces = closingBraces - 1 + } } + return "" +} - err := json.Unmarshal([]byte(outputString), &root.Deps) - if err != nil { - return nil, fmt.Errorf("failed to unmarshal npm package: %w", err) - } +func getDependencyName(dependencyPath string) string { + dependencyName := filepath.Base(dependencyPath) + dir := filepath.Dir(dependencyPath) - deps := make([]*pb.Dependency, 0, len(root.Deps)) - for name, dep := range root.Deps { - deps = append(deps, &pb.Dependency{ - Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, - Name: name, - Version: dep.Version, - }) + // Check if the parent directory starts with "@", meaning that the dependency has a scope. + // See https://docs.npmjs.com/cli/v10/using-npm/scope + if strings.HasPrefix(filepath.Base(dir), "@") { + // Prefix the dependency name with the scope + return filepath.Join(filepath.Base(dir), dependencyName) } - return deps, nil + + return dependencyName } diff --git a/internal/engine/ingester/diff/parse_test.go b/internal/engine/ingester/diff/parse_test.go index 8f923c84a5..5fcdeef614 100644 --- a/internal/engine/ingester/diff/parse_test.go +++ b/internal/engine/ingester/diff/parse_test.go @@ -337,3 +337,171 @@ func TestPyPiParse(t *testing.T) { }) } } + +func TestNpmParse(t *testing.T) { + t.Parallel() + + tests := []struct { + description string + content string + expectedCount int + expectedDependencies []*pb.Dependency + }{ + { + description: "New dependency addition", + content: ` + "version": "1.0.0", + "license": "ISC", + "dependencies": { ++ "chalk": "^5.3.0", + "lodash": "^3.7.0" + } + }, ++ "node_modules/chalk": { ++ "version": "5.3.0", ++ "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.3.0.tgz", ++ "integrity": "sha512-dLitG79d+GV1Nb/VYcCDFivJeK1hiukt9QjRNVOsUtTy1rR1YJsmpGGTZ3qJos+uw7WmWF4wUwBd9jxjocFC2w==", ++ "engines": { ++ "node": "^12.17.0 || ^14.13 || >=16.0.0" ++ }, ++ "funding": { ++ "url": "https://github.com/chalk/chalk?sponsor=1" ++ } ++ }, + "node_modules/lodash": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-3.10.1.tgz", +`, + expectedCount: 1, + expectedDependencies: []*pb.Dependency{ + { + Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, + Name: "chalk", + Version: "5.3.0", + }, + }, + }, + { + description: "Exising dependency version update", + content: ` + } + }, + "node_modules/lodash": { +- "version": "4.17.16", +- "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.16.tgz", +- "integrity": "sha512-mzxOTaU4AsJhnIujhngm+OnA6JX4fTI8D5H26wwGd+BJ57bW70oyRwTqo6EFJm1jTZ7hCo7yVzH1vB8TMFd2ww==" ++ "version": "4.17.21", ++ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", ++ "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + } + } + } +`, + expectedCount: 1, + expectedDependencies: []*pb.Dependency{ + { + Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, + Name: "lodash", + Version: "4.17.21", + }, + }, + }, + { + description: "Entirely new package-lock.json", + content: ` ++{ ++ "name": "public-repo", ++ "version": "1.0.0", ++ "lockfileVersion": 3, ++ "requires": true, ++ "packages": { ++ "": { ++ "name": "public-repo", ++ "version": "1.0.0", ++ "license": "ISC", ++ "dependencies": { ++ "lodash": "^4.17.21" ++ } ++ }, ++ "node_modules/lodash": { ++ "version": "4.17.21", ++ "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", ++ "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" ++ } ++ } ++} +`, + expectedCount: 1, + expectedDependencies: []*pb.Dependency{ + { + Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, + Name: "lodash", + Version: "4.17.21", + }, + }, + }, + { + description: "Multiple new dependencies", + content: ` + "version": "1.0.0", + "license": "ISC", + "dependencies": { ++ "@types/node": "^20.9.0", + "lodash": "^4.17.16" + } + }, ++ "node_modules/@types/node": { ++ "version": "20.9.0", ++ "resolved": "https://registry.npmjs.org/@types/node/-/node-20.9.0.tgz", ++ "integrity": "sha512-nekiGu2NDb1BcVofVcEKMIwzlx4NjHlcjhoxxKBNLtz15Y1z7MYf549DFvkHSId02Ax6kGwWntIBPC3l/JZcmw==", ++ "dependencies": { ++ "undici-types": "~5.26.4" ++ } ++ }, + "node_modules/lodash": { + "version": "4.17.16", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.16.tgz", + "integrity": "sha512-mzxOTaU4AsJhnIujhngm+OnA6JX4fTI8D5H26wwGd+BJ57bW70oyRwTqo6EFJm1jTZ7hCo7yVzH1vB8TMFd2ww==" ++ }, ++ "node_modules/undici-types": { ++ "version": "5.26.5", ++ "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", ++ "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" + } + } + } +`, + expectedCount: 2, + expectedDependencies: []*pb.Dependency{ + { + Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, + Name: "@types/node", + Version: "20.9.0", + }, + { + Ecosystem: pb.DepEcosystem_DEP_ECOSYSTEM_NPM, + Name: "undici-types", + Version: "5.26.5", + }, + }, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.description, func(t *testing.T) { + t.Parallel() + got, err := npmParse(tt.content) + if err != nil { + t.Fatalf("goParse() returned error: %v", err) + } + + assert.Equal(t, tt.expectedCount, len(got), "mismatched dependency count") + + for i, expectedDep := range tt.expectedDependencies { + if !proto.Equal(expectedDep, got[i]) { + t.Errorf("mismatch at index %d: expected %v, got %v", i, expectedDep, got[i]) + } + } + }) + } +}