From a24e35a12d7041e8aaff82d71c060acc722d373f Mon Sep 17 00:00:00 2001 From: Jon McEwen Date: Wed, 13 Apr 2022 15:07:58 +0100 Subject: [PATCH 1/7] fix: #953 Derive language from pURL - https://github.com/anchore/syft/issues/953 Signed-off-by: Jon McEwen --- .../common/cyclonedxhelpers/component.go | 4 +++ .../common/cyclonedxhelpers/component_test.go | 28 +++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/internal/formats/common/cyclonedxhelpers/component.go b/internal/formats/common/cyclonedxhelpers/component.go index d4f6606aeaf..a0d185f5111 100644 --- a/internal/formats/common/cyclonedxhelpers/component.go +++ b/internal/formats/common/cyclonedxhelpers/component.go @@ -86,6 +86,10 @@ func decodeComponent(c *cyclonedx.Component) *pkg.Package { p.Type = pkg.TypeFromPURL(p.PURL) } + if p.Language == "" { + p.Language = pkg.LanguageFromPURL(p.PURL) + } + return p } diff --git a/internal/formats/common/cyclonedxhelpers/component_test.go b/internal/formats/common/cyclonedxhelpers/component_test.go index ab7f3b812bb..586d7c89d2a 100644 --- a/internal/formats/common/cyclonedxhelpers/component_test.go +++ b/internal/formats/common/cyclonedxhelpers/component_test.go @@ -191,3 +191,31 @@ func Test_deriveBomRef(t *testing.T) { }) } } + +func Test_decodeComponent(t *testing.T) { + javaComponentWithNoSyftProperties := cyclonedx.Component{ + Name: "ch.qos.logback/logback-classic", + Version: "1.2.3", + PackageURL: "pkg:maven/ch.qos.logback/logback-classic@1.2.3", + Type: "library", + BOMRef: "pkg:maven/ch.qos.logback/logback-classic@1.2.3", + } + + tests := []struct { + name string + component cyclonedx.Component + want pkg.Language + }{ + { + name: "derive language from pURL if missing", + component: javaComponentWithNoSyftProperties, + want: pkg.Java, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equal(t, tt.want, decodeComponent(&tt.component).Language) + }) + } +} From 903b1ad7354fbe94668d0375d74da71389185e1a Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Mon, 25 Apr 2022 11:45:45 -0400 Subject: [PATCH 2/7] update cataloger path to encode more information Signed-off-by: Christopher Phillips --- syft/pkg/cataloger/catalog.go | 3 +++ test/integration/encode_decode_cycle_test.go | 28 +++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index dfd242a5922..5780302d6f7 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -69,6 +69,9 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. // generate PURL (note: this is excluded from package ID, so is safe to mutate) p.PURL = pkg.URL(p, release) + if p.Language == "" { + p.Language = pkg.LanguageFromPURL(p.PURL) + } // create file-to-package relationships for files owned by the package owningRelationships, err := packageFileOwnershipRelationships(p, resolver) diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 348a309d229..1c3b8b75770 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -7,6 +7,7 @@ import ( "github.com/anchore/syft/internal/formats/cyclonedxxml" "github.com/anchore/syft/internal/formats/syftjson" "github.com/anchore/syft/syft/source" + "github.com/google/go-cmp/cmp" "regexp" "testing" @@ -21,12 +22,14 @@ import ( ) // TestEncodeDecodeEncodeCycleComparison is testing for differences in how SBOM documents get encoded on multiple cycles. -// By encding and decoding the sbom we can compare the differences between the set of resulting objects. However, +// By encoding and decoding the sbom we can compare the differences between the set of resulting objects. However, // this requires specific comparisons being done, and select redactions/omissions being made. Additionally, there are // already unit tests on each format encoder-decoder for properly functioning comparisons in depth, so there is no need // to do an object-to-object comparison. For this reason this test focuses on a bytes-to-bytes comparison after an // encode-decode-encode loop which will detect lossy behavior in both directions. func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { + // use second image for relationships + images := []string{"image-pkg-coverage", "image-owning-package"} tests := []struct { formatOption sbom.FormatID redactor func(in []byte) []byte @@ -37,6 +40,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { json: true, }, { + formatOption: cyclonedxjson.ID, redactor: func(in []byte) []byte { in = regexp.MustCompile("\"(timestamp|serialNumber|bom-ref)\": \"[^\"]+\",").ReplaceAll(in, []byte{}) @@ -45,6 +49,7 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { json: true, }, { + formatOption: cyclonedxxml.ID, redactor: func(in []byte) []byte { in = regexp.MustCompile("(serialNumber|bom-ref)=\"[^\"]+\"").ReplaceAll(in, []byte{}) @@ -55,9 +60,8 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { } for _, test := range tests { - // use second image for relationships - for _, image := range []string{"image-pkg-coverage", "image-owning-package"} { - t.Run(fmt.Sprintf("%s/%s", test.formatOption, image), func(t *testing.T) { + t.Run(fmt.Sprintf("%s", test.formatOption), func(t *testing.T) { + for _, image := range images { originalSBOM, _ := catalogFixtureImage(t, image, source.SquashedScope) format := syft.FormatByID(test.formatOption) @@ -81,15 +85,15 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { if test.json { s1 := string(by1) s2 := string(by2) - assert.JSONEq(t, s1, s2) - } else { - if !assert.True(t, bytes.Equal(by1, by2)) { - dmp := diffmatchpatch.New() - diffs := dmp.DiffMain(string(by1), string(by2), true) - t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) + if diff := cmp.Diff(s1, s2); diff != "" { + t.Errorf("Encode/Decode mismatch (-want +got):\n%s", diff) } + } else if !assert.True(t, bytes.Equal(by1, by2)) { + dmp := diffmatchpatch.New() + diffs := dmp.DiffMain(string(by1), string(by2), true) + t.Errorf("diff: %s", dmp.DiffPrettyText(diffs)) } - }) - } + } + }) } } From 9d26b47db7e1c43557bc91aa4117bd82d4d7ac32 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Mon, 25 Apr 2022 11:49:09 -0400 Subject: [PATCH 3/7] remove extra lines from test Signed-off-by: Christopher Phillips --- test/integration/encode_decode_cycle_test.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 1c3b8b75770..6fd79a8129b 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -40,7 +40,6 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { json: true, }, { - formatOption: cyclonedxjson.ID, redactor: func(in []byte) []byte { in = regexp.MustCompile("\"(timestamp|serialNumber|bom-ref)\": \"[^\"]+\",").ReplaceAll(in, []byte{}) @@ -49,7 +48,6 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { json: true, }, { - formatOption: cyclonedxxml.ID, redactor: func(in []byte) []byte { in = regexp.MustCompile("(serialNumber|bom-ref)=\"[^\"]+\"").ReplaceAll(in, []byte{}) From 780076e4a3db9d5f532d81dc3dbd16f50be19b42 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 26 Apr 2022 10:31:53 -0400 Subject: [PATCH 4/7] add unknown language as expected in distro test Signed-off-by: Christopher Phillips --- syft/pkg/cataloger/catalog.go | 1 + syft/pkg/package.go | 10 ++++----- .../catalog_packages_cases_test.go | 22 +++++++++++-------- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index 5780302d6f7..7b309c4fa24 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -69,6 +69,7 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. // generate PURL (note: this is excluded from package ID, so is safe to mutate) p.PURL = pkg.URL(p, release) + if p.Language == "" { p.Language = pkg.LanguageFromPURL(p.PURL) } diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 13b19ca37a2..91800e0aadc 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -20,11 +20,11 @@ type Package struct { FoundBy string `cyclonedx:"foundBy"` // the specific cataloger that discovered this package Locations source.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) Licenses []string // licenses discovered with the package metadata - Language Language `cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) - Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) - CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) - PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) - MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field + Language Language `hash:"ignore" cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) + Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) + CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) + PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) + MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field Metadata interface{} // additional data found while parsing the package source } diff --git a/test/integration/catalog_packages_cases_test.go b/test/integration/catalog_packages_cases_test.go index 8d566afcb86..35b86e4b7e3 100644 --- a/test/integration/catalog_packages_cases_test.go +++ b/test/integration/catalog_packages_cases_test.go @@ -53,8 +53,9 @@ var imageOnlyTestCases = []testCase{ }, { // When the image is build lib overwrites pkgs/lib causing there to only be two packages - name: "find apkdb packages", - pkgType: pkg.ApkPkg, + name: "find apkdb packages", + pkgType: pkg.ApkPkg, + pkgLanguage: pkg.UnknownLanguage, pkgInfo: map[string]string{ "musl-utils": "1.1.24-r2", "libc-utils": "0.7.2-r0", @@ -169,9 +170,10 @@ var dirOnlyTestCases = []testCase{ }, }, { - name: "find apkdb packages", - pkgType: pkg.ApkPkg, - duplicates: 2, // when the directory is cataloged we have duplicates between lib/ and pkgs/lib + name: "find apkdb packages", + pkgType: pkg.ApkPkg, + pkgLanguage: pkg.UnknownLanguage, + duplicates: 2, // when the directory is cataloged we have duplicates between lib/ and pkgs/lib pkgInfo: map[string]string{ "musl-utils": "1.1.24-r2", "libc-utils": "0.7.2-r0", @@ -203,15 +205,17 @@ var dirOnlyTestCases = []testCase{ var commonTestCases = []testCase{ { - name: "find rpmdb packages", - pkgType: pkg.RpmPkg, + name: "find rpmdb packages", + pkgType: pkg.RpmPkg, + pkgLanguage: pkg.UnknownLanguage, pkgInfo: map[string]string{ "dive": "0.9.2-1", }, }, { - name: "find dpkg packages", - pkgType: pkg.DebPkg, + name: "find dpkg packages", + pkgType: pkg.DebPkg, + pkgLanguage: pkg.UnknownLanguage, pkgInfo: map[string]string{ "apt": "1.8.2", "dash": "0.5.8-2.4", From 1178bab85b65dc0f028cf9a6e92d7a8ef5ab6e5c Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 26 Apr 2022 10:52:47 -0400 Subject: [PATCH 5/7] add comment Signed-off-by: Christopher Phillips --- syft/pkg/cataloger/catalog.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/syft/pkg/cataloger/catalog.go b/syft/pkg/cataloger/catalog.go index 7b309c4fa24..fa0e4d72da0 100644 --- a/syft/pkg/cataloger/catalog.go +++ b/syft/pkg/cataloger/catalog.go @@ -70,6 +70,9 @@ func Catalog(resolver source.FileResolver, release *linux.Release, catalogers .. // generate PURL (note: this is excluded from package ID, so is safe to mutate) p.PURL = pkg.URL(p, release) + // if we were not able to identify the language we have an opportunity + // to try and get this value from the PURL. Worst case we assert that + // we could not identify the language at either stage and set UnknownLanguage if p.Language == "" { p.Language = pkg.LanguageFromPURL(p.PURL) } From 36da8b03674cd651b962ffb2265b226abb19d552 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 26 Apr 2022 11:05:59 -0400 Subject: [PATCH 6/7] redact id/parent from test output Signed-off-by: Christopher Phillips --- syft/pkg/package.go | 10 +++++----- test/integration/encode_decode_cycle_test.go | 6 +++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/syft/pkg/package.go b/syft/pkg/package.go index 91800e0aadc..13b19ca37a2 100644 --- a/syft/pkg/package.go +++ b/syft/pkg/package.go @@ -20,11 +20,11 @@ type Package struct { FoundBy string `cyclonedx:"foundBy"` // the specific cataloger that discovered this package Locations source.LocationSet // the locations that lead to the discovery of this package (note: this is not necessarily the locations that make up this package) Licenses []string // licenses discovered with the package metadata - Language Language `hash:"ignore" cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) - Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) - CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) - PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) - MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field + Language Language `cyclonedx:"language"` // the language ecosystem this package belongs to (e.g. JavaScript, Python, etc) + Type Type `cyclonedx:"type"` // the package type (e.g. Npm, Yarn, Python, Rpm, Deb, etc) + CPEs []CPE `hash:"ignore"` // all possible Common Platform Enumerators (note: this is NOT included in the definition of the ID since all fields on a CPE are derived from other fields) + PURL string `hash:"ignore"` // the Package URL (see https://github.com/package-url/purl-spec) + MetadataType MetadataType `cyclonedx:"metadataType"` // the shape of the additional data in the "metadata" field Metadata interface{} // additional data found while parsing the package source } diff --git a/test/integration/encode_decode_cycle_test.go b/test/integration/encode_decode_cycle_test.go index 6fd79a8129b..012715dc3cd 100644 --- a/test/integration/encode_decode_cycle_test.go +++ b/test/integration/encode_decode_cycle_test.go @@ -37,7 +37,11 @@ func TestEncodeDecodeEncodeCycleComparison(t *testing.T) { }{ { formatOption: syftjson.ID, - json: true, + redactor: func(in []byte) []byte { + in = regexp.MustCompile("\"(id|parent)\": \"[^\"]+\",").ReplaceAll(in, []byte{}) + return in + }, + json: true, }, { formatOption: cyclonedxjson.ID, From 4d0a98fb507d4e13a133925bccb817137f565721 Mon Sep 17 00:00:00 2001 From: Christopher Phillips Date: Tue, 26 Apr 2022 11:15:00 -0400 Subject: [PATCH 7/7] update UnknownLanguage ==> "" Signed-off-by: Christopher Phillips --- syft/pkg/language.go | 2 +- .../catalog_packages_cases_test.go | 22 ++++++++----------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/syft/pkg/language.go b/syft/pkg/language.go index b4b3734a363..d9c4905b1dd 100644 --- a/syft/pkg/language.go +++ b/syft/pkg/language.go @@ -11,7 +11,7 @@ type Language string const ( // the full set of supported programming languages - UnknownLanguage Language = "UnknownLanguage" + UnknownLanguage Language = "" Java Language = "java" JavaScript Language = "javascript" Python Language = "python" diff --git a/test/integration/catalog_packages_cases_test.go b/test/integration/catalog_packages_cases_test.go index 35b86e4b7e3..8d566afcb86 100644 --- a/test/integration/catalog_packages_cases_test.go +++ b/test/integration/catalog_packages_cases_test.go @@ -53,9 +53,8 @@ var imageOnlyTestCases = []testCase{ }, { // When the image is build lib overwrites pkgs/lib causing there to only be two packages - name: "find apkdb packages", - pkgType: pkg.ApkPkg, - pkgLanguage: pkg.UnknownLanguage, + name: "find apkdb packages", + pkgType: pkg.ApkPkg, pkgInfo: map[string]string{ "musl-utils": "1.1.24-r2", "libc-utils": "0.7.2-r0", @@ -170,10 +169,9 @@ var dirOnlyTestCases = []testCase{ }, }, { - name: "find apkdb packages", - pkgType: pkg.ApkPkg, - pkgLanguage: pkg.UnknownLanguage, - duplicates: 2, // when the directory is cataloged we have duplicates between lib/ and pkgs/lib + name: "find apkdb packages", + pkgType: pkg.ApkPkg, + duplicates: 2, // when the directory is cataloged we have duplicates between lib/ and pkgs/lib pkgInfo: map[string]string{ "musl-utils": "1.1.24-r2", "libc-utils": "0.7.2-r0", @@ -205,17 +203,15 @@ var dirOnlyTestCases = []testCase{ var commonTestCases = []testCase{ { - name: "find rpmdb packages", - pkgType: pkg.RpmPkg, - pkgLanguage: pkg.UnknownLanguage, + name: "find rpmdb packages", + pkgType: pkg.RpmPkg, pkgInfo: map[string]string{ "dive": "0.9.2-1", }, }, { - name: "find dpkg packages", - pkgType: pkg.DebPkg, - pkgLanguage: pkg.UnknownLanguage, + name: "find dpkg packages", + pkgType: pkg.DebPkg, pkgInfo: map[string]string{ "apt": "1.8.2", "dash": "0.5.8-2.4",