Skip to content

Commit

Permalink
Add PackageURL.Normalize
Browse files Browse the repository at this point in the history
For many integrations external systems use their own structured
representation of package identifiers rather than Package URLs. In
these cases it would be ideal to construct a Package URL with with
NewPackageURL a then normalize the resulting struct. It avoids the
need to render a temporary, non-canon Package URL and parse that to
achieve normalization.
  • Loading branch information
wetterjames4 authored and shibumi committed Sep 29, 2023
1 parent bca39a1 commit 936a4ac
Show file tree
Hide file tree
Showing 2 changed files with 295 additions and 12 deletions.
88 changes: 79 additions & 9 deletions packageurl.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ var (
// '-' and '_' (period, dash and underscore).
// - A key cannot start with a number.
QualifierKeyPattern = regexp.MustCompile(`^[A-Za-z\.\-_][0-9A-Za-z\.\-_]*$`)
// TypePattern describes a valid type:
//
// - The type must be composed only of ASCII letters and numbers, '.',
// '+' and '-' (period, plus and dash).
// - A type cannot start with a number.
TypePattern = regexp.MustCompile(`^[A-Za-z\.\-\+][0-9A-Za-z\.\-\+]*$`)
)

// These are the known purl types as defined in the spec. Some of these require
Expand Down Expand Up @@ -307,6 +313,33 @@ func (qq Qualifiers) String() string {
return strings.Join(kvPairs, "&")
}

func (qq *Qualifiers) Normalize() error {
qs := *qq
normedQQ := make(Qualifiers, 0, len(qs))
for _, q := range qs {
if q.Key == "" {
return fmt.Errorf("key is missing from qualifier: %v", q)
}
if q.Value == "" {
// Empty values are equivalent to the key being omitted from the PackageURL.
continue
}
key := strings.ToLower(q.Key)
if !validQualifierKey(key) {
return fmt.Errorf("invalid qualifier key: %q", key)
}
normedQQ = append(normedQQ, Qualifier{key, q.Value})
}
sort.Slice(normedQQ, func(i, j int) bool { return normedQQ[i].Key < normedQQ[j].Key })
for i := 1; i < len(normedQQ); i++ {
if normedQQ[i-1].Key == normedQQ[i].Key {
return fmt.Errorf("duplicate qualifier key: %q", normedQQ[i].Key)
}
}
*qq = normedQQ
return nil
}

// PackageURL is the struct representation of the parts that make a package url
type PackageURL struct {
Type string
Expand Down Expand Up @@ -399,13 +432,45 @@ func FromString(purl string) (PackageURL, error) {
pURL := PackageURL{
Qualifiers: qualifiers,
Type: typ,
Namespace: typeAdjustNamespace(typ, namespace),
Name: typeAdjustName(typ, name, qualifiers),
Version: typeAdjustVersion(typ, version),
Subpath: strings.Trim(u.Fragment, "/"),
Namespace: namespace,
Name: name,
Version: version,
Subpath: u.Fragment,
}

return pURL, validCustomRules(pURL)
err = pURL.Normalize()
return pURL, err
}

// Normalize converts p to its canonical form, returning an error if p is invalid.
func (p *PackageURL) Normalize() error {
typ := strings.ToLower(p.Type)
if !validType(typ) {
return fmt.Errorf("invalid type %q", typ)
}
namespace := strings.Trim(p.Namespace, "/")
if err := p.Qualifiers.Normalize(); err != nil {
return fmt.Errorf("invalid qualifiers: %v", err)
}
if p.Name == "" {
return errors.New("purl is missing name")
}
subpath := strings.Trim(p.Subpath, "/")
segs := strings.Split(p.Subpath, "/")
for _, s := range segs {
if s == "." || s == ".." {
return fmt.Errorf("invalid Package URL subpath: %q", p.Subpath)
}
}
*p = PackageURL{
Type: typ,
Namespace: typeAdjustNamespace(typ, namespace),
Name: typeAdjustName(typ, p.Name, p.Qualifiers),
Version: typeAdjustVersion(typ, p.Version),
Qualifiers: p.Qualifiers,
Subpath: subpath,
}
return validCustomRules(*p)
}

// escape the given string in a purl-compatible way.
Expand Down Expand Up @@ -477,11 +542,11 @@ func parseQualifiers(rawQuery string) (Qualifiers, error) {
return nil, fmt.Errorf("invalid qualifier key: '%s'", key)
}

if len(value) > 0 {
// only the first character needs to be lowercase. Note that pURL is always UTF8, so we
// don't need to care about unicode here.
value = strings.ToLower(value[:1]) + value[1:]
value, err = url.QueryUnescape(value)
if err != nil {
return nil, fmt.Errorf("error unescaping qualifier value %q", value)
}

q = append(q, Qualifier{
Key: strings.ToLower(key),
Value: value,
Expand Down Expand Up @@ -566,6 +631,11 @@ func validQualifierKey(key string) bool {
return QualifierKeyPattern.MatchString(key)
}

// validType validates a type against our TypePattern.
func validType(typ string) bool {
return TypePattern.MatchString(typ)
}

// validCustomRules evaluates additional rules for each package url type, as specified in the package-url specification.
// On success, it returns nil. On failure, a descriptive error will be returned.
func validCustomRules(p PackageURL) error {
Expand Down
219 changes: 216 additions & 3 deletions packageurl_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"os"
"reflect"
"regexp"
"sort"
"strings"
"testing"

Expand Down Expand Up @@ -159,8 +160,16 @@ func TestFromStringExamples(t *testing.T) {
t.Logf("%s: incorrect version: wanted: '%s', got '%s'", tc.Description, tc.Version, p.Version)
t.Fail()
}
if !reflect.DeepEqual(p.Qualifiers, tc.Qualifiers()) {
t.Logf("%s: incorrect qualifiers: wanted: '%#v', got '%#v'", tc.Description, tc.Qualifiers(), p.Qualifiers)
want := tc.Qualifiers()
sort.Slice(want, func(i, j int) bool {
return want[i].Key < want[j].Key
})
got := p.Qualifiers
sort.Slice(got, func(i, j int) bool {
return got[i].Key < got[j].Key
})
if !reflect.DeepEqual(want, got) {
t.Logf("%s: incorrect qualifiers: wanted: '%#v', got '%#v'", tc.Description, want, p.Qualifiers)
t.Fail()
}

Expand Down Expand Up @@ -321,7 +330,7 @@ func TestQualifierMissingEqual(t *testing.T) {
want := packageurl.PackageURL{
Type: "npm",
Name: "test-pkg",
Qualifiers: packageurl.Qualifiers{{Key: "key"}},
Qualifiers: packageurl.Qualifiers{},
}
got, err := packageurl.FromString(input)
if err != nil {
Expand All @@ -331,3 +340,207 @@ func TestQualifierMissingEqual(t *testing.T) {
t.Fatalf("FromString(%s): want %q got %q", input, want, got)
}
}

func TestNormalize(t *testing.T) {
testCases := []struct {
name string
input packageurl.PackageURL
want packageurl.PackageURL
wantErr bool
}{{
name: "type is case insensitive",
input: packageurl.PackageURL{
Type: "NpM",
Name: "pkg",
},
want: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{},
},
}, {
name: "type is manditory",
input: packageurl.PackageURL{
Name: "pkg",
},
wantErr: true,
}, {
name: "leading and traling / on namespace are trimmed",
input: packageurl.PackageURL{
Type: "npm",
Namespace: "/namespace/org/",
Name: "pkg",
},
want: packageurl.PackageURL{
Type: "npm",
Namespace: "namespace/org",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{},
},
}, {
name: "qualifiers with empty values are removed",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "k1", Value: "v1",
}, {
Key: "k2", Value: "",
}, {
Key: "k3", Value: "v3",
}},
},
want: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "k1", Value: "v1",
}, {
Key: "k3", Value: "v3",
}},
},
}, {
name: "qualifiers are sorted by key",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "k3", Value: "v3",
}, {
Key: "k2", Value: "v2",
}, {
Key: "k1", Value: "v1",
}},
},
want: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "k1", Value: "v1",
}, {
Key: "k2", Value: "v2",
}, {
Key: "k3", Value: "v3",
}},
},
}, {
name: "duplicate keys are invalid",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "k1", Value: "v1",
}, {
Key: "k1", Value: "v2",
}},
},
wantErr: true,
}, {
name: "keys are made lower case",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "KeY", Value: "v1",
}},
},
want: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{{
Key: "key", Value: "v1",
}},
},
}, {
name: "name is required",
input: packageurl.PackageURL{
Type: "npm",
},
wantErr: true,
}, {
name: "leading and traling / on subpath are trimmed",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Subpath: "/sub/path/",
},
want: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{},
Subpath: "sub/path",
},
}, {
name: "'.' is an invalid subpath segment",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Subpath: "/sub/./path/",
},
wantErr: true,
}, {
name: "'..' is an invalid subpath segment",
input: packageurl.PackageURL{
Type: "npm",
Name: "pkg",
Subpath: "/sub/../path/",
},
wantErr: true,
}, {
name: "known type namespace adjustments",
input: packageurl.PackageURL{
Type: "npm",
Namespace: "NaMeSpAcE",
Name: "pkg",
},
want: packageurl.PackageURL{
Type: "npm",
Namespace: "namespace",
Name: "pkg",
Qualifiers: packageurl.Qualifiers{},
},
}, {
name: "known type name adjustments",
input: packageurl.PackageURL{
Type: "npm",
Name: "nAmE",
},
want: packageurl.PackageURL{
Type: "npm",
Name: "name",
Qualifiers: packageurl.Qualifiers{},
},
}, {
name: "known type version adjustments",
input: packageurl.PackageURL{
Type: "huggingface",
Name: "name",
Version: "VeRsIoN",
},
want: packageurl.PackageURL{
Type: "huggingface",
Name: "name",
Version: "version",
Qualifiers: packageurl.Qualifiers{},
},
}}

for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
got := testCase.input
err := got.Normalize()
if err != nil && testCase.wantErr {
return
}
if err != nil && !testCase.wantErr {
t.Fatalf("Normalize(%s): unexpected error: %v", testCase.name, err)
}
if testCase.wantErr {
t.Fatalf("Normalize(%s): want error, got none", testCase.name)
}
if !reflect.DeepEqual(testCase.want, got) {
t.Fatalf("Normalize(%s):\nwant %#v\ngot %#v", testCase.name, testCase.want, got)
}
})
}
}

0 comments on commit 936a4ac

Please sign in to comment.