From 306b3cdae5771330d87c8d3e44cea5ad2b486854 Mon Sep 17 00:00:00 2001 From: Quim Muntal Date: Thu, 31 Dec 2020 11:28:07 +0100 Subject: [PATCH 1/5] improve part name normalization --- package.go | 6 +- part.go | 307 +++++++++++++++++++++++++++++++++++-------- part_test.go | 25 ++-- reader.go | 8 +- relationship.go | 36 +++-- relationship_test.go | 2 +- writer.go | 10 +- writer_test.go | 1 + 8 files changed, 310 insertions(+), 85 deletions(-) diff --git a/package.go b/package.go index 14667f9..aaab515 100644 --- a/package.go +++ b/package.go @@ -12,7 +12,7 @@ import ( "fmt" "io" "mime" - "path/filepath" + "path" "sort" "strings" ) @@ -152,7 +152,7 @@ func (c *contentTypes) add(partName, contentType string) error { t, params, _ := mime.ParseMediaType(contentType) contentType = mime.FormatMediaType(t, params) - ext := strings.ToLower(filepath.Ext(partName)) + ext := strings.ToLower(path.Ext(partName)) if len(ext) == 0 { c.addOverride(partName, contentType) return nil @@ -187,7 +187,7 @@ func (c *contentTypes) findType(name string) (string, error) { if t, ok := c.overrides[strings.ToUpper(name)]; ok { return t, nil } - ext := filepath.Ext(name) + ext := path.Ext(name) if ext != "" { if t, ok := c.defaults[strings.ToLower(ext[1:])]; ok { return t, nil diff --git a/part.go b/part.go index 181137c..f4f1cee 100644 --- a/part.go +++ b/part.go @@ -3,9 +3,9 @@ package opc import ( "fmt" "mime" - "net/url" - "path/filepath" + "path" "strings" + "unicode/utf8" ) // A Part is a stream of bytes defined in ISO/IEC 29500-2 §9.1.. @@ -28,62 +28,40 @@ func (p *Part) validate() error { return validateRelationships(p.Name, p.Relationships) } -var defaultRef, _ = url.Parse("http://defaultcontainer/") - // ResolveRelationship returns the absolute URI (from the package root) of the part pointed by a relationship of a source part. // This method should be used in places where we have a target relationship URI and we want to get the // name of the part it targets with respect to the source part. // The source can be a valid part URI, for part relationships, or "/", for package relationships. func ResolveRelationship(source string, rel string) string { - if source == "/" || source == "\\" { - if strings.HasPrefix(rel, "\\") { - rel = rel[1:] - } + source = strings.Replace(source, "\\", "/", -1) + rel = strings.Replace(rel, "\\", "/", -1) + if source == "/" { if !strings.HasPrefix(rel, "/") { rel = "/" + rel } } - if !strings.HasPrefix(rel, "/") && !strings.HasPrefix(rel, "\\") { - sourceDir := strings.Replace(filepath.Dir(source), "\\", "/", -1) - rel = fmt.Sprintf("%s/%s", sourceDir, rel) + if !strings.HasPrefix(rel, "/") { + rel = fmt.Sprintf("%s/%s", path.Dir(source), rel) } return rel } -// NormalizePartName transforms the input name so it follows the constrains specified in the ISO/IEC 29500-2 §9.1.1: -// part-URI = 1*( "/" segment ) -// segment = 1*( pchar ) -// pchar is defined in RFC 3986: -// pchar = unreserved / pct-encoded / sub-delims / ":" / "@" -// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" -// pct-encoded = "%" HEXDIG HEXDIG -// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "=" +// NormalizePartName transforms the input name as an URI string +// so it follows the constrains specified in the ISO/IEC 29500-2 §9.1.1. // This method is recommended to be used before adding a new Part to a package to avoid errors. -// If, for whatever reason, the name can't be adapted to the specs, the return value will be the same as the original. -// Warning: This method can heavily modify the original if it differs a lot from the specs, which could led to duplicated part names. +// If, for whatever reason, the name can't be adapted to the specs, the return value is empty. +// Warning: This method can heavily modify the name if it differs a lot from the specs, which could led to duplicated part names. func NormalizePartName(name string) string { - if str := strings.TrimSpace(name); str == "" || str == "/" { - return name - } - - normalized := strings.Replace(name, "\\", "/", -1) - normalized = strings.Replace(normalized, "//", "/", -1) - normalized = strings.Replace(normalized, "%2e", ".", -1) - if strings.HasSuffix(normalized, "/") { - normalized = normalized[:len(normalized)-1] - } - - encodedURL, err := url.Parse(normalized) - if err != nil || encodedURL.IsAbs() { - return name - } - - // Normalize url, decode unnecessary escapes and encode necessary - p, err := url.Parse(defaultRef.ResolveReference(encodedURL).Path) - if err != nil { - return name + name = strings.TrimSpace(name) + if name == "" || name == "/" || name == "\\" || name == "." { + return "" } - return p.EscapedPath() + name, _ = split(name, '#', true) + name = strings.NewReplacer("\\", "/", "//", "/").Replace(name) + name = unescape(name) + name = escape(name) + name = cleanSegments(name) + return strings.TrimSuffix(name, "/") } func (p *Part) validateContentType() error { @@ -104,10 +82,17 @@ func (p *Part) validateContentType() error { } func validatePartName(name string) error { + if strings.EqualFold(name, contentTypesName) { + return nil + } if strings.TrimSpace(name) == "" { return newError(101, name) } + if name[0] != '/' { + return newError(104, name) + } + if err := validateChars(name); err != nil { return err } @@ -116,20 +101,7 @@ func validatePartName(name string) error { return err } - return validateURL(name) -} - -func validateURL(name string) error { - encodedURL, err := url.Parse(name) - if err != nil { - return fmt.Errorf("opc: %s: invalid url: %v", name, err) - } - - if name[0] != '/' || encodedURL.IsAbs() { - return newError(104, name) - } - - if name != encodedURL.EscapedPath() { + if !validEncoded(name, true) { return newError(106, name) } return nil @@ -167,3 +139,226 @@ func validateSegments(name string) error { } return nil } + +func cleanSegments(s string) string { + src := strings.Split(s, "/") + dst := make([]string, 0, len(src)) + for _, elem := range src { + switch elem { + case ".", "..": + // drop + default: + dst = append(dst, elem) + } + } + if last := src[len(src)-1]; last == "." || last == ".." { + // Add final slash to the joined path. + dst = append(dst, "") + } + return "/" + strings.TrimPrefix(strings.Join(dst, "/"), "/") +} + +func escape(s string) string { + hexCount := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '%': + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + hexCount++ + } + default: + if shouldEscape(s[i]) { + hexCount++ + } + } + } + if hexCount == 0 { + return s + } + var buf [64]byte + var t []byte + + required := len(s) + 2*hexCount + if required <= len(buf) { + t = buf[:required] + } else { + t = make([]byte, required) + } + + j := 0 + for i := 0; i < len(s); i++ { + switch s[i] { + case '%': + if i+2 >= len(s) || !ishex(s[i+1]) || !ishex(s[i+2]) { + t[j] = '%' + t[j+1] = '2' + t[j+2] = '5' + j += 3 + } else { + t[j] = '%' + t[j+1] = s[i+1] + t[j+2] = s[i+2] + j += 3 + } + default: + c := s[i] + if shouldEscape(c) { + t[j] = '%' + t[j+1] = upperhex[c>>4] + t[j+2] = upperhex[c&15] + j += 3 + } else { + t[j] = s[i] + j++ + } + } + } + return string(t) +} + +func unescape(s string) string { + n := 0 + for i := 0; i < len(s); { + if s[i] == '%' { + if i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]) { + c := unpct(s[i+1], s[i+2]) + if c == '%' || isReserved(c) { + i++ + } else { + n++ + i += 3 + } + } else { + i++ + } + } else { + i++ + } + } + + if n == 0 { + return s + } + + var t strings.Builder + t.Grow(len(s) - 2*n) + for i := 0; i < len(s); i++ { + if s[i] == '%' { + if i+2 < len(s) && ishex(s[i+1]) && ishex(s[i+2]) { + c := unpct(s[i+1], s[i+2]) + if c == '%' || isReserved(c) { + t.WriteByte(s[i]) + } else { + t.WriteByte(unhex(s[i+1])<<4 | unhex(s[i+2])) + i += 2 + } + } else { + t.WriteByte(s[i]) + } + } else { + t.WriteByte(s[i]) + } + } + return t.String() +} + +func split(s string, sep byte, cutc bool) (string, string) { + i := strings.IndexByte(s, sep) + if i < 0 { + return s, "" + } + if cutc { + return s[:i], s[i+1:] + } + return s[:i], s[i:] +} + +const upperhex = "0123456789ABCDEF" + +func ishex(c byte) bool { + switch { + case '0' <= c && c <= '9': + return true + case 'a' <= c && c <= 'f': + return true + case 'A' <= c && c <= 'F': + return true + } + return false +} + +func unhex(c byte) byte { + switch { + case '0' <= c && c <= '9': + return c - '0' + case 'a' <= c && c <= 'f': + return c - 'a' + 10 + case 'A' <= c && c <= 'F': + return c - 'A' + 10 + } + return 0 +} + +func isAlpha(c byte) bool { + return 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z' +} + +func isDigit(c byte) bool { + return '0' <= c && c <= '9' +} + +func isUnreserved(c byte) bool { + return isAlpha(c) || isDigit(c) || c == '-' || c == '.' || c == '_' || c == '~' +} + +func isReserved(c byte) bool { + if c == '/' || c == ':' || c == '@' { + return true + } + if c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || + c == '*' || c == '+' || c == ',' || c == ';' || c == '=' { + return true + } + return false +} + +func isUcsChar(r rune) bool { + return 0xA0 <= r && r <= 0xD7FF || 0xF900 <= r && r <= 0xFDCF || 0xFDF0 <= r && r <= 0xFFEF || + 0x10000 <= r && r <= 0x1FFFD || 0x20000 <= r && r <= 0x2FFFD || 0x30000 <= r && r <= 0x3FFFD || + 0x40000 <= r && r <= 0x4FFFD || 0x50000 <= r && r <= 0x5FFFD || 0x60000 <= r && r <= 0x6FFFD || + 0x70000 <= r && r <= 0x7FFFD || 0x80000 <= r && r <= 0x8FFFD || 0x90000 <= r && r <= 0x9FFFD || + 0xA0000 <= r && r <= 0xAFFFD || 0xB0000 <= r && r <= 0xBFFFD || 0xC0000 <= r && r <= 0xCFFFD || + 0xD0000 <= r && r <= 0xDFFFD || 0xE1000 <= r && r <= 0xEFFFD +} + +func shouldEscape(c byte) bool { + return !isUnreserved(c) && !isReserved(c) +} + +func unpct(c1, c2 byte) byte { + return unhex(c1)<<4 | unhex(c2) +} + +func validEncoded(s string, allowIRI bool) bool { + for i := 0; i < len(s); i++ { + switch s[i] { + case '%': + if i+2 < len(s) && isUnreserved(unpct(s[i+1], s[i+2])) { + return false + } + // ok + default: + if shouldEscape(s[i]) { + if !allowIRI { + return false + } + r, wid := utf8.DecodeRuneInString(s[i:]) + if !isUcsChar(r) { + return false + } + i += wid + } + } + } + return true +} diff --git a/part_test.go b/part_test.go index 73e714e..b09919e 100644 --- a/part_test.go +++ b/part_test.go @@ -14,26 +14,32 @@ func TestNormalizePartName(t *testing.T) { want string }{ {"base", args{"/a.xml"}, "/a.xml"}, + {"empty", args{" "}, ""}, {"noslash", args{"a.xml"}, "/a.xml"}, + {"endDot", args{"/a/."}, "/a"}, {"folder", args{"/docs/a.xml"}, "/docs/a.xml"}, {"noext", args{"/docs"}, "/docs"}, {"win", args{"\\docs\\a.xml"}, "/docs/a.xml"}, {"winnoslash", args{"docs\\a.xml"}, "/docs/a.xml"}, {"fragment", args{"/docs/a.xml#a"}, "/docs/a.xml"}, {"twoslash", args{"//docs/a.xml"}, "/docs/a.xml"}, - {"necessaryEscaped", args{"//docs/!\".xml"}, "/docs/%21%22.xml"}, + {"necessaryEscaped", args{"//docs/!\".xml"}, "/docs/!%22.xml"}, {"unecessaryEscaped", args{"//docs/%41.xml"}, "/docs/A.xml"}, {"endslash", args{"/docs/a.xml/"}, "/docs/a.xml"}, {"empty", args{""}, ""}, - {"onlyslash", args{"/"}, "/"}, - {"invalidURL", args{"/docs%/a.xml"}, "/docs%/a.xml"}, - {"abs", args{"http://a.com/docs/a.xml"}, "http://a.com/docs/a.xml"}, + {"dot", args{"."}, ""}, + {"onlyslash", args{"/"}, ""}, + {"percentSign", args{"/docs%/a.xml"}, "/docs%25/a.xml"}, + {"percentSign2", args{"/docs%25/%41.xml"}, "/docs%25/A.xml"}, + {"percentSignEnd", args{"/docs/a.%"}, "/docs/a.%25"}, + {"pre-encoded", args{"/%3Aa.xml"}, "/%3Aa.xml"}, + {"chinese", args{"/传/傳.xml"}, "/%E4%BC%A0/%E5%82%B3.xml"}, {"fromSpec1", args{"/a/b.xml"}, "/a/b.xml"}, {"fromSpec2", args{"/a/ц.xml"}, "/a/%D1%86.xml"}, {"fromSpec3", args{"/%41/%61.xml"}, "/A/a.xml"}, {"fromSpec4", args{"/%25XY.xml"}, "/%25XY.xml"}, - {"fromSpec5", args{"/%XY.xml"}, "/%XY.xml"}, - {"fromSpec6", args{"/%2541.xml"}, "/%41.xml"}, + {"fromSpec5", args{"/%XY.xml"}, "/%25XY.xml"}, + {"fromSpec6", args{"/%2541.xml"}, "/%2541.xml"}, {"fromSpec7", args{"/../a.xml"}, "/a.xml"}, {"fromSpec8", args{"/./ц.xml"}, "/%D1%86.xml"}, {"fromSpec9", args{"/%2e/%2e/a.xml"}, "/a.xml"}, @@ -41,6 +47,7 @@ func TestNormalizePartName(t *testing.T) { {"fromSpec11", args{"\\%41.xml"}, "/A.xml"}, {"fromSpec12", args{"/%D1%86.xml"}, "/%D1%86.xml"}, {"fromSpec13", args{"\\%2e/a.xml"}, "/a.xml"}, + {"unicode1", args{"/\uFFFDa.xml"}, "/%EF%BF%BDa.xml"}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -59,22 +66,22 @@ func TestPart_validate(t *testing.T) { wantErr bool }{ {"base", &Part{"/docs/a.xml", "a/b", nil}, false}, - {"gen-delims", &Part{"/[docs]/a.xml", "a/b", nil}, false}, + {"percentChar", &Part{"/docs%/a.xml", "a/b", nil}, false}, + {"ucschar", &Part{"/€/a.xml", "a/b", nil}, false}, {"mediaEmpty", &Part{"/a.txt", "", nil}, true}, {"emptyName", &Part{"", "a/b", nil}, true}, {"onlyspaces", &Part{" ", "a/b", nil}, true}, {"onlyslash", &Part{"/", "a/b", nil}, true}, - {"invalidURL", &Part{"/docs%/a.xml", "a/b", nil}, true}, {"emptySegment", &Part{"/doc//a.xml", "a/b", nil}, true}, {"abs uri", &Part{"http://docs//a.xml", "a/b", nil}, true}, {"not rel uri", &Part{"docs/a.xml", "a/b", nil}, true}, + {"encoded unreserved", &Part{"/%41.xml", "a/b", nil}, true}, {"endSlash", &Part{"/docs/a.xml/", "a/b", nil}, true}, {"endDot", &Part{"/docs/a.xml.", "a/b", nil}, true}, {"dot", &Part{"/docs/./a.xml", "a/b", nil}, true}, {"twoDots", &Part{"/docs/../a.xml", "a/b", nil}, true}, {"reserved", &Part{"/docs/%7E/a.xml", "a/b", nil}, true}, {"withQuery", &Part{"/docs/a.xml?a=2", "a/b", nil}, true}, - {"notencodechar", &Part{"/€/a.xml", "a/b", nil}, true}, {"encodedBSlash", &Part{"/%5C/a.xml", "a/b", nil}, true}, {"encodedBSlash", &Part{"/%2F/a.xml", "a/b", nil}, true}, {"encodechar", &Part{"/%E2%82%AC/a.xml", "a/b", nil}, false}, diff --git a/reader.go b/reader.go index 7640f0d..6c599f7 100644 --- a/reader.go +++ b/reader.go @@ -6,7 +6,7 @@ import ( "fmt" "io" "os" - "path/filepath" + "path" "strings" ) @@ -179,9 +179,9 @@ func loadRelationships(file archiveFile, rels *relationshipsPart) error { return err } - // get part name from rels part - path := strings.Replace(filepath.Dir(filepath.Dir(file.Name())), `\`, "/", -1) - pname := "/" + path + "/" + strings.TrimSuffix(filepath.Base(file.Name()), filepath.Ext(file.Name())) + // get part name from rels parts + name := path.Dir(path.Dir(file.Name())) + pname := "/" + name + "/" + strings.TrimSuffix(path.Base(file.Name()), path.Ext(file.Name())) pname = NormalizePartName(pname) rels.addRelationship(pname, rls) return nil diff --git a/relationship.go b/relationship.go index 4b1735a..89eb815 100644 --- a/relationship.go +++ b/relationship.go @@ -4,7 +4,6 @@ import ( "encoding/xml" "fmt" "io" - "net/url" "strings" ) @@ -21,7 +20,6 @@ const ( ) const externalMode = "External" -const charBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ123456789" // Relationship is used to express a relationship between a source and a target part. // If the ID is not specified a unique ID will be generated following the pattern rIdN. @@ -116,18 +114,16 @@ func isRelationshipURI(uri string) bool { // validateRelationshipTarget checks that a relationship target follows the constrains specified in the ISO/IEC 29500-2 §9.3. func (r *Relationship) validateRelationshipTarget(sourceURI string) error { - uri, err := url.Parse(strings.TrimSpace(r.TargetURI)) - if err != nil || uri.String() == "" { + if !validEncoded(r.TargetURI, false) { return newErrorRelationship(128, sourceURI, r.ID) } - // ISO/IEC 29500-2 M1.29 if r.TargetMode == ModeInternal { - if uri.IsAbs() { + if !isInternal(sourceURI) { return newErrorRelationship(129, sourceURI, r.ID) } else { - source, err := url.Parse(strings.TrimSpace(sourceURI)) - if err != nil || source.String() == "" || isRelationshipURI(source.ResolveReference(uri).String()) { + source := strings.TrimSpace(sourceURI) + if source == "" || isRelationshipURI(ResolveRelationship(sourceURI, r.TargetURI)) { return newErrorRelationship(125, sourceURI, r.ID) } } @@ -198,3 +194,27 @@ func (rp *relationshipsPart) addRelationship(name string, r []*Relationship) { } rp.relation[strings.ToUpper(name)] = r } + +func isInternal(rawurl string) bool { + for i := 0; i < len(rawurl); i++ { + c := rawurl[i] + switch { + case 'a' <= c && c <= 'z' || 'A' <= c && c <= 'Z': + // do nothing + case '0' <= c && c <= '9' || c == '+' || c == '-' || c == '.': + if i == 0 { + return true + } + case c == ':': + if i == 0 { + return true + } + return false + default: + // we have encountered an invalid character, + // so there is no valid scheme + return true + } + } + return true +} diff --git a/relationship_test.go b/relationship_test.go index 3d6ef82..841d124 100644 --- a/relationship_test.go +++ b/relationship_test.go @@ -48,7 +48,7 @@ func TestRelationship_validate(t *testing.T) { {"internalRelRel", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "/_rels/.rels", TargetMode: ModeInternal}, args{"/"}, true}, {"internalRelNoSource", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "/fakeTarget", TargetMode: ModeInternal}, args{""}, true}, {"invalidTarget2", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: " ", TargetMode: ModeInternal}, args{""}, true}, - {"invalid", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "://a.com/b", TargetMode: ModeExternal}, args{""}, true}, + {"invalid", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "http//%61.com/b", TargetMode: ModeExternal}, args{""}, true}, {"invalidID", &Relationship{ID: " ", Type: "fakeType", TargetURI: "http://a.com/b", TargetMode: ModeInternal}, args{""}, true}, {"invalidAbsTarget", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "http://a.com/b", TargetMode: ModeInternal}, args{""}, true}, {"invalidTarget", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "", TargetMode: ModeInternal}, args{""}, true}, diff --git a/writer.go b/writer.go index 5595163..e5ead48 100644 --- a/writer.go +++ b/writer.go @@ -5,7 +5,7 @@ import ( "compress/flate" "fmt" "io" - "path/filepath" + "path" "strings" "time" ) @@ -51,7 +51,9 @@ func NewWriter(w io.Writer) *Writer { // NewWriterFromReader returns a new Writer writing an OPC package to w // and with its content initialized with r. -// Parts comming from r cannot be modified but new parts can be appended +// +// The original package is not modified. +// Parts coming from r cannot be modified but new parts can be appended // and package core properties and relationships can be updated. func NewWriterFromReader(w io.Writer, r *Reader) (*Writer, error) { ow := NewWriter(w) @@ -201,11 +203,11 @@ func (w *Writer) createLastPartRelationships() error { if err := validateRelationships(w.last.Name, w.last.Relationships); err != nil { return err } - dirName := strings.Replace(filepath.Dir(w.last.Name), "\\", "/", -1)[1:] + dirName := path.Dir(w.last.Name)[1:] if dirName != "" { dirName = "/" + dirName } - relName := fmt.Sprintf("%s/_rels/%s.rels", dirName, filepath.Base(w.last.Name)) + relName := fmt.Sprintf("%s/_rels/%s.rels", dirName, path.Base(w.last.Name)) rw, err := w.addToPackage(&Part{Name: relName, ContentType: relationshipContentType}, CompressionNormal) if err != nil { return err diff --git a/writer_test.go b/writer_test.go index df7a308..85c3ed8 100644 --- a/writer_test.go +++ b/writer_test.go @@ -147,6 +147,7 @@ func TestWriter_CreatePart(t *testing.T) { args args wantErr bool }{ + {"unicode", NewWriter(&bytes.Buffer{}), args{&Part{"/a/ц.xml", "a/b", nil}, CompressionNone}, false}, {"fhErr", NewWriter(&bytes.Buffer{}), args{&Part{"/a.xml", "a/b", nil}, -3}, true}, {"nameErr", NewWriter(&bytes.Buffer{}), args{&Part{"a.xml", "a/b", nil}, CompressionNone}, true}, {"failRel", &Writer{w: zip.NewWriter(nil), last: &Part{Name: "/b.xml", Relationships: []*Relationship{{}}}}, args{&Part{"/a.xml", "a/b", nil}, CompressionNone}, true}, From e7152090334ead29a2874df64cc346f36f02d0cf Mon Sep 17 00:00:00 2001 From: Quim Muntal Date: Sat, 2 Jan 2021 20:31:45 +0100 Subject: [PATCH 2/5] normalize part names --- package.go | 20 +++++++++------- package_test.go | 6 ++--- part.go | 6 ++--- reader.go | 4 ++-- reader_test.go | 57 +++++++++++++------------------------------- relationship.go | 13 +++++----- relationship_test.go | 4 +++- writer.go | 2 +- writer_test.go | 8 +++---- 9 files changed, 50 insertions(+), 70 deletions(-) diff --git a/package.go b/package.go index aaab515..6c693d4 100644 --- a/package.go +++ b/package.go @@ -27,18 +27,18 @@ const ( ) type pkg struct { - parts map[string]*Part + parts map[string]struct{} contentTypes contentTypes } func newPackage() *pkg { return &pkg{ - parts: make(map[string]*Part, 0), + parts: make(map[string]struct{}, 0), } } func (p *pkg) partExists(partName string) bool { - _, ok := p.parts[strings.ToUpper(partName)] + _, ok := p.parts[partName] return ok } @@ -46,15 +46,19 @@ func (p *pkg) add(part *Part) error { if err := part.validate(); err != nil { return err } - upperURI := strings.ToUpper(part.Name) - if p.partExists(upperURI) { + name := part.Name + if !strings.EqualFold(name, contentTypesName) { + name = NormalizePartName(part.Name) + } + name = strings.ToUpper(name) + if p.partExists(name) { return newError(112, part.Name) } - if p.checkPrefixCollision(upperURI) { + if p.checkPrefixCollision(name) { return newError(111, part.Name) } - p.contentTypes.add(part.Name, part.ContentType) - p.parts[upperURI] = part + p.contentTypes.add(name, part.ContentType) + p.parts[name] = struct{}{} return nil } diff --git a/package_test.go b/package_test.go index 1e2567d..347ed58 100644 --- a/package_test.go +++ b/package_test.go @@ -8,9 +8,9 @@ import ( ) func createFakePackage(m ...string) *pkg { - parts := make(map[string]*Part, len(m)) + parts := make(map[string]struct{}, len(m)) for _, s := range m { - parts[strings.ToUpper(s)] = new(Part) + parts[strings.ToUpper(s)] = struct{}{} } return &pkg{ parts: parts, @@ -23,7 +23,7 @@ func Test_newPackage(t *testing.T) { want *pkg }{ {"base", &pkg{ - parts: make(map[string]*Part, 0), + parts: make(map[string]struct{}, 0), }, }, } diff --git a/part.go b/part.go index f4f1cee..f0651fb 100644 --- a/part.go +++ b/part.go @@ -35,10 +35,8 @@ func (p *Part) validate() error { func ResolveRelationship(source string, rel string) string { source = strings.Replace(source, "\\", "/", -1) rel = strings.Replace(rel, "\\", "/", -1) - if source == "/" { - if !strings.HasPrefix(rel, "/") { - rel = "/" + rel - } + if source == "/" && !strings.HasPrefix(rel, "/") { + rel = "/" + rel } if !strings.HasPrefix(rel, "/") { rel = fmt.Sprintf("%s/%s", path.Dir(source), rel) diff --git a/reader.go b/reader.go index 6c599f7..8919094 100644 --- a/reader.go +++ b/reader.go @@ -112,7 +112,7 @@ func (r *Reader) loadPackage() error { return err } } else { - cType, err := ct.findType(fileName) + cType, err := ct.findType(NormalizePartName(fileName)) if err != nil { return err } @@ -252,7 +252,7 @@ func decodeContentTypes(r io.Reader) (*contentTypes, error) { } ct.addDefault(ext, cDefault.ContentType) } else if cOverride, ok := c.Value.(overrideContentTypeXML); ok { - partName := strings.ToUpper(cOverride.PartName) + partName := strings.ToUpper(NormalizePartName(cOverride.PartName)) if _, ok := ct.overrides[partName]; ok { return nil, newError(205, partName) } diff --git a/reader_test.go b/reader_test.go index 800ad82..c35e0c3 100644 --- a/reader_test.go +++ b/reader_test.go @@ -17,22 +17,17 @@ import ( func Test_newReader(t *testing.T) { p1 := newPackage() - p1.parts["/DOCPROPS/APP.XML"] = &Part{Name: "/docProps/app.xml", ContentType: "application/vnd.openxmlformats-officedocument.extended-properties+xml"} - p1.parts["/PICTURES/PHOTO.PNG"] = &Part{Name: "/pictures/photo.png", ContentType: "image/png"} - p1.parts["/FILES.XML"] = &Part{Name: "/files.xml", ContentType: "application/xml"} + p1.parts["/DOCPROPS/APP.XML"] = struct{}{} + p1.parts["/PICTURES/PHOTO.PNG"] = struct{}{} + p1.parts["/FILES.XML"] = struct{}{} p1.contentTypes.addOverride("/DOCPROPS/APP.XML", "application/vnd.openxmlformats-officedocument.extended-properties+xml") p1.contentTypes.addDefault("png", "image/png") p1.contentTypes.addDefault("xml", "application/xml") p2 := newPackage() - p2.parts["/DOCPROPS/APP.XML"] = &Part{Name: "/docProps/app.xml", ContentType: "application/vnd.openxmlformats-officedocument.extended-properties+xml", - Relationships: []*Relationship{ - {ID: "rel-1", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - {ID: "rel-2", Type: "text/txt", TargetURI: "/", TargetMode: ModeExternal}, - }, - } - p2.parts["/PICTURES/PHOTO.PNG"] = &Part{Name: "/pictures/photo.png", ContentType: "image/png"} - p2.parts["/FILES.XML"] = &Part{Name: "/files.xml", ContentType: "application/xml"} + p2.parts["/DOCPROPS/APP.XML"] = struct{}{} + p2.parts["/PICTURES/PHOTO.PNG"] = struct{}{} + p2.parts["/FILES.XML"] = struct{}{} p2.contentTypes.addOverride("/DOCPROPS/APP.XML", "application/vnd.openxmlformats-officedocument.extended-properties+xml") p2.contentTypes.addDefault("xml", "application/xml") p2.contentTypes.addDefault("png", "image/png") @@ -127,7 +122,7 @@ func Test_newReader_File(t *testing.T) { ioutil.NopCloser(bytes.NewBufferString(new(cTypeBuilder).withDefault("image/png", "png").String())), nil, ), - newMockFile("pictures/../photo.png", ioutil.NopCloser(bytes.NewBufferString("")), nil), + newMockFile("pictures/./photo.png", ioutil.NopCloser(bytes.NewBufferString("")), nil), }, 110}, {"emptySegment", []archiveFile{ newMockFile( @@ -171,9 +166,9 @@ func Test_newReader_ContentType(t *testing.T) { ` p := newPackage() - p.parts["/DOCPROPS/APP.XML"] = &Part{Name: "/docProps/app.xml", ContentType: "application/vnd.openxmlformats-officedocument.extended-properties+xml"} - p.parts["/PICTURES/PHOTO.PNG"] = &Part{Name: "/pictures/photo.PNG", ContentType: "image/png"} - p.parts["/FILES.XML"] = &Part{Name: "/files.xml", ContentType: "application/xml"} + p.parts["/DOCPROPS/APP.XML"] = struct{}{} + p.parts["/PICTURES/PHOTO.PNG"] = struct{}{} + p.parts["/FILES.XML"] = struct{}{} p.contentTypes.addOverride("/DOCPROPS/APP.XML", "application/vnd.openxmlformats-officedocument.extended-properties+xml") p.contentTypes.addDefault("png", "image/png") p.contentTypes.addDefault("xml", "application/xml") @@ -280,36 +275,18 @@ func Test_newReader_ContentType(t *testing.T) { func Test_newReader_PartRelationships(t *testing.T) { p3 := newPackage() - p3.parts["/DOCPROPS/APP.XML"] = &Part{Name: "/docProps/app.xml", ContentType: "application/vnd.openxmlformats-officedocument.extended-properties+xml", - Relationships: []*Relationship{ - {ID: "rel-1", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - {ID: "rel-2", Type: "text/txt", TargetURI: "/", TargetMode: ModeExternal}, - }, - } - p3.parts["/PICTURES/PHOTO.PNG"] = &Part{Name: "/pictures/photo.png", ContentType: "image/png"} - p3.parts["/FILES.XML"] = &Part{Name: "/files.xml", ContentType: "application/xml", Relationships: []*Relationship{ - {ID: "rel-1", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - }} + p3.parts["/DOCPROPS/APP.XML"] = struct{}{} + p3.parts["/PICTURES/PHOTO.PNG"] = struct{}{} + p3.parts["/FILES.XML"] = struct{}{} p3.contentTypes.addOverride("/DOCPROPS/APP.XML", "application/vnd.openxmlformats-officedocument.extended-properties+xml") p3.contentTypes.addDefault("xml", "application/xml") p3.contentTypes.addDefault("png", "image/png") p4 := newPackage() - p4.parts["/DOCPROPS/APP.XML"] = &Part{Name: "/docProps/app.xml", ContentType: "application/vnd.openxmlformats-officedocument.extended-properties+xml", - Relationships: []*Relationship{ - {ID: "rel-1", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - {ID: "rel-2", Type: "text/txt", TargetURI: "/", TargetMode: ModeExternal}, - }, - } - p4.parts["/PICTURES/SEASON/SUMMER/PHOTO.PNG"] = &Part{Name: "/pictures/season/summer/photo.png", ContentType: "image/png", - Relationships: []*Relationship{ - {ID: "rel-3", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - {ID: "rel-4", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - {ID: "rel-5", Type: "text/txt", TargetURI: "/", TargetMode: ModeInternal}, - }, - } - p4.parts["/PICTURES/SUMMER/PHOTO2.PNG"] = &Part{Name: "/pictures/summer/photo2.png", ContentType: "image/png"} - p4.parts["/FILES.XML"] = &Part{Name: "/files.xml", ContentType: "application/xml"} + p4.parts["/DOCPROPS/APP.XML"] = struct{}{} + p4.parts["/PICTURES/SEASON/SUMMER/PHOTO.PNG"] = struct{}{} + p4.parts["/PICTURES/SUMMER/PHOTO2.PNG"] = struct{}{} + p4.parts["/FILES.XML"] = struct{}{} p4.contentTypes.addOverride("/DOCPROPS/APP.XML", "application/vnd.openxmlformats-officedocument.extended-properties+xml") p4.contentTypes.addDefault("xml", "application/xml") p4.contentTypes.addDefault("png", "image/png") diff --git a/relationship.go b/relationship.go index 89eb815..d58c8ed 100644 --- a/relationship.go +++ b/relationship.go @@ -114,18 +114,17 @@ func isRelationshipURI(uri string) bool { // validateRelationshipTarget checks that a relationship target follows the constrains specified in the ISO/IEC 29500-2 §9.3. func (r *Relationship) validateRelationshipTarget(sourceURI string) error { - if !validEncoded(r.TargetURI, false) { + if !validEncoded(r.TargetURI, true) { return newErrorRelationship(128, sourceURI, r.ID) } // ISO/IEC 29500-2 M1.29 if r.TargetMode == ModeInternal { - if !isInternal(sourceURI) { + if !isInternal(r.TargetURI) { return newErrorRelationship(129, sourceURI, r.ID) - } else { - source := strings.TrimSpace(sourceURI) - if source == "" || isRelationshipURI(ResolveRelationship(sourceURI, r.TargetURI)) { - return newErrorRelationship(125, sourceURI, r.ID) - } + } + source := strings.TrimSpace(sourceURI) + if source == "" || isRelationshipURI(ResolveRelationship(sourceURI, r.TargetURI)) { + return newErrorRelationship(125, sourceURI, r.ID) } } diff --git a/relationship_test.go b/relationship_test.go index 841d124..55ba72e 100644 --- a/relationship_test.go +++ b/relationship_test.go @@ -44,7 +44,9 @@ func TestRelationship_validate(t *testing.T) { }{ {"relative", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "./two/two.txt", TargetMode: ModeExternal}, args{"/one.txt"}, false}, {"new", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "fakeTarget", TargetMode: ModeExternal}, args{""}, false}, - {"abs", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "http://a.com/b", TargetMode: ModeExternal}, args{""}, false}, + {"external", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "http://a.com/b", TargetMode: ModeExternal}, args{""}, false}, + {"external1", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "://a.com/b", TargetMode: ModeInternal}, args{"/"}, false}, + {"externalErr", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "http://a.com/b", TargetMode: ModeInternal}, args{"/"}, true}, {"internalRelRel", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "/_rels/.rels", TargetMode: ModeInternal}, args{"/"}, true}, {"internalRelNoSource", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: "/fakeTarget", TargetMode: ModeInternal}, args{""}, true}, {"invalidTarget2", &Relationship{ID: "fakeId", Type: "fakeType", TargetURI: " ", TargetMode: ModeInternal}, args{""}, true}, diff --git a/writer.go b/writer.go index e5ead48..77dba07 100644 --- a/writer.go +++ b/writer.go @@ -38,7 +38,7 @@ type Writer struct { // NewWriter returns a new Writer writing an OPC package to w. func NewWriter(w io.Writer) *Writer { return &Writer{p: &pkg{ - parts: make(map[string]*Part, 0), + parts: make(map[string]struct{}, 0), contentTypes: contentTypes{ defaults: map[string]string{ "xml": "application/xml", diff --git a/writer_test.go b/writer_test.go index 85c3ed8..2191cea 100644 --- a/writer_test.go +++ b/writer_test.go @@ -28,11 +28,11 @@ func TestWriter_Close(t *testing.T) { p.contentTypes.add("/a.xml", "a/b") p.contentTypes.add("/b.xml", "c/d") pC := newPackage() - pC.parts["/[CONTENT_TYPES].XML"] = new(Part) + pC.parts["/[CONTENT_TYPES].XML"] = struct{}{} pCore := newPackage() - pCore.parts["/PROPS/CORE.XML"] = new(Part) + pCore.parts["/PROPS/CORE.XML"] = struct{}{} pRel := newPackage() - pRel.parts["/_RELS/.RELS"] = new(Part) + pRel.parts["/_RELS/.RELS"] = struct{}{} tests := []struct { name string w *Writer @@ -136,7 +136,7 @@ func TestWriter_CreatePart(t *testing.T) { rel := &Relationship{ID: "fakeId", Type: "asd", TargetURI: "/fakeTarget", TargetMode: ModeInternal} w := NewWriter(&bytes.Buffer{}) pRel := newPackage() - pRel.parts["/_RELS/A.XML.RELS"] = new(Part) + pRel.parts["/_RELS/A.XML.RELS"] = struct{}{} type args struct { part *Part compression CompressionOption From cc8a62722d6ff0e78fbcec270d6719b6e9b23393 Mon Sep 17 00:00:00 2001 From: Quim Muntal Date: Sat, 2 Jan 2021 21:11:30 +0100 Subject: [PATCH 3/5] remove unused param --- package_test.go | 3 +-- part.go | 7 ++----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/package_test.go b/package_test.go index 347ed58..ca38f43 100644 --- a/package_test.go +++ b/package_test.go @@ -24,8 +24,7 @@ func Test_newPackage(t *testing.T) { }{ {"base", &pkg{ parts: make(map[string]struct{}, 0), - }, - }, + }}, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/part.go b/part.go index f0651fb..d1aa510 100644 --- a/part.go +++ b/part.go @@ -54,7 +54,7 @@ func NormalizePartName(name string) string { if name == "" || name == "/" || name == "\\" || name == "." { return "" } - name, _ = split(name, '#', true) + name, _ = split(name, '#') name = strings.NewReplacer("\\", "/", "//", "/").Replace(name) name = unescape(name) name = escape(name) @@ -260,14 +260,11 @@ func unescape(s string) string { return t.String() } -func split(s string, sep byte, cutc bool) (string, string) { +func split(s string, sep byte) (string, string) { i := strings.IndexByte(s, sep) if i < 0 { return s, "" } - if cutc { - return s[:i], s[i+1:] - } return s[:i], s[i:] } From 0ee977da28e287576b1233b626946bcf76bbf6d3 Mon Sep 17 00:00:00 2001 From: Quim Muntal Date: Sun, 3 Jan 2021 20:44:57 +0100 Subject: [PATCH 4/5] simplify code --- part.go | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/part.go b/part.go index d1aa510..0eb09d9 100644 --- a/part.go +++ b/part.go @@ -193,9 +193,7 @@ func escape(s string) string { t[j+2] = '5' j += 3 } else { - t[j] = '%' - t[j+1] = s[i+1] - t[j+2] = s[i+2] + t[j], t[j+1], t[j+3] = '%', s[i+1], s[i+2] j += 3 } default: From 06213f6841d3eb1e681f15ffa1cb232aa8291b04 Mon Sep 17 00:00:00 2001 From: Quim Muntal Date: Mon, 4 Jan 2021 08:54:10 +0100 Subject: [PATCH 5/5] remove allowIRI param --- part.go | 8 +++----- relationship.go | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/part.go b/part.go index 0eb09d9..b0a5ac6 100644 --- a/part.go +++ b/part.go @@ -99,7 +99,7 @@ func validatePartName(name string) error { return err } - if !validEncoded(name, true) { + if !validEncoded(name) { return newError(106, name) } return nil @@ -332,7 +332,7 @@ func unpct(c1, c2 byte) byte { return unhex(c1)<<4 | unhex(c2) } -func validEncoded(s string, allowIRI bool) bool { +func validEncoded(s string) bool { for i := 0; i < len(s); i++ { switch s[i] { case '%': @@ -342,9 +342,7 @@ func validEncoded(s string, allowIRI bool) bool { // ok default: if shouldEscape(s[i]) { - if !allowIRI { - return false - } + // Check if IRI supported shar r, wid := utf8.DecodeRuneInString(s[i:]) if !isUcsChar(r) { return false diff --git a/relationship.go b/relationship.go index d58c8ed..32cef96 100644 --- a/relationship.go +++ b/relationship.go @@ -114,7 +114,7 @@ func isRelationshipURI(uri string) bool { // validateRelationshipTarget checks that a relationship target follows the constrains specified in the ISO/IEC 29500-2 §9.3. func (r *Relationship) validateRelationshipTarget(sourceURI string) error { - if !validEncoded(r.TargetURI, true) { + if !validEncoded(r.TargetURI) { return newErrorRelationship(128, sourceURI, r.ID) } // ISO/IEC 29500-2 M1.29