From debf7d9f3c361ec367b9c80b380e17f96f496e70 Mon Sep 17 00:00:00 2001 From: Guilhem Fanton <8671905+gfanton@users.noreply.github.com> Date: Mon, 28 Aug 2023 21:19:07 +0200 Subject: [PATCH] feat(stdlib): add `path` package (#1065) simply add `path` package to stdlib, needed by `net/url` package (and probably some other packages)
Contributors' checklist... - [X] Added new tests, or not needed, or not feasible - [ ] Provided an example (e.g. screenshot) to aid review or the PR is self-explanatory - [ ] Updated the official documentation or not needed - [X] No breaking changes were made, or a `BREAKING CHANGE: xxx` message was included in the description - [ ] Added references to related issues and PRs - [ ] Provided any useful hints for running manual tests - [ ] Added new benchmarks to [generated graphs](https://gnoland.github.io/benchmarks), if any. More info [here](https://github.com/gnolang/gno/blob/master/.benchmarks/README.md).
Signed-off-by: gfanton <8671905+gfanton@users.noreply.github.com> --- gnovm/docs/go-gno-compatibility.md | 4 +- gnovm/stdlibs/path/match.gno | 231 ++++++++++++++++++++++++++++ gnovm/stdlibs/path/match_test.gno | 81 ++++++++++ gnovm/stdlibs/path/path.gno | 233 ++++++++++++++++++++++++++++ gnovm/stdlibs/path/path_test.gno | 236 +++++++++++++++++++++++++++++ 5 files changed, 783 insertions(+), 2 deletions(-) create mode 100644 gnovm/stdlibs/path/match.gno create mode 100644 gnovm/stdlibs/path/match_test.gno create mode 100644 gnovm/stdlibs/path/path.gno create mode 100644 gnovm/stdlibs/path/path_test.gno diff --git a/gnovm/docs/go-gno-compatibility.md b/gnovm/docs/go-gno-compatibility.md index e3616861ca1..d5448ab2779 100644 --- a/gnovm/docs/go-gno-compatibility.md +++ b/gnovm/docs/go-gno-compatibility.md @@ -259,7 +259,7 @@ Additional native types: | log/slog/internal | TBD | | log/syslog | TBD | | maps | TBD | -| math | partial | +| math | partial | | math/big | TBD | | math/bits | TBD | | math/cmplx | TBD | @@ -288,7 +288,7 @@ Additional native types: | os/exec | TBD | | os/signal | TBD | | os/user | TBD | -| path | TBD | +| path | full | | path/filepath | TBD | | plugin | TBD | | reflect | TBD | diff --git a/gnovm/stdlibs/path/match.gno b/gnovm/stdlibs/path/match.gno new file mode 100644 index 00000000000..59b615dc11e --- /dev/null +++ b/gnovm/stdlibs/path/match.gno @@ -0,0 +1,231 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package path + +import ( + "errors" + "unicode/utf8" + + "internal/bytealg" +) + +// ErrBadPattern indicates a pattern was malformed. +var ErrBadPattern = errors.New("syntax error in pattern") + +// Match reports whether name matches the shell pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-/ characters +// '?' matches any single non-/ character +// '[' [ '^' ] { character-range } ']' +// character class (must be non-empty) +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// Match requires pattern to match all of name, not just a substring. +// The only possible returned error is ErrBadPattern, when pattern +// is malformed. +func Match(pattern, name string) (matched bool, err error) { +Pattern: + for len(pattern) > 0 { + var star bool + var chunk string + star, chunk, pattern = scanChunk(pattern) + if star && chunk == "" { + // Trailing * matches rest of string unless it has a /. + return bytealg.IndexByteString(name, '/') < 0, nil + } + // Look for match at current position. + t, ok, err := matchChunk(chunk, name) + // if we're the last chunk, make sure we've exhausted the name + // otherwise we'll give a false result even if we could still match + // using the star + if ok && (len(t) == 0 || len(pattern) > 0) { + name = t + continue + } + if err != nil { + return false, err + } + if star { + // Look for match skipping i+1 bytes. + // Cannot skip /. + for i := 0; i < len(name) && name[i] != '/'; i++ { + t, ok, err := matchChunk(chunk, name[i+1:]) + if ok { + // if we're the last chunk, make sure we exhausted the name + if len(pattern) == 0 && len(t) > 0 { + continue + } + name = t + continue Pattern + } + if err != nil { + return false, err + } + } + } + // Before returning false with no error, + // check that the remainder of the pattern is syntactically valid. + for len(pattern) > 0 { + _, chunk, pattern = scanChunk(pattern) + if _, _, err := matchChunk(chunk, ""); err != nil { + return false, err + } + } + return false, nil + } + return len(name) == 0, nil +} + +// scanChunk gets the next segment of pattern, which is a non-star string +// possibly preceded by a star. +func scanChunk(pattern string) (star bool, chunk, rest string) { + for len(pattern) > 0 && pattern[0] == '*' { + pattern = pattern[1:] + star = true + } + inrange := false + var i int +Scan: + for i = 0; i < len(pattern); i++ { + switch pattern[i] { + case '\\': + // error check handled in matchChunk: bad pattern. + if i+1 < len(pattern) { + i++ + } + case '[': + inrange = true + case ']': + inrange = false + case '*': + if !inrange { + break Scan + } + } + } + return star, pattern[0:i], pattern[i:] +} + +// matchChunk checks whether chunk matches the beginning of s. +// If so, it returns the remainder of s (after the match). +// Chunk is all single-character operators: literals, char classes, and ?. +func matchChunk(chunk, s string) (rest string, ok bool, err error) { + // failed records whether the match has failed. + // After the match fails, the loop continues on processing chunk, + // checking that the pattern is well-formed but no longer reading s. + failed := false + for len(chunk) > 0 { + if !failed && len(s) == 0 { + failed = true + } + switch chunk[0] { + case '[': + // character class + var r rune + if !failed { + var n int + r, n = utf8.DecodeRuneInString(s) + s = s[n:] + } + chunk = chunk[1:] + // possibly negated + negated := false + if len(chunk) > 0 && chunk[0] == '^' { + negated = true + chunk = chunk[1:] + } + // parse all ranges + match := false + nrange := 0 + for { + if len(chunk) > 0 && chunk[0] == ']' && nrange > 0 { + chunk = chunk[1:] + break + } + var lo, hi rune + if lo, chunk, err = getEsc(chunk); err != nil { + return "", false, err + } + hi = lo + if chunk[0] == '-' { + if hi, chunk, err = getEsc(chunk[1:]); err != nil { + return "", false, err + } + } + if lo <= r && r <= hi { + match = true + } + nrange++ + } + if match == negated { + failed = true + } + + case '?': + if !failed { + if s[0] == '/' { + failed = true + } + _, n := utf8.DecodeRuneInString(s) + s = s[n:] + } + chunk = chunk[1:] + + case '\\': + chunk = chunk[1:] + if len(chunk) == 0 { + return "", false, ErrBadPattern + } + fallthrough + + default: + if !failed { + if chunk[0] != s[0] { + failed = true + } + s = s[1:] + } + chunk = chunk[1:] + } + } + if failed { + return "", false, nil + } + return s, true, nil +} + +// getEsc gets a possibly-escaped character from chunk, for a character class. +func getEsc(chunk string) (r rune, nchunk string, err error) { + if len(chunk) == 0 || chunk[0] == '-' || chunk[0] == ']' { + err = ErrBadPattern + return + } + if chunk[0] == '\\' { + chunk = chunk[1:] + if len(chunk) == 0 { + err = ErrBadPattern + return + } + } + r, n := utf8.DecodeRuneInString(chunk) + if r == utf8.RuneError && n == 1 { + err = ErrBadPattern + } + nchunk = chunk[n:] + if len(nchunk) == 0 { + err = ErrBadPattern + } + return +} diff --git a/gnovm/stdlibs/path/match_test.gno b/gnovm/stdlibs/path/match_test.gno new file mode 100644 index 00000000000..c33447888e4 --- /dev/null +++ b/gnovm/stdlibs/path/match_test.gno @@ -0,0 +1,81 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package path + +import "testing" + +type MatchTest struct { + pattern, s string + match bool + err error +} + +var matchTests = []MatchTest{ + {"abc", "abc", true, nil}, + {"*", "abc", true, nil}, + {"*c", "abc", true, nil}, + {"a*", "a", true, nil}, + {"a*", "abc", true, nil}, + {"a*", "ab/c", false, nil}, + {"a*/b", "abc/b", true, nil}, + {"a*/b", "a/c/b", false, nil}, + {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil}, + {"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil}, + {"a*b?c*x", "abxbbxdbxebxczzx", true, nil}, + {"a*b?c*x", "abxbbxdbxebxczzy", false, nil}, + {"ab[c]", "abc", true, nil}, + {"ab[b-d]", "abc", true, nil}, + {"ab[e-g]", "abc", false, nil}, + {"ab[^c]", "abc", false, nil}, + {"ab[^b-d]", "abc", false, nil}, + {"ab[^e-g]", "abc", true, nil}, + {"a\\*b", "a*b", true, nil}, + {"a\\*b", "ab", false, nil}, + {"a?b", "a☺b", true, nil}, + {"a[^a]b", "a☺b", true, nil}, + {"a???b", "a☺b", false, nil}, + {"a[^a][^a][^a]b", "a☺b", false, nil}, + {"[a-ζ]*", "α", true, nil}, + {"*[a-ζ]", "A", false, nil}, + {"a?b", "a/b", false, nil}, + {"a*b", "a/b", false, nil}, + {"[\\]a]", "]", true, nil}, + {"[\\-]", "-", true, nil}, + {"[x\\-]", "x", true, nil}, + {"[x\\-]", "-", true, nil}, + {"[x\\-]", "z", false, nil}, + {"[\\-x]", "x", true, nil}, + {"[\\-x]", "-", true, nil}, + {"[\\-x]", "a", false, nil}, + {"[]a]", "]", false, ErrBadPattern}, + {"[-]", "-", false, ErrBadPattern}, + {"[x-]", "x", false, ErrBadPattern}, + {"[x-]", "-", false, ErrBadPattern}, + {"[x-]", "z", false, ErrBadPattern}, + {"[-x]", "x", false, ErrBadPattern}, + {"[-x]", "-", false, ErrBadPattern}, + {"[-x]", "a", false, ErrBadPattern}, + {"\\", "a", false, ErrBadPattern}, + {"[a-b-c]", "a", false, ErrBadPattern}, + {"[", "a", false, ErrBadPattern}, + {"[^", "a", false, ErrBadPattern}, + {"[^bc", "a", false, ErrBadPattern}, + {"a[", "a", false, ErrBadPattern}, + {"a[", "ab", false, ErrBadPattern}, + {"a[", "x", false, ErrBadPattern}, + {"a/b[", "x", false, ErrBadPattern}, + {"*x", "xxx", true, nil}, +} + +func TestMatch(t *testing.T) { + for _, tt := range matchTests { + ok, err := Match(tt.pattern, tt.s) + if ok != tt.match || err != tt.err { + t.Errorf("Match(%#q, %#q) = %v, %v want %v, %v", tt.pattern, tt.s, ok, err, tt.match, tt.err) + } + } +} diff --git a/gnovm/stdlibs/path/path.gno b/gnovm/stdlibs/path/path.gno new file mode 100644 index 00000000000..547b9debce1 --- /dev/null +++ b/gnovm/stdlibs/path/path.gno @@ -0,0 +1,233 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package path implements utility routines for manipulating slash-separated +// paths. +// +// The path package should only be used for paths separated by forward +// slashes, such as the paths in URLs. This package does not deal with +// Windows paths with drive letters or backslashes; to manipulate +// operating system paths, use the path/filepath package. +package path + +// A lazybuf is a lazily constructed path buffer. +// It supports append, reading previously appended bytes, +// and retrieving the final string. It does not allocate a buffer +// to hold the output until that output diverges from s. +type lazybuf struct { + s string + buf []byte + w int +} + +func (b *lazybuf) index(i int) byte { + if b.buf != nil { + return b.buf[i] + } + return b.s[i] +} + +func (b *lazybuf) append(c byte) { + if b.buf == nil { + if b.w < len(b.s) && b.s[b.w] == c { + b.w++ + return + } + b.buf = make([]byte, len(b.s)) + copy(b.buf, b.s[:b.w]) + } + b.buf[b.w] = c + b.w++ +} + +func (b *lazybuf) string() string { + if b.buf == nil { + return b.s[:b.w] + } + return string(b.buf[:b.w]) +} + +// Clean returns the shortest path name equivalent to path +// by purely lexical processing. It applies the following rules +// iteratively until no further processing can be done: +// +// 1. Replace multiple slashes with a single slash. +// 2. Eliminate each . path name element (the current directory). +// 3. Eliminate each inner .. path name element (the parent directory) +// along with the non-.. element that precedes it. +// 4. Eliminate .. elements that begin a rooted path: +// that is, replace "/.." by "/" at the beginning of a path. +// +// The returned path ends in a slash only if it is the root "/". +// +// If the result of this process is an empty string, Clean +// returns the string ".". +// +// See also Rob Pike, “Lexical File Names in Plan 9 or +// Getting Dot-Dot Right,” +// https://9p.io/sys/doc/lexnames.html +func Clean(path string) string { + if path == "" { + return "." + } + + rooted := path[0] == '/' + n := len(path) + + // Invariants: + // reading from path; r is index of next byte to process. + // writing to buf; w is index of next byte to write. + // dotdot is index in buf where .. must stop, either because + // it is the leading slash or it is a leading ../../.. prefix. + out := lazybuf{s: path} + r, dotdot := 0, 0 + if rooted { + out.append('/') + r, dotdot = 1, 1 + } + + for r < n { + switch { + case path[r] == '/': + // empty path element + r++ + case path[r] == '.' && (r+1 == n || path[r+1] == '/'): + // . element + r++ + case path[r] == '.' && path[r+1] == '.' && (r+2 == n || path[r+2] == '/'): + // .. element: remove to last / + r += 2 + switch { + case out.w > dotdot: + // can backtrack + out.w-- + for out.w > dotdot && out.index(out.w) != '/' { + out.w-- + } + case !rooted: + // cannot backtrack, but not rooted, so append .. element. + if out.w > 0 { + out.append('/') + } + out.append('.') + out.append('.') + dotdot = out.w + } + default: + // real path element. + // add slash if needed + if rooted && out.w != 1 || !rooted && out.w != 0 { + out.append('/') + } + // copy element + for ; r < n && path[r] != '/'; r++ { + out.append(path[r]) + } + } + } + + // Turn empty string into "." + if out.w == 0 { + return "." + } + + return out.string() +} + +// lastSlash(s) is strings.LastIndex(s, "/") but we can't import strings. +func lastSlash(s string) int { + i := len(s) - 1 + for i >= 0 && s[i] != '/' { + i-- + } + return i +} + +// Split splits path immediately following the final slash, +// separating it into a directory and file name component. +// If there is no slash in path, Split returns an empty dir and +// file set to path. +// The returned values have the property that path = dir+file. +func Split(path string) (dir, file string) { + i := lastSlash(path) + return path[:i+1], path[i+1:] +} + +// Join joins any number of path elements into a single path, +// separating them with slashes. Empty elements are ignored. +// The result is Cleaned. However, if the argument list is +// empty or all its elements are empty, Join returns +// an empty string. +func Join(elem ...string) string { + size := 0 + for _, e := range elem { + size += len(e) + } + if size == 0 { + return "" + } + buf := make([]byte, 0, size+len(elem)-1) + for _, e := range elem { + if len(buf) > 0 || e != "" { + if len(buf) > 0 { + buf = append(buf, '/') + } + buf = append(buf, e...) + } + } + return Clean(string(buf)) +} + +// Ext returns the file name extension used by path. +// The extension is the suffix beginning at the final dot +// in the final slash-separated element of path; +// it is empty if there is no dot. +func Ext(path string) string { + for i := len(path) - 1; i >= 0 && path[i] != '/'; i-- { + if path[i] == '.' { + return path[i:] + } + } + return "" +} + +// Base returns the last element of path. +// Trailing slashes are removed before extracting the last element. +// If the path is empty, Base returns ".". +// If the path consists entirely of slashes, Base returns "/". +func Base(path string) string { + if path == "" { + return "." + } + // Strip trailing slashes. + for len(path) > 0 && path[len(path)-1] == '/' { + path = path[0 : len(path)-1] + } + // Find the last element + if i := lastSlash(path); i >= 0 { + path = path[i+1:] + } + // If empty now, it had only slashes. + if path == "" { + return "/" + } + return path +} + +// IsAbs reports whether the path is absolute. +func IsAbs(path string) bool { + return len(path) > 0 && path[0] == '/' +} + +// Dir returns all but the last element of path, typically the path's directory. +// After dropping the final element using Split, the path is Cleaned and trailing +// slashes are removed. +// If the path is empty, Dir returns ".". +// If the path consists entirely of slashes followed by non-slash bytes, Dir +// returns a single slash. In any other case, the returned path does not end in a +// slash. +func Dir(path string) string { + dir, _ := Split(path) + return Clean(dir) +} diff --git a/gnovm/stdlibs/path/path_test.gno b/gnovm/stdlibs/path/path_test.gno new file mode 100644 index 00000000000..0b81de31e2c --- /dev/null +++ b/gnovm/stdlibs/path/path_test.gno @@ -0,0 +1,236 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package path + +import ( + // "runtime" + "testing" +) + +type PathTest struct { + path, result string +} + +var cleantests = []PathTest{ + // Already clean + {"", "."}, + {"abc", "abc"}, + {"abc/def", "abc/def"}, + {"a/b/c", "a/b/c"}, + {".", "."}, + {"..", ".."}, + {"../..", "../.."}, + {"../../abc", "../../abc"}, + {"/abc", "/abc"}, + {"/", "/"}, + + // Remove trailing slash + {"abc/", "abc"}, + {"abc/def/", "abc/def"}, + {"a/b/c/", "a/b/c"}, + {"./", "."}, + {"../", ".."}, + {"../../", "../.."}, + {"/abc/", "/abc"}, + + // Remove doubled slash + {"abc//def//ghi", "abc/def/ghi"}, + {"//abc", "/abc"}, + {"///abc", "/abc"}, + {"//abc//", "/abc"}, + {"abc//", "abc"}, + + // Remove . elements + {"abc/./def", "abc/def"}, + {"/./abc/def", "/abc/def"}, + {"abc/.", "abc"}, + + // Remove .. elements + {"abc/def/ghi/../jkl", "abc/def/jkl"}, + {"abc/def/../ghi/../jkl", "abc/jkl"}, + {"abc/def/..", "abc"}, + {"abc/def/../..", "."}, + {"/abc/def/../..", "/"}, + {"abc/def/../../..", ".."}, + {"/abc/def/../../..", "/"}, + {"abc/def/../../../ghi/jkl/../../../mno", "../../mno"}, + + // Combinations + {"abc/./../def", "def"}, + {"abc//./../def", "def"}, + {"abc/../../././../def", "../../def"}, +} + +func TestClean(t *testing.T) { + for _, test := range cleantests { + if s := Clean(test.path); s != test.result { + t.Errorf("Clean(%q) = %q, want %q", test.path, s, test.result) + } + if s := Clean(test.result); s != test.result { + t.Errorf("Clean(%q) = %q, want %q", test.result, s, test.result) + } + } +} + +// XXX: runtime is not an available package, 'AllocsPerRun' is not defined +// func TestCleanMallocs(t *testing.T) { +// if testing.Short() { +// t.Skip("skipping malloc count in short mode") +// } +// if runtime.GOMAXPROCS(0) > 1 { +// t.Log("skipping AllocsPerRun checks; GOMAXPROCS>1") +// return +// } + +// for _, test := range cleantests { +// allocs := testing.AllocsPerRun(100, func() { Clean(test.result) }) +// if allocs > 0 { +// t.Errorf("Clean(%q): %v allocs, want zero", test.result, allocs) +// } +// } +// } + +type SplitTest struct { + path, dir, file string +} + +var splittests = []SplitTest{ + {"a/b", "a/", "b"}, + {"a/b/", "a/b/", ""}, + {"a/", "a/", ""}, + {"a", "", "a"}, + {"/", "/", ""}, +} + +func TestSplit(t *testing.T) { + for _, test := range splittests { + if d, f := Split(test.path); d != test.dir || f != test.file { + t.Errorf("Split(%q) = %q, %q, want %q, %q", test.path, d, f, test.dir, test.file) + } + } +} + +type JoinTest struct { + elem []string + path string +} + +var jointests = []JoinTest{ + // zero parameters + {[]string{}, ""}, + + // one parameter + {[]string{""}, ""}, + {[]string{"a"}, "a"}, + + // two parameters + {[]string{"a", "b"}, "a/b"}, + {[]string{"a", ""}, "a"}, + {[]string{"", "b"}, "b"}, + {[]string{"/", "a"}, "/a"}, + {[]string{"/", ""}, "/"}, + {[]string{"a/", "b"}, "a/b"}, + {[]string{"a/", ""}, "a"}, + {[]string{"", ""}, ""}, +} + +func TestJoin(t *testing.T) { + for _, test := range jointests { + if p := Join(test.elem...); p != test.path { + t.Errorf("Join(%q) = %q, want %q", test.elem, p, test.path) + } + } +} + +type ExtTest struct { + path, ext string +} + +var exttests = []ExtTest{ + {"path.go", ".go"}, + {"path.pb.go", ".go"}, + {"a.dir/b", ""}, + {"a.dir/b.go", ".go"}, + {"a.dir/", ""}, +} + +func TestExt(t *testing.T) { + for _, test := range exttests { + if x := Ext(test.path); x != test.ext { + t.Errorf("Ext(%q) = %q, want %q", test.path, x, test.ext) + } + } +} + +var basetests = []PathTest{ + // Already clean + {"", "."}, + {".", "."}, + {"/.", "."}, + {"/", "/"}, + {"////", "/"}, + {"x/", "x"}, + {"abc", "abc"}, + {"abc/def", "def"}, + {"a/b/.x", ".x"}, + {"a/b/c.", "c."}, + {"a/b/c.x", "c.x"}, +} + +func TestBase(t *testing.T) { + for _, test := range basetests { + if s := Base(test.path); s != test.result { + t.Errorf("Base(%q) = %q, want %q", test.path, s, test.result) + } + } +} + +var dirtests = []PathTest{ + {"", "."}, + {".", "."}, + {"/.", "/"}, + {"/", "/"}, + {"////", "/"}, + {"/foo", "/"}, + {"x/", "x"}, + {"abc", "."}, + {"abc/def", "abc"}, + {"abc////def", "abc"}, + {"a/b/.x", "a/b"}, + {"a/b/c.", "a/b"}, + {"a/b/c.x", "a/b"}, +} + +func TestDir(t *testing.T) { + for _, test := range dirtests { + if s := Dir(test.path); s != test.result { + t.Errorf("Dir(%q) = %q, want %q", test.path, s, test.result) + } + } +} + +type IsAbsTest struct { + path string + isAbs bool +} + +var isAbsTests = []IsAbsTest{ + {"", false}, + {"/", true}, + {"/usr/bin/gcc", true}, + {"..", false}, + {"/a/../bb", true}, + {".", false}, + {"./", false}, + {"lala", false}, +} + +func TestIsAbs(t *testing.T) { + for _, test := range isAbsTests { + if r := IsAbs(test.path); r != test.isAbs { + t.Errorf("IsAbs(%q) = %v, want %v", test.path, r, test.isAbs) + } + } +}