diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 01803ea..fb43e97 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -13,7 +13,7 @@ jobs: - uses: actions/setup-go@v5 with: { go-version: '1.23' } - uses: actions/checkout@v4 - - run: make install generate-win fmt + - run: make install generate-win fmt_linux - run: git diff --exit-code lint: diff --git a/Makefile b/Makefile index a19dad1..c30a3c4 100644 --- a/Makefile +++ b/Makefile @@ -5,14 +5,24 @@ install: go install mvdan.cc/gofumpt@v0.6.0 go install github.com/daixiang0/gci@v0.13.4 -.PHONY: fmt -fmt: +.PHONY: fmt_darwin +fmt_darwin: @echo "Formatting..." go mod tidy go fmt ./... gci write -s standard -s default -s "prefix(github.com/forensicanalysis/artifactcollector)" . gofumpt -l -w . - find . -type f -name "*.go" -print0 | xargs -0 sed -i.bak -e 's/ 0o/ 0/g' + find . -type f -name "*.go" -print0 | xargs -0 sed -i '' -e 's/ 0o/ 0/g' + wsl -fix ./... || true + +.PHONY: fmt_linux +fmt_linux: + @echo "Formatting..." + go mod tidy + go fmt ./... + gci write -s standard -s default -s "prefix(github.com/forensicanalysis/artifactcollector)" . + gofumpt -l -w . + find . -type f -name "*.go" -print0 | xargs -0 sed -i -e 's/ 0o/ 0/g' wsl -fix ./... || true .PHONY: vendor diff --git a/artifacts/expansion.go b/artifacts/expansion.go index 3f4112a..497a18a 100644 --- a/artifacts/expansion.go +++ b/artifacts/expansion.go @@ -29,7 +29,7 @@ import ( "runtime" "strings" - "github.com/forensicanalysis/fsdoublestar" + "github.com/forensicanalysis/artifactcollector/doublestar" ) const windows = "windows" @@ -231,7 +231,7 @@ func expandPath(fs fs.FS, syspath string, prefixes []string, collector ArtifactC expandedPath = strings.Replace(expandedPath, "{", `\{`, -1) expandedPath = strings.Replace(expandedPath, "}", `\}`, -1) - unglobedPaths, err := fsdoublestar.Glob(fs, expandedPath) + unglobedPaths, err := doublestar.Glob(fs, expandedPath) if err != nil { log.Println(err) diff --git a/doublestar/README.md b/doublestar/README.md new file mode 100644 index 0000000..4206504 --- /dev/null +++ b/doublestar/README.md @@ -0,0 +1,22 @@ +Recursive directory globbing via `**` for Go's [io/fs](https://golang.org/pkg/io/fs). + +## Example + +``` golang +func main() { + // get file system for this repository + wd, _ := os.Getwd() + fsys := os.DirFS(wd) + + // get all yml files + matches, _ := fsdoublestar.Glob(fsys, "**/*.yml") + + // print matches + fmt.Println(matches) + // Output: [.github/workflows/ci.yml .github/.golangci.yml] +} +``` + +## Acknowledgement + +This repository is based on [Bob Matcuk's](https://github.com/bmatcuk) great [doublestar](https://github.com/bmatcuk/doublestar) package. diff --git a/doublestar/doublestar.go b/doublestar/doublestar.go new file mode 100644 index 0000000..98c8324 --- /dev/null +++ b/doublestar/doublestar.go @@ -0,0 +1,560 @@ +// Copyright (c) 2014-2019 Bob Matcuk +// Copyright (c) 2019-2020 Siemens AG +// Copyright (c) 2021 Jonas Plum +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// +// Author(s): Bob Matcuk, Jonas Plum +// +// This code was adapted from +// https://github.com/bmatcuk/doublestar +// for use with forensic filesystems. + +// Package doublestar provides a globing function for io/fs. +package doublestar + +import ( + "io/fs" + "path" + "regexp" + "strconv" + "strings" + "unicode/utf8" +) + +var doubleStarPattern = regexp.MustCompile(`^\*\*[0-9]*$`) + +// ErrBadPattern indicates a pattern was malformed. +var ErrBadPattern = path.ErrBadPattern + +// Split a path on the given separator, respecting escaping. +func splitPathOnSeparator(path string, separator rune) (ret []string) { + idx := 0 + + if separator == '\\' { //nolint:nestif + // if the separator is '\\', then we can just split... + ret = strings.Split(path, string(separator)) + idx = len(ret) + } else { + // otherwise, we need to be careful of situations where the separator was escaped + cnt := strings.Count(path, string(separator)) + if cnt == 0 { + return []string{path} + } + + ret = make([]string, cnt+1) + pathlen := len(path) + separatorLen := utf8.RuneLen(separator) + emptyEnd := false + + for start := 0; start < pathlen; { + end := indexRuneWithEscaping(path[start:], separator) + if end == -1 { + emptyEnd = false + end = pathlen + } else { + emptyEnd = true + end += start + } + + ret[idx] = path[start:end] + start = end + separatorLen + idx++ + } + + // If the last rune is a path separator, we need to append an empty string to + // represent the last, empty path component. By default, the strings from + // make([]string, ...) will be empty, so we just need to increment the count + if emptyEnd { + idx++ + } + } + + return ret[:idx] +} + +// Find the first index of a rune in a string, +// ignoring any times the rune is escaped using "\". +func indexRuneWithEscaping(s string, r rune) int { + end := strings.IndexRune(s, r) + if end == -1 { + return -1 + } + + if end > 0 && s[end-1] == '\\' { + start := end + utf8.RuneLen(r) + end = indexRuneWithEscaping(s[start:], r) + + if end != -1 { + end += start + } + } + + return end +} + +// Match returns true if name matches the shell file name pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-path-separators +// '**' matches any sequence of characters, including +// path separators. +// '?' matches any single non-path-separator character +// '[' [ '^' ] { character-range } ']' +// character class (must be non-empty) +// '{' { term } [ ',' { term } ... ] '}' +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c +// lo '-' hi matches character c for lo <= c <= hi +// +// Match requires pattern to match all of name, not just a substring. +// The path-separator defaults to the '/' character. The only possible +// returned error is ErrBadPattern, when pattern is malformed. +// +// Note: this is meant as a drop-in replacement for path.Match() which +// always uses '/' as the path separator. If you want to support systems +// which use a different path separator (such as Windows), what you want +// is the PathMatch() function below. +func Match(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, '/') +} + +// PathMatch is like Match except that it uses your system's path separator. +// For most systems, this will be '/'. However, for Windows, it would be '\\'. +// Note that for systems where the path separator is '\\', escaping is +// disabled. +// +// Note: this is meant as a drop-in replacement for filepath.Match(). +func PathMatch(pattern, name string) (bool, error) { + return matchWithSeparator(pattern, name, '/') +} + +// Match returns true if name matches the shell file name pattern. +// The pattern syntax is: +// +// pattern: +// { term } +// term: +// '*' matches any sequence of non-path-separators +// '**' matches any sequence of characters, including +// path separators. +// '?' matches any single non-path-separator character +// '[' [ '^' ] { character-range } ']' +// character class (must be non-empty) +// '{' { term } [ ',' { term } ... ] '}' +// c matches character c (c != '*', '?', '\\', '[') +// '\\' c matches character c +// +// character-range: +// c matches character c (c != '\\', '-', ']') +// '\\' c matches character c, unless separator is '\\' +// lo '-' hi matches character c for lo <= c <= hi +// +// Match requires pattern to match all of name, not just a substring. +// The only possible returned error is ErrBadPattern, when pattern +// is malformed. +func matchWithSeparator(pattern, name string, separator rune) (bool, error) { + patternComponents := splitPathOnSeparator(pattern, separator) + nameComponents := splitPathOnSeparator(name, separator) + + return doMatching(patternComponents, nameComponents) +} + +func doMatching(patternComponents, nameComponents []string) (matched bool, err error) { //nolint:cyclop + // check for some base-cases + patternLen, nameLen := len(patternComponents), len(nameComponents) + if patternLen == 0 && nameLen == 0 { + return true, nil + } + + if patternLen == 0 || nameLen == 0 { + return false, nil + } + + patIdx, nameIdx := 0, 0 + for patIdx < patternLen && nameIdx < nameLen { + // if patternComponents[patIdx] == "**" { + if doubleStarPattern.MatchString(patternComponents[patIdx]) { + depth := getDepth(patternComponents, patIdx, -1) + + // if our last pattern component is a doublestar, we are done - + // doublestar will match any remaining name components, if any. + if patIdx++; patIdx >= patternLen { + return true, nil + } + + // otherwise, try matching remaining components + for ; nameIdx < nameLen; nameIdx++ { + if nameIdx-patIdx == depth { + break + } + + if m, _ := doMatching(patternComponents[patIdx:], nameComponents[nameIdx:]); m { + return true, nil + } + } + + return false, nil + } + + // try matching components + matched, err = matchComponent(patternComponents[patIdx], nameComponents[nameIdx]) + if !matched || err != nil { + return matched, err + } + + patIdx++ + nameIdx++ + } + + return patIdx >= patternLen && nameIdx >= nameLen, nil +} + +// Glob returns the names of all files matching pattern or nil +// if there is no matching file. The syntax of pattern is the same +// as in Match. The pattern may describe hierarchical names such as +// /usr/*/bin/ed (assuming the Separator is '/'). +// +// Glob ignores file system errors such as I/O errors reading directories. +// The only possible returned error is ErrBadPattern, when pattern +// is malformed. +// +// Your system path separator is automatically used. This means on +// systems where the separator is '\\' (Windows), escaping will be +// disabled. +// +// Note: this is meant as a drop-in replacement for filepath.Glob(). +func Glob(fsys fs.FS, pattern string) (matches []string, err error) { + patternComponents := splitPathOnSeparator(pattern, '/') + if len(patternComponents) == 0 { + return nil, nil + } + + return doGlob(fsys, ".", patternComponents, matches, -2) +} + +// Perform a glob. +func doGlob(fsys fs.FS, basedir string, components, matches []string, depth int) ([]string, error) { //nolint:gocognit,funlen,cyclop + if depth == 0 && len(components) < 2 || depth == -1 { + return matches, nil + } + + patLen, patIdx := skipComponents(components) + if patIdx > 0 { + basedir = path.Join(basedir, path.Join(components[0:patIdx]...)) + } + + // Stat will return an error if the file/directory doesn't exist + fi, err := fs.Stat(fsys, basedir) + if err != nil { + return matches, nil //nolint:nilerr + } + + // if there are no more components, we've found a match + if patIdx >= patLen { + matches = append(matches, basedir) + + return matches, nil + } + + // confirm it's a directory... + if !fi.IsDir() { + return matches, nil + } + + entries, err := fs.ReadDir(fsys, basedir) + if err != nil { + return matches, err + } + + lastComponent := (patIdx + 1) >= patLen + + if doubleStarPattern.MatchString(components[patIdx]) { //nolint:nestif + depth = getDepth(components, patIdx, depth) + + // if the current component is a doublestar, we'll try depth-first + for _, entry := range entries { + fi, err = fs.Stat(fsys, path.Join(basedir, entry.Name())) + if err != nil { + continue + } + + if fi.IsDir() { + // recurse into directories + if lastComponent { + matches = append(matches, path.Join(basedir, entry.Name())) + } + + matches, err = doGlob(fsys, path.Join(basedir, entry.Name()), components[patIdx:], matches, depth-1) + } else if lastComponent { + // if the pattern's last component is a doublestar, we match filenames, too + matches = append(matches, path.Join(basedir, entry.Name())) + } + } + + if lastComponent { + return matches, err // we're done + } + + patIdx++ + lastComponent = (patIdx + 1) >= patLen + } + + // check items in current directory and recurse + var match bool + for _, entry := range entries { + match, err = matchComponent(components[patIdx], entry.Name()) + if err != nil { + return matches, err + } + + if match { + if lastComponent { + matches = append(matches, path.Join(basedir, entry.Name())) + } else { + matches, err = doGlob(fsys, path.Join(basedir, entry.Name()), components[patIdx+1:], matches, depth-1) + } + } + } + + return matches, err +} + +func skipComponents(components []string) (patLen, patIdx int) { + // figure out how many components we don't need to glob because they're + // just names without patterns - we'll use os.Stat below to check if that + // path actually exists + patLen = len(components) + for ; patIdx < patLen; patIdx++ { + if strings.ContainsAny(components[patIdx], "*?[{\\") { + break + } + } + + return patLen, patIdx +} + +func getDepth(components []string, patIdx int, depth int) int { + depthString := strings.TrimLeft(components[patIdx], "/*") + + if depth < 0 { + depth = 3 + if depthString != "" { + depth, _ = strconv.Atoi(depthString) + } + } + + return depth +} + +// Attempt to match a single pattern component with a path component. +func matchComponent(pattern, name string) (bool, error) { //nolint:funlen,cyclop + // check some base cases + patternLen, nameLen := len(pattern), len(name) + if patternLen == 0 && nameLen == 0 { + return true, nil + } + + if patternLen == 0 { + return false, nil + } + + if nameLen == 0 && pattern != "*" { + return false, nil + } + + // check for matches one rune at a time + patIdx, nameIdx := 0, 0 + for patIdx < patternLen && nameIdx < nameLen { + patRune, patAdj := utf8.DecodeRuneInString(pattern[patIdx:]) + nameRune, nameAdj := utf8.DecodeRuneInString(name[nameIdx:]) + + switch patRune { + case '\\': + // handle escaped runes + patIdx += patAdj + patRune, patAdj = utf8.DecodeRuneInString(pattern[patIdx:]) + + switch patRune { + case nameRune: + patIdx += patAdj + nameIdx += nameAdj + case utf8.RuneError: + return false, ErrBadPattern + default: + return false, nil + } + case '*': + return handleStars(patIdx, patAdj, patternLen, nameIdx, nameLen, nameAdj, pattern, name) + case '[': + // handle character sets + patIdx += patAdj + + endClass, err, done := handleCharacterSet(pattern, patIdx, nameRune) + if done { + return false, err + } + + patIdx = endClass + 1 + nameIdx += nameAdj + case '{': + return handleAlternatives(patIdx, patAdj, pattern, name, nameIdx) + case '?', nameRune: + // handle single-rune wildcard + patIdx += patAdj + nameIdx += nameAdj + default: + return false, nil + } + } + + if patIdx >= patternLen && nameIdx >= nameLen { + return true, nil + } + + if nameIdx >= nameLen && pattern[patIdx:] == "*" || pattern[patIdx:] == "**" { + return true, nil + } + + return false, nil +} + +func handleStars(patIdx int, patAdj int, patternLen int, nameIdx int, nameLen int, nameAdj int, pattern string, name string) (bool, error) { + // handle stars + if patIdx += patAdj; patIdx >= patternLen { + // a star at the end of a pattern will always + // match the rest of the path + return true, nil + } + + // check if we can make any matches + for ; nameIdx < nameLen; nameIdx += nameAdj { + if m, _ := matchComponent(pattern[patIdx:], name[nameIdx:]); m { + return true, nil + } + } + + return false, nil +} + +func handleCharacterSet(pattern string, patIdx int, nameRune rune) (int, error, bool) { //nolint:golint,gocognit,funlen,revive,cyclop + endClass := indexRuneWithEscaping(pattern[patIdx:], ']') + if endClass == -1 { + return 0, ErrBadPattern, true + } + + endClass += patIdx + classRunes := []rune(pattern[patIdx:endClass]) + + classRunesLen := len(classRunes) + if classRunesLen > 0 { //nolint:nestif + classIdx := 0 + matchClass := false + + if classRunes[0] == '^' { + classIdx++ + } + + for classIdx < classRunesLen { + low := classRunes[classIdx] + if low == '-' { + return 0, ErrBadPattern, true + } + + classIdx++ + if low == '\\' { + if classIdx < classRunesLen { + low = classRunes[classIdx] + classIdx++ + } else { + return 0, ErrBadPattern, true + } + } + + high := low + + if classIdx < classRunesLen && classRunes[classIdx] == '-' { + // we have a range of runes + if classIdx++; classIdx >= classRunesLen { + return 0, ErrBadPattern, true + } + + high = classRunes[classIdx] + if high == '-' { + return 0, ErrBadPattern, true + } + + classIdx++ + if high == '\\' { + if classIdx < classRunesLen { + high = classRunes[classIdx] + classIdx++ + } else { + return 0, ErrBadPattern, true + } + } + } + + if low <= nameRune && nameRune <= high { + matchClass = true + } + } + + if matchClass == (classRunes[0] == '^') { + return 0, nil, true + } + } else { + return 0, ErrBadPattern, true + } + + return endClass, nil, false +} + +func handleAlternatives(patIdx int, patAdj int, pattern string, name string, nameIdx int) (bool, error) { + // handle alternatives such as {alt1,alt2,...} + patIdx += patAdj + + endOptions := indexRuneWithEscaping(pattern[patIdx:], '}') + if endOptions == -1 { + return false, ErrBadPattern + } + + endOptions += patIdx + options := splitPathOnSeparator(pattern[patIdx:endOptions], ',') + patIdx = endOptions + 1 + + for _, o := range options { + m, e := matchComponent(o+pattern[patIdx:], name[nameIdx:]) + if e != nil { + return false, e + } + + if m { + return true, nil + } + } + + return false, nil +} diff --git a/doublestar/doublestar_test.go b/doublestar/doublestar_test.go new file mode 100644 index 0000000..78eed0c --- /dev/null +++ b/doublestar/doublestar_test.go @@ -0,0 +1,457 @@ +// Copyright (c) 2014-2019 Bob Matcuk +// Copyright (c) 2019-2020 Siemens AG +// Copyright (c) 2021 Jonas Plum +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// +// Author(s): Bob Matcuk, Jonas Plum +// +// This code was adapted from +// https://github.com/bmatcuk/doublestar +// for use with forensic filesystems. + +package doublestar + +import ( + "io/fs" + "log" + "path" + "reflect" + "sort" + "strings" + "testing" + "testing/fstest" +) + +type MatchTest struct { + pattern, testPath string // a pattern and path to test the pattern on + shouldMatch bool // true if the pattern should match the path + expectedErr error // an expected error + testOnDisk bool // true: test pattern against files in "test" directory +} + +var matchTests = []MatchTest{ + {"*", "", true, nil, false}, + {"\\*", "", false, nil, false}, + // {"*", ".", false, nil, false}, + // {"*", "/", true, nil, false}, + {"*", "debug/", false, nil, false}, + // {"*", "//", false, nil, false}, + {"abc", "abc", true, nil, true}, + {"*", "abc", true, nil, true}, + {"*c", "abc", true, nil, true}, + {"a*", "a", true, nil, true}, + {"a*", "abc", true, nil, true}, + {"a*", "ab/c", false, nil, true}, + {"a*/b", "abc/b", true, nil, true}, + {"a*/b", "a/c/b", false, nil, true}, + {"a*b*c*d*e*/f", "axbxcxdxe/f", true, nil, true}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/f", true, nil, true}, + {"a*b*c*d*e*/f", "axbxcxdxe/xxx/f", false, nil, true}, + {"a*b*c*d*e*/f", "axbxcxdxexxx/fff", false, nil, true}, + {"a*b?c*x", "abxbbxdbxebxczzx", true, nil, true}, + {"a*b?c*x", "abxbbxdbxebxczzy", false, nil, true}, + {"ab[c]", "abc", true, nil, true}, + {"ab[b-d]", "abc", true, nil, true}, + {"ab[e-g]", "abc", false, nil, true}, + {"ab[^c]", "abc", false, nil, true}, + {"ab[^b-d]", "abc", false, nil, true}, + {"ab[^e-g]", "abc", true, nil, true}, + {"a\\*b", "ab", false, nil, true}, + {"a?b", "a☺b", true, nil, true}, + {"a[^a]b", "a☺b", true, nil, true}, + {"a???b", "a☺b", false, nil, true}, + {"a[^a][^a][^a]b", "a☺b", false, nil, true}, + {"[a-ζ]*", "α", true, nil, true}, + {"*[a-ζ]", "A", false, nil, true}, + {"a?b", "a/b", false, nil, true}, + {"a*b", "a/b", false, nil, true}, + {"[]a]", "]", false, ErrBadPattern, true}, + {"[-]", "-", false, ErrBadPattern, true}, + {"[x-]", "x", false, ErrBadPattern, true}, + {"[x-]", "-", false, ErrBadPattern, true}, + {"[x-]", "z", false, ErrBadPattern, true}, + {"[-x]", "x", false, ErrBadPattern, true}, + {"[-x]", "-", false, ErrBadPattern, true}, + {"[-x]", "a", false, ErrBadPattern, true}, + {"[a-b-c]", "a", false, ErrBadPattern, true}, + {"[", "a", false, ErrBadPattern, true}, + {"[^", "a", false, ErrBadPattern, true}, + {"[^bc", "a", false, ErrBadPattern, true}, + // {"a[", "a", false, nil, false}, + {"a[", "ab", false, ErrBadPattern, true}, + {"*x", "xxx", true, nil, true}, + {"[abc]", "b", true, nil, true}, + {"a/**", "a", false, nil, true}, + {"a/**", "a/b", true, nil, true}, + {"a/**", "a/b/c", true, nil, true}, + {"**/c", "c", true, nil, true}, + {"**/c", "b/c", true, nil, true}, + {"**/c", "a/b/c", true, nil, true}, + {"**/c", "a/b", false, nil, true}, + {"**/c", "abcd", false, nil, true}, + {"**/c", "a/abc", false, nil, true}, + {"a/**/b", "a/b", true, nil, true}, + {"a/**/c", "a/b/c", true, nil, true}, + {"a/**/d", "a/b/c/d", true, nil, true}, + // {"a//b/c", "a/b/c", true, nil, true}, + // {"a/b/c", "a/b//c", true, nil, true}, + {"ab{c,d}", "abc", true, nil, true}, + {"ab{c,d,*}", "abcde", true, nil, true}, + {"ab{c,d}[", "abcd", false, ErrBadPattern, true}, + {"abc/**", "abc/b", true, nil, true}, + {"**/abc", "abc", true, nil, true}, + {"abc**", "abc/b", false, nil, true}, + {"abc**", "abc/b", false, nil, true}, + {"**2/d", "a/b/c/d", false, nil, true}, + {"a/**2/d", "a/b/c/d", true, nil, true}, + {"**3/d", "a/b/c/d", true, nil, true}, + {"**5/d", "a/b/c/d", true, nil, true}, + {"**/d", "f/g/h/i/j/k/d", false, nil, true}, + {"**5/d", "f/g/h/i/j/k/d", false, nil, true}, + {"**6/d", "f/g/h/i/j/k/d", true, nil, true}, + {"**7/d", "f/g/h/i/j/k/d", true, nil, true}, +} + +func TestMatch(t *testing.T) { + for idx, tt := range matchTests { + // Since Match() always uses "" as the separator, we + // don't need to worry about the tt.testOnDisk flag + testMatchWith(t, idx, tt) + } +} + +func testMatchWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r) + } + }() + + // Match() always uses "" as the separator + ok, err := Match(tt.pattern, tt.testPath) + if ok != tt.shouldMatch || err != tt.expectedErr { + t.Errorf("#%v. Match(%#q, %#q) = %v, %v want %v, %v", idx, tt.pattern, tt.testPath, ok, err, tt.shouldMatch, tt.expectedErr) + } + + if isStandardPattern(tt.pattern) { + stdOk, stdErr := path.Match(tt.pattern, tt.testPath) + if ok != stdOk || !compareErrors(err, stdErr) { + t.Errorf("#%v. Match(%#q, %#q) != path.Match(...). Got %v, %v want %v, %v", idx, tt.pattern, tt.testPath, ok, err, stdOk, stdErr) + } + } +} + +func TestPathMatch(t *testing.T) { + for idx, tt := range matchTests { + // Even though we aren't actually matching paths on disk, we are using + // PathMatch() which will use the system's separator. As a result, any + // patterns that might cause problems on-disk need to also be avoided + // here in this test. + if tt.testOnDisk { + testPathMatchWith(t, idx, tt) + } + } +} + +func testPathMatchWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Match(%#q, %#q) panicked: %#v", idx, tt.pattern, tt.testPath, r) + } + }() + + ok, err := PathMatch(tt.pattern, tt.testPath) + if ok != tt.shouldMatch || err != tt.expectedErr { + t.Errorf("#%v. Match(%#q, %#q) = %v, %v want %v, %v", idx, tt.pattern, tt.testPath, ok, err, tt.shouldMatch, tt.expectedErr) + } + + if isStandardPattern(tt.pattern) { + stdOk, stdErr := path.Match(tt.pattern, tt.testPath) + if ok != stdOk || !compareErrors(err, stdErr) { + t.Errorf("#%v. PathMatch(%#q, %#q) != path.Match(...). Got %v, %v want %v, %v", idx, tt.pattern, tt.testPath, ok, err, stdOk, stdErr) + } + } +} + +func TestGlob(t *testing.T) { + for idx, tt := range matchTests { + if tt.testOnDisk { + // test both relative paths and absolute paths + testGlobWith(t, idx, tt) + } + } +} + +func testGlobWith(t *testing.T, idx int, tt MatchTest) { + defer func() { + if r := recover(); r != nil { + t.Errorf("#%v. Glob(%#q) panicked: %#v", idx, tt.pattern, r) + } + }() + + tfs := getTestFS() + pattern := path.Join(tt.pattern) + testPath := path.Join(tt.testPath) + matches, err := Glob(tfs, pattern) + + if inSlice(testPath, matches) != tt.shouldMatch { + if tt.shouldMatch { + t.Errorf("#%v. Glob(%#q) = %#v - doesn't contain %v, but should", idx, pattern, matches, tt.testPath) + } else { + t.Errorf("#%v. Glob(%#q) = %#v - contains %v, but shouldn't", idx, pattern, matches, tt.testPath) + } + } + + if err != tt.expectedErr { + t.Errorf("#%v. Glob(%#q) has error %v, but should be %v", idx, pattern, err, tt.expectedErr) + } +} + +func isStandardPattern(pattern string) bool { + return !strings.Contains(pattern, "**") && indexRuneWithEscaping(pattern, '{') == -1 +} + +func compareErrors(a, b error) bool { + if a == nil { + return b == nil + } + + return b != nil +} + +func inSlice(s string, a []string) bool { + for _, i := range a { + if i == s { + return true + } + } + + return false +} + +func getTestFS() *fstest.MapFS { + infs := fstest.MapFS{} + + // create test files + files := []string{ + "a/abc", "a/b/c/d", "a/c/b", "abc/b", "abcd", "abcde", "abxbbxdbxebxczzx", + "abxbbxdbxebxczzy", "axbxcxdxe/f", "axbxcxdxe/xxx/f", "axbxcxdxexxx/f", + "axbxcxdxexxx/fff", "a☺b", "b/c", "c", "x", "xxx", "z", + "α", "f/g/h/i/j/k/l", "f/g/h/i/j/k/d", "f/g/h/i/j/k/u.bin", "f/g/h/i/j/k/v.bin", + } + + for _, file := range files { + if !fs.ValidPath(file) { + log.Fatal(file) + } + + infs[file] = &fstest.MapFile{Data: []byte("")} + } + + return &infs +} + +func getInFS() fs.FS { + infs := fstest.MapFS{} + + files := []string{"foo.bin", "dir/bar.bin", "dir/baz.bin", "dir/a/a/foo.bin", "dir/a/b/foo.bin", "dir/b/a/foo.bin", "dir/b/b/foo.bin"} + for _, file := range files { + if !fs.ValidPath(file) { + log.Fatal(file) + } + + infs[file] = &fstest.MapFile{Data: []byte("")} + } + + return infs +} + +func Test_expandPath(t *testing.T) { + type args struct { + fs fs.FS + in string + } + + tests := []struct { + name string + args args + want []string + }{ + {"Expand path 1", args{getInFS(), "*/bar.bin"}, []string{"dir/bar.bin"}}, + {"Expand path 2", args{getInFS(), "dir/*.bin"}, []string{"dir/bar.bin", "dir/baz.bin"}}, + {"Expand path 3", args{getInFS(), "dir/*/*/foo.bin"}, []string{"dir/a/a/foo.bin", "dir/a/b/foo.bin", "dir/b/a/foo.bin", "dir/b/b/foo.bin"}}, + {"Expand path 4", args{getInFS(), "**"}, []string{"dir", "dir/a", "dir/a/a", "dir/a/b", "dir/b", "dir/b/a", "dir/b/b", "dir/bar.bin", "dir/baz.bin", "foo.bin"}}, + {"Expand path 5", args{getInFS(), "dir/**2/foo.bin"}, []string{"dir/a/a/foo.bin", "dir/a/b/foo.bin", "dir/b/a/foo.bin", "dir/b/b/foo.bin"}}, + {"Expand path 6", args{getInFS(), "dir/**1"}, []string{"dir/a", "dir/b", "dir/bar.bin", "dir/baz.bin"}}, + {"Expand path 7", args{getInFS(), "dir/**10"}, []string{"dir/a", "dir/a/a", "dir/a/a/foo.bin", "dir/a/b", "dir/a/b/foo.bin", "dir/b", "dir/b/a", "dir/b/a/foo.bin", "dir/b/b", "dir/b/b/foo.bin", "dir/bar.bin", "dir/baz.bin"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := Glob(tt.args.fs, tt.args.in) + if err != nil { + t.Fatal(err) + } + + sort.Strings(tt.want) + sort.Strings(got) + + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("expandPath(%s) = %v, want %v", tt.args.in, got, tt.want) + } + }) + } +} + +func Test_splitPathOnSeparator(t *testing.T) { + type args struct { + path string + separator rune + } + + tests := []struct { + name string + args args + wantRet []string + }{ + {"backslash", args{"foo\\bar", '\\'}, []string{"foo", "bar"}}, + {"slash", args{"foo", '/'}, []string{"foo"}}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if gotRet := splitPathOnSeparator(tt.args.path, tt.args.separator); !reflect.DeepEqual(gotRet, tt.wantRet) { + t.Errorf("splitPathOnSeparator() = %v, want %v", gotRet, tt.wantRet) + } + }) + } +} + +func Test_indexRuneWithEscaping(t *testing.T) { + type args struct { + s string + r rune + } + + tests := []struct { + name string + args args + want int + }{ + {"normal y", args{"xxxy", 'y'}, 3}, + {"escaped y", args{"xxx\\y", 'y'}, -1}, + {"escaped x", args{"xxx\\xy", 'y'}, 5}, + {"escaped x 2", args{"xxx\\yy", 'y'}, 5}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := indexRuneWithEscaping(tt.args.s, tt.args.r); got != tt.want { + t.Errorf("indexRuneWithEscaping() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_doMatching(t *testing.T) { + type args struct { + patternComponents []string + nameComponents []string + } + + tests := []struct { + name string + args args + wantMatched bool + wantErr bool + }{ + {"early return 1", args{nil, nil}, true, false}, + {"early return 2", args{nil, []string{"a"}}, false, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotMatched, err := doMatching(tt.args.patternComponents, tt.args.nameComponents) + if (err != nil) != tt.wantErr { + t.Errorf("doMatching() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if gotMatched != tt.wantMatched { + t.Errorf("doMatching() gotMatched = %v, want %v", gotMatched, tt.wantMatched) + } + }) + } +} + +func Test_matchComponent(t *testing.T) { + type args struct { + pattern string + name string + } + + tests := []struct { + name string + args args + want bool + wantErr bool + }{ + {"early return 2", args{"", "x"}, false, false}, + {"early return 3", args{"x", ""}, false, false}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := matchComponent(tt.args.pattern, tt.args.name) + if (err != nil) != tt.wantErr { + t.Errorf("matchComponent() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if got != tt.want { + t.Errorf("matchComponent() got = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_readDir(t *testing.T) { + type args struct { + fs fs.FS + basedir string + } + + tests := []struct { + name string + args args + want []fs.DirEntry + wantErr bool + }{ + {"read dir", args{&fstest.MapFS{}, "."}, []fs.DirEntry{}, false}, + {"read dir error", args{&fstest.MapFS{}, "x"}, nil, true}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := fs.ReadDir(tt.args.fs, tt.args.basedir) + if (err != nil) != tt.wantErr { + t.Errorf("readDir() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !reflect.DeepEqual(got, tt.want) { + t.Errorf("readDir() got = %#v, want %#v", got, tt.want) + } + }) + } +} diff --git a/doublestar/example_test.go b/doublestar/example_test.go new file mode 100644 index 0000000..e3028cf --- /dev/null +++ b/doublestar/example_test.go @@ -0,0 +1,21 @@ +package doublestar_test + +import ( + "fmt" + "os" + + "github.com/forensicanalysis/artifactcollector/doublestar" +) + +func Example() { + // get file system for this repository + wd, _ := os.Getwd() + fsys := os.DirFS(wd) + + // get all yml files + matches, _ := doublestar.Glob(fsys, "**/*.md") + + // print matches + fmt.Println(matches) + // Output: [README.md] +} diff --git a/go.mod b/go.mod index 1bc0c65..5184bfa 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,6 @@ module github.com/forensicanalysis/artifactcollector go 1.16 require ( - github.com/forensicanalysis/fsdoublestar v0.1.0 github.com/forensicanalysis/fslib v0.15.1 github.com/go-ole/go-ole v1.2.4 github.com/google/uuid v1.3.0 diff --git a/go.sum b/go.sum index dd8d380..922f602 100644 --- a/go.sum +++ b/go.sum @@ -11,8 +11,6 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/djherbis/times v1.5.0 h1:79myA211VwPhFTqUk8xehWrsEO+zcIZj0zT8mXPVARU= github.com/djherbis/times v1.5.0/go.mod h1:5q7FDLvbNg1L/KaBmPcWlVR9NmoKo3+ucqUA3ijQhA0= -github.com/forensicanalysis/fsdoublestar v0.1.0 h1:t0SuwumE5FQtrw1ZkkNhxQbaWn8LywqdnCpEBX0n/Fc= -github.com/forensicanalysis/fsdoublestar v0.1.0/go.mod h1:UpG3oKY5UiFTnbxqp4/1h6WuaDtlPZcL6Hu5yI3DsL0= github.com/forensicanalysis/fslib v0.15.1 h1:mB30Tn2xwW6+nvV6FLbKqz1BYtHJpt3j3oyHpXGrJCQ= github.com/forensicanalysis/fslib v0.15.1/go.mod h1:cFsrRmnHRJkAE4Gbshk8N1myWx1BB7zT/u9+RnPH22M= github.com/go-ole/go-ole v1.2.4 h1:nNBDSCOigTSiarFpYE9J/KtEA1IOW4CNeqT9TQDqCxI=