Skip to content

Commit

Permalink
Merge pull request #50 from hashicorp/brandonc/maybe_skip_pack_descent
Browse files Browse the repository at this point in the history
Optimize Pack for deep, ignored directories
  • Loading branch information
brandonc authored Dec 4, 2023
2 parents bc4aefe + 101bfff commit 4f09527
Show file tree
Hide file tree
Showing 14 changed files with 115 additions and 33 deletions.
31 changes: 23 additions & 8 deletions internal/ignorefiles/ignorerules.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ type Ruleset struct {
rules []rule
}

// ExcludesResult is the result of matching a path against a Ruleset. A result
// is Excluded if it matches a set of paths that are excluded by the rules in a
// Ruleset. A matching result is Dominating if none of the rules that follow it
// contain a negation, implying that if the rule excludes a directory,
// everything below that directory may be ignored.
type ExcludesResult struct {
Excluded bool
Dominating bool
}

// ParseIgnoreFileContent takes a reader over the content of a .terraformignore
// file and returns the Ruleset described by that file, or an error if the
// file is invalid.
Expand Down Expand Up @@ -57,19 +67,20 @@ func LoadPackageIgnoreRules(packageDir string) (*Ruleset, error) {
// excluded by the rules in the ruleset.
//
// If any of the rules in the ruleset have invalid syntax then Excludes will
// return an error, but it will also still return a boolean result which
// return an error, but it will also still return a result which
// considers all of the remaining valid rules, to support callers that want to
// just ignore invalid exclusions. Such callers can safely ignore the error
// result:
//
// exc, _ = ruleset.Excludes(path)
func (r *Ruleset) Excludes(path string) (bool, error) {
// exc, matching, _ = ruleset.Excludes(path)
func (r *Ruleset) Excludes(path string) (ExcludesResult, error) {
if r == nil {
return false, nil
return ExcludesResult{}, nil
}

var retErr error
foundMatch := false
dominating := false
for _, rule := range r.rules {
match, err := rule.match(path)
if err != nil {
Expand All @@ -81,16 +92,20 @@ func (r *Ruleset) Excludes(path string) (bool, error) {
}
}
if match {
foundMatch = !rule.excluded
foundMatch = !rule.negated
dominating = foundMatch && !rule.negationsAfter
}
}
return foundMatch, retErr
return ExcludesResult{
Excluded: foundMatch,
Dominating: dominating,
}, retErr
}

// Includes is the inverse of [Ruleset.Excludes].
func (r *Ruleset) Includes(path string) (bool, error) {
notRet, err := r.Excludes(path)
return !notRet, err
result, err := r.Excludes(path)
return !result.Excluded, err
}

var DefaultRuleset *Ruleset
Expand Down
32 changes: 21 additions & 11 deletions internal/ignorefiles/terraformignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func readRules(input io.Reader) ([]rule, error) {
rules := defaultExclusions
scanner := bufio.NewScanner(input)
scanner.Split(bufio.ScanLines)
currentRuleIndex := len(defaultExclusions) - 1

for scanner.Scan() {
pattern := scanner.Text()
Expand All @@ -32,8 +33,15 @@ func readRules(input io.Reader) ([]rule, error) {
rule := rule{}
// Exclusions
if pattern[0] == '!' {
rule.excluded = true
rule.negated = true
pattern = pattern[1:]
// Mark all previous rules as having negations after it
for i := currentRuleIndex; i >= 0; i-- {
if rules[i].negationsAfter {
break
}
rules[i].negationsAfter = true
}
}
// If it is a directory, add ** so we catch descendants
if pattern[len(pattern)-1] == os.PathSeparator {
Expand All @@ -49,6 +57,7 @@ func readRules(input io.Reader) ([]rule, error) {
rule.val = pattern
rule.dirs = strings.Split(pattern, string(os.PathSeparator))
rules = append(rules, rule)
currentRuleIndex += 1
}

if err := scanner.Err(); err != nil {
Expand All @@ -58,10 +67,11 @@ func readRules(input io.Reader) ([]rule, error) {
}

type rule struct {
val string // the value of the rule itself
excluded bool // ! is present, an exclusion rule
dirs []string // directories of the rule
regex *regexp.Regexp // regular expression to match for the rule
val string // the value of the rule itself
negated bool // prefixed by !, a negated rule
negationsAfter bool // negatied rules appear after this rule
dirs []string // directories of the rule
regex *regexp.Regexp // regular expression to match for the rule
}

func (r *rule) match(path string) (bool, error) {
Expand Down Expand Up @@ -160,16 +170,16 @@ func (r *rule) compile() error {

var defaultExclusions = []rule{
{
val: strings.Join([]string{"**", ".git", "**"}, string(os.PathSeparator)),
excluded: false,
val: strings.Join([]string{"**", ".git", "**"}, string(os.PathSeparator)),
negated: false,
},
{
val: strings.Join([]string{"**", ".terraform", "**"}, string(os.PathSeparator)),
excluded: false,
val: strings.Join([]string{"**", ".terraform", "**"}, string(os.PathSeparator)),
negated: false,
},
{
val: strings.Join([]string{"**", ".terraform", "modules", "**"}, string(os.PathSeparator)),
excluded: true,
val: strings.Join([]string{"**", ".terraform", "modules", "**"}, string(os.PathSeparator)),
negated: true,
},
}

Expand Down
43 changes: 41 additions & 2 deletions internal/ignorefiles/terraformignore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func TestTerraformIgnore(t *testing.T) {
if err != nil {
t.Fatal(err)
}

type file struct {
// the actual path, should be file path format /dir/subdir/file.extension
path string
Expand Down Expand Up @@ -112,13 +113,51 @@ func TestTerraformIgnore(t *testing.T) {
},
}
for i, p := range paths {
match, err := rs.Excludes(p.path)
result, err := rs.Excludes(p.path)
if err != nil {
t.Errorf("invalid rule syntax when checking %s at index %d", p.path, i)
continue
}
if match != p.match {
if result.Excluded != p.match {
t.Fatalf("%s at index %d should be %t", p.path, i, p.match)
}
}
}

func TestTerraformIgnoreNoExclusionOptimization(t *testing.T) {
rs, err := LoadPackageIgnoreRules("testdata/with-exclusion")
if err != nil {
t.Fatal(err)
}
if len(rs.rules) != 7 {
t.Fatalf("Expected 7 rules, got %d", len(rs.rules))
}

// reflects that no negations follow the last rule
afterValue := false
for i := len(rs.rules) - 1; i >= 0; i-- {
r := rs.rules[i]
if r.negationsAfter != afterValue {
t.Errorf("Expected exclusionsAfter to be %v at index %d", afterValue, i)
}
if r.negated {
afterValue = true
}
}

// last two will be dominating
for _, r := range []string{"logs/", "tmp/"} {
result, err := rs.Excludes(r)
if err != nil {
t.Fatal(err)
}
if !result.Dominating {
t.Errorf("Expected %q to be a dominating rule", r)
}
}

if actual, _ := rs.Excludes("src/baz/ignored"); !actual.Excluded {
t.Errorf("Expected %q to be excluded, but it was included", "src/baz/ignored")
}

}
4 changes: 2 additions & 2 deletions internal/ignorefiles/testdata/archive-dir/.terraformignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# ignore a directory
terraform.d/
# exclude ignoring a directory at the root
# negate ignoring a directory at the root
!/terraform.d/
# ignore a file at a subpath
**/foo/bar.tf
Expand All @@ -17,4 +17,4 @@ bar/something-[a-z].txt
# ignore a file
boop.txt
# but not one at the current directory
!/boop.txt
!/boop.txt
5 changes: 5 additions & 0 deletions internal/ignorefiles/testdata/with-exclusion/.terraformignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
src/**/*
# except at one directory
!src/foo/bar.txt
logs/
tmp/
1 change: 1 addition & 0 deletions internal/ignorefiles/testdata/with-exclusion/logs/foo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ignored
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bar
1 change: 1 addition & 0 deletions internal/ignorefiles/testdata/with-exclusion/tmp/tmp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tmp
10 changes: 7 additions & 3 deletions slug.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,19 @@ func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta,
return nil
}

if m := matchIgnoreRules(subpath, ignoreRules); m {
if r := matchIgnoreRules(subpath, ignoreRules); r.Excluded {
return nil
}

// Catch directories so we don't end up with empty directories,
// the files are ignored correctly
if info.IsDir() {
if m := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); m {
return nil
if r := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); r.Excluded {
if r.Dominating {
return filepath.SkipDir
} else {
return nil
}
}
}

Expand Down
9 changes: 7 additions & 2 deletions sourcebundle/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -632,7 +632,7 @@ func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepat
if err != nil {
return fmt.Errorf("invalid .terraformignore rules: %#w", err)
}
if ignored {
if ignored.Excluded {
err := os.RemoveAll(absPath)
if err != nil {
return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err)
Expand All @@ -642,12 +642,17 @@ func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepat

// For directories we also need to check with a path separator on the
// end, which ignores entire subtrees.
//
// TODO: What about exclusion rules that follow a matching directory?
// Example:
// /logs
// !/logs/production/*
if info.IsDir() {
ignored, err := ignoreRules.Excludes(relPath + string(os.PathSeparator))
if err != nil {
return fmt.Errorf("invalid .terraformignore rules: %#w", err)
}
if ignored {
if ignored.Excluded {
err := os.RemoveAll(absPath)
if err != nil {
return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err)
Expand Down
2 changes: 1 addition & 1 deletion terraformignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func parseIgnoreFile(rootPath string) *ignorefiles.Ruleset {
return ret
}

func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) bool {
func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) ignorefiles.ExcludesResult {
// Ruleset.Excludes explicitly allows ignoring its error, in which
// case we are ignoring any individual invalid rules in the set
// but still taking all of the others into account.
Expand Down
4 changes: 2 additions & 2 deletions testdata/archive-dir-no-external/.terraformignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# ignore a directory
terraform.d/
# exclude ignoring a directory at the root
# negate ignoring a directory at the root
!/terraform.d/
# ignore a file at a subpath
**/foo/bar.tf
Expand All @@ -17,4 +17,4 @@ bar/something-[a-z].txt
# ignore a file
boop.txt
# but not one at the current directory
!/boop.txt
!/boop.txt
4 changes: 2 additions & 2 deletions testdata/archive-dir/.terraformignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

# ignore a directory
terraform.d/
# exclude ignoring a directory at the root
# negate ignoring a directory at the root
!/terraform.d/
# ignore a file at a subpath
**/foo/bar.tf
Expand All @@ -17,4 +17,4 @@ bar/something-[a-z].txt
# ignore a file
boop.txt
# but not one at the current directory
!/boop.txt
!/boop.txt

0 comments on commit 4f09527

Please sign in to comment.