From 0099c238bf20519b78c241efccde7fdf42b7c776 Mon Sep 17 00:00:00 2001 From: Louis Garman <75728+leg100@users.noreply.github.com> Date: Tue, 6 Aug 2024 11:12:18 +0100 Subject: [PATCH] chore: speed up module discovery (#112) --- internal/module/module.go | 132 ++++++++++++++++++++------------- internal/module/module_test.go | 17 ++++- internal/module/service.go | 63 +++++++++------- internal/workspace/service.go | 3 + 4 files changed, 132 insertions(+), 83 deletions(-) diff --git a/internal/module/module.go b/internal/module/module.go index 3b203d9b..cbb83e0d 100644 --- a/internal/module/module.go +++ b/internal/module/module.go @@ -1,15 +1,16 @@ package module import ( + "context" "io/fs" "log/slog" "path/filepath" + "sync" "github.com/hashicorp/hcl/v2" "github.com/hashicorp/hcl/v2/gohcl" "github.com/hashicorp/hcl/v2/hclparse" "github.com/leg100/pug/internal" - "github.com/leg100/pug/internal/logging" "github.com/leg100/pug/internal/resource" ) @@ -63,68 +64,88 @@ func (m *Module) LogValue() slog.Value { ) } -// findModules finds root modules that are descendents of the workdir and +// find finds root modules that are descendents of the workdir and // returns options for creating equivalent pug modules. // // A root module is deemed to be a directory that contains a .tf file that // contains a backend or cloud block, or in the case of terragrunt, a // terragrunt.hcl file. -func findModules(logger logging.Interface, workdir internal.Workdir) (modules []Options, err error) { - walkfn := func(path string, d fs.DirEntry, walkerr error) error { - if walkerr != nil { - return err - } - if d.IsDir() { - switch d.Name() { - case ".terraform", ".terragrunt-cache": - return filepath.SkipDir - } - return nil - } - - var isTerragrunt bool - switch { - case d.Name() == "terragrunt.hcl": - isTerragrunt = true - fallthrough - case filepath.Ext(path) == ".tf": - backend, found, err := detectBackend(path) +// +// find returns two channels: the first streams discovered modules (in the form +// of Options structs for creating the module in pug); the second streams any +// errors encountered. +// +// When finished, both channels are closed. +func find(ctx context.Context, workdir internal.Workdir) (<-chan Options, <-chan error) { + modules := make(chan Options) + errc := make(chan error, 1) + + go func() { + var wg sync.WaitGroup + err := filepath.WalkDir(workdir.String(), func(path string, d fs.DirEntry, err error) error { if err != nil { - logger.Error("reloading modules: parsing hcl", "path", path, "error", err) - return nil + errc <- err + return err } - if !isTerragrunt && !found { - // Not a terragrunt module, nor a vanilla terraform module with a - // backend config, so skip. + if d.IsDir() { + switch d.Name() { + case ".terraform", ".terragrunt-cache": + return filepath.SkipDir + } return nil } - if isTerragrunt && backend == "" { - // Unless terragrunt.hcl directly contains a `remote_state` - // block then Pug doesn't have a way of determining the backend - // type (not unless it evaluates terragrunt's language and - // follows `find_in_parent` etc. to locate the effective - // remote_state, which is perhaps a future exercise...). - logger.Warn("reloading modules: could not determine backend type", "path", path) + + var isTerragrunt bool + switch { + case d.Name() == "terragrunt.hcl": + isTerragrunt = true + fallthrough + case filepath.Ext(path) == ".tf": + wg.Add(1) + go func() { + defer wg.Done() + backend, found, err := detectBackend(path) + if err != nil { + errc <- err + return + } + if !isTerragrunt && !found { + // Not a terragrunt module, nor a vanilla terraform module with a + // backend config, so skip. + return + } + // Strip workdir from module path + stripped, err := filepath.Rel(workdir.String(), filepath.Dir(path)) + if err != nil { + errc <- err + return + } + modules <- Options{ + Path: stripped, + Backend: backend, + } + }() + // Skip walking remainder of parent directory + return fs.SkipDir } - // Strip workdir from module path - stripped, err := filepath.Rel(workdir.String(), filepath.Dir(path)) - if err != nil { - return err + // Abort walk if context canceled + select { + case <-ctx.Done(): + return ctx.Err() + default: + return nil } - modules = append(modules, Options{ - Path: stripped, - Backend: backend, - }) - // skip walking remainder of parent directory - return fs.SkipDir + }) + if err != nil { + errc <- err } - - return nil - } - if err := filepath.WalkDir(workdir.String(), walkfn); err != nil { - return nil, err - } - return + go func() { + wg.Wait() + close(modules) + close(errc) + }() + }() + return modules, errc } type terragrunt struct { @@ -177,7 +198,14 @@ func detectBackend(path string) (string, bool, error) { return "cloud", true, nil } } - // Detect terragrunt remote state configuration + // Detect terragrunt remote state configuration. + // + // Unless terragrunt.hcl directly contains a `remote_state` block then Pug + // doesn't have a way of determining the backend type (not unless it + // evaluates terragrunt's language and follows `find_in_parent` etc. to + // locate the effective remote_state, which is perhaps a future + // exercise...). If it doesn't contain such a block then the backend is + // simply an empty string. var remoteStateBlock terragrunt if diags := gohcl.DecodeBody(f.Body, nil, &remoteStateBlock); diags != nil { return "", false, diags diff --git a/internal/module/module_test.go b/internal/module/module_test.go index abc4d9ea..aaa4aee5 100644 --- a/internal/module/module_test.go +++ b/internal/module/module_test.go @@ -1,13 +1,12 @@ package module import ( + "context" "os" "testing" "github.com/leg100/pug/internal" - "github.com/leg100/pug/internal/logging" "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" ) func TestNew(t *testing.T) { @@ -21,8 +20,12 @@ func TestNew(t *testing.T) { func TestFindModules(t *testing.T) { workdir, _ := internal.NewWorkdir("./testdata/modules") - got, err := findModules(logging.Discard, workdir) - require.NoError(t, err) + modules, errch := find(context.Background(), workdir) + + var got []Options + for opts := range modules { + got = append(got, opts) + } assert.Equal(t, 5, len(got), got) assert.Contains(t, got, Options{Path: "with_local_backend", Backend: "local"}) @@ -31,4 +34,10 @@ func TestFindModules(t *testing.T) { assert.Contains(t, got, Options{Path: "terragrunt_with_local", Backend: "local"}) assert.Contains(t, got, Options{Path: "terragrunt_without_backend", Backend: ""}) assert.NotContains(t, got, "broken") + + // Expect one error from broken module then error channel should close + goterr := <-errch + assert.Contains(t, goterr.Error(), "Unclosed configuration block") + _, closed := <-errch + assert.False(t, closed) } diff --git a/internal/module/service.go b/internal/module/service.go index 660ebffd..94358d28 100644 --- a/internal/module/service.go +++ b/internal/module/service.go @@ -1,10 +1,12 @@ package module import ( + "context" "errors" "fmt" "io" "path/filepath" + "slices" "github.com/leg100/pug/internal" "github.com/leg100/pug/internal/logging" @@ -65,37 +67,44 @@ func NewService(opts ServiceOptions) *Service { // to the store before pruning those that are currently stored but can no longer // be found. func (s *Service) Reload() (added []string, removed []string, err error) { - found, err := findModules(s.logger, s.workdir) - if err != nil { - return nil, nil, err - } - for _, opts := range found { - // Add module if it isn't in pug already, otherwise update in-place - if mod, err := s.GetByPath(opts.Path); errors.Is(err, resource.ErrNotFound) { - mod := New(s.workdir, opts) - s.table.Add(mod.ID, mod) - added = append(added, opts.Path) - } else if err != nil { - return nil, nil, fmt.Errorf("retrieving module: %w", err) - } else { - // Update in-place; the backend may have changed. - s.table.Update(mod.ID, func(existing *Module) error { - existing.Backend = opts.Backend - return nil - }) + ch, errc := find(context.TODO(), s.workdir) + var found []string + for ch != nil || errc != nil { + select { + case opts, ok := <-ch: + if !ok { + ch = nil + break + } + found = append(found, opts.Path) + // handle found module + if mod, err := s.GetByPath(opts.Path); errors.Is(err, resource.ErrNotFound) { + // Not found, so add to pug + mod := New(s.workdir, opts) + s.table.Add(mod.ID, mod) + added = append(added, opts.Path) + } else if err != nil { + s.logger.Error("reloading modules", "error", err) + } else { + // Update in-place; the backend may have changed. + s.table.Update(mod.ID, func(existing *Module) error { + existing.Backend = opts.Backend + return nil + }) + } + case err, ok := <-errc: + if !ok { + errc = nil + break + } + if err != nil { + s.logger.Error("reloading modules", "error", err) + } } } - // Cleanup existing modules, removing those that are no longer to be found for _, existing := range s.table.List() { - var keep bool - for _, opts := range found { - if opts.Path == existing.Path { - keep = true - break - } - } - if !keep { + if !slices.Contains(found, existing.Path) { s.table.Delete(existing.ID) removed = append(removed, existing.Path) } diff --git a/internal/workspace/service.go b/internal/workspace/service.go index 4b34145e..08fe3192 100644 --- a/internal/workspace/service.go +++ b/internal/workspace/service.go @@ -69,6 +69,9 @@ func NewService(opts ServiceOptions) *Service { // whenever: // * a new module is loaded into pug for the first time // * an existing module is updated and does not yet have a current workspace. +// +// TODO: "load" is ambiguous, it often means the opposite of save, i.e. read +// from a system, whereas what is intended is to save or add workspaces to pug. func (s *Service) LoadWorkspacesUponModuleLoad(modules moduleSubscription) { sub := modules.Subscribe()