From d5542ed286746e89fb13a1f821d4955ace371773 Mon Sep 17 00:00:00 2001 From: Dietrich Epp Date: Tue, 18 Jun 2024 14:26:08 -0400 Subject: [PATCH] deploy: Add stripIndexHtml target option This new configuration parameter causes paths matching "/index.html" to be stored as "/" remotely. This simplifies the cloud configuration needed for some use cases, such as CloudFront distributions with S3 bucket origins. Before this change, users must configure their S3 buckets as public websites (which is incompatible with certain authentication / authorization schemes), or users must add a CloudFormation function to add index.html to the end of incoming requests. After this change, users can simply use an ordinary CloudFront distribution (no additional code) with an ordinary S3 bucket origin (and not an S3 website). This adds tests to ensure that functionality like matchers is unaffected by this change. I have also tested that the functionality works as expected when deploying to a real S3 / CloudFront website. Closes #12607 --- deploy/deploy.go | 22 +++++- deploy/deploy_test.go | 69 ++++++++++++++++++- deploy/deployconfig/deployConfig.go | 5 ++ .../en/hosting-and-deployment/hugo-deploy.md | 10 +++ 4 files changed, 101 insertions(+), 5 deletions(-) diff --git a/deploy/deploy.go b/deploy/deploy.go index b2a8a88138b..a69e974b78c 100644 --- a/deploy/deploy.go +++ b/deploy/deploy.go @@ -133,10 +133,14 @@ func (d *Deployer) Deploy(ctx context.Context) error { // Load local files from the source directory. var include, exclude glob.Glob + var mappath func(string) string if d.target != nil { include, exclude = d.target.IncludeGlob, d.target.ExcludeGlob + if d.target.StripIndexHTML { + mappath = stripIndexHTML + } } - local, err := d.walkLocal(d.localFs, d.cfg.Matchers, include, exclude, d.mediaTypes) + local, err := d.walkLocal(d.localFs, d.cfg.Matchers, include, exclude, d.mediaTypes, mappath) if err != nil { return err } @@ -483,7 +487,7 @@ func knownHiddenDirectory(name string) bool { // walkLocal walks the source directory and returns a flat list of files, // using localFile.SlashPath as the map keys. -func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, include, exclude glob.Glob, mediaTypes media.Types) (map[string]*localFile, error) { +func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, include, exclude glob.Glob, mediaTypes media.Types, mappath func(string) string) (map[string]*localFile, error) { retval := map[string]*localFile{} err := afero.Walk(fs, "", func(path string, info os.FileInfo, err error) error { if err != nil { @@ -529,6 +533,11 @@ func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, incl break } } + // Apply any additional modifications to the local path, to map it to + // the remote path. + if mappath != nil { + slashpath = mappath(slashpath) + } lf, err := newLocalFile(fs, path, slashpath, m, mediaTypes) if err != nil { return err @@ -542,6 +551,15 @@ func (d *Deployer) walkLocal(fs afero.Fs, matchers []*deployconfig.Matcher, incl return retval, nil } +// stripIndexHTML remaps keys matching "/index.html" to "/". +func stripIndexHTML(slashpath string) string { + const suffix = "/index.html" + if strings.HasSuffix(slashpath, suffix) { + return slashpath[:len(slashpath)-len(suffix)+1] + } + return slashpath +} + // walkRemote walks the target bucket and returns a flat list. func (d *Deployer) walkRemote(ctx context.Context, bucket *blob.Bucket, include, exclude glob.Glob) (map[string]*blob.ListObject, error) { retval := map[string]*blob.ListObject{} diff --git a/deploy/deploy_test.go b/deploy/deploy_test.go index 12967fba071..17dffc25af4 100644 --- a/deploy/deploy_test.go +++ b/deploy/deploy_test.go @@ -216,8 +216,9 @@ func TestFindDiffs(t *testing.T) { func TestWalkLocal(t *testing.T) { tests := map[string]struct { - Given []string - Expect []string + Given []string + Expect []string + MapPath func(string) string }{ "Empty": { Given: []string{}, @@ -235,6 +236,11 @@ func TestWalkLocal(t *testing.T) { Given: []string{"file.txt", ".hidden_dir/file.txt", ".well-known/file.txt"}, Expect: []string{"file.txt", ".well-known/file.txt"}, }, + "StripIndexHTML": { + Given: []string{"index.html", "file.txt", "dir/index.html", "dir/file.txt"}, + Expect: []string{"index.html", "file.txt", "dir/", "dir/file.txt"}, + MapPath: stripIndexHTML, + }, } for desc, tc := range tests { @@ -254,7 +260,7 @@ func TestWalkLocal(t *testing.T) { } } d := newDeployer() - if got, err := d.walkLocal(fs, nil, nil, nil, media.DefaultTypes); err != nil { + if got, err := d.walkLocal(fs, nil, nil, nil, media.DefaultTypes, tc.MapPath); err != nil { t.Fatal(err) } else { expect := map[string]any{} @@ -274,6 +280,63 @@ func TestWalkLocal(t *testing.T) { } } +func TestStripIndexHTML(t *testing.T) { + tests := map[string]struct { + Input string + Output string + }{ + "Unmapped": {Input: "normal_file.txt", Output: "normal_file.txt"}, + "Stripped": {Input: "directory/index.html", Output: "directory/"}, + "NoSlash": {Input: "prefix_index.html", Output: "prefix_index.html"}, + "Root": {Input: "index.html", Output: "index.html"}, + } + for desc, tc := range tests { + t.Run(desc, func(t *testing.T) { + got := stripIndexHTML(tc.Input) + if got != tc.Output { + t.Errorf("got %q, expect %q", got, tc.Output) + } + }) + } +} + +func TestStripIndexHTMLMatcher(t *testing.T) { + // StripIndexHTML should not affect matchers. + fs := afero.NewMemMapFs() + if err := fs.Mkdir("dir", 0o755); err != nil { + t.Fatal(err) + } + for _, name := range []string{"index.html", "dir/index.html", "file.txt"} { + if fd, err := fs.Create(name); err != nil { + t.Fatal(err) + } else { + fd.Close() + } + } + d := newDeployer() + const pattern = `\.html$` + matcher := &deployconfig.Matcher{Pattern: pattern, Gzip: true, Re: regexp.MustCompile(pattern)} + if got, err := d.walkLocal(fs, []*deployconfig.Matcher{matcher}, nil, nil, media.DefaultTypes, stripIndexHTML); err != nil { + t.Fatal(err) + } else { + for _, name := range []string{"index.html", "dir/"} { + lf := got[name] + if lf == nil { + t.Errorf("missing file %q", name) + } else if lf.matcher == nil { + t.Errorf("file %q has nil matcher, expect %q", name, pattern) + } + } + const name = "file.txt" + lf := got[name] + if lf == nil { + t.Errorf("missing file %q", name) + } else if lf.matcher != nil { + t.Errorf("file %q has matcher %q, expect nil", name, lf.matcher.Pattern) + } + } +} + func TestLocalFile(t *testing.T) { const ( content = "hello world!" diff --git a/deploy/deployconfig/deployConfig.go b/deploy/deployconfig/deployConfig.go index b5b8973b2d8..b16b7c627a2 100644 --- a/deploy/deployconfig/deployConfig.go +++ b/deploy/deployconfig/deployConfig.go @@ -69,6 +69,11 @@ type Target struct { // Parsed versions of Include/Exclude. IncludeGlob glob.Glob `json:"-"` ExcludeGlob glob.Glob `json:"-"` + + // If true, any local path matching /index.html will be mapped to the + // remote path /. This does not affect the top-level index.html file, + // since that would result in an empty path. + StripIndexHTML bool } func (tgt *Target) ParseIncludeExclude() error { diff --git a/docs/content/en/hosting-and-deployment/hugo-deploy.md b/docs/content/en/hosting-and-deployment/hugo-deploy.md index 45b917f1e28..db2448ee7c6 100644 --- a/docs/content/en/hosting-and-deployment/hugo-deploy.md +++ b/docs/content/en/hosting-and-deployment/hugo-deploy.md @@ -186,6 +186,15 @@ URL = "" #include = "**.html" # would only include files with ".html" suffix #exclude = "**.{jpg, png}" # would exclude files with ".jpg" or ".png" suffix +# Map any file named "/index.html" to the remote file "/". This does +# not affect the root "index.html" file, and it does not affect matchers below. +# This works when deploying to key-value cloud storage systems, such as Amazon +# S3 (general purpose buckets, not directory buckets), Google Cloud Storage, and +# Azure Blob Storage. This makes it so the canonical URL will match the object +# key in cloud storage, except for the root index.html file. +# +#stripIndexHTML = true + ####################### [[deployment.matchers]] @@ -195,6 +204,7 @@ URL = "" # See https://golang.org/pkg/regexp/syntax/ for pattern syntax. # Pattern searching is stopped on first match. +# This is not affected by stripIndexHTML, above. pattern = "" # If true, Hugo will gzip the file before uploading it to the bucket.