From a7e7135db5ba026ab2cd9bd824e8fd31779e3cb4 Mon Sep 17 00:00:00 2001
From: Paul Gottschling <paul.gottschling@gmail.com>
Date: Sun, 14 Nov 2021 16:29:44 -0500
Subject: [PATCH] Avoid sticky errors in subsequent *Init.Do calls

When *Init.Do executes the *Init's f function, it stores the error
in the *Init's err field and returns ini.err. If we call an *Init's
Do method again, and there is no error in the subsequent call,
ini.err remains unchanged, and *Init.Do returns it again.

This becomes an issue for the "hugo server" command, which calls
h.init.data.Do within *HugoSites.Data every time the site is
rebuilt. If "hugo server" is started with a valid data file, and
that data file is edited to make it invalid (e.g., with poorly
formatted JSON), then any attempt to fix the data file only causes
Hugo to return the same error.

The fix is to ensure that *Init.Do resets ini.err to nil with each
call. The err field of lazy.Init is not used outside of *Init.Do,
so I removed it in order to simplify the code slightly and ensure
that errors would not be part of an *Init's state. Instead, this
change declares an error variable at the top of *Init.Do and
assigns it in place of ini.err.

After making this change, the go race detector began reporting
a race in TestImageOps between one goroutine created in
*Site.renderPages:

```go
go pageRenderer(ctx, s, pages, results, wg)
```

...and another goroutine created in *Init.withTimeout:

```go
	go func(cv chan verr) {
		v, err := f(ctx)
		c <- verr{v: v, err: err}
	}(c)
```

The race affected *pageContentOutput.content. To
prevent this race, I added getter and setter methods for
pageContentOutput.content that make use of a sync.RWMutex.
Otherwise, this field is assigned/read directly by multiple
goroutines.

I made other changes in order to refactor the code and prevent
unintentional memory sharing:

- Removed lazy.AddWithTimeout and its tests. This function is not
  called anywhere, and removing it makes it slightly easier to
  make sense of the lazy package.

- Edited the *Init.withTimeout to remove a redundant select
  statement.

- Edited *onceMore.Do to return an error, making it a bit
  more straightforward to share errors between its function
  argument and its caller without risking unintentional memory
  sharing.

Fixes #7043
---
 hugolib/page__per_output.go | 27 ++++++++++++---
 lazy/init.go                | 46 ++++++++++---------------
 lazy/init_test.go           | 69 +++++++++++--------------------------
 lazy/once.go                | 10 +++---
 4 files changed, 68 insertions(+), 84 deletions(-)

diff --git a/hugolib/page__per_output.go b/hugolib/page__per_output.go
index f59b5f9b545..c4285d4aedf 100644
--- a/hugolib/page__per_output.go
+++ b/hugolib/page__per_output.go
@@ -73,6 +73,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
 	}
 
 	cp := &pageContentOutput{
+		mu:                &sync.RWMutex{},
 		dependencyTracker: dependencyTracker,
 		p:                 p,
 		f:                 po.f,
@@ -191,7 +192,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
 			cp.summary = helpers.BytesToHTML(html)
 		}
 
-		cp.content = helpers.BytesToHTML(cp.workContent)
+		cp.setContent(helpers.BytesToHTML(cp.workContent))
 
 		return nil
 	}
@@ -211,7 +212,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err
 	}
 
 	cp.initPlain = cp.initMain.Branch(func() (interface{}, error) {
-		cp.plain = helpers.StripHTML(string(cp.content))
+		cp.plain = helpers.StripHTML(string(cp.getContent()))
 		cp.plainWords = strings.Fields(cp.plain)
 		cp.setWordCounts(p.m.isCJKLanguage)
 
@@ -263,6 +264,9 @@ type pageContentOutput struct {
 	contentPlaceholders map[string]string
 
 	// Content sections
+	//
+	// content is only goroutine safe if retrieved via getContent and set
+	// via setContent.
 	content         template.HTML
 	summary         template.HTML
 	tableOfContents template.HTML
@@ -274,6 +278,7 @@ type pageContentOutput struct {
 	fuzzyWordCount int
 	wordCount      int
 	readingTime    int
+	mu             *sync.RWMutex
 }
 
 func (p *pageContentOutput) trackDependency(id identity.Provider) {
@@ -282,6 +287,20 @@ func (p *pageContentOutput) trackDependency(id identity.Provider) {
 	}
 }
 
+// getContent enables safe concurrent read access to p.content
+func (p *pageContentOutput) getContent() template.HTML {
+	p.mu.RLock()
+	defer p.mu.RUnlock()
+	return p.content
+}
+
+// setContent enables safe concurrent write access to p.content
+func (p *pageContentOutput) setContent(c template.HTML) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.content = c
+}
+
 func (p *pageContentOutput) Reset() {
 	if p.dependencyTracker != nil {
 		p.dependencyTracker.Reset()
@@ -293,7 +312,7 @@ func (p *pageContentOutput) Reset() {
 
 func (p *pageContentOutput) Content() (interface{}, error) {
 	if p.p.s.initInit(p.initMain, p.p) {
-		return p.content, nil
+		return p.getContent(), nil
 	}
 	return nil, nil
 }
@@ -305,7 +324,7 @@ func (p *pageContentOutput) FuzzyWordCount() int {
 
 func (p *pageContentOutput) Len() int {
 	p.p.s.initInit(p.initMain, p.p)
-	return len(p.content)
+	return len(p.getContent())
 }
 
 func (p *pageContentOutput) Plain() string {
diff --git a/lazy/init.go b/lazy/init.go
index e2e70072ea4..575f11d94ff 100644
--- a/lazy/init.go
+++ b/lazy/init.go
@@ -35,7 +35,6 @@ type Init struct {
 
 	init onceMore
 	out  interface{}
-	err  error
 	f    func() (interface{}, error)
 }
 
@@ -47,13 +46,6 @@ func (ini *Init) Add(initFn func() (interface{}, error)) *Init {
 	return ini.add(false, initFn)
 }
 
-// AddWithTimeout is same as Add, but with a timeout that aborts initialization.
-func (ini *Init) AddWithTimeout(timeout time.Duration, f func(ctx context.Context) (interface{}, error)) *Init {
-	return ini.Add(func() (interface{}, error) {
-		return ini.withTimeout(timeout, f)
-	})
-}
-
 // Branch creates a new dependency branch based on an existing and adds
 // the given dependency as a child.
 func (ini *Init) Branch(initFn func() (interface{}, error)) *Init {
@@ -76,15 +68,17 @@ func (ini *Init) Do() (interface{}, error) {
 		panic("init is nil")
 	}
 
-	ini.init.Do(func() {
+	var err error
+
+	err = ini.init.Do(func() error {
 		prev := ini.prev
+		var e error
 		if prev != nil {
 			// A branch. Initialize the ancestors.
 			if prev.shouldInitialize() {
-				_, err := prev.Do()
-				if err != nil {
-					ini.err = err
-					return
+				_, e = prev.Do()
+				if e != nil {
+					return e
 				}
 			} else if prev.inProgress() {
 				// Concurrent initialization. The following init func
@@ -94,23 +88,26 @@ func (ini *Init) Do() (interface{}, error) {
 		}
 
 		if ini.f != nil {
-			ini.out, ini.err = ini.f()
+			ini.out, e = ini.f()
+			if e != nil {
+				return e
+			}
 		}
 
 		for _, child := range ini.children {
 			if child.shouldInitialize() {
-				_, err := child.Do()
-				if err != nil {
-					ini.err = err
-					return
+				_, e = child.Do()
+				if e != nil {
+					return e
 				}
 			}
 		}
+		return nil
 	})
 
 	ini.wait()
 
-	return ini.out, ini.err
+	return ini.out, err
 }
 
 // TODO(bep) investigate if we can use sync.Cond for this.
@@ -172,15 +169,10 @@ func (ini *Init) withTimeout(timeout time.Duration, f func(ctx context.Context)
 	defer cancel()
 	c := make(chan verr, 1)
 
-	go func() {
+	go func(cv chan verr) {
 		v, err := f(ctx)
-		select {
-		case <-ctx.Done():
-			return
-		default:
-			c <- verr{v: v, err: err}
-		}
-	}()
+		c <- verr{v: v, err: err}
+	}(c)
 
 	select {
 	case <-ctx.Done():
diff --git a/lazy/init_test.go b/lazy/init_test.go
index 2051f6b1a5f..71a2f3f432f 100644
--- a/lazy/init_test.go
+++ b/lazy/init_test.go
@@ -14,7 +14,6 @@
 package lazy
 
 import (
-	"context"
 	"errors"
 	"math/rand"
 	"strings"
@@ -107,53 +106,6 @@ func TestInit(t *testing.T) {
 	}
 }
 
-func TestInitAddWithTimeout(t *testing.T) {
-	c := qt.New(t)
-
-	init := New().AddWithTimeout(100*time.Millisecond, func(ctx context.Context) (interface{}, error) {
-		return nil, nil
-	})
-
-	_, err := init.Do()
-
-	c.Assert(err, qt.IsNil)
-}
-
-func TestInitAddWithTimeoutTimeout(t *testing.T) {
-	c := qt.New(t)
-
-	init := New().AddWithTimeout(100*time.Millisecond, func(ctx context.Context) (interface{}, error) {
-		time.Sleep(500 * time.Millisecond)
-		select {
-		case <-ctx.Done():
-			return nil, nil
-		default:
-		}
-		t.Fatal("slept")
-		return nil, nil
-	})
-
-	_, err := init.Do()
-
-	c.Assert(err, qt.Not(qt.IsNil))
-
-	c.Assert(err.Error(), qt.Contains, "timed out")
-
-	time.Sleep(1 * time.Second)
-}
-
-func TestInitAddWithTimeoutError(t *testing.T) {
-	c := qt.New(t)
-
-	init := New().AddWithTimeout(100*time.Millisecond, func(ctx context.Context) (interface{}, error) {
-		return nil, errors.New("failed")
-	})
-
-	_, err := init.Do()
-
-	c.Assert(err, qt.Not(qt.IsNil))
-}
-
 type T struct {
 	sync.Mutex
 	V1 string
@@ -220,3 +172,24 @@ func TestInitBranchOrder(t *testing.T) {
 
 	c.Assert(state.V2, qt.Equals, "ABAB")
 }
+
+// Sometimes need to call the same *Init's Do method multiple times, e.g., when
+// hugo server is running. In these cases, we need to make sure that an error
+// returned by an earlier call does not persist in memory for later calls.
+// See issue 7043
+func TestAvoidRepeatDoError(t *testing.T) {
+	r := false
+	i := New().Add(func() (interface{}, error) {
+		if r {
+			return nil, nil
+		}
+		return nil, errors.New("r is false")
+	})
+	_, err := i.Do()
+	r = true
+	_, err = i.Do()
+	if err != nil {
+		t.Errorf("expected a nil error but got: %v", err)
+	}
+
+}
diff --git a/lazy/once.go b/lazy/once.go
index bdce12c330b..13fdf5a4e0a 100644
--- a/lazy/once.go
+++ b/lazy/once.go
@@ -30,15 +30,15 @@ type onceMore struct {
 	done uint32
 }
 
-func (t *onceMore) Do(f func()) {
+func (t *onceMore) Do(f func() error) error {
 	if atomic.LoadUint32(&t.done) == 1 {
-		return
+		return nil
 	}
 
 	// f may call this Do and we would get a deadlock.
 	locked := atomic.CompareAndSwapUint32(&t.lock, 0, 1)
 	if !locked {
-		return
+		return nil
 	}
 	defer atomic.StoreUint32(&t.lock, 0)
 
@@ -47,10 +47,10 @@ func (t *onceMore) Do(f func()) {
 
 	// Double check
 	if t.done == 1 {
-		return
+		return nil
 	}
 	defer atomic.StoreUint32(&t.done, 1)
-	f()
+	return f()
 }
 
 func (t *onceMore) InProgress() bool {