From a7e7135db5ba026ab2cd9bd824e8fd31779e3cb4 Mon Sep 17 00:00:00 2001 From: Paul Gottschling Date: Sun, 14 Nov 2021 16:29:44 -0500 Subject: [PATCH] Avoid sticky errors in subsequent *Init.Do calls When *Init.Do executes the *Init's f function, it stores the error in the *Init's err field and returns ini.err. If we call an *Init's Do method again, and there is no error in the subsequent call, ini.err remains unchanged, and *Init.Do returns it again. This becomes an issue for the "hugo server" command, which calls h.init.data.Do within *HugoSites.Data every time the site is rebuilt. If "hugo server" is started with a valid data file, and that data file is edited to make it invalid (e.g., with poorly formatted JSON), then any attempt to fix the data file only causes Hugo to return the same error. The fix is to ensure that *Init.Do resets ini.err to nil with each call. The err field of lazy.Init is not used outside of *Init.Do, so I removed it in order to simplify the code slightly and ensure that errors would not be part of an *Init's state. Instead, this change declares an error variable at the top of *Init.Do and assigns it in place of ini.err. After making this change, the go race detector began reporting a race in TestImageOps between one goroutine created in *Site.renderPages: ```go go pageRenderer(ctx, s, pages, results, wg) ``` ...and another goroutine created in *Init.withTimeout: ```go go func(cv chan verr) { v, err := f(ctx) c <- verr{v: v, err: err} }(c) ``` The race affected *pageContentOutput.content. To prevent this race, I added getter and setter methods for pageContentOutput.content that make use of a sync.RWMutex. Otherwise, this field is assigned/read directly by multiple goroutines. I made other changes in order to refactor the code and prevent unintentional memory sharing: - Removed lazy.AddWithTimeout and its tests. This function is not called anywhere, and removing it makes it slightly easier to make sense of the lazy package. - Edited the *Init.withTimeout to remove a redundant select statement. - Edited *onceMore.Do to return an error, making it a bit more straightforward to share errors between its function argument and its caller without risking unintentional memory sharing. Fixes #7043 --- hugolib/page__per_output.go | 27 ++++++++++++--- lazy/init.go | 46 ++++++++++--------------- lazy/init_test.go | 69 +++++++++++-------------------------- lazy/once.go | 10 +++--- 4 files changed, 68 insertions(+), 84 deletions(-) diff --git a/hugolib/page__per_output.go b/hugolib/page__per_output.go index f59b5f9b545..c4285d4aedf 100644 --- a/hugolib/page__per_output.go +++ b/hugolib/page__per_output.go @@ -73,6 +73,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err } cp := &pageContentOutput{ + mu: &sync.RWMutex{}, dependencyTracker: dependencyTracker, p: p, f: po.f, @@ -191,7 +192,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err cp.summary = helpers.BytesToHTML(html) } - cp.content = helpers.BytesToHTML(cp.workContent) + cp.setContent(helpers.BytesToHTML(cp.workContent)) return nil } @@ -211,7 +212,7 @@ func newPageContentOutput(p *pageState, po *pageOutput) (*pageContentOutput, err } cp.initPlain = cp.initMain.Branch(func() (interface{}, error) { - cp.plain = helpers.StripHTML(string(cp.content)) + cp.plain = helpers.StripHTML(string(cp.getContent())) cp.plainWords = strings.Fields(cp.plain) cp.setWordCounts(p.m.isCJKLanguage) @@ -263,6 +264,9 @@ type pageContentOutput struct { contentPlaceholders map[string]string // Content sections + // + // content is only goroutine safe if retrieved via getContent and set + // via setContent. content template.HTML summary template.HTML tableOfContents template.HTML @@ -274,6 +278,7 @@ type pageContentOutput struct { fuzzyWordCount int wordCount int readingTime int + mu *sync.RWMutex } func (p *pageContentOutput) trackDependency(id identity.Provider) { @@ -282,6 +287,20 @@ func (p *pageContentOutput) trackDependency(id identity.Provider) { } } +// getContent enables safe concurrent read access to p.content +func (p *pageContentOutput) getContent() template.HTML { + p.mu.RLock() + defer p.mu.RUnlock() + return p.content +} + +// setContent enables safe concurrent write access to p.content +func (p *pageContentOutput) setContent(c template.HTML) { + p.mu.Lock() + defer p.mu.Unlock() + p.content = c +} + func (p *pageContentOutput) Reset() { if p.dependencyTracker != nil { p.dependencyTracker.Reset() @@ -293,7 +312,7 @@ func (p *pageContentOutput) Reset() { func (p *pageContentOutput) Content() (interface{}, error) { if p.p.s.initInit(p.initMain, p.p) { - return p.content, nil + return p.getContent(), nil } return nil, nil } @@ -305,7 +324,7 @@ func (p *pageContentOutput) FuzzyWordCount() int { func (p *pageContentOutput) Len() int { p.p.s.initInit(p.initMain, p.p) - return len(p.content) + return len(p.getContent()) } func (p *pageContentOutput) Plain() string { diff --git a/lazy/init.go b/lazy/init.go index e2e70072ea4..575f11d94ff 100644 --- a/lazy/init.go +++ b/lazy/init.go @@ -35,7 +35,6 @@ type Init struct { init onceMore out interface{} - err error f func() (interface{}, error) } @@ -47,13 +46,6 @@ func (ini *Init) Add(initFn func() (interface{}, error)) *Init { return ini.add(false, initFn) } -// AddWithTimeout is same as Add, but with a timeout that aborts initialization. -func (ini *Init) AddWithTimeout(timeout time.Duration, f func(ctx context.Context) (interface{}, error)) *Init { - return ini.Add(func() (interface{}, error) { - return ini.withTimeout(timeout, f) - }) -} - // Branch creates a new dependency branch based on an existing and adds // the given dependency as a child. func (ini *Init) Branch(initFn func() (interface{}, error)) *Init { @@ -76,15 +68,17 @@ func (ini *Init) Do() (interface{}, error) { panic("init is nil") } - ini.init.Do(func() { + var err error + + err = ini.init.Do(func() error { prev := ini.prev + var e error if prev != nil { // A branch. Initialize the ancestors. if prev.shouldInitialize() { - _, err := prev.Do() - if err != nil { - ini.err = err - return + _, e = prev.Do() + if e != nil { + return e } } else if prev.inProgress() { // Concurrent initialization. The following init func @@ -94,23 +88,26 @@ func (ini *Init) Do() (interface{}, error) { } if ini.f != nil { - ini.out, ini.err = ini.f() + ini.out, e = ini.f() + if e != nil { + return e + } } for _, child := range ini.children { if child.shouldInitialize() { - _, err := child.Do() - if err != nil { - ini.err = err - return + _, e = child.Do() + if e != nil { + return e } } } + return nil }) ini.wait() - return ini.out, ini.err + return ini.out, err } // TODO(bep) investigate if we can use sync.Cond for this. @@ -172,15 +169,10 @@ func (ini *Init) withTimeout(timeout time.Duration, f func(ctx context.Context) defer cancel() c := make(chan verr, 1) - go func() { + go func(cv chan verr) { v, err := f(ctx) - select { - case <-ctx.Done(): - return - default: - c <- verr{v: v, err: err} - } - }() + c <- verr{v: v, err: err} + }(c) select { case <-ctx.Done(): diff --git a/lazy/init_test.go b/lazy/init_test.go index 2051f6b1a5f..71a2f3f432f 100644 --- a/lazy/init_test.go +++ b/lazy/init_test.go @@ -14,7 +14,6 @@ package lazy import ( - "context" "errors" "math/rand" "strings" @@ -107,53 +106,6 @@ func TestInit(t *testing.T) { } } -func TestInitAddWithTimeout(t *testing.T) { - c := qt.New(t) - - init := New().AddWithTimeout(100*time.Millisecond, func(ctx context.Context) (interface{}, error) { - return nil, nil - }) - - _, err := init.Do() - - c.Assert(err, qt.IsNil) -} - -func TestInitAddWithTimeoutTimeout(t *testing.T) { - c := qt.New(t) - - init := New().AddWithTimeout(100*time.Millisecond, func(ctx context.Context) (interface{}, error) { - time.Sleep(500 * time.Millisecond) - select { - case <-ctx.Done(): - return nil, nil - default: - } - t.Fatal("slept") - return nil, nil - }) - - _, err := init.Do() - - c.Assert(err, qt.Not(qt.IsNil)) - - c.Assert(err.Error(), qt.Contains, "timed out") - - time.Sleep(1 * time.Second) -} - -func TestInitAddWithTimeoutError(t *testing.T) { - c := qt.New(t) - - init := New().AddWithTimeout(100*time.Millisecond, func(ctx context.Context) (interface{}, error) { - return nil, errors.New("failed") - }) - - _, err := init.Do() - - c.Assert(err, qt.Not(qt.IsNil)) -} - type T struct { sync.Mutex V1 string @@ -220,3 +172,24 @@ func TestInitBranchOrder(t *testing.T) { c.Assert(state.V2, qt.Equals, "ABAB") } + +// Sometimes need to call the same *Init's Do method multiple times, e.g., when +// hugo server is running. In these cases, we need to make sure that an error +// returned by an earlier call does not persist in memory for later calls. +// See issue 7043 +func TestAvoidRepeatDoError(t *testing.T) { + r := false + i := New().Add(func() (interface{}, error) { + if r { + return nil, nil + } + return nil, errors.New("r is false") + }) + _, err := i.Do() + r = true + _, err = i.Do() + if err != nil { + t.Errorf("expected a nil error but got: %v", err) + } + +} diff --git a/lazy/once.go b/lazy/once.go index bdce12c330b..13fdf5a4e0a 100644 --- a/lazy/once.go +++ b/lazy/once.go @@ -30,15 +30,15 @@ type onceMore struct { done uint32 } -func (t *onceMore) Do(f func()) { +func (t *onceMore) Do(f func() error) error { if atomic.LoadUint32(&t.done) == 1 { - return + return nil } // f may call this Do and we would get a deadlock. locked := atomic.CompareAndSwapUint32(&t.lock, 0, 1) if !locked { - return + return nil } defer atomic.StoreUint32(&t.lock, 0) @@ -47,10 +47,10 @@ func (t *onceMore) Do(f func()) { // Double check if t.done == 1 { - return + return nil } defer atomic.StoreUint32(&t.done, 1) - f() + return f() } func (t *onceMore) InProgress() bool {