From 57c1d80d6b0a24226918f9c4c80170bf44a2cd53 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Fri, 20 Oct 2023 22:49:02 -0700 Subject: [PATCH 1/6] internal/stack: Use control flow for state In anticipation of parsing more information from stack traces make the stack trace parsing logic more manageable by moving it from a state machine into a layout closer to a recursive descent parser. That is, instead of a central loop that reads input line-by-line and needs to manage its various states: current, result := ... for { input := read() if cond(input) { result.append(current) current = startNew(input) } else { current = accumulate(input) } } result = flush(current) Break it down so that parsing of individual results is its own function, representing the state machine via control flow. result := ... for { input := read() if cond(input) { result.append(parseOne()) } } // where func parseOne(input) { value := ... for ; !cond(input); input = read() { value = accumulate(input) } return value } The net effect of this is to make the parsing logic more maintainable once it gets more complex -- adds more states. For example, to parse more information for individual stacks with a state machine, we'd have to make the main loop more complex. State for an individual stack (e.g. "all the functions in the stack") will leak into the state management for the whole state machine. On the other hand, with this method, we'll only modify parseStack, keeping its responsiblity encapsulated to parsing a single stack trace. This idea was also demonstrated recently in the first section of [Storing Data in Control flow by Russ Cox][1]. [1]: https://research.swtch.com/pcdata#step --- To make it easy to write this parser, we switch from bufio.Reader to bufio.Scanner, and wrap it with the ability to "Unscan": basically "don't move forward on next Scan()". --- internal/stack/scan.go | 55 ++++++++++++++ internal/stack/scan_test.go | 46 ++++++++++++ internal/stack/stacks.go | 133 +++++++++++++++++++++++----------- internal/stack/stacks_test.go | 42 ++++++++++- 4 files changed, 233 insertions(+), 43 deletions(-) create mode 100644 internal/stack/scan.go create mode 100644 internal/stack/scan_test.go diff --git a/internal/stack/scan.go b/internal/stack/scan.go new file mode 100644 index 0000000..ab531ad --- /dev/null +++ b/internal/stack/scan.go @@ -0,0 +1,55 @@ +// Copyright (c) 2023 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package stack + +import ( + "bufio" + "io" +) + +// scanner provides a bufio.Scanner the ability to Unscan, +// which allows the current token to be read again +// after the next Scan. +type scanner struct { + *bufio.Scanner + + unscanned bool +} + +func newScanner(r io.Reader) *scanner { + return &scanner{Scanner: bufio.NewScanner(r)} +} + +func (s *scanner) Scan() bool { + if s.unscanned { + s.unscanned = false + return true + } + return s.Scanner.Scan() +} + +// Unscan moves the scanner back one token. +// +// Bytes and Text will return the same token after next Scan +// that they do right now. +func (s *scanner) Unscan() { + s.unscanned = true +} diff --git a/internal/stack/scan_test.go b/internal/stack/scan_test.go new file mode 100644 index 0000000..4355983 --- /dev/null +++ b/internal/stack/scan_test.go @@ -0,0 +1,46 @@ +// Copyright (c) 2023 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package stack + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestScanner(t *testing.T) { + scanner := newScanner(strings.NewReader("foo\nbar\nbaz\n")) + + require.True(t, scanner.Scan()) + assert.Equal(t, "foo", scanner.Text()) + + require.True(t, scanner.Scan()) + assert.Equal(t, "bar", scanner.Text()) + + scanner.Unscan() + require.True(t, scanner.Scan()) + assert.Equal(t, "bar", scanner.Text()) + + require.True(t, scanner.Scan()) + assert.Equal(t, "baz", scanner.Text()) +} diff --git a/internal/stack/stacks.go b/internal/stack/stacks.go index 94f82e4..7b263db 100644 --- a/internal/stack/stacks.go +++ b/internal/stack/stacks.go @@ -1,4 +1,4 @@ -// Copyright (c) 2017 Uber Technologies, Inc. +// Copyright (c) 2017-2023 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,8 +21,8 @@ package stack import ( - "bufio" "bytes" + "errors" "fmt" "io" "runtime" @@ -37,7 +37,9 @@ type Stack struct { id int state string firstFunction string - fullStack *bytes.Buffer + + // Full, raw stack trace. + fullStack string } // ID returns the goroutine ID. @@ -52,7 +54,7 @@ func (s Stack) State() string { // Full returns the full stack trace for this goroutine. func (s Stack) Full() string { - return s.fullStack.String() + return s.fullStack } // FirstFunction returns the name of the first function on the stack. @@ -67,45 +69,88 @@ func (s Stack) String() string { } func getStacks(all bool) []Stack { - var stacks []Stack + stacks, err := newStackParser(bytes.NewReader(getStackBuffer(all))).Parse() + if err != nil { + panic(err) + } + return stacks +} - var curStack *Stack - stackReader := bufio.NewReader(bytes.NewReader(getStackBuffer(all))) - for { - line, err := stackReader.ReadString('\n') - if err == io.EOF { - break - } - if err != nil { - // We're reading using bytes.NewReader which should never fail. - panic("bufio.NewReader failed on a fixed string") - } +type stackParser struct { + scan *scanner + stacks []Stack + errors []error +} + +func newStackParser(r io.Reader) *stackParser { + return &stackParser{ + scan: newScanner(r), + } +} + +func (p *stackParser) Parse() ([]Stack, error) { + for p.scan.Scan() { + line := p.scan.Text() // If we see the goroutine header, start a new stack. - isFirstLine := false if strings.HasPrefix(line, "goroutine ") { - // flush any previous stack - if curStack != nil { - stacks = append(stacks, *curStack) - } - id, goState := parseGoStackHeader(line) - curStack = &Stack{ - id: id, - state: goState, - fullStack: &bytes.Buffer{}, + stack, err := p.parseStack(line) + if err != nil { + p.errors = append(p.errors, err) + } else { + p.stacks = append(p.stacks, stack) } - isFirstLine = true - } - curStack.fullStack.WriteString(line) - if !isFirstLine && curStack.firstFunction == "" { - curStack.firstFunction = parseFirstFunc(line) } } - if curStack != nil { - stacks = append(stacks, *curStack) + p.errors = append(p.errors, p.scan.Err()) + return p.stacks, errors.Join(p.errors...) +} + +// parseStack parses a single stack trace from the given scanner. +// line is the first line of the stack trace, which should look like: +// +// goroutine 123 [runnable]: +func (p *stackParser) parseStack(line string) (Stack, error) { + id, state, err := parseGoStackHeader(line) + if err != nil { + return Stack{}, fmt.Errorf("parse header: %w", err) } - return stacks + + // Read the rest of the stack trace. + var ( + firstFunction string + fullStack bytes.Buffer + ) + for p.scan.Scan() { + line := p.scan.Text() + + if strings.HasPrefix(line, "goroutine ") { + // If we see the goroutine header, + // it's the end of this stack. + // Unscan so the next Scan sees the same line. + p.scan.Unscan() + break + } + + fullStack.WriteString(line) + fullStack.WriteByte('\n') // scanner trims the newline + + // The first line after the header is the top of the stack. + if firstFunction == "" { + firstFunction, err = parseFirstFunc(line) + if err != nil { + return Stack{}, fmt.Errorf("extract function: %w", err) + } + } + } + + return Stack{ + id: id, + state: state, + firstFunction: firstFunction, + fullStack: fullStack.String(), + }, nil } // All returns the stacks for all running goroutines. @@ -127,29 +172,33 @@ func getStackBuffer(all bool) []byte { } } -func parseFirstFunc(line string) string { +func parseFirstFunc(line string) (string, error) { line = strings.TrimSpace(line) if idx := strings.LastIndex(line, "("); idx > 0 { - return line[:idx] + return line[:idx], nil } - panic(fmt.Sprintf("function calls missing parents: %q", line)) + return "", fmt.Errorf("no function found: %q", line) } // parseGoStackHeader parses a stack header that looks like: // goroutine 643 [runnable]:\n // And returns the goroutine ID, and the state. -func parseGoStackHeader(line string) (goroutineID int, state string) { - line = strings.TrimSuffix(line, ":\n") +func parseGoStackHeader(line string) (goroutineID int, state string, err error) { + // The scanner will have already trimmed the "\n", + // but we'll guard against it just in case. + // + // Trimming them separately makes them both optional. + line = strings.TrimSuffix(strings.TrimSuffix(line, ":"), "\n") parts := strings.SplitN(line, " ", 3) if len(parts) != 3 { - panic(fmt.Sprintf("unexpected stack header format: %q", line)) + return 0, "", fmt.Errorf("unexpected format: %q", line) } id, err := strconv.Atoi(parts[1]) if err != nil { - panic(fmt.Sprintf("failed to parse goroutine ID: %v in line %q", parts[1], line)) + return 0, "", fmt.Errorf("bad goroutine ID %q in line %q", parts[1], line) } state = strings.TrimSuffix(strings.TrimPrefix(parts[2], "["), "]") - return id, state + return id, state, nil } diff --git a/internal/stack/stacks_test.go b/internal/stack/stacks_test.go index 646dd2a..c324334 100644 --- a/internal/stack/stacks_test.go +++ b/internal/stack/stacks_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2017 Uber Technologies, Inc. +// Copyright (c) 2017-2023 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -134,6 +134,46 @@ func TestAllLargeStack(t *testing.T) { close(done) } +func TestParseStackErrors(t *testing.T) { + tests := []struct { + name string + give string + wantErr string + }{ + { + name: "bad goroutine ID", + give: "goroutine no-number [running]:", + wantErr: `bad goroutine ID "no-number"`, + }, + { + name: "not enough parts", + give: "goroutine [running]:", + wantErr: `unexpected format`, + }, + { + name: "bad function name", + give: joinLines( + "goroutine 1 [running]:", + "example.com/foo/bar.baz", // no arguments + " example.com/foo/bar.go:123", + ), + wantErr: `no function found`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := newStackParser(strings.NewReader(tt.give)).Parse() + require.Error(t, err) + assert.ErrorContains(t, err, tt.wantErr) + }) + } +} + +func joinLines(lines ...string) string { + return strings.Join(lines, "\n") + "\n" +} + type byGoroutineID []Stack func (ss byGoroutineID) Len() int { return len(ss) } From 2f2c1d521dba507437c429c7e475e2345642592d Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sat, 21 Oct 2023 13:27:58 -0700 Subject: [PATCH 2/6] go.mod: Bump to Go 1.20 This is needed for use of `errors.Join`. --- go.mod | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 926ce59..af70901 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module go.uber.org/goleak -go 1.18 +go 1.20 require github.com/stretchr/testify v1.8.0 From 7de2b92e5660865c8769faf119efca0ac191cb8c Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 22 Oct 2023 09:16:37 -0700 Subject: [PATCH 3/6] test: Verify Unscan returns the same token --- internal/stack/scan_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/stack/scan_test.go b/internal/stack/scan_test.go index 4355983..32a7c54 100644 --- a/internal/stack/scan_test.go +++ b/internal/stack/scan_test.go @@ -38,6 +38,8 @@ func TestScanner(t *testing.T) { assert.Equal(t, "bar", scanner.Text()) scanner.Unscan() + assert.Equal(t, "bar", scanner.Text()) + require.True(t, scanner.Scan()) assert.Equal(t, "bar", scanner.Text()) From 7973cbbab521302486c788d1a7f5b5004d0c0a4c Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 22 Oct 2023 10:44:09 -0700 Subject: [PATCH 4/6] getStacks: Explain the panic, include the stack trace --- internal/stack/stacks.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/internal/stack/stacks.go b/internal/stack/stacks.go index 7b263db..67fc988 100644 --- a/internal/stack/stacks.go +++ b/internal/stack/stacks.go @@ -69,9 +69,13 @@ func (s Stack) String() string { } func getStacks(all bool) []Stack { - stacks, err := newStackParser(bytes.NewReader(getStackBuffer(all))).Parse() + trace := getStackBuffer(all) + stacks, err := newStackParser(bytes.NewReader(trace)).Parse() if err != nil { - panic(err) + // Well-formed stack traces should never fail to parse. + // If they do, it's a bug in this package. + // Panic so we can fix it. + panic(fmt.Sprintf("Failed to parse stack trace: %v\n%s", err, trace)) } return stacks } From 6396c7f4eeb25036a4d9c852b27cca09d819a2c2 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 22 Oct 2023 10:44:59 -0700 Subject: [PATCH 5/6] stackParser.Parse: Don't nest success case --- internal/stack/stacks.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/stack/stacks.go b/internal/stack/stacks.go index 67fc988..5c4b9c4 100644 --- a/internal/stack/stacks.go +++ b/internal/stack/stacks.go @@ -101,9 +101,9 @@ func (p *stackParser) Parse() ([]Stack, error) { stack, err := p.parseStack(line) if err != nil { p.errors = append(p.errors, err) - } else { - p.stacks = append(p.stacks, stack) + continue } + p.stacks = append(p.stacks, stack) } } From aee45b60768ece8d3b8b3ed27204b903ef2aa9fa Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 22 Oct 2023 10:48:23 -0700 Subject: [PATCH 6/6] doc(Unscan): Clarify that it doesn't move the token --- internal/stack/scan.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/stack/scan.go b/internal/stack/scan.go index ab531ad..4b7ac84 100644 --- a/internal/stack/scan.go +++ b/internal/stack/scan.go @@ -46,7 +46,8 @@ func (s *scanner) Scan() bool { return s.Scanner.Scan() } -// Unscan moves the scanner back one token. +// Unscan stops the scanner from advancing its position +// for the next Scan. // // Bytes and Text will return the same token after next Scan // that they do right now.