From 25cbb67949a29168fe22878d215ad49bce416fb1 Mon Sep 17 00:00:00 2001 From: Abhinav Gupta Date: Sun, 22 Oct 2023 11:15:24 -0700 Subject: [PATCH] internal/stack: Use control flow for state (#110) In anticipation of parsing more information from stack traces make the stack trace parsing logic more manageable by moving it from a state machine into a layout closer to a recursive descent parser. That is, instead of a central loop that reads input line-by-line and needs to manage its various states: current, result := ... for { input := read() if cond(input) { result.append(current) current = startNew(input) } else { current = accumulate(input) } } result = flush(current) Break it down so that parsing of individual results is its own function, representing the state machine via control flow. result := ... for { input := read() if cond(input) { result.append(parseOne()) } } // where func parseOne(input) { value := ... for ; !cond(input); input = read() { value = accumulate(input) } return value } The net effect of this is to make the parsing logic more maintainable once it gets more complex -- adds more states. For example, to parse more information for individual stacks with a state machine, we'd have to make the main loop more complex. State for an individual stack (e.g. "all the functions in the stack") will leak into the state management for the whole state machine. On the other hand, with this method, we'll only modify parseStack, keeping its responsiblity encapsulated to parsing a single stack trace. This idea was also demonstrated recently in the first section of [Storing Data in Control flow by Russ Cox][1]. [1]: https://research.swtch.com/pcdata#step --- To make it easy to write this parser, we switch from bufio.Reader to bufio.Scanner, and wrap it with the ability to "Unscan": basically "don't move forward on next Scan()". Lastly, we need to bump the `go` directive in go.mod to Go 1.20 to allow use of errors.Join. --- go.mod | 2 +- internal/stack/scan.go | 56 ++++++++++++++ internal/stack/scan_test.go | 48 ++++++++++++ internal/stack/stacks.go | 137 +++++++++++++++++++++++----------- internal/stack/stacks_test.go | 42 ++++++++++- 5 files changed, 241 insertions(+), 44 deletions(-) create mode 100644 internal/stack/scan.go create mode 100644 internal/stack/scan_test.go diff --git a/go.mod b/go.mod index 926ce59..af70901 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module go.uber.org/goleak -go 1.18 +go 1.20 require github.com/stretchr/testify v1.8.0 diff --git a/internal/stack/scan.go b/internal/stack/scan.go new file mode 100644 index 0000000..4b7ac84 --- /dev/null +++ b/internal/stack/scan.go @@ -0,0 +1,56 @@ +// Copyright (c) 2023 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package stack + +import ( + "bufio" + "io" +) + +// scanner provides a bufio.Scanner the ability to Unscan, +// which allows the current token to be read again +// after the next Scan. +type scanner struct { + *bufio.Scanner + + unscanned bool +} + +func newScanner(r io.Reader) *scanner { + return &scanner{Scanner: bufio.NewScanner(r)} +} + +func (s *scanner) Scan() bool { + if s.unscanned { + s.unscanned = false + return true + } + return s.Scanner.Scan() +} + +// Unscan stops the scanner from advancing its position +// for the next Scan. +// +// Bytes and Text will return the same token after next Scan +// that they do right now. +func (s *scanner) Unscan() { + s.unscanned = true +} diff --git a/internal/stack/scan_test.go b/internal/stack/scan_test.go new file mode 100644 index 0000000..32a7c54 --- /dev/null +++ b/internal/stack/scan_test.go @@ -0,0 +1,48 @@ +// Copyright (c) 2023 Uber Technologies, Inc. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +// THE SOFTWARE. + +package stack + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestScanner(t *testing.T) { + scanner := newScanner(strings.NewReader("foo\nbar\nbaz\n")) + + require.True(t, scanner.Scan()) + assert.Equal(t, "foo", scanner.Text()) + + require.True(t, scanner.Scan()) + assert.Equal(t, "bar", scanner.Text()) + + scanner.Unscan() + assert.Equal(t, "bar", scanner.Text()) + + require.True(t, scanner.Scan()) + assert.Equal(t, "bar", scanner.Text()) + + require.True(t, scanner.Scan()) + assert.Equal(t, "baz", scanner.Text()) +} diff --git a/internal/stack/stacks.go b/internal/stack/stacks.go index 94f82e4..5c4b9c4 100644 --- a/internal/stack/stacks.go +++ b/internal/stack/stacks.go @@ -1,4 +1,4 @@ -// Copyright (c) 2017 Uber Technologies, Inc. +// Copyright (c) 2017-2023 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -21,8 +21,8 @@ package stack import ( - "bufio" "bytes" + "errors" "fmt" "io" "runtime" @@ -37,7 +37,9 @@ type Stack struct { id int state string firstFunction string - fullStack *bytes.Buffer + + // Full, raw stack trace. + fullStack string } // ID returns the goroutine ID. @@ -52,7 +54,7 @@ func (s Stack) State() string { // Full returns the full stack trace for this goroutine. func (s Stack) Full() string { - return s.fullStack.String() + return s.fullStack } // FirstFunction returns the name of the first function on the stack. @@ -67,45 +69,92 @@ func (s Stack) String() string { } func getStacks(all bool) []Stack { - var stacks []Stack + trace := getStackBuffer(all) + stacks, err := newStackParser(bytes.NewReader(trace)).Parse() + if err != nil { + // Well-formed stack traces should never fail to parse. + // If they do, it's a bug in this package. + // Panic so we can fix it. + panic(fmt.Sprintf("Failed to parse stack trace: %v\n%s", err, trace)) + } + return stacks +} - var curStack *Stack - stackReader := bufio.NewReader(bytes.NewReader(getStackBuffer(all))) - for { - line, err := stackReader.ReadString('\n') - if err == io.EOF { - break - } - if err != nil { - // We're reading using bytes.NewReader which should never fail. - panic("bufio.NewReader failed on a fixed string") - } +type stackParser struct { + scan *scanner + stacks []Stack + errors []error +} + +func newStackParser(r io.Reader) *stackParser { + return &stackParser{ + scan: newScanner(r), + } +} + +func (p *stackParser) Parse() ([]Stack, error) { + for p.scan.Scan() { + line := p.scan.Text() // If we see the goroutine header, start a new stack. - isFirstLine := false if strings.HasPrefix(line, "goroutine ") { - // flush any previous stack - if curStack != nil { - stacks = append(stacks, *curStack) - } - id, goState := parseGoStackHeader(line) - curStack = &Stack{ - id: id, - state: goState, - fullStack: &bytes.Buffer{}, + stack, err := p.parseStack(line) + if err != nil { + p.errors = append(p.errors, err) + continue } - isFirstLine = true - } - curStack.fullStack.WriteString(line) - if !isFirstLine && curStack.firstFunction == "" { - curStack.firstFunction = parseFirstFunc(line) + p.stacks = append(p.stacks, stack) } } - if curStack != nil { - stacks = append(stacks, *curStack) + p.errors = append(p.errors, p.scan.Err()) + return p.stacks, errors.Join(p.errors...) +} + +// parseStack parses a single stack trace from the given scanner. +// line is the first line of the stack trace, which should look like: +// +// goroutine 123 [runnable]: +func (p *stackParser) parseStack(line string) (Stack, error) { + id, state, err := parseGoStackHeader(line) + if err != nil { + return Stack{}, fmt.Errorf("parse header: %w", err) } - return stacks + + // Read the rest of the stack trace. + var ( + firstFunction string + fullStack bytes.Buffer + ) + for p.scan.Scan() { + line := p.scan.Text() + + if strings.HasPrefix(line, "goroutine ") { + // If we see the goroutine header, + // it's the end of this stack. + // Unscan so the next Scan sees the same line. + p.scan.Unscan() + break + } + + fullStack.WriteString(line) + fullStack.WriteByte('\n') // scanner trims the newline + + // The first line after the header is the top of the stack. + if firstFunction == "" { + firstFunction, err = parseFirstFunc(line) + if err != nil { + return Stack{}, fmt.Errorf("extract function: %w", err) + } + } + } + + return Stack{ + id: id, + state: state, + firstFunction: firstFunction, + fullStack: fullStack.String(), + }, nil } // All returns the stacks for all running goroutines. @@ -127,29 +176,33 @@ func getStackBuffer(all bool) []byte { } } -func parseFirstFunc(line string) string { +func parseFirstFunc(line string) (string, error) { line = strings.TrimSpace(line) if idx := strings.LastIndex(line, "("); idx > 0 { - return line[:idx] + return line[:idx], nil } - panic(fmt.Sprintf("function calls missing parents: %q", line)) + return "", fmt.Errorf("no function found: %q", line) } // parseGoStackHeader parses a stack header that looks like: // goroutine 643 [runnable]:\n // And returns the goroutine ID, and the state. -func parseGoStackHeader(line string) (goroutineID int, state string) { - line = strings.TrimSuffix(line, ":\n") +func parseGoStackHeader(line string) (goroutineID int, state string, err error) { + // The scanner will have already trimmed the "\n", + // but we'll guard against it just in case. + // + // Trimming them separately makes them both optional. + line = strings.TrimSuffix(strings.TrimSuffix(line, ":"), "\n") parts := strings.SplitN(line, " ", 3) if len(parts) != 3 { - panic(fmt.Sprintf("unexpected stack header format: %q", line)) + return 0, "", fmt.Errorf("unexpected format: %q", line) } id, err := strconv.Atoi(parts[1]) if err != nil { - panic(fmt.Sprintf("failed to parse goroutine ID: %v in line %q", parts[1], line)) + return 0, "", fmt.Errorf("bad goroutine ID %q in line %q", parts[1], line) } state = strings.TrimSuffix(strings.TrimPrefix(parts[2], "["), "]") - return id, state + return id, state, nil } diff --git a/internal/stack/stacks_test.go b/internal/stack/stacks_test.go index 646dd2a..c324334 100644 --- a/internal/stack/stacks_test.go +++ b/internal/stack/stacks_test.go @@ -1,4 +1,4 @@ -// Copyright (c) 2017 Uber Technologies, Inc. +// Copyright (c) 2017-2023 Uber Technologies, Inc. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -134,6 +134,46 @@ func TestAllLargeStack(t *testing.T) { close(done) } +func TestParseStackErrors(t *testing.T) { + tests := []struct { + name string + give string + wantErr string + }{ + { + name: "bad goroutine ID", + give: "goroutine no-number [running]:", + wantErr: `bad goroutine ID "no-number"`, + }, + { + name: "not enough parts", + give: "goroutine [running]:", + wantErr: `unexpected format`, + }, + { + name: "bad function name", + give: joinLines( + "goroutine 1 [running]:", + "example.com/foo/bar.baz", // no arguments + " example.com/foo/bar.go:123", + ), + wantErr: `no function found`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := newStackParser(strings.NewReader(tt.give)).Parse() + require.Error(t, err) + assert.ErrorContains(t, err, tt.wantErr) + }) + } +} + +func joinLines(lines ...string) string { + return strings.Join(lines, "\n") + "\n" +} + type byGoroutineID []Stack func (ss byGoroutineID) Len() int { return len(ss) }