From c538b4e9949aa032ec14c82465b7d70fa34a97fd Mon Sep 17 00:00:00 2001 From: Alan Donovan Date: Mon, 6 Nov 2023 19:14:32 -0500 Subject: [PATCH] internal/cmd/deadcode: add -whylive=function flag The -whylive flag explains why a function is live (not dead code), by displaying an arbitrary shortest path to the function from one of the main entrypoints. Call paths from main packages (not tests), from main functions (not init functions), and involving only static calls are preferred. The output logic is unified so that the usual mode and -whylive both compute a list of JSON objects, which are then formatted by -json or -format=template. Also, a test. Also, fix the test framework to correctly handle a sequence of commands(!). Fixes golang/go#61263 Change-Id: I6aafc851c9c57e27a0a8f0d723e20fb2f8b57ad7 Reviewed-on: https://go-review.googlesource.com/c/tools/+/540219 Reviewed-by: Robert Findley LUCI-TryBot-Result: Go LUCI --- internal/cmd/deadcode/deadcode.go | 313 ++++++++++++++---- internal/cmd/deadcode/deadcode_test.go | 118 ++++--- internal/cmd/deadcode/doc.go | 68 +++- internal/cmd/deadcode/testdata/jsonflag.txtar | 5 +- internal/cmd/deadcode/testdata/whylive.txtar | 56 ++++ 5 files changed, 441 insertions(+), 119 deletions(-) create mode 100644 internal/cmd/deadcode/testdata/whylive.txtar diff --git a/internal/cmd/deadcode/deadcode.go b/internal/cmd/deadcode/deadcode.go index e01c3aaf00b..890fcc14f19 100644 --- a/internal/cmd/deadcode/deadcode.go +++ b/internal/cmd/deadcode/deadcode.go @@ -14,7 +14,7 @@ import ( "fmt" "go/ast" "go/token" - "html/template" + "go/types" "io" "log" "os" @@ -23,7 +23,9 @@ import ( "runtime/pprof" "sort" "strings" + "text/template" + "golang.org/x/tools/go/callgraph" "golang.org/x/tools/go/callgraph/rta" "golang.org/x/tools/go/packages" "golang.org/x/tools/go/ssa" @@ -40,6 +42,7 @@ var ( filterFlag = flag.String("filter", "", "report only packages matching this regular expression (default: module of first package)") generatedFlag = flag.Bool("generated", false, "include dead functions in generated Go files") + whyLiveFlag = flag.String("whylive", "", "show a path from main to the named function") formatFlag = flag.String("format", "", "format output records using template") jsonFlag = flag.Bool("json", false, "output JSON records") cpuProfile = flag.String("cpuprofile", "", "write CPU profile to this file") @@ -95,14 +98,12 @@ func main() { }() } - var tmpl *template.Template + // Reject bad output options early. if *formatFlag != "" { if *jsonFlag { log.Fatalf("you cannot specify both -format=template and -json") } - var err error - tmpl, err = template.New("deadcode").Parse(*formatFlag) - if err != nil { + if _, err := template.New("deadcode").Parse(*formatFlag); err != nil { log.Fatalf("invalid -format: %v", err) } } @@ -162,8 +163,8 @@ func main() { } // Compute the reachabilty from main. - // (We don't actually build a call graph.) - res := rta.Analyze(roots, false) + // (Build a call graph only for -whylive.) + res := rta.Analyze(roots, *whyLiveFlag != "") // Subtle: the -test flag causes us to analyze test variants // such as "package p as compiled for p.test" or even "for q.test". @@ -178,11 +179,77 @@ func main() { // to packages "p" and "p [p.test]" were parsed only once.) reachablePosn := make(map[token.Position]bool) for fn := range res.Reachable { - if fn.Pos().IsValid() { + if fn.Pos().IsValid() || fn.Name() == "init" { reachablePosn[prog.Fset.Position(fn.Pos())] = true } } + // The -whylive=fn flag causes deadcode to explain why a function + // is not dead, by showing a path to it from some root. + if *whyLiveFlag != "" { + targets := make(map[*ssa.Function]bool) + for fn := range ssautil.AllFunctions(prog) { + if fn.String() == *whyLiveFlag { + targets[fn] = true + } + } + if len(targets) == 0 { + // Function is not part of the program. + // + // TODO(adonovan): improve the UX here in case + // of spelling or syntax mistakes. Some ideas: + // - a cmd/callgraph command to enumerate + // available functions. + // - a deadcode -live flag to compute the complement. + // - a syntax hint: example.com/pkg.Func or (example.com/pkg.Type).Method + // - report the element of AllFunctions with the smallest + // Levenshtein distance from *whyLiveFlag. + // - permit -whylive=regexp. But beware of spurious + // matches (e.g. fmt.Print matches fmt.Println) + // and the annoyance of having to quote parens (*T).f. + log.Fatalf("function %q not found in program", *whyLiveFlag) + } + + // Opt: remove the unreachable ones. + for fn := range targets { + if !reachablePosn[prog.Fset.Position(fn.Pos())] { + delete(targets, fn) + } + } + if len(targets) == 0 { + log.Fatalf("function %s is dead code", *whyLiveFlag) + } + + root, path := pathSearch(roots, res, targets) + if root == nil { + // RTA doesn't add callgraph edges for reflective calls. + log.Fatalf("%s is reachable only through reflection", *whyLiveFlag) + } + if len(path) == 0 { + // No edges => one of the targets is a root. + // Rather than (confusingly) print nothing, make this an error. + log.Fatalf("%s is a root", root.Func) + } + + // Build a list of jsonEdge records + // to print as -json or -format=template. + var edges []any + for _, edge := range path { + edges = append(edges, jsonEdge{ + Initial: cond(len(edges) == 0, edge.Caller.Func.String(), ""), + Kind: cond(isStaticCall(edge), "static", "dynamic"), + Posn: toJSONPosition(prog.Fset.Position(edge.Site.Pos())), + Callee: edge.Callee.Func.String(), + }) + } + format := `{{if .Initial}}{{printf "%19s%s\n" "" .Initial}}{{end}}{{printf "%8s@L%.4d --> %s" .Kind .Posn.Line .Callee}}` + if *formatFlag != "" { + format = *formatFlag + } + printObjects(format, edges) + return + } + // Group unreachable functions by package path. byPkgPath := make(map[string]map[*ssa.Function]bool) for fn := range ssautil.AllFunctions(prog) { @@ -220,14 +287,9 @@ func main() { } } - var packages []jsonPackage - - // Report dead functions grouped by packages. - // TODO(adonovan): use maps.Keys, twice. - pkgpaths := make([]string, 0, len(byPkgPath)) - for pkgpath := range byPkgPath { - pkgpaths = append(pkgpaths, pkgpath) - } + // Build array of jsonPackage objects. + var packages []any + pkgpaths := keys(byPkgPath) sort.Strings(pkgpaths) for _, pkgpath := range pkgpaths { if !filter.MatchString(pkgpath) { @@ -240,10 +302,7 @@ func main() { // declaration order. This tends to keep related // methods such as (T).Marshal and (*T).Unmarshal // together better than sorting. - fns := make([]*ssa.Function, 0, len(m)) - for fn := range m { - fns = append(fns, fn) - } + fns := keys(m) sort.Slice(fns, func(i, j int) bool { xposn := prog.Fset.Position(fns[i].Pos()) yposn := prog.Fset.Position(fns[j].Pos()) @@ -268,7 +327,7 @@ func main() { functions = append(functions, jsonFunction{ Name: fn.String(), RelName: fn.RelString(fn.Pkg.Pkg), - Posn: posn.String(), + Posn: toJSONPosition(posn), Generated: gen, }) } @@ -278,44 +337,37 @@ func main() { }) } - // Format the output, in the manner of 'go list (-json|-f=template)'. - switch { - case *jsonFlag: - // -json - out, err := json.MarshalIndent(packages, "", "\t") + // Default format: functions grouped by package. + format := `{{println .Path}}{{range .Funcs}}{{printf "\t%s\n" .RelName}}{{end}}{{println}}` + if *formatFlag != "" { + format = *formatFlag + } + printObjects(format, packages) +} + +// printObjects formats an array of objects, either as JSON or using a +// template, following the manner of 'go list (-json|-f=template)'. +func printObjects(format string, objects []any) { + if *jsonFlag { + out, err := json.MarshalIndent(objects, "", "\t") if err != nil { log.Fatalf("internal error: %v", err) } os.Stdout.Write(out) + return + } - case tmpl != nil: - // -format=template - for _, p := range packages { - var buf bytes.Buffer - if err := tmpl.Execute(&buf, p); err != nil { - log.Fatal(err) - } - if n := buf.Len(); n == 0 || buf.Bytes()[n-1] != '\n' { - buf.WriteByte('\n') - } - os.Stdout.Write(buf.Bytes()) + // -format=template. Parse can't fail: we checked it earlier. + tmpl := template.Must(template.New("deadcode").Parse(format)) + for _, object := range objects { + var buf bytes.Buffer + if err := tmpl.Execute(&buf, object); err != nil { + log.Fatal(err) } - - default: - // functions grouped by package - for _, pkg := range packages { - seen := false - for _, fn := range pkg.Funcs { - if !seen { - seen = true - fmt.Println(pkg.Path) - } - fmt.Printf("\t%s\n", fn.RelName) - } - if seen { - fmt.Println() - } + if n := buf.Len(); n == 0 || buf.Bytes()[n-1] != '\n' { + buf.WriteByte('\n') } + os.Stdout.Write(buf.Bytes()) } } @@ -358,15 +410,117 @@ func generator(file *ast.File) (string, bool) { return "", false } +// pathSearch returns the shortest path from one of the roots to one +// of the targets (along with the root itself), or zero if no path was found. +func pathSearch(roots []*ssa.Function, res *rta.Result, targets map[*ssa.Function]bool) (*callgraph.Node, []*callgraph.Edge) { + // Search breadth-first (for shortest path) from the root. + // + // We don't use the virtual CallGraph.Root node as we wish to + // choose the order in which we search entrypoints: + // non-test packages before test packages, + // main functions before init functions. + + // Sort roots into preferred order. + importsTesting := func(fn *ssa.Function) bool { + isTesting := func(p *types.Package) bool { return p.Path() == "testing" } + return containsFunc(fn.Pkg.Pkg.Imports(), isTesting) + } + sort.Slice(roots, func(i, j int) bool { + x, y := roots[i], roots[j] + xtest := importsTesting(x) + ytest := importsTesting(y) + if xtest != ytest { + return !xtest // non-tests before tests + } + xinit := x.Name() == "init" + yinit := y.Name() == "init" + if xinit != yinit { + return !xinit // mains before inits + } + return false + }) + + search := func(allowDynamic bool) (*callgraph.Node, []*callgraph.Edge) { + // seen maps each encountered node to its predecessor on the + // path to a root node, or to nil for root itself. + seen := make(map[*callgraph.Node]*callgraph.Edge) + bfs := func(root *callgraph.Node) []*callgraph.Edge { + queue := []*callgraph.Node{root} + seen[root] = nil + for len(queue) > 0 { + node := queue[0] + queue = queue[1:] + + // found a path? + if targets[node.Func] { + path := []*callgraph.Edge{} // non-nil in case len(path)=0 + for { + edge := seen[node] + if edge == nil { + reverse(path) + return path + } + path = append(path, edge) + node = edge.Caller + } + } + + for _, edge := range node.Out { + if allowDynamic || isStaticCall(edge) { + if _, ok := seen[edge.Callee]; !ok { + seen[edge.Callee] = edge + queue = append(queue, edge.Callee) + } + } + } + } + return nil + } + for _, rootFn := range roots { + root := res.CallGraph.Nodes[rootFn] + if path := bfs(root); path != nil { + return root, path + } + } + return nil, nil + } + + for _, allowDynamic := range []bool{false, true} { + if root, path := search(allowDynamic); path != nil { + return root, path + } + } + + return nil, nil +} + +// -- utilities -- + +func isStaticCall(edge *callgraph.Edge) bool { + return edge.Site != nil && edge.Site.Common().StaticCallee() != nil +} + +func toJSONPosition(posn token.Position) jsonPosition { + return jsonPosition{posn.Filename, posn.Line, posn.Column} +} + +func cond[T any](cond bool, t, f T) T { + if cond { + return t + } else { + return f + } +} + // -- output protocol (for JSON or text/template) -- // Keep in sync with doc comment! type jsonFunction struct { - Name string // name (with package qualifier) - RelName string // name (sans package qualifier) - Posn string // position in form "filename:line:col" - Generated bool // function is declared in a generated .go file + Name string // name (with package qualifier) + RelName string // name (sans package qualifier) + Posn jsonPosition // file/line/column of declaration + Generated bool // function is declared in a generated .go file } func (f jsonFunction) String() string { return f.Name } @@ -377,3 +531,50 @@ type jsonPackage struct { } func (p jsonPackage) String() string { return p.Path } + +type jsonEdge struct { + Initial string `json:",omitempty"` // initial entrypoint (main or init); first edge only + Kind string // = static | dynamic + Posn jsonPosition + Callee string +} + +type jsonPosition struct { + File string + Line, Col int +} + +func (p jsonPosition) String() string { + return fmt.Sprintf("%s:%d:%d", p.File, p.Line, p.Col) +} + +// -- from the future -- + +// TODO(adonovan): use go1.22's slices and maps packages. + +func containsFunc[S ~[]E, E any](s S, f func(E) bool) bool { + return indexFunc(s, f) >= 0 +} + +func indexFunc[S ~[]E, E any](s S, f func(E) bool) int { + for i := range s { + if f(s[i]) { + return i + } + } + return -1 +} + +func reverse[S ~[]E, E any](s S) { + for i, j := 0, len(s)-1; i < j; i, j = i+1, j-1 { + s[i], s[j] = s[j], s[i] + } +} + +func keys[M ~map[K]V, K comparable, V any](m M) []K { + r := make([]K, 0, len(m)) + for k := range m { + r = append(r, k) + } + return r +} diff --git a/internal/cmd/deadcode/deadcode_test.go b/internal/cmd/deadcode/deadcode_test.go index 17c588b134e..f17a1227362 100644 --- a/internal/cmd/deadcode/deadcode_test.go +++ b/internal/cmd/deadcode/deadcode_test.go @@ -45,15 +45,33 @@ func Test(t *testing.T) { t.Fatal(err) } + // Write the archive files to the temp directory. + tmpdir := t.TempDir() + for _, f := range ar.Files { + filename := filepath.Join(tmpdir, f.Name) + if err := os.MkdirAll(filepath.Dir(filename), 0777); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filename, f.Data, 0666); err != nil { + t.Fatal(err) + } + } + // Parse archive comment as directives of these forms: // - // deadcode args... command-line arguments - // [!]want arg expected/unwanted string in output + // [!]deadcode args... command-line arguments + // [!]want arg expected/unwanted string in output (or stderr) // // Args may be Go-quoted strings. - var args []string - want := make(map[string]bool) // string -> sense - for _, line := range strings.Split(string(ar.Comment), "\n") { + type testcase struct { + linenum int + args []string + wantErr bool + want map[string]bool // string -> sense + } + var cases []*testcase + var current *testcase + for i, line := range strings.Split(string(ar.Comment), "\n") { line = strings.TrimSpace(line) if line == "" || line[0] == '#' { continue // skip blanks and comments @@ -64,55 +82,64 @@ func Test(t *testing.T) { t.Fatalf("cannot break line into words: %v (%s)", err, line) } switch kind := words[0]; kind { - case "deadcode": - args = words[1:] + case "deadcode", "!deadcode": + current = &testcase{ + linenum: i + 1, + want: make(map[string]bool), + args: words[1:], + wantErr: kind[0] == '!', + } + cases = append(cases, current) case "want", "!want": + if current == nil { + t.Fatalf("'want' directive must be after 'deadcode'") + } if len(words) != 2 { t.Fatalf("'want' directive needs argument <<%s>>", line) } - want[words[1]] = kind[0] != '!' + current.want[words[1]] = kind[0] != '!' default: t.Fatalf("%s: invalid directive %q", filename, kind) } } - // Write the archive files to the temp directory. - tmpdir := t.TempDir() - for _, f := range ar.Files { - filename := filepath.Join(tmpdir, f.Name) - if err := os.MkdirAll(filepath.Dir(filename), 0777); err != nil { - t.Fatal(err) - } - if err := os.WriteFile(filename, f.Data, 0666); err != nil { - t.Fatal(err) - } - } - - // Run the command. - cmd := exec.Command(exe, args...) - cmd.Stdout = new(bytes.Buffer) - cmd.Stderr = new(bytes.Buffer) - cmd.Dir = tmpdir - cmd.Env = append(os.Environ(), "GOPROXY=", "GO111MODULE=on") - if err := cmd.Run(); err != nil { - t.Fatalf("deadcode failed: %v (stderr=%s)", err, cmd.Stderr) - } - - // Check each want directive. - got := fmt.Sprint(cmd.Stdout) - for str, sense := range want { - ok := true - if strings.Contains(got, str) != sense { - if sense { - t.Errorf("missing %q", str) + for _, tc := range cases { + t.Run(fmt.Sprintf("L%d", tc.linenum), func(t *testing.T) { + // Run the command. + cmd := exec.Command(exe, tc.args...) + cmd.Stdout = new(bytes.Buffer) + cmd.Stderr = new(bytes.Buffer) + cmd.Dir = tmpdir + cmd.Env = append(os.Environ(), "GOPROXY=", "GO111MODULE=on") + var got string + if err := cmd.Run(); err != nil { + if !tc.wantErr { + t.Fatalf("deadcode failed: %v (stderr=%s)", err, cmd.Stderr) + } + got = fmt.Sprint(cmd.Stderr) } else { - t.Errorf("unwanted %q", str) + if tc.wantErr { + t.Fatalf("deadcode succeeded unexpectedly (stdout=%s)", cmd.Stdout) + } + got = fmt.Sprint(cmd.Stdout) } - ok = false - } - if !ok { - t.Errorf("got: <<%s>>", got) - } + + // Check each want directive. + for str, sense := range tc.want { + ok := true + if strings.Contains(got, str) != sense { + if sense { + t.Errorf("missing %q", str) + } else { + t.Errorf("unwanted %q", str) + } + ok = false + } + if !ok { + t.Errorf("got: <<%s>>", got) + } + } + }) } }) } @@ -137,10 +164,7 @@ func buildDeadcode(t *testing.T) string { func words(s string) ([]string, error) { var words []string for s != "" { - if s[0] == ' ' { - s = s[1:] - continue - } + s = strings.TrimSpace(s) var word string if s[0] == '"' || s[0] == '`' { prefix, err := strconv.QuotedPrefix(s) diff --git a/internal/cmd/deadcode/doc.go b/internal/cmd/deadcode/doc.go index c4874d6e262..3c0e7119200 100644 --- a/internal/cmd/deadcode/doc.go +++ b/internal/cmd/deadcode/doc.go @@ -61,20 +61,8 @@ The command supports three output formats. With no flags, the command prints dead functions grouped by package. -With the -json flag, the command prints an array of JSON Package -objects, as defined by this schema: - - type Package struct { - Path string - Funcs []Function - } - - type Function struct { - Name string // name (with package qualifier) - RelName string // name (sans package qualifier) - Posn string // position in form "filename:line:col" - Generated bool // function is declared in a generated .go file - } +With the -json flag, the command prints an array of Package +objects, as defined by the JSON schema (see below). With the -format=template flag, the command executes the specified template on each Package record. So, this template produces a result similar to the @@ -86,6 +74,58 @@ And this template shows only the list of source positions of dead functions: -format='{{range .Funcs}}{{println .Posn}}{{end}}' +# Why is a function not dead? + +The -whylive=function flag explain why the named function is not dead +by showing an arbitrary shortest path to it from one of the main functions. +(To enumerate the functions in a program, or for more sophisticated +call graph queries, use golang.org/x/tools/cmd/callgraph.) + +Fully static call paths are preferred over paths involving dynamic +edges, even if longer. Paths starting from a non-test package are +preferred over those from tests. Paths from main functions are +preferred over paths from init functions. + +The result is a list of Edge objects (see JSON schema below). +Again, the -json and -format=template flags may be used to control +the formatting of the list of Edge objects. +The default format shows, for each edge in the path, whether the call +is static or dynamic, and its source line number. For example: + + $ deadcode -whylive="(*bytes.Buffer).String" -test ./internal/cmd/deadcode/... + golang.org/x/tools/internal/cmd/deadcode.main + static@L0321 --> (*golang.org/x/tools/go/ssa.Function).RelString + static@L0428 --> (*golang.org/x/tools/go/ssa.Function).relMethod + static@L0452 --> golang.org/x/tools/go/ssa.relType + static@L0047 --> go/types.TypeString + static@L0051 --> (*bytes.Buffer).String + +# JSON schema + + type Package struct { + Path string // import path of package + Funcs []Function // list of dead functions within it + } + + type Function struct { + Name string // name (with package qualifier) + RelName string // name (sans package qualifier) + Posn Position // file/line/column of function declaration + Generated bool // function is declared in a generated .go file + } + + type Edge struct { + Initial string // initial entrypoint (main or init); first edge only + Kind string // = static | dynamic + Posn Position // file/line/column of call site + Callee string // target of the call + } + + type Position struct { + File string // name of file + Line, Col int // line and byte index, both 1-based + } + THIS TOOL IS EXPERIMENTAL and its interface may change. At some point it may be published at cmd/deadcode. In the meantime, please give us feedback at github.com/golang/go/issues. diff --git a/internal/cmd/deadcode/testdata/jsonflag.txtar b/internal/cmd/deadcode/testdata/jsonflag.txtar index cbf80ec1322..f0f3ab21bd0 100644 --- a/internal/cmd/deadcode/testdata/jsonflag.txtar +++ b/internal/cmd/deadcode/testdata/jsonflag.txtar @@ -6,6 +6,8 @@ deadcode -json example.com/p want `"Name": "example.com/p.Dead",` want `"RelName": "Dead",` want `"Generated": false` + want `"Line": 5,` + want `"Col": 6` -- go.mod -- module example.com @@ -14,7 +16,6 @@ go 1.18 -- p/p.go -- package main -func Dead() {} - func main() {} +func Dead() {} diff --git a/internal/cmd/deadcode/testdata/whylive.txtar b/internal/cmd/deadcode/testdata/whylive.txtar new file mode 100644 index 00000000000..9e7b0e6e4af --- /dev/null +++ b/internal/cmd/deadcode/testdata/whylive.txtar @@ -0,0 +1,56 @@ +# Test of -whylive flag. + +# The -whylive argument must be live. + +!deadcode -whylive=example.com.d example.com + want "function example.com.d is dead code" + +# A fully static path is preferred, even if longer. + + deadcode -whylive=example.com.c example.com + want " example.com.main" + want " static@L0004 --> example.com.a" + want " static@L0009 --> example.com.b" + want " static@L0012 --> example.com.c" + +# Dynamic edges are followed if necessary. +# (Note that main is preferred over init.) + + deadcode -whylive=example.com.f example.com + want " example.com.main" + want "dynamic@L0006 --> example.com.e" + want " static@L0017 --> example.com.f" + +# Degenerate case where target is itself a root. + +!deadcode -whylive=example.com.main example.com + want "example.com.main is a root" + +-- go.mod -- +module example.com +go 1.18 + +-- main.go -- +package main + +func main() { + a() + println(c, e) // c, e are address-taken + (func ())(nil)() // potential dynamic call to c, e +} +func a() { + b() +} +func b() { + c() +} +func c() +func d() +func e() { + f() +} +func f() + +func init() { + (func ())(nil)() // potential dynamic call to c, e +} \ No newline at end of file