Skip to content

Commit

Permalink
cmd/go: detect when package or binary is stale due to removed source …
Browse files Browse the repository at this point in the history
…file

The go command uses file modification times to decide when a
package is out of date: if the .a file is older than a source file,
the .a file needs to be rebuilt. This scheme breaks down when
multiple source files compile into a single .a file: if one source file
is removed but no other changes are made, there is no indication
that the .a file is out of date.

The fix is to store a value called a build ID in the package archive itself.
The build ID is a hash of the names of all source files compiled into the package.
A later go command can read the build ID out of the package archive
and compare to the build ID derived from the list of source files it now
sees in the directory. If the build IDs differ, the file list has changed,
and the package must be rebuilt.

There is a cost here: when scanning a package directory, in addition
to reading the beginning of every source file for build tags and imports,
the go command now also reads the beginning of the associated
package archive, for the build ID. This is at most a doubling in the
number of files read. On my 2012 MacBook Pro, the time for
'go list std' increases from about 0.215 seconds to about 0.23 seconds.

For executable binaries, the approach is the same except that the
build ID information is stored in a trailer at the end of the executable file.
It remains to be seen if anything objects to the trailer.
I don't expect problems except maybe on Plan 9.

Fixes #3895.

Change-Id: I21b4ebf5890c1a39e4a013eabe1ddbb5f3510c04
Reviewed-on: https://go-review.googlesource.com/9154
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
rsc committed Jun 4, 2015
1 parent 81d5810 commit 7b87631
Show file tree
Hide file tree
Showing 4 changed files with 306 additions and 59 deletions.
61 changes: 4 additions & 57 deletions src/cmd/dist/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -888,72 +888,19 @@ var buildorder = []string{
"text/template",
"go/doc",
"go/build",
"hash",
"crypto",
"crypto/sha1",
"cmd/go",
}

// cleantab records the directories to clean in 'go clean'.
// It is bigger than the buildorder because we clean all the
// compilers but build only the $GOARCH ones.
var cleantab = []string{
// Commands and C libraries.
"cmd/compile",
"cmd/go",
"cmd/link",
"cmd/old5a",
"cmd/old6a",
"cmd/old8a",
"cmd/old9a",

// Go packages.
"bufio",
"bytes",
"container/heap",
"encoding",
"encoding/base64",
"encoding/json",
"errors",
"flag",
"fmt",
"go/ast",
"go/build",
"go/doc",
"go/parser",
"go/scanner",
"go/token",
"io",
"io/ioutil",
"log",
"math",
"net/url",
"os",
"os/exec",
"path",
"path/filepath",
"reflect",
"regexp",
"regexp/syntax",
"runtime",
"sort",
"strconv",
"strings",
"sync",
"sync/atomic",
"syscall",
"text/template",
"text/template/parse",
"time",
"unicode",
"unicode/utf16",
"unicode/utf8",
}

var runtimegen = []string{
"zaexperiment.h",
"zversion.go",
}

func clean() {
for _, name := range cleantab {
for _, name := range buildorder {
path := pathf("%s/src/%s", goroot, name)
// Remove generated files.
for _, elem := range xreaddir(path) {
Expand Down
23 changes: 23 additions & 0 deletions src/cmd/go/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,26 @@ func (b *builder) build(a *action) (err error) {
if err := buildToolchain.ld(b, a.p, a.target, all, a.objpkg, objects); err != nil {
return err
}

// Write build ID to end of binary.
// We could try to put it in a custom section or some such,
// but then we'd need different code for ELF, Mach-O, PE, and Plan 9.
// Instead, just append to the binary. No one should care.
// Issue #11048 is to fix this for ELF and Mach-O at least.
if buildToolchain == (gcToolchain{}) && a.p.buildID != "" {
f, err := os.OpenFile(a.target, os.O_WRONLY|os.O_APPEND, 0)
if err != nil {
return err
}
defer f.Close()
// Note: This string must match readBuildIDFromBinary in pkg.go.
if _, err := fmt.Fprintf(f, "\x00\n\ngo binary\nbuild id %q\nend go binary\n", a.p.buildID); err != nil {
return err
}
if err := f.Close(); err != nil {
return err
}
}
}

return nil
Expand Down Expand Up @@ -2131,6 +2151,9 @@ func (gcToolchain) gc(b *builder, p *Package, archive, obj string, asmhdr bool,
if buildContext.InstallSuffix != "" {
gcargs = append(gcargs, "-installsuffix", buildContext.InstallSuffix)
}
if p.buildID != "" {
gcargs = append(gcargs, "-buildid", p.buildID)
}

args := []interface{}{buildToolExec, tool("compile"), "-o", ofile, "-trimpath", b.work, buildGcflags, gcargs, "-D", p.localPrefix, importArgs}
if ofile == archive {
Expand Down
213 changes: 211 additions & 2 deletions src/cmd/go/pkg.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,20 @@ package main

import (
"bytes"
"crypto/sha1"
"errors"
"fmt"
"go/build"
"go/scanner"
"go/token"
"io"
"io/ioutil"
"os"
pathpkg "path"
"path/filepath"
"runtime"
"sort"
"strconv"
"strings"
"time"
"unicode"
Expand Down Expand Up @@ -95,6 +98,7 @@ type Package struct {
coverMode string // preprocess Go source files with the coverage tool in this mode
coverVars map[string]*CoverVar // variables created by coverage analysis
omitDWARF bool // tell linker not to write DWARF information
buildID string // expected build ID for generated package
}

// CoverVar holds the name of the generated coverage variables targeting the named file.
Expand Down Expand Up @@ -687,6 +691,36 @@ func (p *Package) load(stk *importStack, bp *build.Package, err error) *Package
}
}

// Compute build ID for this package.
// Build ID is hash of information we want to detect changes in.
// The mtime-based checks in computeStale take care of most
// of that information, but they cannot detect the removal of a
// source file from a directory (with no changes to files that remain
// and no new files in that directory). We hash the list of source
// files (without full path, to allow moving the entire tree)
// so that if one is removed, we detect it via the build IDs.
// In the future we might include other relevant information,
// like build tags or whether we're using the race detector or
// (if it becomes cheap enough) file contents.
h := sha1.New()
inputFiles := stringList(
p.GoFiles,
p.CgoFiles,
p.CFiles,
p.CXXFiles,
p.MFiles,
p.HFiles,
p.SFiles,
p.SysoFiles,
p.SwigFiles,
p.SwigCXXFiles,
)
fmt.Fprintf(h, "%d files\n", len(inputFiles))
for _, file := range inputFiles {
fmt.Fprintf(h, "%s\n", file)
}
p.buildID = fmt.Sprintf("%x", h.Sum(nil))

return p
}

Expand Down Expand Up @@ -795,6 +829,14 @@ func isStale(p *Package, topRoot map[string]bool) bool {
}
}

// Package is stale if the expected build ID differs from the
// recorded build ID. This catches changes like a source file
// being removed from a package directory. See issue 3895.
targetBuildID, err := readBuildID(p)
if err == nil && targetBuildID != p.buildID {
return true
}

// As a courtesy to developers installing new versions of the compiler
// frequently, define that packages are stale if they are
// older than the compiler, and commands if they are older than
Expand All @@ -814,9 +856,10 @@ func isStale(p *Package, topRoot map[string]bool) bool {
}

// Have installed copy, probably built using current compilers,
// and built after its imported packages. The only reason now
// built with the right set of source files,
// and built after its imported packages. The only reason now
// that we'd have to rebuild it is if the sources were newer than
// the package. If a package p is not in the same tree as any
// the package. If a package p is not in the same tree as any
// package named on the command-line, assume it is up-to-date
// no matter what the modification times on the source files indicate.
// This avoids rebuilding $GOROOT packages when people are
Expand Down Expand Up @@ -994,3 +1037,169 @@ func hasSubdir(root, dir string) (rel string, ok bool) {
}
return filepath.ToSlash(dir[len(root):]), true
}

var (
errBuildIDToolchain = fmt.Errorf("build ID only supported in gc toolchain")
errBuildIDMalformed = fmt.Errorf("malformed object file")
errBuildIDUnknown = fmt.Errorf("lost build ID")
)

var (
bangArch = []byte("!<arch>")
pkgdef = []byte("__.PKGDEF")
goobject = []byte("go object ")
buildid = []byte("build id ")
)

// readBuildID reads the build ID from an archive or binary.
// It only supports the gc toolchain.
// Other toolchain maintainers should adjust this function.
func readBuildID(p *Package) (id string, err error) {
if buildToolchain != (gcToolchain{}) {
return "", errBuildIDToolchain
}

// For commands, read build ID directly from binary.
if p.Name == "main" {
return readBuildIDFromBinary(p)
}

// Otherwise, we expect to have an archive (.a) file,
// and we can read the build ID from the Go export data.
if !strings.HasSuffix(p.Target, ".a") {
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDUnknown}
}

// Read just enough of the target to fetch the build ID.
// The archive is expected to look like:
//
// !<arch>
// __.PKGDEF 0 0 0 644 7955 `
// go object darwin amd64 devel X:none
// build id "b41e5c45250e25c9fd5e9f9a1de7857ea0d41224"
//
// The variable-sized strings are GOOS, GOARCH, and the experiment list (X:none).
// Reading the first 1024 bytes should be plenty.
f, err := os.Open(p.Target)
if err != nil {
return "", err
}
data := make([]byte, 1024)
n, err := io.ReadFull(f, data)
f.Close()

if err != nil && n == 0 {
return "", err
}

bad := func() (string, error) {
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDMalformed}
}

// Archive header.
for i := 0; ; i++ { // returns during i==3
j := bytes.IndexByte(data, '\n')
if j < 0 {
return bad()
}
line := data[:j]
data = data[j+1:]
switch i {
case 0:
if !bytes.Equal(line, bangArch) {
return bad()
}
case 1:
if !bytes.HasPrefix(line, pkgdef) {
return bad()
}
case 2:
if !bytes.HasPrefix(line, goobject) {
return bad()
}
case 3:
if !bytes.HasPrefix(line, buildid) {
// Found the object header, just doesn't have a build id line.
// Treat as successful, with empty build id.
return "", nil
}
id, err := strconv.Unquote(string(line[len(buildid):]))
if err != nil {
return bad()
}
return id, nil
}
}
}

var (
goBinary = []byte("\x00\n\ngo binary\n")
endGoBinary = []byte("\nend go binary\n")
newlineAndBuildid = []byte("\nbuild id ")
)

// readBuildIDFromBinary reads the build ID from a binary.
// Instead of trying to be good citizens and store the build ID in a
// custom section of the binary, which would be different for each
// of the four binary types we support (ELF, Mach-O, Plan 9, PE),
// we write a few lines to the end of the binary.
//
// At the very end of the binary we expect to find:
//
// <NUL>
//
// go binary
// build id "XXX"
// end go binary
//
func readBuildIDFromBinary(p *Package) (id string, err error) {
if p.Target == "" {
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDUnknown}
}

f, err := os.Open(p.Target)
if err != nil {
return "", err
}
defer f.Close()

off, err := f.Seek(0, 2)
if err != nil {
return "", err
}
n := 1024
if off < int64(n) {
n = int(off)
}
if _, err := f.Seek(off-int64(n), 0); err != nil {
return "", err
}
data := make([]byte, n)
if _, err := io.ReadFull(f, data); err != nil {
return "", err
}
if !bytes.HasSuffix(data, endGoBinary) {
// Trailer missing. Treat as successful but build ID empty.
return "", nil
}
i := bytes.LastIndex(data, goBinary)
if i < 0 {
// Trailer missing. Treat as successful but build ID empty.
return "", nil
}

// Have trailer. Find build id line.
data = data[i:]
i = bytes.Index(data, newlineAndBuildid)
if i < 0 {
// Trailer present; build ID missing. Treat as successful but empty.
return "", nil
}
line := data[i+len(newlineAndBuildid):]
j := bytes.IndexByte(line, '\n') // must succeed - endGoBinary is at end and has newlines
id, err = strconv.Unquote(string(line[:j]))
if err != nil {
return "", &os.PathError{Op: "parse", Path: p.Target, Err: errBuildIDMalformed}
}
return id, nil
}
Loading

0 comments on commit 7b87631

Please sign in to comment.