Skip to content

Commit

Permalink
internal/lsp/cache: use [256]byte Hash instead of hex digit string
Browse files Browse the repository at this point in the history
I had hoped to see a reduction in total allocation, but it does not
appear to be significant according to the included crude benchmark.
Nonetheless this is a slight code clarity improvement.

Change-Id: I94a503b377dd1146eb371ff11222a351cb5a43b7
Reviewed-on: https://go-review.googlesource.com/c/tools/+/411655
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
Reviewed-by: Robert Findley <rfindley@google.com>
  • Loading branch information
adonovan committed Jun 13, 2022
1 parent c41ddce commit a41fc98
Show file tree
Hide file tree
Showing 13 changed files with 98 additions and 81 deletions.
30 changes: 30 additions & 0 deletions gopls/internal/regtest/bench/bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"flag"
"fmt"
"os"
"runtime"
"runtime/pprof"
"testing"

Expand Down Expand Up @@ -66,6 +67,7 @@ func TestBenchmarkIWL(t *testing.T) {
results := testing.Benchmark(func(b *testing.B) {
for i := 0; i < b.N; i++ {
WithOptions(opts...).Run(t, "", func(t *testing.T, env *Env) {})

}
})

Expand Down Expand Up @@ -192,3 +194,31 @@ func TestBenchmarkDidChange(t *testing.T) {
printBenchmarkResults(result)
})
}

// TestPrintMemStats measures the memory usage of loading a project.
// It uses the same -didchange_dir flag as above.
// Always run it in isolation since it measures global heap usage.
//
// Kubernetes example:
// $ go test -run=TestPrintMemStats -didchange_dir=$HOME/w/kubernetes
// TotalAlloc: 5766 MB
// HeapAlloc: 1984 MB
//
// Both figures exhibit variance of less than 1%.
func TestPrintMemStats(t *testing.T) {
if *benchDir == "" {
t.Skip("-didchange_dir is not set")
}

// Load the program...
opts := benchmarkOptions(*benchDir)
WithOptions(opts...).Run(t, "", func(_ *testing.T, env *Env) {
// ...and print the memory usage.
runtime.GC()
runtime.GC()
var mem runtime.MemStats
runtime.ReadMemStats(&mem)
t.Logf("TotalAlloc:\t%d MB", mem.TotalAlloc/1e6)
t.Logf("HeapAlloc:\t%d MB", mem.HeapAlloc/1e6)
})
}
4 changes: 2 additions & 2 deletions internal/lsp/cache/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ func (s *snapshot) Analyze(ctx context.Context, id string, analyzers []*source.A
return results, nil
}

type actionHandleKey string
type actionHandleKey source.Hash

// An action represents one unit of analysis work: the application of
// one analysis to one package. Actions form a DAG, both within a
Expand Down Expand Up @@ -170,7 +170,7 @@ func (act *actionHandle) analyze(ctx context.Context, snapshot *snapshot) ([]*so
}

func buildActionKey(a *analysis.Analyzer, ph *packageHandle) actionHandleKey {
return actionHandleKey(hashContents([]byte(fmt.Sprintf("%p %s", a, string(ph.key)))))
return actionHandleKey(source.Hashf("%p%s", a, ph.key[:]))
}

func (act *actionHandle) String() string {
Expand Down
19 changes: 2 additions & 17 deletions internal/lsp/cache/cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ package cache

import (
"context"
"crypto/sha256"
"fmt"
"go/ast"
"go/token"
Expand Down Expand Up @@ -55,7 +54,7 @@ type fileHandle struct {
modTime time.Time
uri span.URI
bytes []byte
hash string
hash source.Hash
err error

// size is the file length as reported by Stat, for the purpose of
Expand Down Expand Up @@ -139,7 +138,7 @@ func readFile(ctx context.Context, uri span.URI, fi os.FileInfo) (*fileHandle, e
size: fi.Size(),
uri: uri,
bytes: data,
hash: hashContents(data),
hash: source.HashOf(data),
}, nil
}

Expand Down Expand Up @@ -168,10 +167,6 @@ func (h *fileHandle) URI() span.URI {
return h.uri
}

func (h *fileHandle) Hash() string {
return h.hash
}

func (h *fileHandle) FileIdentity() source.FileIdentity {
return source.FileIdentity{
URI: h.uri,
Expand All @@ -183,16 +178,6 @@ func (h *fileHandle) Read() ([]byte, error) {
return h.bytes, h.err
}

// hashContents returns a string of hex digits denoting the hash of contents.
//
// TODO(adonovan): opt: use [32]byte array as a value more widely and convert
// to hex digits on demand (rare). The array is larger when it appears as a
// struct field (32B vs 16B) but smaller overall (string data is 64B), has
// better locality, and is more efficiently hashed by runtime maps.
func hashContents(contents []byte) string {
return fmt.Sprintf("%64x", sha256.Sum256(contents))
}

var cacheIndex, sessionIndex, viewIndex int64

func (c *Cache) ID() string { return c.id }
Expand Down
31 changes: 16 additions & 15 deletions internal/lsp/cache/check.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
"golang.org/x/tools/internal/typesinternal"
)

type packageHandleKey string
type packageHandleKey source.Hash

type packageHandle struct {
handle *memoize.Handle
Expand Down Expand Up @@ -187,7 +187,7 @@ func (s *snapshot) buildKey(ctx context.Context, id PackageID, mode source.Parse
}
// One bad dependency should not prevent us from checking the entire package.
// Add a special key to mark a bad dependency.
depKeys = append(depKeys, packageHandleKey(fmt.Sprintf("%s import not found", depID)))
depKeys = append(depKeys, packageHandleKey(source.Hashf("%s import not found", depID)))
continue
}
deps[depHandle.m.PkgPath] = depHandle
Expand Down Expand Up @@ -215,6 +215,8 @@ func (s *snapshot) workspaceParseMode(id PackageID) source.ParseMode {
}

func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps []packageHandleKey, mode source.ParseMode, experimentalKey bool) packageHandleKey {
// TODO(adonovan): opt: no need to materalize the bytes; hash them directly.
// Also, use field separators to avoid spurious collisions.
b := bytes.NewBuffer(nil)
b.WriteString(string(id))
if m.Module != nil {
Expand All @@ -225,46 +227,45 @@ func checkPackageKey(id PackageID, pghs []*parseGoHandle, m *KnownMetadata, deps
// files, and deps). It should not otherwise affect the inputs to the type
// checker, so this experiment omits it. This should increase cache hits on
// the daemon as cfg contains the environment and working directory.
b.WriteString(hashConfig(m.Config))
hc := hashConfig(m.Config)
b.Write(hc[:])
}
b.WriteByte(byte(mode))
for _, dep := range deps {
b.WriteString(string(dep))
b.Write(dep[:])
}
for _, cgf := range pghs {
b.WriteString(cgf.file.FileIdentity().String())
}
return packageHandleKey(hashContents(b.Bytes()))
return packageHandleKey(source.HashOf(b.Bytes()))
}

// hashEnv returns a hash of the snapshot's configuration.
func hashEnv(s *snapshot) string {
func hashEnv(s *snapshot) source.Hash {
s.view.optionsMu.Lock()
env := s.view.options.EnvSlice()
s.view.optionsMu.Unlock()

b := &bytes.Buffer{}
for _, e := range env {
b.WriteString(e)
}
return hashContents(b.Bytes())
return source.Hashf("%s", env)
}

// hashConfig returns the hash for the *packages.Config.
func hashConfig(config *packages.Config) string {
b := bytes.NewBuffer(nil)
func hashConfig(config *packages.Config) source.Hash {
// TODO(adonovan): opt: don't materialize the bytes; hash them directly.
// Also, use sound field separators to avoid collisions.
var b bytes.Buffer

// Dir, Mode, Env, BuildFlags are the parts of the config that can change.
b.WriteString(config.Dir)
b.WriteString(string(rune(config.Mode)))
b.WriteRune(rune(config.Mode))

for _, e := range config.Env {
b.WriteString(e)
}
for _, f := range config.BuildFlags {
b.WriteString(f)
}
return hashContents(b.Bytes())
return source.HashOf(b.Bytes())
}

func (ph *packageHandle) Check(ctx context.Context, s source.Snapshot) (source.Package, error) {
Expand Down
6 changes: 3 additions & 3 deletions internal/lsp/cache/imports.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ type importsState struct {
cleanupProcessEnv func()
cacheRefreshDuration time.Duration
cacheRefreshTimer *time.Timer
cachedModFileHash string
cachedModFileHash source.Hash
cachedBuildFlags []string
}

Expand All @@ -38,7 +38,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot
// Find the hash of the active mod file, if any. Using the unsaved content
// is slightly wasteful, since we'll drop caches a little too often, but
// the mod file shouldn't be changing while people are autocompleting.
var modFileHash string
var modFileHash source.Hash
// If we are using 'legacyWorkspace' mode, we can just read the modfile from
// the snapshot. Otherwise, we need to get the synthetic workspace mod file.
//
Expand All @@ -61,7 +61,7 @@ func (s *importsState) runProcessEnvFunc(ctx context.Context, snapshot *snapshot
if err != nil {
return err
}
modFileHash = hashContents(modBytes)
modFileHash = source.HashOf(modBytes)
}

// view.goEnv is immutable -- changes make a new view. Options can change.
Expand Down
8 changes: 5 additions & 3 deletions internal/lsp/cache/mod.go
Original file line number Diff line number Diff line change
Expand Up @@ -201,9 +201,11 @@ func sumFilename(modURI span.URI) string {
// modKey is uniquely identifies cached data for `go mod why` or dependencies
// to upgrade.
type modKey struct {
sessionID, env, view string
mod source.FileIdentity
verb modAction
sessionID string
env source.Hash
view string
mod source.FileIdentity
verb modAction
}

type modAction int
Expand Down
17 changes: 6 additions & 11 deletions internal/lsp/cache/mod_tidy.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@ import (

type modTidyKey struct {
sessionID string
env string
env source.Hash
gomod source.FileIdentity
imports string
unsavedOverlays string
imports source.Hash
unsavedOverlays source.Hash
view string
}

Expand Down Expand Up @@ -81,10 +81,6 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
if err != nil {
return nil, err
}
importHash, err := s.hashImports(ctx, workspacePkgs)
if err != nil {
return nil, err
}

s.mu.Lock()
overlayHash := hashUnsavedOverlays(s.files)
Expand All @@ -93,7 +89,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
key := modTidyKey{
sessionID: s.view.session.id,
view: s.view.folder.Filename(),
imports: importHash,
imports: s.hashImports(ctx, workspacePkgs),
unsavedOverlays: overlayHash,
gomod: fh.FileIdentity(),
env: hashEnv(s),
Expand Down Expand Up @@ -152,7 +148,7 @@ func (s *snapshot) ModTidy(ctx context.Context, pm *source.ParsedModule) (*sourc
return mth.tidy(ctx, s)
}

func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) (string, error) {
func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle) source.Hash {
seen := map[string]struct{}{}
var imports []string
for _, ph := range wsPackages {
Expand All @@ -164,8 +160,7 @@ func (s *snapshot) hashImports(ctx context.Context, wsPackages []*packageHandle)
}
}
sort.Strings(imports)
hashed := strings.Join(imports, ",")
return hashContents([]byte(hashed)), nil
return source.Hashf("%s", imports)
}

// modTidyDiagnostics computes the differences between the original and tidied
Expand Down
4 changes: 2 additions & 2 deletions internal/lsp/cache/session.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ type overlay struct {
session *Session
uri span.URI
text []byte
hash string
hash source.Hash
version int32
kind source.FileKind

Expand Down Expand Up @@ -637,7 +637,7 @@ func (s *Session) updateOverlays(ctx context.Context, changes []source.FileModif
if c.OnDisk || c.Action == source.Save {
version = o.version
}
hash := hashContents(text)
hash := source.HashOf(text)
var sameContentOnDisk bool
switch c.Action {
case source.Delete:
Expand Down
24 changes: 3 additions & 21 deletions internal/lsp/cache/snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -466,15 +466,15 @@ func (s *snapshot) buildOverlay() map[string][]byte {
return overlays
}

func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) string {
func hashUnsavedOverlays(files map[span.URI]source.VersionedFileHandle) source.Hash {
var unsaved []string
for uri, fh := range files {
if overlay, ok := fh.(*overlay); ok && !overlay.saved {
unsaved = append(unsaved, uri.Filename())
}
}
sort.Strings(unsaved)
return hashContents([]byte(strings.Join(unsaved, "")))
return source.Hashf("%s", unsaved)
}

func (s *snapshot) PackagesForFile(ctx context.Context, uri span.URI, mode source.TypecheckMode, includeTestVariants bool) ([]source.Package, error) {
Expand Down Expand Up @@ -2652,25 +2652,7 @@ func (m *goFileMap) forEachConcurrent(f func(parseKey, *parseGoHandle)) {
// -- internal--

// hash returns 8 bits from the key's file digest.
func (m *goFileMap) hash(k parseKey) int {
h := k.file.Hash
if h == "" {
// Sadly the Hash isn't always a hash because cache.GetFile may
// successfully return a *fileHandle containing an error and no hash.
// Lump the duds together for now.
// TODO(adonovan): fix the underlying bug.
return 0
}
return unhex(h[0])<<4 | unhex(h[1])
}

// unhex returns the value of a valid hex digit.
func unhex(b byte) int {
if '0' <= b && b <= '9' {
return int(b - '0')
}
return int(b) & ^0x20 - 'A' + 0xA // [a-fA-F]
}
func (*goFileMap) hash(k parseKey) byte { return k.file.Hash[0] }

// unshare makes k's stripe exclusive, allocating a copy if needed, and returns it.
func (m *goFileMap) unshare(k parseKey) map[parseKey]*parseGoHandle {
Expand Down
2 changes: 1 addition & 1 deletion internal/lsp/cache/symbols.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ type symbolData struct {
err error
}

type symbolHandleKey string
type symbolHandleKey source.Hash

func (s *snapshot) buildSymbolHandle(ctx context.Context, fh source.FileHandle) *symbolHandle {
if h := s.getSymbolHandle(fh.URI()); h != nil {
Expand Down
2 changes: 1 addition & 1 deletion internal/lsp/cache/view.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ func (v *View) ID() string { return v.id }
// given go.mod file. It is the caller's responsibility to clean up the files
// when they are done using them.
func tempModFile(modFh source.FileHandle, gosum []byte) (tmpURI span.URI, cleanup func(), err error) {
filenameHash := hashContents([]byte(modFh.URI().Filename()))
filenameHash := source.Hashf("%s", modFh.URI().Filename())
tmpMod, err := ioutil.TempFile("", fmt.Sprintf("go.%s.*.mod", filenameHash))
if err != nil {
return "", nil, err
Expand Down
3 changes: 2 additions & 1 deletion internal/lsp/debug/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,8 @@ func (i *Instance) getFile(r *http.Request) interface{} {
return nil
}
for _, o := range s.Overlays() {
if o.FileIdentity().Hash == identifier {
// TODO(adonovan): understand and document this comparison.
if o.FileIdentity().Hash.String() == identifier {
return o
}
}
Expand Down
Loading

0 comments on commit a41fc98

Please sign in to comment.