internal/lsp/lsppos: add helpers for mapping token positions

For use-cases that work only with token.Pos and protocol.Position, the span package is unnecessarily indirect, and inefficient. It also loses information about newline termination, and handles positions within CRLF line endings incorrectly. The lsppos package was written to bypass this complexity, but had limited use and lacked tests. Add tests, and an wrapper API that operates on token.Pos. Also fix source.TestTokenOffset to not panic, and add a temporary exemption of the new token.Offset usage. This change also fixes position calculation in the case of empty file content. The mapper now finds position (0, 0) at offset 0 of an empty file. Change-Id: I639bd3fac78a127b1c8eddad60b890449901c68c Reviewed-on: https://go-review.googlesource.com/c/tools/+/403678 Reviewed-by: Alan Donovan <adonovan@google.com> Run-TryBot: Robert Findley <rfindley@google.com> gopls-CI: kokoro <noreply+kokoro@google.com> TryBot-Result: Gopher Robot <gobot@golang.org>
golang · May 9, 2022 · cde25b3 · cde25b3
1 parent 22b7096
commit cde25b3
Show file tree

Hide file tree

Showing 6 changed files with 309 additions and 50 deletions.
diff --git a/internal/lsp/lsppos/lsppos.go b/internal/lsp/lsppos/lsppos.go
@@ -2,7 +2,10 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// Package lsppos provides utilities for working with LSP positions.
+// Package lsppos provides utilities for working with LSP positions. Much of
+// this functionality is duplicated from the internal/span package, but this
+// package is simpler and more accurate with respect to newline terminated
+// content.
 //
 // See https://microsoft.github.io/language-server-protocol/specification#textDocuments
 // for a description of LSP positions. Notably:
@@ -14,25 +17,30 @@
 package lsppos
 
 import (
+	"errors"
 	"sort"
 	"unicode/utf8"
+
+	"golang.org/x/tools/internal/lsp/protocol"
 )
 
+// Mapper maps utf-8 byte offsets to LSP positions for a single file.
 type Mapper struct {
 	nonASCII bool
-	src      []byte
+	content  []byte
 
-	// Start-of-line positions. If src is newline-terminated, the final entry will be empty.
+	// Start-of-line positions. If src is newline-terminated, the final entry
+	// will be len(content).
 	lines []int
 }
 
-func NewMapper(src []byte) *Mapper {
-	m := &Mapper{src: src}
-	if len(src) == 0 {
-		return m
+// NewMapper creates a new Mapper for the given content.
+func NewMapper(content []byte) *Mapper {
+	m := &Mapper{
+		content: content,
+		lines:   []int{0},
 	}
-	m.lines = []int{0}
-	for offset, b := range src {
+	for offset, b := range content {
 		if b == '\n' {
 			m.lines = append(m.lines, offset+1)
 		}
@@ -43,8 +51,11 @@ func NewMapper(src []byte) *Mapper {
 	return m
 }
 
-func (m *Mapper) Position(offset int) (line, char int) {
-	if offset < 0 || offset > len(m.src) {
+// LineColUTF16 returns the 0-based UTF-16 line and character index for the
+// given offset. It returns -1, -1 if offset is out of bounds for the file
+// being mapped.
+func (m *Mapper) LineColUTF16(offset int) (line, char int) {
+	if offset < 0 || offset > len(m.content) {
 		return -1, -1
 	}
 	nextLine := sort.Search(len(m.lines), func(i int) bool {
@@ -57,27 +68,59 @@ func (m *Mapper) Position(offset int) (line, char int) {
 	start := m.lines[line]
 	var charOffset int
 	if m.nonASCII {
-		charOffset = UTF16len(m.src[start:offset])
+		charOffset = UTF16len(m.content[start:offset])
 	} else {
 		charOffset = offset - start
 	}
 
 	var eol int
 	if line == len(m.lines)-1 {
-		eol = len(m.src)
+		eol = len(m.content)
 	} else {
 		eol = m.lines[line+1] - 1
 	}
 
 	// Adjustment for line-endings: \r|\n is the same as |\r\n.
-	if offset == eol && offset > 0 && m.src[offset-1] == '\r' {
+	if offset == eol && offset > 0 && m.content[offset-1] == '\r' {
 		charOffset--
 	}
 
 	return line, charOffset
 }
 
+// Position returns the protocol position corresponding to the given offset. It
+// returns false if offset is out of bounds for the file being mapped.
+func (m *Mapper) Position(offset int) (protocol.Position, bool) {
+	l, c := m.LineColUTF16(offset)
+	if l < 0 {
+		return protocol.Position{}, false
+	}
+	return protocol.Position{
+		Line:      uint32(l),
+		Character: uint32(c),
+	}, true
+}
+
+// Range returns the protocol range corresponding to the given start and end
+// offsets.
+func (m *Mapper) Range(start, end int) (protocol.Range, error) {
+	startPos, ok := m.Position(start)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid start position")
+	}
+	endPos, ok := m.Position(end)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid end position")
+	}
+
+	return protocol.Range{Start: startPos, End: endPos}, nil
+}
+
+// UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to
+// be re-encoded as UTF-16.
 func UTF16len(buf []byte) int {
+	// This function copies buf, but microbenchmarks showed it to be faster than
+	// using utf8.DecodeRune due to inlining and avoiding bounds checks.
 	cnt := 0
 	for _, r := range string(buf) {
 		cnt++

diff --git a/internal/lsp/lsppos/lsppos_test.go b/internal/lsp/lsppos/lsppos_test.go
@@ -0,0 +1,107 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lsppos_test
+
+import (
+	"fmt"
+	"strings"
+	"testing"
+
+	. "golang.org/x/tools/internal/lsp/lsppos"
+	"golang.org/x/tools/internal/lsp/protocol"
+)
+
+type testCase struct {
+	content            string      // input text
+	substrOrOffset     interface{} // explicit integer offset, or a substring
+	wantLine, wantChar int         // expected LSP position information
+}
+
+// offset returns the test case byte offset
+func (c testCase) offset() int {
+	switch x := c.substrOrOffset.(type) {
+	case int:
+		return x
+	case string:
+		i := strings.Index(c.content, x)
+		if i < 0 {
+			panic(fmt.Sprintf("%q does not contain substring %q", c.content, x))
+		}
+		return i
+	}
+	panic("substrOrIndex must be an integer or string")
+}
+
+var tests = []testCase{
+	{"a𐐀b", "a", 0, 0},
+	{"a𐐀b", "𐐀", 0, 1},
+	{"a𐐀b", "b", 0, 3},
+	{"a𐐀b\n", "\n", 0, 4},
+	{"a𐐀b\r\n", "\n", 0, 4}, // \r|\n is not a valid position, so we move back to the end of the first line.
+	{"a𐐀b\r\nx", "x", 1, 0},
+	{"a𐐀b\r\nx\ny", "y", 2, 0},
+
+	// Testing EOL and EOF positions
+	{"", 0, 0, 0}, // 0th position of an empty buffer is (0, 0)
+	{"abc", "c", 0, 2},
+	{"abc", 3, 0, 3},
+	{"abc\n", "\n", 0, 3},
+	{"abc\n", 4, 1, 0}, // position after a newline is on the next line
+}
+
+func TestLineChar(t *testing.T) {
+	for _, test := range tests {
+		m := NewMapper([]byte(test.content))
+		offset := test.offset()
+		gotLine, gotChar := m.LineColUTF16(offset)
+		if gotLine != test.wantLine || gotChar != test.wantChar {
+			t.Errorf("LineChar(%d) = (%d,%d), want (%d,%d)", offset, gotLine, gotChar, test.wantLine, test.wantChar)
+		}
+	}
+}
+
+func TestInvalidOffset(t *testing.T) {
+	content := []byte("a𐐀b\r\nx\ny")
+	m := NewMapper(content)
+	for _, offset := range []int{-1, 100} {
+		gotLine, gotChar := m.LineColUTF16(offset)
+		if gotLine != -1 {
+			t.Errorf("LineChar(%d) = (%d,%d), want (-1,-1)", offset, gotLine, gotChar)
+		}
+	}
+}
+
+func TestPosition(t *testing.T) {
+	for _, test := range tests {
+		m := NewMapper([]byte(test.content))
+		offset := test.offset()
+		got, ok := m.Position(offset)
+		if !ok {
+			t.Error("invalid position for", test.substrOrOffset)
+			continue
+		}
+		want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
+		if got != want {
+			t.Errorf("Position(%d) = %v, want %v", offset, got, want)
+		}
+	}
+}
+
+func TestRange(t *testing.T) {
+	for _, test := range tests {
+		m := NewMapper([]byte(test.content))
+		offset := test.offset()
+		got, err := m.Range(0, offset)
+		if err != nil {
+			t.Fatal(err)
+		}
+		want := protocol.Range{
+			End: protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)},
+		}
+		if got != want {
+			t.Errorf("Range(%d) = %v, want %v", offset, got, want)
+		}
+	}
+}
diff --git a/internal/lsp/lsppos/token.go b/internal/lsp/lsppos/token.go
@@ -0,0 +1,59 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lsppos
+
+import (
+	"errors"
+	"go/token"
+
+	"golang.org/x/tools/internal/lsp/protocol"
+)
+
+// TokenMapper maps token.Pos to LSP positions for a single file.
+type TokenMapper struct {
+	// file is used for computing offsets.
+	file *token.File
+
+	// For now, just delegate to a Mapper for position calculation. As an
+	// optimization we could avoid building the mapper and just use the file, but
+	// then have to correctly adjust for newline-terminated files. It is easier
+	// to just delegate unless performance becomes a concern.
+	mapper *Mapper
+}
+
+// NewMapper creates a new TokenMapper for the given content, using the
+// provided file to compute offsets.
+func NewTokenMapper(content []byte, file *token.File) *TokenMapper {
+	return &TokenMapper{
+		file:   file,
+		mapper: NewMapper(content),
+	}
+}
+
+// Position returns the protocol position corresponding to the given pos. It
+// returns false if pos is out of bounds for the file being mapped.
+func (m *TokenMapper) Position(pos token.Pos) (protocol.Position, bool) {
+	if int(pos) < m.file.Base() || int(pos) > m.file.Base()+m.file.Size() {
+		return protocol.Position{}, false
+	}
+	offset := m.file.Offset(pos) // usage of token.File.Offset is temporarily exempted
+	return m.mapper.Position(offset)
+}
+
+// Range returns the protocol range corresponding to the given start and end
+// positions. It returns an error if start or end is out of bounds for the file
+// being mapped.
+func (m *TokenMapper) Range(start, end token.Pos) (protocol.Range, error) {
+	startPos, ok := m.Position(start)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid start position")
+	}
+	endPos, ok := m.Position(end)
+	if !ok {
+		return protocol.Range{}, errors.New("invalid end position")
+	}
+
+	return protocol.Range{Start: startPos, End: endPos}, nil
+}
diff --git a/internal/lsp/lsppos/token_test.go b/internal/lsp/lsppos/token_test.go
@@ -0,0 +1,57 @@
+// Copyright 2022 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package lsppos_test
+
+import (
+	"go/token"
+	"testing"
+
+	. "golang.org/x/tools/internal/lsp/lsppos"
+	"golang.org/x/tools/internal/lsp/protocol"
+)
+
+func makeTokenMapper(content []byte) (*TokenMapper, *token.File) {
+	file := token.NewFileSet().AddFile("p.go", -1, len(content))
+	file.SetLinesForContent(content)
+	return NewTokenMapper(content, file), file
+}
+
+func TestInvalidPosition(t *testing.T) {
+	content := []byte("a𐐀b\r\nx\ny")
+	m, _ := makeTokenMapper(content)
+
+	for _, pos := range []token.Pos{-1, 100} {
+		posn, ok := m.Position(pos)
+		if ok {
+			t.Errorf("Position(%d) = %v, want error", pos, posn)
+		}
+	}
+}
+
+func TestTokenPosition(t *testing.T) {
+	for _, test := range tests {
+		m, f := makeTokenMapper([]byte(test.content))
+		pos := token.Pos(f.Base() + test.offset())
+		got, ok := m.Position(pos)
+		if !ok {
+			t.Error("invalid position for", test.substrOrOffset)
+			continue
+		}
+		want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
+		if got != want {
+			t.Errorf("Position(%d) = %v, want %v", pos, got, want)
+		}
+		gotRange, err := m.Range(token.Pos(f.Base()), pos)
+		if err != nil {
+			t.Fatal(err)
+		}
+		wantRange := protocol.Range{
+			End: want,
+		}
+		if gotRange != wantRange {
+			t.Errorf("Range(%d) = %v, want %v", pos, got, want)
+		}
+	}
+}
diff --git a/internal/lsp/source/format.go b/internal/lsp/source/format.go
@@ -329,21 +329,18 @@ func ProtocolEditsFromSource(src []byte, edits []diff.TextEdit, converter span.C
 		if err != nil {
 			return nil, fmt.Errorf("computing offsets: %v", err)
 		}
-		startLine, startChar := m.Position(spn.Start().Offset())
-		endLine, endChar := m.Position(spn.End().Offset())
-		if startLine < 0 || endLine < 0 {
-			return nil, fmt.Errorf("out of bound span: %v", spn)
+		rng, err := m.Range(spn.Start().Offset(), spn.End().Offset())
+		if err != nil {
+			return nil, err
 		}
 
-		pstart := protocol.Position{Line: uint32(startLine), Character: uint32(startChar)}
-		pend := protocol.Position{Line: uint32(endLine), Character: uint32(endChar)}
-		if pstart == pend && edit.NewText == "" {
+		if rng.Start == rng.End && edit.NewText == "" {
 			// Degenerate case, which may result from a diff tool wanting to delete
 			// '\r' in line endings. Filter it out.
 			continue
 		}
 		result = append(result, protocol.TextEdit{
-			Range:   protocol.Range{Start: pstart, End: pend},
+			Range:   rng,
 			NewText: edit.NewText,
 		})
 	}