Skip to content

Commit

Permalink
internal/lsp/lsppos: add helpers for mapping token positions
Browse files Browse the repository at this point in the history
For use-cases that work only with token.Pos and protocol.Position, the
span package is unnecessarily indirect, and inefficient. It also loses
information about newline termination, and handles positions within CRLF
line endings incorrectly.

The lsppos package was written to bypass this complexity, but had
limited use and lacked tests.

Add tests, and an wrapper API that operates on token.Pos. Also fix
source.TestTokenOffset to not panic, and add a temporary exemption of
the new token.Offset usage.

This change also fixes position calculation in the case of empty file
content. The mapper now finds position (0, 0) at offset 0 of an empty
file.

Change-Id: I639bd3fac78a127b1c8eddad60b890449901c68c
Reviewed-on: https://go-review.googlesource.com/c/tools/+/403678
Reviewed-by: Alan Donovan <adonovan@google.com>
Run-TryBot: Robert Findley <rfindley@google.com>
gopls-CI: kokoro <noreply+kokoro@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
  • Loading branch information
findleyr committed May 9, 2022
1 parent 22b7096 commit cde25b3
Show file tree
Hide file tree
Showing 6 changed files with 309 additions and 50 deletions.
71 changes: 57 additions & 14 deletions internal/lsp/lsppos/lsppos.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package lsppos provides utilities for working with LSP positions.
// Package lsppos provides utilities for working with LSP positions. Much of
// this functionality is duplicated from the internal/span package, but this
// package is simpler and more accurate with respect to newline terminated
// content.
//
// See https://microsoft.github.io/language-server-protocol/specification#textDocuments
// for a description of LSP positions. Notably:
Expand All @@ -14,25 +17,30 @@
package lsppos

import (
"errors"
"sort"
"unicode/utf8"

"golang.org/x/tools/internal/lsp/protocol"
)

// Mapper maps utf-8 byte offsets to LSP positions for a single file.
type Mapper struct {
nonASCII bool
src []byte
content []byte

// Start-of-line positions. If src is newline-terminated, the final entry will be empty.
// Start-of-line positions. If src is newline-terminated, the final entry
// will be len(content).
lines []int
}

func NewMapper(src []byte) *Mapper {
m := &Mapper{src: src}
if len(src) == 0 {
return m
// NewMapper creates a new Mapper for the given content.
func NewMapper(content []byte) *Mapper {
m := &Mapper{
content: content,
lines: []int{0},
}
m.lines = []int{0}
for offset, b := range src {
for offset, b := range content {
if b == '\n' {
m.lines = append(m.lines, offset+1)
}
Expand All @@ -43,8 +51,11 @@ func NewMapper(src []byte) *Mapper {
return m
}

func (m *Mapper) Position(offset int) (line, char int) {
if offset < 0 || offset > len(m.src) {
// LineColUTF16 returns the 0-based UTF-16 line and character index for the
// given offset. It returns -1, -1 if offset is out of bounds for the file
// being mapped.
func (m *Mapper) LineColUTF16(offset int) (line, char int) {
if offset < 0 || offset > len(m.content) {
return -1, -1
}
nextLine := sort.Search(len(m.lines), func(i int) bool {
Expand All @@ -57,27 +68,59 @@ func (m *Mapper) Position(offset int) (line, char int) {
start := m.lines[line]
var charOffset int
if m.nonASCII {
charOffset = UTF16len(m.src[start:offset])
charOffset = UTF16len(m.content[start:offset])
} else {
charOffset = offset - start
}

var eol int
if line == len(m.lines)-1 {
eol = len(m.src)
eol = len(m.content)
} else {
eol = m.lines[line+1] - 1
}

// Adjustment for line-endings: \r|\n is the same as |\r\n.
if offset == eol && offset > 0 && m.src[offset-1] == '\r' {
if offset == eol && offset > 0 && m.content[offset-1] == '\r' {
charOffset--
}

return line, charOffset
}

// Position returns the protocol position corresponding to the given offset. It
// returns false if offset is out of bounds for the file being mapped.
func (m *Mapper) Position(offset int) (protocol.Position, bool) {
l, c := m.LineColUTF16(offset)
if l < 0 {
return protocol.Position{}, false
}
return protocol.Position{
Line: uint32(l),
Character: uint32(c),
}, true
}

// Range returns the protocol range corresponding to the given start and end
// offsets.
func (m *Mapper) Range(start, end int) (protocol.Range, error) {
startPos, ok := m.Position(start)
if !ok {
return protocol.Range{}, errors.New("invalid start position")
}
endPos, ok := m.Position(end)
if !ok {
return protocol.Range{}, errors.New("invalid end position")
}

return protocol.Range{Start: startPos, End: endPos}, nil
}

// UTF16Len returns the UTF-16 length of the UTF-8 encoded content, were it to
// be re-encoded as UTF-16.
func UTF16len(buf []byte) int {
// This function copies buf, but microbenchmarks showed it to be faster than
// using utf8.DecodeRune due to inlining and avoiding bounds checks.
cnt := 0
for _, r := range string(buf) {
cnt++
Expand Down
107 changes: 107 additions & 0 deletions internal/lsp/lsppos/lsppos_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package lsppos_test

import (
"fmt"
"strings"
"testing"

. "golang.org/x/tools/internal/lsp/lsppos"
"golang.org/x/tools/internal/lsp/protocol"
)

type testCase struct {
content string // input text
substrOrOffset interface{} // explicit integer offset, or a substring
wantLine, wantChar int // expected LSP position information
}

// offset returns the test case byte offset
func (c testCase) offset() int {
switch x := c.substrOrOffset.(type) {
case int:
return x
case string:
i := strings.Index(c.content, x)
if i < 0 {
panic(fmt.Sprintf("%q does not contain substring %q", c.content, x))
}
return i
}
panic("substrOrIndex must be an integer or string")
}

var tests = []testCase{
{"a𐐀b", "a", 0, 0},
{"a𐐀b", "𐐀", 0, 1},
{"a𐐀b", "b", 0, 3},
{"a𐐀b\n", "\n", 0, 4},
{"a𐐀b\r\n", "\n", 0, 4}, // \r|\n is not a valid position, so we move back to the end of the first line.
{"a𐐀b\r\nx", "x", 1, 0},
{"a𐐀b\r\nx\ny", "y", 2, 0},

// Testing EOL and EOF positions
{"", 0, 0, 0}, // 0th position of an empty buffer is (0, 0)
{"abc", "c", 0, 2},
{"abc", 3, 0, 3},
{"abc\n", "\n", 0, 3},
{"abc\n", 4, 1, 0}, // position after a newline is on the next line
}

func TestLineChar(t *testing.T) {
for _, test := range tests {
m := NewMapper([]byte(test.content))
offset := test.offset()
gotLine, gotChar := m.LineColUTF16(offset)
if gotLine != test.wantLine || gotChar != test.wantChar {
t.Errorf("LineChar(%d) = (%d,%d), want (%d,%d)", offset, gotLine, gotChar, test.wantLine, test.wantChar)
}
}
}

func TestInvalidOffset(t *testing.T) {
content := []byte("a𐐀b\r\nx\ny")
m := NewMapper(content)
for _, offset := range []int{-1, 100} {
gotLine, gotChar := m.LineColUTF16(offset)
if gotLine != -1 {
t.Errorf("LineChar(%d) = (%d,%d), want (-1,-1)", offset, gotLine, gotChar)
}
}
}

func TestPosition(t *testing.T) {
for _, test := range tests {
m := NewMapper([]byte(test.content))
offset := test.offset()
got, ok := m.Position(offset)
if !ok {
t.Error("invalid position for", test.substrOrOffset)
continue
}
want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
if got != want {
t.Errorf("Position(%d) = %v, want %v", offset, got, want)
}
}
}

func TestRange(t *testing.T) {
for _, test := range tests {
m := NewMapper([]byte(test.content))
offset := test.offset()
got, err := m.Range(0, offset)
if err != nil {
t.Fatal(err)
}
want := protocol.Range{
End: protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)},
}
if got != want {
t.Errorf("Range(%d) = %v, want %v", offset, got, want)
}
}
}
59 changes: 59 additions & 0 deletions internal/lsp/lsppos/token.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package lsppos

import (
"errors"
"go/token"

"golang.org/x/tools/internal/lsp/protocol"
)

// TokenMapper maps token.Pos to LSP positions for a single file.
type TokenMapper struct {
// file is used for computing offsets.
file *token.File

// For now, just delegate to a Mapper for position calculation. As an
// optimization we could avoid building the mapper and just use the file, but
// then have to correctly adjust for newline-terminated files. It is easier
// to just delegate unless performance becomes a concern.
mapper *Mapper
}

// NewMapper creates a new TokenMapper for the given content, using the
// provided file to compute offsets.
func NewTokenMapper(content []byte, file *token.File) *TokenMapper {
return &TokenMapper{
file: file,
mapper: NewMapper(content),
}
}

// Position returns the protocol position corresponding to the given pos. It
// returns false if pos is out of bounds for the file being mapped.
func (m *TokenMapper) Position(pos token.Pos) (protocol.Position, bool) {
if int(pos) < m.file.Base() || int(pos) > m.file.Base()+m.file.Size() {
return protocol.Position{}, false
}
offset := m.file.Offset(pos) // usage of token.File.Offset is temporarily exempted
return m.mapper.Position(offset)
}

// Range returns the protocol range corresponding to the given start and end
// positions. It returns an error if start or end is out of bounds for the file
// being mapped.
func (m *TokenMapper) Range(start, end token.Pos) (protocol.Range, error) {
startPos, ok := m.Position(start)
if !ok {
return protocol.Range{}, errors.New("invalid start position")
}
endPos, ok := m.Position(end)
if !ok {
return protocol.Range{}, errors.New("invalid end position")
}

return protocol.Range{Start: startPos, End: endPos}, nil
}
57 changes: 57 additions & 0 deletions internal/lsp/lsppos/token_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package lsppos_test

import (
"go/token"
"testing"

. "golang.org/x/tools/internal/lsp/lsppos"
"golang.org/x/tools/internal/lsp/protocol"
)

func makeTokenMapper(content []byte) (*TokenMapper, *token.File) {
file := token.NewFileSet().AddFile("p.go", -1, len(content))
file.SetLinesForContent(content)
return NewTokenMapper(content, file), file
}

func TestInvalidPosition(t *testing.T) {
content := []byte("a𐐀b\r\nx\ny")
m, _ := makeTokenMapper(content)

for _, pos := range []token.Pos{-1, 100} {
posn, ok := m.Position(pos)
if ok {
t.Errorf("Position(%d) = %v, want error", pos, posn)
}
}
}

func TestTokenPosition(t *testing.T) {
for _, test := range tests {
m, f := makeTokenMapper([]byte(test.content))
pos := token.Pos(f.Base() + test.offset())
got, ok := m.Position(pos)
if !ok {
t.Error("invalid position for", test.substrOrOffset)
continue
}
want := protocol.Position{Line: uint32(test.wantLine), Character: uint32(test.wantChar)}
if got != want {
t.Errorf("Position(%d) = %v, want %v", pos, got, want)
}
gotRange, err := m.Range(token.Pos(f.Base()), pos)
if err != nil {
t.Fatal(err)
}
wantRange := protocol.Range{
End: want,
}
if gotRange != wantRange {
t.Errorf("Range(%d) = %v, want %v", pos, got, want)
}
}
}
13 changes: 5 additions & 8 deletions internal/lsp/source/format.go
Original file line number Diff line number Diff line change
Expand Up @@ -329,21 +329,18 @@ func ProtocolEditsFromSource(src []byte, edits []diff.TextEdit, converter span.C
if err != nil {
return nil, fmt.Errorf("computing offsets: %v", err)
}
startLine, startChar := m.Position(spn.Start().Offset())
endLine, endChar := m.Position(spn.End().Offset())
if startLine < 0 || endLine < 0 {
return nil, fmt.Errorf("out of bound span: %v", spn)
rng, err := m.Range(spn.Start().Offset(), spn.End().Offset())
if err != nil {
return nil, err
}

pstart := protocol.Position{Line: uint32(startLine), Character: uint32(startChar)}
pend := protocol.Position{Line: uint32(endLine), Character: uint32(endChar)}
if pstart == pend && edit.NewText == "" {
if rng.Start == rng.End && edit.NewText == "" {
// Degenerate case, which may result from a diff tool wanting to delete
// '\r' in line endings. Filter it out.
continue
}
result = append(result, protocol.TextEdit{
Range: protocol.Range{Start: pstart, End: pend},
Range: rng,
NewText: edit.NewText,
})
}
Expand Down
Loading

0 comments on commit cde25b3

Please sign in to comment.