Skip to content

Commit

Permalink
refactor: move comment processing to tree-sitter
Browse files Browse the repository at this point in the history
  • Loading branch information
jdkato committed May 5, 2024
1 parent f065051 commit 31b5975
Show file tree
Hide file tree
Showing 20 changed files with 321 additions and 438 deletions.
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ require (
github.com/pierrec/lz4/v4 v4.1.2 // indirect
github.com/rivo/uniseg v0.4.4 // indirect
github.com/shopspring/decimal v1.2.0 // indirect
github.com/smacker/go-tree-sitter v0.0.0-20240423010953-8ba036550382 // indirect
github.com/spf13/cast v1.3.1 // indirect
github.com/ulikunitz/xz v0.5.10 // indirect
github.com/xi2/xz v0.0.0-20171230120015-48954b6210f8 // indirect
Expand Down
3 changes: 3 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ github.com/sergi/go-diff v1.2.0 h1:XU+rvMAioB0UC3q1MFrIQy4Vo5/4VsRDQQXHsEya6xQ=
github.com/sergi/go-diff v1.2.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=
github.com/shopspring/decimal v1.2.0 h1:abSATXmQEYyShuxI4/vyW3tV1MrKAJzCZ/0zLUXYbsQ=
github.com/shopspring/decimal v1.2.0/go.mod h1:DKyhrW/HYNuLGql+MJL6WCR6knT2jwCFRcu2hWCYk4o=
github.com/smacker/go-tree-sitter v0.0.0-20240423010953-8ba036550382 h1:Cb8njhEbNgGk5lQMM/r1FWvrKT+ysH8H0WV9NAIKAu8=
github.com/smacker/go-tree-sitter v0.0.0-20240423010953-8ba036550382/go.mod h1:q99oHDsbP0xRwmn7Vmob8gbSMNyvJ83OauXPSuHQuKE=
github.com/spf13/cast v1.3.1 h1:nFm6S0SMdyzrzcmThSipiEubIDy8WEXKNZ0UOgiRpng=
github.com/spf13/cast v1.3.1/go.mod h1:Qx5cxh0v+4UWYiBimWS+eyWzqEqokIECu5etghLkUJE=
github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA=
Expand All @@ -153,6 +155,7 @@ github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.4/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
Expand Down
104 changes: 104 additions & 0 deletions internal/lint/code/comments.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
package code

Check failure on line 1 in internal/lint/code/comments.go

View workflow job for this annotation

GitHub Actions / lint

: # github.com/errata-ai/vale/v3/internal/lint/code [github.com/errata-ai/vale/v3/internal/lint/code.test]

import (
"context"
"fmt"
"strings"

sitter "github.com/smacker/go-tree-sitter"
"github.com/smacker/go-tree-sitter/golang"
"github.com/smacker/go-tree-sitter/python"
"github.com/smacker/go-tree-sitter/rust"
)

// Language represents a supported programming language.
//
// NOTE: What about haskell, less, perl, php, powershell, r, sass, swift?
type Language struct {
Delimiters []string
Sitter *sitter.Language
Query string
}

// Comment represents an in-code comment (line or block).
type Comment struct {
Text string
Line int
Offset int
Scope string
}

func getLanguageFromExt(ext string) (*Language, error) {
switch ext {
case ".go":
return &Language{
Delimiters: []string{"//", "/*", "*/"},
Sitter: golang.GetLanguage(),
Query: "(comment)+ @comment",
}, nil
case ".rs":
return &Language{
Delimiters: []string{"///", "//"},
Sitter: rust.GetLanguage(),
Query: "(line_comment)+ @comment",
}, nil
case ".py":
return &Language{
Delimiters: []string{"#"},
Sitter: python.GetLanguage(),
Query: `(comment) @comment`,
}, nil
default:
return nil, fmt.Errorf("unsupported extension: '%s'", ext)
}
}

func getComments(source []byte, lang *Language) ([]Comment, error) {
var comments []Comment

parser := sitter.NewParser()
parser.SetLanguage(lang.Sitter)

tree, err := parser.ParseCtx(context.Background(), nil, source)
if err != nil {
return comments, err
}
n := tree.RootNode()

q, err := sitter.NewQuery([]byte(lang.Query), lang.Sitter)
if err != nil {
return comments, err
}

qc := sitter.NewQueryCursor()
qc.Exec(q, n)

for {
m, ok := qc.NextMatch()
if !ok {
break
}

for _, c := range m.Captures {
text := c.Node.Content(source)

for _, d := range lang.Delimiters {
text = strings.Trim(text, d)
}

scope := "text.comment.line"
if strings.Count(text, "\n") > 1 {
scope = "text.comment.block"
}

comments = append(comments, Comment{
Line: int(c.Node.StartPoint().Row) + 1,
Offset: int(c.Node.StartPoint().Column),
Scope: scope,
Text: text,
})
}
}

return comments, nil
}
61 changes: 61 additions & 0 deletions internal/lint/code/comments_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package code

import (
"encoding/json"
"fmt"
"io/fs"
"os"
"path/filepath"
"testing"
)

func toJSON(comments []Comment) string {
j, _ := json.MarshalIndent(comments, "", " ")
return string(j)
}

func TestComments(t *testing.T) {
var cleaned []fs.DirEntry

cases, err := os.ReadDir("../../testdata/comments/in")
if err != nil {
t.Error(err)
}

for _, f := range cases {
if f.Name() == ".DS_Store" {
continue
}
cleaned = append(cleaned, f)
}

for i, f := range cleaned {
b, err1 := os.ReadFile(fmt.Sprintf("../../testdata/comments/in/%s", f.Name()))
if err1 != nil {
t.Error(err1)
}

lang, err2 := getLanguageFromExt(filepath.Ext(f.Name()))
if err2 != nil {
t.Fatal(err2)
}

comments, err3 := getComments(b, lang)
if err3 != nil {
t.Fatal(err3)
}
comments = coalesce(comments)

Check failure on line 47 in internal/lint/code/comments_test.go

View workflow job for this annotation

GitHub Actions / lint

undefined: coalesce (typecheck)

b2, err4 := os.ReadFile(fmt.Sprintf("../../testdata/comments/out/%d.json", i))
if err4 != nil {
t.Fatal(err4)
}

markup := toJSON(comments)
if markup != string(b2) {
bin := filepath.Join("..", "..", "bin", fmt.Sprintf("%d.json", i))
_ = os.WriteFile(bin, []byte(markup), os.ModePerm)
t.Errorf("%s", markup)
}
}
}
Loading

0 comments on commit 31b5975

Please sign in to comment.