Skip to content

Commit

Permalink
Conformance tests for the runtime and generated lexers. (#270)
Browse files Browse the repository at this point in the history
The goal is to have a single lexer definition that exercises all the
functionality of the stateful lexer and generated equivalent.

Backrefs are still not working, but that can come a bit later.
  • Loading branch information
alecthomas authored Oct 22, 2022
1 parent 3c918a1 commit d4035d3
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 33 deletions.
File renamed without changes.
2 changes: 1 addition & 1 deletion bin/go
2 changes: 1 addition & 1 deletion bin/gofmt
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
// Code generated by Participle. DO NOT EDIT.
{{if .Tags}}//go:build {{.Tags}}
{{end -}}
package {{.Package}}

import (
"fmt"
"io"
"strings"
"sync"
"unicode/utf8"
"regexp/syntax"

Expand All @@ -12,7 +16,10 @@ import (
)

var _ syntax.Op
var _ fmt.State
const _ = utf8.RuneError

var {{.Name}}BackRefCache sync.Map
var {{.Name}}Lexer lexer.Definition = lexer{{.Name}}DefinitionImpl{}

type lexer{{.Name}}DefinitionImpl struct {}
Expand Down Expand Up @@ -77,7 +84,7 @@ func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) {
{{- range $i, $rule := $state.Rules}}
{{- if $i}} else {{end -}}
{{- if .Pattern -}}
if match := match{{$.Name}}{{.Name}}(l.s, l.p); match[1] != 0 {
if match := match{{$.Name}}{{.Name}}(l.s, l.p, l.states[len(l.states)-1].groups); match[1] != 0 {
sym = {{index $.Def.Symbols .Name}}
groups = match[:]
{{- else if .|IsReturn -}}
Expand All @@ -103,7 +110,7 @@ func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) {
if len(sample) > 16 {
sample = append(sample[:16], []rune("...")...)
}
return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", sample)
return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", string(sample))
}
pos := l.pos
span := l.s[groups[0]:groups[1]]
Expand Down
34 changes: 26 additions & 8 deletions cmd/participle/gen_lexer_cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ import (
type genLexerCmd struct {
Name string `help:"Name of the lexer."`
Output string `short:"o" help:"Output file."`
Tags string `help:"Build tags to include in the generated file."`
Package string `arg:"" required:"" help:"Go package for generated code."`
Lexer string `arg:"" required:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer."`
Lexer string `arg:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer (read from stdin if omitted)."`
}

func (c *genLexerCmd) Help() string {
Expand Down Expand Up @@ -52,18 +53,26 @@ func (c *genLexerCmd) Run() error {
if err != nil {
return err
}
err = generateLexer(os.Stdout, c.Package, def, c.Name)
out := os.Stdout
if c.Output != "" {
out, err = os.Create(c.Output)
if err != nil {
return err
}
defer out.Close()
}
err = generateLexer(out, c.Package, def, c.Name, c.Tags)
if err != nil {
return err
}
return nil
}

var (
//go:embed files/codegen.go.tmpl
//go:embed codegen.go.tmpl
codegenTemplateSource string
codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`)
codegenTemplate *template.Template = template.Must(template.New("lexgen").Funcs(template.FuncMap{
codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`)
codegenTemplate = template.Must(template.New("lexgen").Funcs(template.FuncMap{
"IsPush": func(r lexer.Rule) string {
if p, ok := r.Action.(lexer.ActionPush); ok {
return p.State
Expand All @@ -89,14 +98,15 @@ var (
}).Parse(codegenTemplateSource))
)

func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name string) error {
func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name, tags string) error {
type ctx struct {
Package string
Name string
Tags string
Def *lexer.StatefulDefinition
}
rules := def.Rules()
err := codegenTemplate.Execute(w, ctx{pkg, name, def})
err := codegenTemplate.Execute(w, ctx{pkg, name, tags, def})
if err != nil {
return err
}
Expand Down Expand Up @@ -140,6 +150,14 @@ func orderRules(rules lexer.Rules) []orderedRule {
}

func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error {
if codegenBackrefRe.FindStringIndex(pattern) != nil {
fmt.Fprintf(w, "func match%s%s(s string, p int, backrefs []string) (groups []int) {\n", lexerName, name)
fmt.Fprintf(w, " re, err := lexer.BackrefRegex(%sBackRefCache, %q, backrefs)\n", lexerName, pattern)
fmt.Fprintf(w, " if err != nil { panic(fmt.Sprintf(\"%%s: %%s\", err, backrefs)) }\n")
fmt.Fprintf(w, " return re.FindStringSubmatchIndex(s[p:])\n")
fmt.Fprintf(w, "}\n")
return nil
}
re, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
return err
Expand All @@ -164,7 +182,7 @@ func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error {
}
re = re.Simplify()
fmt.Fprintf(w, "// %s\n", re)
fmt.Fprintf(w, "func match%s%s(s string, p int) (groups [%d]int) {\n", lexerName, name, 2*re.MaxCap()+2)
fmt.Fprintf(w, "func match%s%s(s string, p int, backrefs []string) (groups [%d]int) {\n", lexerName, name, 2*re.MaxCap()+2)
flattened := flatten(re)

// Fast-path a single literal.
Expand Down
8 changes: 5 additions & 3 deletions cmd/participle/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ import "github.com/alecthomas/kong"

var (
version string = "dev"
cli struct {

cli struct {
Version kong.VersionFlag
Gen struct {
Lexer genLexerCmd `cmd:""`

Gen struct {
Lexer genLexerCmd `cmd:"" help:"Generate a lexer."`
} `cmd:"" help:"Generate code to accelerate Participle."`
}
)
Expand Down
6 changes: 3 additions & 3 deletions cmd/railroad/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"embed"
"flag"
"fmt"
"io/ioutil"
"os"

"github.com/alecthomas/repr"
Expand All @@ -26,6 +25,7 @@ type production struct {

// Embed the railroad-diagrams css and js files for later output.
// From here: https://github.com/tabatkins/railroad-diagrams
//
//go:embed assets/*
var assets embed.FS

Expand Down Expand Up @@ -200,7 +200,7 @@ func main() {
str := generate(productions, ast)

if *outputFile != "" {
err := ioutil.WriteFile(*outputFile, []byte(str), 0644) // nolint
err := os.WriteFile(*outputFile, []byte(str), 0644) // nolint
if err != nil {
panic(err)
}
Expand Down Expand Up @@ -233,7 +233,7 @@ func writeAssetFiles() (err error) {
if err != nil {
return err
}
err = ioutil.WriteFile(fileName, data, 0644) // nolint
err = os.WriteFile(fileName, data, 0644) // nolint
if err != nil {
return err
}
Expand Down
14 changes: 14 additions & 0 deletions lexer/internal/conformance/conformance_codegen_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//go:build generated

package conformance_test

import (
"testing"

"github.com/alecthomas/participle/v2/lexer/internal/conformance"
)

// This should only be run by TestLexerConformanceGenerated.
func TestLexerConformanceGeneratedInternal(t *testing.T) {
testLexer(t, conformance.GeneratedConformanceLexer)
}
164 changes: 164 additions & 0 deletions lexer/internal/conformance/conformance_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package conformance_test

import (
"encoding/json"
"flag"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"

"github.com/alecthomas/assert/v2"
"github.com/alecthomas/participle/v2/lexer"
)

var conformanceLexer = lexer.MustStateful(lexer.Rules{
"Root": {
{"String", `"`, lexer.Push("String")},
// {"Heredoc", `<<(\w+)`, lexer.Push("Heredoc")},
},
"String": {
{"Escaped", `\\.`, nil},
{"StringEnd", `"`, lexer.Pop()},
{"Expr", `\${`, lexer.Push("Expr")},
{"Char", `[^$"\\]+`, nil},
},
"Expr": {
lexer.Include("Root"),
{`Whitespace`, `\s+`, nil},
{`Oper`, `[-+/*%]`, nil},
{"Ident", `\w+`, lexer.Push("Reference")},
{"ExprEnd", `}`, lexer.Pop()},
},
"Reference": {
{"Dot", `\.`, nil},
{"Ident", `\w+`, nil},
lexer.Return(),
},
// "Heredoc": {
// {"End", `\1`, lexer.Pop()},
// lexer.Include("Expr"),
// },
})

type token struct {
Type string
Value string
}

func testLexer(t *testing.T, lex lexer.Definition) {
t.Helper()
tests := []struct {
name string
input string
expected []token
}{
{"Push", `"${"Hello ${name + "!"}"}"`, []token{
{"String", "\""},
{"Expr", "${"},
{"String", "\""},
{"Char", "Hello "},
{"Expr", "${"},
{"Ident", "name"},
{"Whitespace", " "},
{"Oper", "+"},
{"Whitespace", " "},
{"String", "\""},
{"Char", "!"},
{"StringEnd", "\""},
{"ExprEnd", "}"},
{"StringEnd", "\""},
{"ExprEnd", "}"},
{"StringEnd", "\""},
}},
{"Reference", `"${user.name}"`, []token{
{"String", "\""},
{"Expr", "${"},
{"Ident", "user"},
{"Dot", "."},
{"Ident", "name"},
{"ExprEnd", "}"},
{"StringEnd", "\""},
}},
// TODO(alecthomas): Once backreferences are supported, this will work.
// {"Backref", `<<EOF
// heredoc
// EOF`, []token{
// {"Heredoc", "<<EOF"},
// {"Whitespace", "\n"},
// {"Ident", "heredoc"},
// {"Whitespace", "\n"},
// {"End", "EOF"},
// }},
}
symbols := lexer.SymbolsByRune(lex)
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
l, err := lex.Lex(test.name, strings.NewReader(test.input))
assert.NoError(t, err)
tokens, err := lexer.ConsumeAll(l)
assert.NoError(t, err)
actual := make([]token, len(tokens)-1)
for i, t := range tokens {
if t.Type == lexer.EOF {
continue
}
actual[i] = token{Type: symbols[t.Type], Value: t.Value}
}
assert.Equal(t, test.expected, actual)
})
}
}

func TestLexerConformanceGenerated(t *testing.T) {
genLexer(t)
args := []string{"test", "-run", "TestLexerConformanceGeneratedInternal", "-tags", "generated"}
// Propagate test flags.
flag.CommandLine.VisitAll(func(f *flag.Flag) {
if f.Value.String() != f.DefValue {
args = append(args, fmt.Sprintf("-%s=%s", f.Name, f.Value.String()))
}
})
cmd := exec.Command("go", args...)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
err := cmd.Run()
assert.NoError(t, err)
}

func TestLexerConformance(t *testing.T) {
testLexer(t, conformanceLexer)
}

func genLexer(t *testing.T) {
t.Helper()
lexerJSON, err := json.Marshal(conformanceLexer)
assert.NoError(t, err)
cwd, err := os.Getwd()
assert.NoError(t, err)
generatedConformanceLexer := filepath.Join(cwd, "conformance_lexer_gen.go")
t.Cleanup(func() {
_ = os.Remove(generatedConformanceLexer)
})
cmd := exec.Command(
"../../../scripts/participle",
"gen", "lexer", "conformance",
"--tags", "generated",
"--name", "GeneratedConformance",
"--output", generatedConformanceLexer)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
w, err := cmd.StdinPipe()
assert.NoError(t, err)
defer w.Close()
err = cmd.Start()
assert.NoError(t, err)
_, err = w.Write(lexerJSON)
assert.NoError(t, err)
err = w.Close()
assert.NoError(t, err)
err = cmd.Wait()
assert.NoError(t, err)
}
Loading

0 comments on commit d4035d3

Please sign in to comment.