From fa71ac8850695cff16d172d83353d5a121588551 Mon Sep 17 00:00:00 2001 From: Alec Thomas Date: Tue, 27 Sep 2022 19:18:12 +1000 Subject: [PATCH] Conformance tests for the runtime and generated lexers. The goal is to have a single lexer definition that exercises all the functionality of the stateful lexer and generated equivalent. See #264 --- cmd/participle/{files => }/codegen.go.tmpl | 18 +- cmd/participle/gen_lexer_cmd.go | 24 ++- cmd/participle/main.go | 8 +- .../conformance/conformance_codegen_test.go | 14 ++ .../internal/conformance/conformance_test.go | 154 ++++++++++++++++++ lexer/stateful.go | 55 +++++-- lexer/stateful_test.go | 21 ++- scripts/participle | 2 +- 8 files changed, 254 insertions(+), 42 deletions(-) rename cmd/participle/{files => }/codegen.go.tmpl (88%) create mode 100644 lexer/internal/conformance/conformance_codegen_test.go create mode 100644 lexer/internal/conformance/conformance_test.go diff --git a/cmd/participle/files/codegen.go.tmpl b/cmd/participle/codegen.go.tmpl similarity index 88% rename from cmd/participle/files/codegen.go.tmpl rename to cmd/participle/codegen.go.tmpl index 9a601da0..b410c191 100644 --- a/cmd/participle/files/codegen.go.tmpl +++ b/cmd/participle/codegen.go.tmpl @@ -1,9 +1,12 @@ // Code generated by Participle. DO NOT EDIT. +{{if .Tags}}//go:build {{.Tags}} +{{end -}} package {{.Package}} import ( "io" "strings" + "sync" "unicode/utf8" "regexp/syntax" @@ -12,7 +15,9 @@ import ( ) var _ syntax.Op +const _ = utf8.RuneError +var {{.Name}}BackRefCache sync.Map var {{.Name}}Lexer lexer.Definition = lexer{{.Name}}DefinitionImpl{} type lexer{{.Name}}DefinitionImpl struct {} @@ -33,7 +38,7 @@ func (lexer{{.Name}}DefinitionImpl) LexString(filename string, s string) (lexer. Line: 1, Column: 1, }, - states: []lexer{{.Name}}State{ {name: "Root"} }, + states: []lexer.State{ {Name: "Root"} }, }, nil } @@ -50,16 +55,11 @@ func (d lexer{{.Name}}DefinitionImpl) Lex(filename string, r io.Reader) (lexer.L return d.LexString(filename, s.String()) } -type lexer{{.Name}}State struct { - name string - groups []string -} - type lexer{{.Name}}Impl struct { s string p int pos lexer.Position - states []lexer{{.Name}}State + states []lexer.State } func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) { @@ -71,7 +71,7 @@ func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) { groups []int sym lexer.TokenType ) - switch state.name { + switch state.Name { {{- range $state := .Def.Rules|OrderRules}} case "{{$state.Name}}": {{- range $i, $rule := $state.Rules}} @@ -84,7 +84,7 @@ func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) { if true { {{- end}} {{- if .|IsPush}} - l.states = append(l.states, lexer{{$.Name}}State{name: "{{.|IsPush}}"{{if HaveBackrefs $.Def $state.Name}}, groups: l.sgroups(groups){{end}}}) + l.states = append(l.states, lexer.State{Name: "{{.|IsPush}}"{{if HaveBackrefs $.Def $state.Name}}, Groups: l.sgroups(groups){{end}}}) {{- else if (or (.|IsPop) (.|IsReturn))}} l.states = l.states[:len(l.states)-1] {{- if .|IsReturn}} diff --git a/cmd/participle/gen_lexer_cmd.go b/cmd/participle/gen_lexer_cmd.go index d84e51fc..dc3efa0b 100644 --- a/cmd/participle/gen_lexer_cmd.go +++ b/cmd/participle/gen_lexer_cmd.go @@ -18,8 +18,9 @@ import ( type genLexerCmd struct { Name string `help:"Name of the lexer."` Output string `short:"o" help:"Output file."` + Tags string `help:"Build tags to include in the generated file."` Package string `arg:"" required:"" help:"Go package for generated code."` - Lexer string `arg:"" required:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer."` + Lexer string `arg:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer (read from stdin if omitted)."` } func (c *genLexerCmd) Help() string { @@ -52,7 +53,15 @@ func (c *genLexerCmd) Run() error { if err != nil { return err } - err = generateLexer(os.Stdout, c.Package, def, c.Name) + out := os.Stdout + if c.Output != "" { + out, err = os.Create(c.Output) + if err != nil { + return err + } + defer out.Close() + } + err = generateLexer(out, c.Package, def, c.Name, c.Tags) if err != nil { return err } @@ -60,10 +69,10 @@ func (c *genLexerCmd) Run() error { } var ( - //go:embed files/codegen.go.tmpl + //go:embed codegen.go.tmpl codegenTemplateSource string - codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`) - codegenTemplate *template.Template = template.Must(template.New("lexgen").Funcs(template.FuncMap{ + codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`) + codegenTemplate = template.Must(template.New("lexgen").Funcs(template.FuncMap{ "IsPush": func(r lexer.Rule) string { if p, ok := r.Action.(lexer.ActionPush); ok { return p.State @@ -89,14 +98,15 @@ var ( }).Parse(codegenTemplateSource)) ) -func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name string) error { +func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name, tags string) error { type ctx struct { Package string Name string + Tags string Def *lexer.StatefulDefinition } rules := def.Rules() - err := codegenTemplate.Execute(w, ctx{pkg, name, def}) + err := codegenTemplate.Execute(w, ctx{pkg, name, tags, def}) if err != nil { return err } diff --git a/cmd/participle/main.go b/cmd/participle/main.go index 3a215317..4f048ae3 100644 --- a/cmd/participle/main.go +++ b/cmd/participle/main.go @@ -4,10 +4,12 @@ import "github.com/alecthomas/kong" var ( version string = "dev" - cli struct { + + cli struct { Version kong.VersionFlag - Gen struct { - Lexer genLexerCmd `cmd:""` + + Gen struct { + Lexer genLexerCmd `cmd:"" help:"Generate a lexer."` } `cmd:"" help:"Generate code to accelerate Participle."` } ) diff --git a/lexer/internal/conformance/conformance_codegen_test.go b/lexer/internal/conformance/conformance_codegen_test.go new file mode 100644 index 00000000..6fde099f --- /dev/null +++ b/lexer/internal/conformance/conformance_codegen_test.go @@ -0,0 +1,14 @@ +//go:build generated + +package conformance_test + +import ( + "testing" + + "github.com/alecthomas/participle/v2/lexer/internal/conformance" +) + +// This should only be run by TestLexerConformanceGenerated. +func TestLexerConformanceGeneratedInternal(t *testing.T) { + testLexer(t, conformance.GeneratedConformanceLexer) +} diff --git a/lexer/internal/conformance/conformance_test.go b/lexer/internal/conformance/conformance_test.go new file mode 100644 index 00000000..f72d4683 --- /dev/null +++ b/lexer/internal/conformance/conformance_test.go @@ -0,0 +1,154 @@ +package conformance_test + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/alecthomas/assert/v2" + "github.com/alecthomas/participle/v2/lexer" +) + +var conformanceLexer = lexer.MustStateful(lexer.Rules{ + "Root": { + {"String", `"`, lexer.Push("String")}, + // {"Heredoc", `<<(\w+\b)`, lexer.Push("Heredoc")}, + }, + "String": { + {"Escaped", `\\.`, nil}, + {"StringEnd", `"`, lexer.Pop()}, + {"Expr", `\${`, lexer.Push("Expr")}, + {"Char", `[^$"\\]+`, nil}, + }, + "Expr": { + lexer.Include("Root"), + {`Whitespace`, `\s+`, nil}, + {`Oper`, `[-+/*%]`, nil}, + {"Ident", `\w+`, lexer.Push("Reference")}, + {"ExprEnd", `}`, lexer.Pop()}, + }, + "Reference": { + {"Dot", `\.`, nil}, + {"Ident", `\w+`, nil}, + lexer.Return(), + }, + // "Heredoc": { + // {"End", `\b\1\b`, lexer.Pop()}, + // lexer.Include("Expr"), + // }, +}) + +type token struct { + Type string + Value string +} + +func testLexer(t *testing.T, lex lexer.Definition) { + t.Helper() + tests := []struct { + name string + input string + expected []token + }{ + {"Push", `"${"Hello ${name + "!"}"}"`, []token{ + {"String", "\""}, + {"Expr", "${"}, + {"String", "\""}, + {"Char", "Hello "}, + {"Expr", "${"}, + {"Ident", "name"}, + {"Whitespace", " "}, + {"Oper", "+"}, + {"Whitespace", " "}, + {"String", "\""}, + {"Char", "!"}, + {"StringEnd", "\""}, + {"ExprEnd", "}"}, + {"StringEnd", "\""}, + {"ExprEnd", "}"}, + {"StringEnd", "\""}, + }}, + {"Reference", `"${user.name}"`, []token{ + {"String", "\""}, + {"Expr", "${"}, + {"Ident", "user"}, + {"Dot", "."}, + {"Ident", "name"}, + {"ExprEnd", "}"}, + {"StringEnd", "\""}, + }}, + } + symbols := lexer.SymbolsByRune(lex) + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + l, err := lex.Lex(test.name, strings.NewReader(test.input)) + assert.NoError(t, err) + tokens, err := lexer.ConsumeAll(l) + assert.NoError(t, err) + actual := make([]token, len(tokens)-1) + for i, t := range tokens { + if t.Type == lexer.EOF { + continue + } + actual[i] = token{Type: symbols[t.Type], Value: t.Value} + } + assert.Equal(t, test.expected, actual) + }) + } +} + +func TestLexerConformanceGenerated(t *testing.T) { + genLexer(t) + args := []string{"test", "-run", "TestLexerConformanceGeneratedInternal", "-tags", "generated"} + // Propagate test flags. + flag.CommandLine.VisitAll(func(f *flag.Flag) { + if f.Value.String() != f.DefValue { + args = append(args, fmt.Sprintf("-%s=%s", f.Name, f.Value.String())) + } + }) + cmd := exec.Command("go", args...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err := cmd.Run() + assert.NoError(t, err) +} + +func TestLexerConformance(t *testing.T) { + testLexer(t, conformanceLexer) +} + +func genLexer(t *testing.T) { + t.Helper() + lexerJSON, err := json.Marshal(conformanceLexer) + assert.NoError(t, err) + cwd, err := os.Getwd() + assert.NoError(t, err) + generatedConformanceLexer := filepath.Join(cwd, "conformance_lexer_gen.go") + t.Cleanup(func() { + _ = os.Remove(generatedConformanceLexer) + }) + cmd := exec.Command( + "../../../scripts/participle", + "gen", "lexer", "conformance", + "--tags", "generated", + "--name", "GeneratedConformance", + "--output", generatedConformanceLexer) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + w, err := cmd.StdinPipe() + assert.NoError(t, err) + defer w.Close() + err = cmd.Start() + assert.NoError(t, err) + _, err = w.Write(lexerJSON) + assert.NoError(t, err) + err = w.Close() + assert.NoError(t, err) + err = cmd.Wait() + assert.NoError(t, err) +} diff --git a/lexer/stateful.go b/lexer/stateful.go index 1f6f6828..0dbc0f1a 100644 --- a/lexer/stateful.go +++ b/lexer/stateful.go @@ -140,6 +140,10 @@ type RulesAction interface { applyRules(state string, rule int, rules compiledRules) error } +type validatingRule interface { + validate(rules Rules) error +} + // ActionPop pops to the previous state when the Rule matches. type ActionPop struct{} @@ -173,7 +177,14 @@ func (p ActionPush) applyAction(lexer *StatefulLexer, groups []string) error { if groups[0] == "" { return errors.New("did not consume any input") } - lexer.stack = append(lexer.stack, lexerState{name: p.State, groups: groups}) + lexer.stack = append(lexer.stack, State{Name: p.State, Groups: groups}) + return nil +} + +func (p ActionPush) validate(rules Rules) error { + if _, ok := rules[p.State]; !ok { + return fmt.Errorf("push to unknown state %q", p.State) + } return nil } @@ -232,6 +243,11 @@ func New(rules Rules) (*StatefulDefinition, error) { compiled := compiledRules{} for key, set := range rules { for i, rule := range set { + if validate, ok := rule.Action.(validatingRule); ok { + if err := validate.validate(rules); err != nil { + return nil, fmt.Errorf("invalid action for rule %q: %w", rule.Name, err) + } + } pattern := "^(?:" + rule.Pattern + ")" var ( re *regexp.Regexp @@ -310,7 +326,7 @@ func (d *StatefulDefinition) LexString(filename string, s string) (Lexer, error) return &StatefulLexer{ def: d, data: s, - stack: []lexerState{{name: "Root"}}, + stack: []State{{Name: "Root"}}, pos: Position{ Filename: filename, Line: 1, @@ -332,14 +348,15 @@ func (d *StatefulDefinition) Symbols() map[string]TokenType { // nolint: golint return d.symbols } -type lexerState struct { - name string - groups []string +// State stored when switching states in the lexer. +type State struct { + Name string + Groups []string } // StatefulLexer implementation. type StatefulLexer struct { - stack []lexerState + stack []State def *StatefulDefinition data string pos Position @@ -347,7 +364,7 @@ type StatefulLexer struct { func (l *StatefulLexer) Next() (Token, error) { // nolint: golint parent := l.stack[len(l.stack)-1] - rules := l.def.rules[parent.name] + rules := l.def.rules[parent.Name] next: for len(l.data) > 0 { var ( @@ -360,7 +377,7 @@ next: if candidate.Rule == ReturnRule { l.stack = l.stack[:len(l.stack)-1] parent = l.stack[len(l.stack)-1] - rules = l.def.rules[parent.name] + rules = l.def.rules[parent.Name] continue next } re, err := l.getPattern(candidate) @@ -405,7 +422,7 @@ next: l.pos.Advance(span) if rule.ignore { parent = l.stack[len(l.stack)-1] - rules = l.def.rules[parent.name] + rules = l.def.rules[parent.Name] continue } return Token{ @@ -421,12 +438,16 @@ func (l *StatefulLexer) getPattern(candidate compiledRule) (*regexp.Regexp, erro if candidate.RE != nil { return candidate.RE, nil } + return BackrefRegex(l.def.backrefCache, candidate.Pattern, l.stack) +} +// Back +func BackrefRegex(backrefCache sync.Map, input string, stack []State) (*regexp.Regexp, error) { // We don't have a compiled RE. This means there are back-references // that need to be substituted first. - parent := l.stack[len(l.stack)-1] - key := candidate.Pattern + "\000" + strings.Join(parent.groups, "\000") - cached, ok := l.def.backrefCache.Load(key) + parent := stack[len(stack)-1] + key := input + "\000" + strings.Join(parent.Groups, "\000") + cached, ok := backrefCache.Load(key) if ok { return cached.(*regexp.Regexp), nil } @@ -435,19 +456,19 @@ func (l *StatefulLexer) getPattern(candidate compiledRule) (*regexp.Regexp, erro re *regexp.Regexp err error ) - pattern := backrefReplace.ReplaceAllStringFunc(candidate.Pattern, func(s string) string { + pattern := backrefReplace.ReplaceAllStringFunc(input, func(s string) string { var rematch = backrefReplace.FindStringSubmatch(s) n, nerr := strconv.ParseInt(rematch[2], 10, 64) if nerr != nil { err = nerr return s } - if len(parent.groups) == 0 || int(n) >= len(parent.groups) { - err = fmt.Errorf("invalid group %d from parent with %d groups", n, len(parent.groups)) + if len(parent.Groups) == 0 || int(n) >= len(parent.Groups) { + err = fmt.Errorf("invalid group %d from parent with %d groups", n, len(parent.Groups)) return s } // concatenate the leading \\\\ which are already escaped to the quoted match. - return rematch[1][:len(rematch[1])-1] + regexp.QuoteMeta(parent.groups[n]) + return rematch[1][:len(rematch[1])-1] + regexp.QuoteMeta(parent.Groups[n]) }) if err == nil { re, err = regexp.Compile("^(?:" + pattern + ")") @@ -455,6 +476,6 @@ func (l *StatefulLexer) getPattern(candidate compiledRule) (*regexp.Regexp, erro if err != nil { return nil, fmt.Errorf("invalid backref expansion: %q: %s", pattern, err) } - l.def.backrefCache.Store(key, re) + backrefCache.Store(key, re) return re, nil } diff --git a/lexer/stateful_test.go b/lexer/stateful_test.go index 3e672bd6..31ef6128 100644 --- a/lexer/stateful_test.go +++ b/lexer/stateful_test.go @@ -43,12 +43,17 @@ func TestMarshalUnmarshal(t *testing.T) { func TestStatefulLexer(t *testing.T) { tests := []struct { - name string - rules lexer.Rules - input string - tokens []string - err string + name string + rules lexer.Rules + input string + tokens []string + err string + buildErr string }{ + {name: "InvalidPushTarget", + buildErr: `invalid action for rule "foo": push to unknown state "Invalid"`, + rules: lexer.Rules{"Root": {{`foo`, ``, lexer.Push("Invalid")}}}, + }, {name: "BackrefNoGroups", input: `hello`, err: `1:1: rule "Backref": invalid backref expansion: "\\1": invalid group 1 from parent with 0 groups`, @@ -174,6 +179,12 @@ func TestStatefulLexer(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { def, err := lexer.New(test.rules) + if test.buildErr != "" { + require.EqualError(t, err, test.buildErr) + return + } else { + require.NoError(t, err) + } require.NoError(t, err) lex, err := def.Lex("", strings.NewReader(test.input)) require.NoError(t, err) diff --git a/scripts/participle b/scripts/participle index eccdee5d..d1744c9a 100755 --- a/scripts/participle +++ b/scripts/participle @@ -1,4 +1,4 @@ #!/bin/bash set -euo pipefail -(cd "$(dirname $0)/../cmd/participle" && go install github.com/alecthomas/participle/v2/cmd/participle) +(cd "$(dirname "$0")/../cmd/participle" && go install github.com/alecthomas/participle/v2/cmd/participle) exec "$(go env GOBIN)/participle" "$@"