diff --git a/bin/.go-1.18.2.pkg b/bin/.go-1.19.2.pkg similarity index 100% rename from bin/.go-1.18.2.pkg rename to bin/.go-1.19.2.pkg diff --git a/bin/go b/bin/go index f4ae4f00..50baf175 120000 --- a/bin/go +++ b/bin/go @@ -1 +1 @@ -.go-1.18.2.pkg \ No newline at end of file +.go-1.19.2.pkg \ No newline at end of file diff --git a/bin/gofmt b/bin/gofmt index f4ae4f00..50baf175 120000 --- a/bin/gofmt +++ b/bin/gofmt @@ -1 +1 @@ -.go-1.18.2.pkg \ No newline at end of file +.go-1.19.2.pkg \ No newline at end of file diff --git a/cmd/participle/files/codegen.go.tmpl b/cmd/participle/codegen.go.tmpl similarity index 91% rename from cmd/participle/files/codegen.go.tmpl rename to cmd/participle/codegen.go.tmpl index 9a601da0..84acbfab 100644 --- a/cmd/participle/files/codegen.go.tmpl +++ b/cmd/participle/codegen.go.tmpl @@ -1,9 +1,13 @@ // Code generated by Participle. DO NOT EDIT. +{{if .Tags}}//go:build {{.Tags}} +{{end -}} package {{.Package}} import ( + "fmt" "io" "strings" + "sync" "unicode/utf8" "regexp/syntax" @@ -12,7 +16,10 @@ import ( ) var _ syntax.Op +var _ fmt.State +const _ = utf8.RuneError +var {{.Name}}BackRefCache sync.Map var {{.Name}}Lexer lexer.Definition = lexer{{.Name}}DefinitionImpl{} type lexer{{.Name}}DefinitionImpl struct {} @@ -77,7 +84,7 @@ func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) { {{- range $i, $rule := $state.Rules}} {{- if $i}} else {{end -}} {{- if .Pattern -}} - if match := match{{$.Name}}{{.Name}}(l.s, l.p); match[1] != 0 { + if match := match{{$.Name}}{{.Name}}(l.s, l.p, l.states[len(l.states)-1].groups); match[1] != 0 { sym = {{index $.Def.Symbols .Name}} groups = match[:] {{- else if .|IsReturn -}} @@ -103,7 +110,7 @@ func (l *lexer{{.Name}}Impl) Next() (lexer.Token, error) { if len(sample) > 16 { sample = append(sample[:16], []rune("...")...) } - return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", sample) + return lexer.Token{}, participle.Errorf(l.pos, "invalid input text %q", string(sample)) } pos := l.pos span := l.s[groups[0]:groups[1]] diff --git a/cmd/participle/gen_lexer_cmd.go b/cmd/participle/gen_lexer_cmd.go index d84e51fc..368b3e2c 100644 --- a/cmd/participle/gen_lexer_cmd.go +++ b/cmd/participle/gen_lexer_cmd.go @@ -18,8 +18,9 @@ import ( type genLexerCmd struct { Name string `help:"Name of the lexer."` Output string `short:"o" help:"Output file."` + Tags string `help:"Build tags to include in the generated file."` Package string `arg:"" required:"" help:"Go package for generated code."` - Lexer string `arg:"" required:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer."` + Lexer string `arg:"" default:"-" type:"existingfile" help:"JSON representation of a Participle lexer (read from stdin if omitted)."` } func (c *genLexerCmd) Help() string { @@ -52,7 +53,15 @@ func (c *genLexerCmd) Run() error { if err != nil { return err } - err = generateLexer(os.Stdout, c.Package, def, c.Name) + out := os.Stdout + if c.Output != "" { + out, err = os.Create(c.Output) + if err != nil { + return err + } + defer out.Close() + } + err = generateLexer(out, c.Package, def, c.Name, c.Tags) if err != nil { return err } @@ -60,10 +69,10 @@ func (c *genLexerCmd) Run() error { } var ( - //go:embed files/codegen.go.tmpl + //go:embed codegen.go.tmpl codegenTemplateSource string - codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`) - codegenTemplate *template.Template = template.Must(template.New("lexgen").Funcs(template.FuncMap{ + codegenBackrefRe = regexp.MustCompile(`(\\+)(\d)`) + codegenTemplate = template.Must(template.New("lexgen").Funcs(template.FuncMap{ "IsPush": func(r lexer.Rule) string { if p, ok := r.Action.(lexer.ActionPush); ok { return p.State @@ -89,14 +98,15 @@ var ( }).Parse(codegenTemplateSource)) ) -func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name string) error { +func generateLexer(w io.Writer, pkg string, def *lexer.StatefulDefinition, name, tags string) error { type ctx struct { Package string Name string + Tags string Def *lexer.StatefulDefinition } rules := def.Rules() - err := codegenTemplate.Execute(w, ctx{pkg, name, def}) + err := codegenTemplate.Execute(w, ctx{pkg, name, tags, def}) if err != nil { return err } @@ -140,6 +150,14 @@ func orderRules(rules lexer.Rules) []orderedRule { } func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error { + if codegenBackrefRe.FindStringIndex(pattern) != nil { + fmt.Fprintf(w, "func match%s%s(s string, p int, backrefs []string) (groups []int) {\n", lexerName, name) + fmt.Fprintf(w, " re, err := lexer.BackrefRegex(%sBackRefCache, %q, backrefs)\n", lexerName, pattern) + fmt.Fprintf(w, " if err != nil { panic(fmt.Sprintf(\"%%s: %%s\", err, backrefs)) }\n") + fmt.Fprintf(w, " return re.FindStringSubmatchIndex(s[p:])\n") + fmt.Fprintf(w, "}\n") + return nil + } re, err := syntax.Parse(pattern, syntax.Perl) if err != nil { return err @@ -164,7 +182,7 @@ func generateRegexMatch(w io.Writer, lexerName, name, pattern string) error { } re = re.Simplify() fmt.Fprintf(w, "// %s\n", re) - fmt.Fprintf(w, "func match%s%s(s string, p int) (groups [%d]int) {\n", lexerName, name, 2*re.MaxCap()+2) + fmt.Fprintf(w, "func match%s%s(s string, p int, backrefs []string) (groups [%d]int) {\n", lexerName, name, 2*re.MaxCap()+2) flattened := flatten(re) // Fast-path a single literal. diff --git a/cmd/participle/main.go b/cmd/participle/main.go index 3a215317..4f048ae3 100644 --- a/cmd/participle/main.go +++ b/cmd/participle/main.go @@ -4,10 +4,12 @@ import "github.com/alecthomas/kong" var ( version string = "dev" - cli struct { + + cli struct { Version kong.VersionFlag - Gen struct { - Lexer genLexerCmd `cmd:""` + + Gen struct { + Lexer genLexerCmd `cmd:"" help:"Generate a lexer."` } `cmd:"" help:"Generate code to accelerate Participle."` } ) diff --git a/cmd/railroad/main.go b/cmd/railroad/main.go index 71c1819b..976a296b 100644 --- a/cmd/railroad/main.go +++ b/cmd/railroad/main.go @@ -5,7 +5,6 @@ import ( "embed" "flag" "fmt" - "io/ioutil" "os" "github.com/alecthomas/repr" @@ -26,6 +25,7 @@ type production struct { // Embed the railroad-diagrams css and js files for later output. // From here: https://github.com/tabatkins/railroad-diagrams +// //go:embed assets/* var assets embed.FS @@ -200,7 +200,7 @@ func main() { str := generate(productions, ast) if *outputFile != "" { - err := ioutil.WriteFile(*outputFile, []byte(str), 0644) // nolint + err := os.WriteFile(*outputFile, []byte(str), 0644) // nolint if err != nil { panic(err) } @@ -233,7 +233,7 @@ func writeAssetFiles() (err error) { if err != nil { return err } - err = ioutil.WriteFile(fileName, data, 0644) // nolint + err = os.WriteFile(fileName, data, 0644) // nolint if err != nil { return err } diff --git a/lexer/internal/conformance/conformance_codegen_test.go b/lexer/internal/conformance/conformance_codegen_test.go new file mode 100644 index 00000000..6fde099f --- /dev/null +++ b/lexer/internal/conformance/conformance_codegen_test.go @@ -0,0 +1,14 @@ +//go:build generated + +package conformance_test + +import ( + "testing" + + "github.com/alecthomas/participle/v2/lexer/internal/conformance" +) + +// This should only be run by TestLexerConformanceGenerated. +func TestLexerConformanceGeneratedInternal(t *testing.T) { + testLexer(t, conformance.GeneratedConformanceLexer) +} diff --git a/lexer/internal/conformance/conformance_test.go b/lexer/internal/conformance/conformance_test.go new file mode 100644 index 00000000..aa8dc66c --- /dev/null +++ b/lexer/internal/conformance/conformance_test.go @@ -0,0 +1,164 @@ +package conformance_test + +import ( + "encoding/json" + "flag" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "github.com/alecthomas/assert/v2" + "github.com/alecthomas/participle/v2/lexer" +) + +var conformanceLexer = lexer.MustStateful(lexer.Rules{ + "Root": { + {"String", `"`, lexer.Push("String")}, + // {"Heredoc", `<<(\w+)`, lexer.Push("Heredoc")}, + }, + "String": { + {"Escaped", `\\.`, nil}, + {"StringEnd", `"`, lexer.Pop()}, + {"Expr", `\${`, lexer.Push("Expr")}, + {"Char", `[^$"\\]+`, nil}, + }, + "Expr": { + lexer.Include("Root"), + {`Whitespace`, `\s+`, nil}, + {`Oper`, `[-+/*%]`, nil}, + {"Ident", `\w+`, lexer.Push("Reference")}, + {"ExprEnd", `}`, lexer.Pop()}, + }, + "Reference": { + {"Dot", `\.`, nil}, + {"Ident", `\w+`, nil}, + lexer.Return(), + }, + // "Heredoc": { + // {"End", `\1`, lexer.Pop()}, + // lexer.Include("Expr"), + // }, +}) + +type token struct { + Type string + Value string +} + +func testLexer(t *testing.T, lex lexer.Definition) { + t.Helper() + tests := []struct { + name string + input string + expected []token + }{ + {"Push", `"${"Hello ${name + "!"}"}"`, []token{ + {"String", "\""}, + {"Expr", "${"}, + {"String", "\""}, + {"Char", "Hello "}, + {"Expr", "${"}, + {"Ident", "name"}, + {"Whitespace", " "}, + {"Oper", "+"}, + {"Whitespace", " "}, + {"String", "\""}, + {"Char", "!"}, + {"StringEnd", "\""}, + {"ExprEnd", "}"}, + {"StringEnd", "\""}, + {"ExprEnd", "}"}, + {"StringEnd", "\""}, + }}, + {"Reference", `"${user.name}"`, []token{ + {"String", "\""}, + {"Expr", "${"}, + {"Ident", "user"}, + {"Dot", "."}, + {"Ident", "name"}, + {"ExprEnd", "}"}, + {"StringEnd", "\""}, + }}, + // TODO(alecthomas): Once backreferences are supported, this will work. + // {"Backref", `<= len(parent.groups) { - err = fmt.Errorf("invalid group %d from parent with %d groups", n, len(parent.groups)) + if len(groups) == 0 || int(n) >= len(groups) { + err = fmt.Errorf("invalid group %d from parent with %d groups", n, len(groups)) return s } // concatenate the leading \\\\ which are already escaped to the quoted match. - return rematch[1][:len(rematch[1])-1] + regexp.QuoteMeta(parent.groups[n]) + return rematch[1][:len(rematch[1])-1] + regexp.QuoteMeta(groups[n]) }) + fmt.Fprintln(os.Stderr, pattern) if err == nil { re, err = regexp.Compile("^(?:" + pattern + ")") } if err != nil { return nil, fmt.Errorf("invalid backref expansion: %q: %s", pattern, err) } - l.def.backrefCache.Store(key, re) + backrefCache.Store(key, re) return re, nil } diff --git a/lexer/stateful_test.go b/lexer/stateful_test.go index 3e672bd6..31ef6128 100644 --- a/lexer/stateful_test.go +++ b/lexer/stateful_test.go @@ -43,12 +43,17 @@ func TestMarshalUnmarshal(t *testing.T) { func TestStatefulLexer(t *testing.T) { tests := []struct { - name string - rules lexer.Rules - input string - tokens []string - err string + name string + rules lexer.Rules + input string + tokens []string + err string + buildErr string }{ + {name: "InvalidPushTarget", + buildErr: `invalid action for rule "foo": push to unknown state "Invalid"`, + rules: lexer.Rules{"Root": {{`foo`, ``, lexer.Push("Invalid")}}}, + }, {name: "BackrefNoGroups", input: `hello`, err: `1:1: rule "Backref": invalid backref expansion: "\\1": invalid group 1 from parent with 0 groups`, @@ -174,6 +179,12 @@ func TestStatefulLexer(t *testing.T) { for _, test := range tests { t.Run(test.name, func(t *testing.T) { def, err := lexer.New(test.rules) + if test.buildErr != "" { + require.EqualError(t, err, test.buildErr) + return + } else { + require.NoError(t, err) + } require.NoError(t, err) lex, err := def.Lex("", strings.NewReader(test.input)) require.NoError(t, err) diff --git a/scripts/participle b/scripts/participle index eccdee5d..d1744c9a 100755 --- a/scripts/participle +++ b/scripts/participle @@ -1,4 +1,4 @@ #!/bin/bash set -euo pipefail -(cd "$(dirname $0)/../cmd/participle" && go install github.com/alecthomas/participle/v2/cmd/participle) +(cd "$(dirname "$0")/../cmd/participle" && go install github.com/alecthomas/participle/v2/cmd/participle) exec "$(go env GOBIN)/participle" "$@"