From 6057f1c0427f9086d055a456d51cc886d2b5327f Mon Sep 17 00:00:00 2001 From: Chloe Date: Fri, 1 May 2020 11:58:18 +1000 Subject: [PATCH] feat: move arraiParsers to a separated file --- Makefile | 5 +- main.go | 7 +++ syntax/arrai.wbnf | 70 +++++++++++++++++++++++++++ syntax/parse.go | 79 ------------------------------- syntax/parser.go | 84 +++++++++++++++++++++++++++++++++ tools/parser/generate_parser.go | 26 ++++++++++ 6 files changed, 191 insertions(+), 80 deletions(-) create mode 100644 main.go create mode 100644 syntax/arrai.wbnf create mode 100644 syntax/parser.go create mode 100644 tools/parser/generate_parser.go diff --git a/Makefile b/Makefile index 2b5eddbb..9c2afb68 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -all: test lint wasm +all: parser test lint wasm test: go test $(GOTESTFLAGS) -tags timingsensitive ./... @@ -8,3 +8,6 @@ lint: wasm: GOOS=js GOARCH=wasm go build -o /tmp/arrai.wasm ./cmd/arrai + +parser: + go generate main.go diff --git a/main.go b/main.go new file mode 100644 index 00000000..ff10854d --- /dev/null +++ b/main.go @@ -0,0 +1,7 @@ +package main + +//go:generate go run tools/parser/generate_parser.go syntax/arrai.wbnf syntax/parser.go +//go:generate goimports -w syntax/parser.go + +func main() { +} diff --git a/syntax/arrai.wbnf b/syntax/arrai.wbnf new file mode 100644 index 00000000..2734f716 --- /dev/null +++ b/syntax/arrai.wbnf @@ -0,0 +1,70 @@ +expr -> C* amp="&"* @ C* arrow=( + nest | + unnest | + ARROW @ | + binding="->" C* "\\" C* IDENT C* %%bind C* @ | + binding="->" C* %%bind @ + )* C* + > C* @:binop=("with" | "without") C* + > C* @:binop="||" C* + > C* @:binop="&&" C* + > C* @:compare=/{!?(?:<:|<>?=?|>=?|=)} C* + > C* @ if=("if" t=expr ("else" f=expr)?)* C* + > C* @:binop=/{\+\+|[+|]|-%?} C* + > C* @:binop=/{&~|&|~~?|[-<][-&][->]} C* + > C* @:binop=/{//|[*/%]|\\} C* + > C* @:rbinop="^" C* + > C* unop=/{:>|=>|>>|[-+!*^]}* @ C* + > C* @:binop=">>>" C* + > C* @ count="count"? C* touch? C* + > C* (get | @) tail=( + get + | call=("(" + arg=( + expr (":" end=expr? (":" step=expr)?)? + | ":" end=expr (":" step=expr)? + ):",", + ")") + )* C* + > C* "{" C* rel=(names tuple=("(" v=@:",", ")"):",",?) "}" C* + | C* "{" C* set=(elt=@:",",?) "}" C* + | C* "{" C* dict=((key=@ ":" value=@):",",?) "}" C* + | C* cond=("cond" "(" (key=@ ":" value=@):",",? ("*" ":" f=expr)? ")") C* + | C* cond=(("(" control_var=expr ")" | IDENT)? C* "cond" "(" (key=@ ":" value=@):",",? ("*" ":" f=expr)? ")") C* + | C* "[" C* array=(item=@:",",?) "]" C* + | C* "{:" C* embed=(grammar=@ ":" subgrammar=%%ast) ":}" C* + | C* op="\\\\" @ C* + | C* fn="\\" IDENT @ C* + | C* "//" pkg=( "{" dot="."? PKGPATH "}" | std=IDENT?) + | C* "(" tuple=(pairs=(name? ":" v=@):",",?) ")" C* + | C* "(" @ ")" C* + | C* let=("let" C* IDENT C* "=" C* @ %%bind C* ";" C* @) C* + | C* xstr C* + | C* IDENT C* + | C* STR C* + | C* NUM C*; +nest -> C* "nest" names IDENT C*; +unnest -> C* "unnest" IDENT C*; +touch -> C* ("->*" ("&"? IDENT | STR))+ "(" expr:"," ","? ")" C*; +get -> C* dot="." ("&"? IDENT | STR | "*") C*; +names -> C* "|" C* IDENT:"," C* "|" C*; +name -> C* IDENT C* | C* STR C*; +xstr -> C* quote=/{\$"\s*} part=( sexpr | fragment=/{(?: \\. | \$[^{"] | [^\\"$] )+} )* '"' C* + | C* quote=/{\$'\s*} part=( sexpr | fragment=/{(?: \\. | \$[^{'] | [^\\'$] )+} )* "'" C* + | C* quote=/{\$‵\s*} part=( sexpr | fragment=/{(?: ‵‵ | \$[^{‵] | [^‵ $] )+} )* "‵" C*; +sexpr -> "${" + C* expr C* + control=/{ (?: : [-+#*\.\_0-9a-z]* (?: : (?: \\. | [^\\:}] )* ){0,2} )? } + close=/{\}\s*}; + +ARROW -> /{:>|=>|>>|orderby|order|where|sum|max|mean|median|min}; +IDENT -> /{ \. | [$@A-Za-z_][0-9$@A-Za-z_]* }; +PKGPATH -> /{ (?: \\ | [^\\}] )* }; +STR -> /{ " (?: \\. | [^\\"] )* " + | ' (?: \\. | [^\\'] )* ' + | ‵ (?: ‵‵ | [^‵ ] )* ‵ + }; +NUM -> /{ (?: \d+(?:\.\d*)? | \.\d+ ) (?: [Ee][-+]?\d+ )? }; +C -> /{ # .* $ }; + +.wrapRE -> /{\s*()\s*}; \ No newline at end of file diff --git a/syntax/parse.go b/syntax/parse.go index 5b6d00f0..e9f06256 100644 --- a/syntax/parse.go +++ b/syntax/parse.go @@ -3,7 +3,6 @@ package syntax import ( "fmt" "log" - "strings" "github.com/arr-ai/wbnf/ast" "github.com/arr-ai/wbnf/wbnf" @@ -22,84 +21,6 @@ import ( // var noParse = &noParseType{} -func unfakeBackquote(s string) string { - return strings.ReplaceAll(s, "‵", "`") -} - -var arraiParsers = wbnf.MustCompile(unfakeBackquote(` -expr -> C* amp="&"* @ C* arrow=( - nest | - unnest | - ARROW @ | - binding="->" C* "\\" C* IDENT C* %%bind C* @ | - binding="->" C* %%bind @ - )* C* - > C* @:binop=("with" | "without") C* - > C* @:binop="||" C* - > C* @:binop="&&" C* - > C* @:compare=/{!?(?:<:|<>?=?|>=?|=)} C* - > C* @ if=("if" t=expr ("else" f=expr)?)* C* - > C* @:binop=/{\+\+|[+|]|-%?} C* - > C* @:binop=/{&~|&|~~?|[-<][-&][->]} C* - > C* @:binop=/{//|[*/%]|\\} C* - > C* @:rbinop="^" C* - > C* unop=/{:>|=>|>>|[-+!*^]}* @ C* - > C* @:binop=">>>" C* - > C* @ count="count"? C* touch? C* - > C* (get | @) tail=( - get - | call=("(" - arg=( - expr (":" end=expr? (":" step=expr)?)? - | ":" end=expr (":" step=expr)? - ):",", - ")") - )* C* - > C* "{" C* rel=(names tuple=("(" v=@:",", ")"):",",?) "}" C* - | C* "{" C* set=(elt=@:",",?) "}" C* - | C* "{" C* dict=((key=@ ":" value=@):",",?) "}" C* - | C* cond=("cond" "(" (key=@ ":" value=@):",",? ("*" ":" f=expr)? ")") C* - | C* cond=(("(" control_var=expr ")" | IDENT)? C* "cond" "(" (key=@ ":" value=@):",",? ("*" ":" f=expr)? ")") C* - | C* "[" C* array=(item=@:",",?) "]" C* - | C* "{:" C* embed=(grammar=@ ":" subgrammar=%%ast) ":}" C* - | C* op="\\\\" @ C* - | C* fn="\\" IDENT @ C* - | C* "//" pkg=( "{" dot="."? PKGPATH "}" | std=IDENT? - ) - | C* "(" tuple=(pairs=(name? ":" v=@):",",?) ")" C* - | C* "(" @ ")" C* - | C* let=("let" C* IDENT C* "=" C* @ %%bind C* ";" C* @) C* - | C* xstr C* - | C* IDENT C* - | C* STR C* - | C* NUM C*; -nest -> C* "nest" names IDENT C*; -unnest -> C* "unnest" IDENT C*; -touch -> C* ("->*" ("&"? IDENT | STR))+ "(" expr:"," ","? ")" C*; -get -> C* dot="." ("&"? IDENT | STR | "*") C*; -names -> C* "|" C* IDENT:"," C* "|" C*; -name -> C* IDENT C* | C* STR C*; -xstr -> C* quote=/{\$"\s*} part=( sexpr | fragment=/{(?: \\. | \$[^{"] | [^\\"$] )+} )* '"' C* - | C* quote=/{\$'\s*} part=( sexpr | fragment=/{(?: \\. | \$[^{'] | [^\\'$] )+} )* "'" C* - | C* quote=/{\$‵\s*} part=( sexpr | fragment=/{(?: ‵‵ | \$[^{‵] | [^‵ $] )+} )* "‵" C*; -sexpr -> "${" - C* expr C* - control=/{ (?: : [-+#*\.\_0-9a-z]* (?: : (?: \\. | [^\\:}] )* ){0,2} )? } - close=/{\}\s*}; - -ARROW -> /{:>|=>|>>|orderby|order|where|sum|max|mean|median|min}; -IDENT -> /{ \. | [$@A-Za-z_][0-9$@A-Za-z_]* }; -PKGPATH -> /{ (?: \\ | [^\\}] )* }; -STR -> /{ " (?: \\. | [^\\"] )* " - | ' (?: \\. | [^\\'] )* ' - | ‵ (?: ‵‵ | [^‵ ] )* ‵ - }; -NUM -> /{ (?: \d+(?:\.\d*)? | \.\d+ ) (?: [Ee][-+]?\d+ )? }; -C -> /{ # .* $ }; - -.wrapRE -> /{\s*()\s*}; -`), nil) - type ParseContext struct { SourceDir string } diff --git a/syntax/parser.go b/syntax/parser.go new file mode 100644 index 00000000..731af44c --- /dev/null +++ b/syntax/parser.go @@ -0,0 +1,84 @@ +package syntax + +import ( + "strings" + + "github.com/arr-ai/wbnf/wbnf" +) + +func unfakeBackquote(s string) string { + return strings.ReplaceAll(s, "`", "`") +} + +var arraiParsers = wbnf.MustCompile(unfakeBackquote(` +expr -> C* amp="&"* @ C* arrow=( + nest | + unnest | + ARROW @ | + binding="->" C* "\\" C* IDENT C* %%bind C* @ | + binding="->" C* %%bind @ + )* C* + > C* @:binop=("with" | "without") C* + > C* @:binop="||" C* + > C* @:binop="&&" C* + > C* @:compare=/{!?(?:<:|<>?=?|>=?|=)} C* + > C* @ if=("if" t=expr ("else" f=expr)?)* C* + > C* @:binop=/{\+\+|[+|]|-%?} C* + > C* @:binop=/{&~|&|~~?|[-<][-&][->]} C* + > C* @:binop=/{//|[*/%]|\\} C* + > C* @:rbinop="^" C* + > C* unop=/{:>|=>|>>|[-+!*^]}* @ C* + > C* @:binop=">>>" C* + > C* @ count="count"? C* touch? C* + > C* (get | @) tail=( + get + | call=("(" + arg=( + expr (":" end=expr? (":" step=expr)?)? + | ":" end=expr (":" step=expr)? + ):",", + ")") + )* C* + > C* "{" C* rel=(names tuple=("(" v=@:",", ")"):",",?) "}" C* + | C* "{" C* set=(elt=@:",",?) "}" C* + | C* "{" C* dict=((key=@ ":" value=@):",",?) "}" C* + | C* cond=("cond" "(" (key=@ ":" value=@):",",? ("*" ":" f=expr)? ")") C* + | C* cond=(("(" control_var=expr ")" | IDENT)? C* "cond" "(" (key=@ ":" value=@):",",? ("*" ":" f=expr)? ")") C* + | C* "[" C* array=(item=@:",",?) "]" C* + | C* "{:" C* embed=(grammar=@ ":" subgrammar=%%ast) ":}" C* + | C* op="\\\\" @ C* + | C* fn="\\" IDENT @ C* + | C* "//" pkg=( "{" dot="."? PKGPATH "}" | std=IDENT?) + | C* "(" tuple=(pairs=(name? ":" v=@):",",?) ")" C* + | C* "(" @ ")" C* + | C* let=("let" C* IDENT C* "=" C* @ %%bind C* ";" C* @) C* + | C* xstr C* + | C* IDENT C* + | C* STR C* + | C* NUM C*; +nest -> C* "nest" names IDENT C*; +unnest -> C* "unnest" IDENT C*; +touch -> C* ("->*" ("&"? IDENT | STR))+ "(" expr:"," ","? ")" C*; +get -> C* dot="." ("&"? IDENT | STR | "*") C*; +names -> C* "|" C* IDENT:"," C* "|" C*; +name -> C* IDENT C* | C* STR C*; +xstr -> C* quote=/{\$"\s*} part=( sexpr | fragment=/{(?: \\. | \$[^{"] | [^\\"$] )+} )* '"' C* + | C* quote=/{\$'\s*} part=( sexpr | fragment=/{(?: \\. | \$[^{'] | [^\\'$] )+} )* "'" C* + | C* quote=/{\$‵\s*} part=( sexpr | fragment=/{(?: ‵‵ | \$[^{‵] | [^‵ $] )+} )* "‵" C*; +sexpr -> "${" + C* expr C* + control=/{ (?: : [-+#*\.\_0-9a-z]* (?: : (?: \\. | [^\\:}] )* ){0,2} )? } + close=/{\}\s*}; + +ARROW -> /{:>|=>|>>|orderby|order|where|sum|max|mean|median|min}; +IDENT -> /{ \. | [$@A-Za-z_][0-9$@A-Za-z_]* }; +PKGPATH -> /{ (?: \\ | [^\\}] )* }; +STR -> /{ " (?: \\. | [^\\"] )* " + | ' (?: \\. | [^\\'] )* ' + | ‵ (?: ‵‵ | [^‵ ] )* ‵ + }; +NUM -> /{ (?: \d+(?:\.\d*)? | \.\d+ ) (?: [Ee][-+]?\d+ )? }; +C -> /{ # .* $ }; + +.wrapRE -> /{\s*()\s*}; +`), nil) diff --git a/tools/parser/generate_parser.go b/tools/parser/generate_parser.go new file mode 100644 index 00000000..cab09bcb --- /dev/null +++ b/tools/parser/generate_parser.go @@ -0,0 +1,26 @@ +package main + +import ( + "fmt" + "io/ioutil" + "os" +) + +// Reads ../syntax/arrai.wbnf file +// and encodes them as strings literals in ../syntax/parser.go +func main() { + data, err := ioutil.ReadFile(os.Args[1]) + if err != nil { + panic(err) + } + + content := append( + []byte("package syntax\n\nfunc unfakeBackquote(s string) string {\n return strings.ReplaceAll(s, \"`\", \"`\")\n}\n\nvar arraiParsers = wbnf.MustCompile(unfakeBackquote(`\n"), //nolint:lll + data...) + content = append(content, []byte("\n`), nil)")...) + err = ioutil.WriteFile(os.Args[2], content, 0644) + if err != nil { + panic(err) + } + fmt.Println("arrai parser generated") +}