[WIP] Implement (possibly?) the entire Rust grammar. #13

eddyb · 2018-11-01T21:33:06Z

Tested at rust-lang/rust@f6e9a6e, after git submodule update --recursive --init:

Out of 13485 Rust files tested:
* 1078 parsed fully and unambiguously
* 12166 parsed fully (but ambiguously)
* 188 parsed partially (only a prefix)
* 50 didn't parse at all (lexer error?)

The "partial / prefix parse" bit pretty much means there's a syntax error somewhere within the file.

(click to see the exhaustive list of files that didn't parse)

src/test/ui/similar-tokens.rs
src/test/ui/parser/issue-1655.rs
src/test/ui/parser/duplicate-visibility.rs
src/test/ui/parser/removed-syntax-fixed-vec.rs
src/test/ui/parser/no-unsafe-self.rs
src/test/ui/parser/lifetime-semicolon.rs
src/test/ui/parser/issue-30318.rs
src/test/ui/parser/virtual-structs.rs
src/test/ui/parser/brace-after-qualified-path-in-match.rs
src/test/ui/parser/issue-33413.rs
src/test/ui/parser/issue-21153.rs
src/test/ui/parser/default.rs
src/test/ui/parser/paamayim-nekudotayim.rs
src/test/ui/parser/removed-syntax-with-2.rs
src/test/ui/parser/type-parameters-in-field-exprs.rs
src/test/ui/parser/doc-before-identifier.rs
src/test/ui/parser/macro-bad-delimiter-ident.rs
src/test/ui/parser/pat-tuple-4.rs
src/test/ui/parser/bad-name.rs
src/test/ui/parser/extern-crate-unexpected-token.rs
src/test/ui/parser/removed-syntax-record.rs
src/test/ui/parser/pat-tuple-5.rs
src/test/ui/parser/issue-20711.rs
src/test/ui/parser/removed-syntax-field-let.rs
src/test/ui/parser/raw-byte-string-literals.rs
src/test/ui/parser/removed-syntax-enum-newtype.rs
src/test/ui/parser/issue-27255.rs
src/test/ui/parser/attr.rs
src/test/ui/parser/issue-41155.rs
src/test/ui/parser/bounds-lifetime-where-1.rs
src/test/ui/parser/attr-bad-meta.rs
src/test/ui/parser/doc-before-rbrace.rs
src/test/ui/parser/raw-byte-string-eof.rs
src/test/ui/parser/macros-no-semicolon-items.rs
src/test/ui/parser/issue-32505.rs
src/test/ui/parser/pat-tuple-1.rs
src/test/ui/parser/bounds-type-where.rs
src/test/ui/parser/doc-before-fn-rbrace.rs
src/test/ui/parser/bound-single-question-mark.rs
src/test/ui/parser/use-ends-with-mod-sep.rs
src/test/ui/parser/issue-20711-2.rs
src/test/ui/parser/impl-parsing.rs
src/test/ui/parser/multiline-comment-line-tracking.rs
src/test/ui/parser/raw-str-unbalanced.rs
src/test/ui/parser/bind-struct-early-modifiers.rs
src/test/ui/parser/attr-before-eof.rs
src/test/ui/parser/removed-syntax-extern-const.rs
src/test/ui/parser/import-from-path.rs
src/test/ui/parser/inner-attr-after-doc-comment.rs
src/test/ui/parser/bad-pointer-type.rs
src/test/ui/parser/issue-6610.rs
src/test/ui/parser/removed-syntax-mode.rs
src/test/ui/parser/trait-pub-method.rs
src/test/ui/parser/removed-syntax-mut-vec-expr.rs
src/test/ui/parser/removed-syntax-uniq-mut-ty.rs
src/test/ui/parser/struct-field-numeric-shorthand.rs
src/test/ui/parser/doc-before-attr.rs
src/test/ui/parser/where-clauses-no-bounds-or-predicates.rs
src/test/ui/parser/column-offset-1-based.rs
src/test/ui/parser/inverted-parameters.rs
src/test/ui/parser/doc-before-struct-rbrace-2.rs
src/test/ui/parser/trait-pub-assoc-ty.rs
src/test/ui/parser/issue-24375.rs
src/test/ui/parser/issue-15980.rs
src/test/ui/parser/associated-types-project-from-hrtb-explicit.rs
src/test/ui/parser/extern-no-fn.rs
src/test/ui/parser/trait-pub-assoc-const.rs
src/test/ui/parser/bounds-lifetime-2.rs
src/test/ui/parser/better-expected.rs
src/test/ui/parser/removed-syntax-fn-sigil.rs
src/test/ui/parser/not-a-pred.rs
src/test/ui/parser/doc-before-mod-rbrace.rs
src/test/ui/parser/pat-lt-bracket-7.rs
src/test/ui/parser/doc-before-struct-rbrace-1.rs
src/test/ui/parser/removed-syntax-closure-lifetime.rs
src/test/ui/parser/attrs-after-extern-mod.rs
src/test/ui/parser/issue-17904-2.rs
src/test/ui/parser/issue-24780.rs
src/test/ui/parser/removed-syntax-mut-vec-ty.rs
src/test/ui/parser/paren-after-qualified-path-in-match.rs
src/test/ui/parser/issue-32214.rs
src/test/ui/parser/recover-enum2.rs
src/test/ui/parser/doc-before-eof.rs
src/test/ui/parser/bounds-lifetime-1.rs
src/test/ui/parser/doc-after-struct-field.rs
src/test/ui/parser/attr-dangling-in-mod.rs
src/test/ui/parser/doc-before-semi.rs
src/test/ui/parser/issue-33455.rs
src/test/ui/parser/import-glob-rename.rs
src/test/ui/parser/import-from-rename.rs
src/test/ui/parser/removed-syntax-field-semicolon.rs
src/test/ui/parser/extern-foreign-crate.rs
src/test/ui/parser/import-glob-path.rs
src/test/ui/parser/use-as-where-use-ends-with-mod-sep.rs
src/test/ui/parser/class-implements-bad-trait.rs
src/test/ui/parser/pat-lt-bracket-2.rs
src/test/ui/parser/attr-bad-meta-3.rs
src/test/ui/parser/issue-10392-2.rs
src/test/ui/parser/attr-dangling-in-fn.rs
src/test/ui/parser/regions-out-of-scope-slice.rs
src/test/ui/parser/removed-syntax-static-fn.rs
src/test/ui/parser/trait-bounds-not-on-impl.rs
src/test/ui/parser/unsized.rs
src/test/ui/parser/empty-impl-semicolon.rs
src/test/ui/parser/attr-bad-meta-2.rs
src/test/ui/parser/inner-attr.rs
src/test/ui/parser/doc-before-extern-rbrace.rs
src/test/ui/parser/issue-19096.rs
src/test/ui/parser/issue-19398.rs
src/test/ui/parser/pat-ref-enum.rs
src/test/ui/parser/issue-32446.rs
src/test/ui/parser/trait-object-lifetime-parens.rs
src/test/ui/parser/bounds-type.rs
src/test/ui/parser/issue-17904.rs
src/test/ui/parser/pat-lt-bracket-1.rs
src/test/ui/parser/issue-17718-const-mut.rs
src/test/ui/parser/removed-syntax-ptr-lifetime.rs
src/test/ui/parser/trait-object-bad-parens.rs
src/test/ui/parser/multitrait.rs
src/test/ui/parser/extern-expected-fn-or-brace.rs
src/test/ui/structs/struct-field-init-syntax.rs
src/test/ui/structs/struct-duplicate-comma.rs
src/test/ui/structs/struct-missing-comma.rs
src/test/ui/tuple/tuple-struct-fields/test.rs
src/test/ui/invalid/invalid-variadic-function.rs
src/test/ui/issues/issue-20616-5.rs
src/test/ui/issues/issue-49257.rs
src/test/ui/issues/issue-34222-1.rs
src/test/ui/issues/issue-20616-4.rs
src/test/ui/issues/issue-20616-6.rs
src/test/ui/issues/issue-46186.rs
src/test/ui/issues/issue-20616-7.rs
src/test/ui/issues/issue-20616-3.rs
src/test/ui/issues/issue-39616.rs
src/test/ui/issues/issue-20616-2.rs
src/test/ui/issues/issue-20616-1.rs
src/test/ui/issues/issue-48636.rs
src/test/ui/issues/issue-20616-8.rs
src/test/ui/issues/issue-20616-9.rs
src/test/ui/issues/issue-44021.rs
src/test/ui/issues/issue-45296.rs
src/test/ui/issues/issue-28433.rs
src/test/ui/issues/issue-43196.rs
src/test/ui/issues/issue-49040.rs
src/test/ui/issues/auxiliary/issue-21146-inc.rs
src/test/ui/self/self-vs-path-ambiguity.rs
src/test/ui/traits/trait-alias.rs
src/test/ui/parse-error-correct.rs
src/test/ui/resolve/issue-54379.rs
src/test/ui/missing/missing-block-hint.rs
src/test/ui/pub/pub-restricted-non-path.rs
src/test/ui/pub/pub-ident-struct.rs
src/test/ui/pub/pub-ident-fn-or-struct.rs
src/test/ui/pub/pub-ident-fn-or-struct-2.rs
src/test/ui/pub/pub-restricted-error-fn.rs
src/test/ui/pub/pub-restricted-error.rs
src/test/ui/pub/pub-ident-fn.rs
src/test/ui/pub/pub-ident-fn-2.rs
src/test/ui/pub/pub-restricted.rs
src/test/ui/attrs-with-no-formal-in-generics/attrs-with-no-formal-in-generics-1.rs
src/test/ui/attrs-with-no-formal-in-generics/attrs-with-no-formal-in-generics-3.rs
src/test/ui/attrs-with-no-formal-in-generics/attrs-with-no-formal-in-generics-2.rs
src/test/ui/macros/macro-attribute.rs
src/test/ui/exclusive-range/exclusive_range_pattern_syntax_collision3.rs
src/test/ui/error-codes/E0585.rs
src/test/ui/error-codes/E0586.rs
src/test/ui/extern/extern-const.rs
src/test/ui/impossible_range.rs
src/test/ui/associated-types/issue-36499.rs
src/test/ui/type/type-ascription-instead-of-statement-end.rs
src/test/ui/did_you_mean/issue-48492-tuple-destructure-missing-parens.rs
src/test/ui/did_you_mean/issue-41679-tilde-bitwise-negation-attempt.rs
src/test/ui/did_you_mean/issue-54109-and_instead_of_ampersands.rs
src/test/ui/did_you_mean/bad-assoc-pat.rs
src/test/ui/did_you_mean/multiple-pattern-typo.rs
src/test/ui/did_you_mean/bad-assoc-ty.rs
src/test/ui/did_you_mean/issue-40006.rs
src/test/ui/did_you_mean/issue-46718-struct-pattern-dotdotdot.rs
src/test/ui/bad/bad-crate-name.rs
src/test/ui/trait-alias-fail.rs
src/test/ui/obsolete-syntax-impl-for-dotdot.rs
src/test/ui/feature-gates/feature-gate-extern_prelude.rs
src/test/ui/syntax-trait-polarity.rs
src/test/run-pass/macros/auxiliary/macro-include-items-expr.rs
src/test/run-pass/macros/auxiliary/macro-comma-support.rs
src/test/run-pass/shebang.rs
src/tools/clang/test/Sema/renderscript.rs
src/tools/rustfmt/tests/coverage/target/comments.rs

AFAICT, none of those should parse, but I haven't double-checked against libsyntax yet.
Also, it might be useful to do the reverse (to make sure we don't parse more than rustc).

In order to get as much as possible to parse, I did include unstable syntax. However, we might be able to separate it out, or even just comment it out for now.

There are 3 blacklisted files, because they're unfeasibly large to parse currently:

I might end up using something similar to them to benchmark improvements to lykenware/gll.

eddyb · 2018-11-02T07:37:57Z

I may have come up with a way to separate stable and unstable features: adjoining grammars - that is, making gll::Grammar::extend, when it sees rules with the same name, | them together, instead of panicking (or, worse, overwriting one of the definitions with the other).

eddyb · 2018-11-02T09:36:48Z

cc @petrochenkov @jseyfried @Manishearth @nikomatsakis @nrc @pnkfelix
AFAIK all of you also have some level of interest in Rust's grammar, so it's be great if you could look over and point anything out, either where I've been too specific, too general, or just comments that should be added to the grammar, about various quirks, peculiarities, etc.

I should probably also spend some time reading previous attempts at a Rust grammar, to see if they hold some information I might be missing here.

Note, however, that this particular PR is not an at attempt at disambiguating and fully restricting the grammar, to one specific parse tree (even if it does that 8% of the time already).

(there's also the option of removing some of the restrictions, in unambiguous situations; I mean, it's almost 2019 and we still haven't standardized on fully general CFG parsing everywhere?!)

matklad · 2018-11-02T09:49:39Z

grammar/item.g

+    Variadic:"..." |
+    RegularAndVariadic:{ args:FnArg+ % "," "," "..." };
+FnArg =
+    SelfValue:{ mutable:"mut"? "self" } |


This misses type ascription I think (which is stable in limited form)

struct S; impl S { fn f(self: S) {} } fn main() {}

Note that that parses, but self there is just a binding pattern.

@Centril do we yet have a proposal for "self" "@" Pat { ":" Type }?, or were there previous attempts that died out? IMO it would be a natural extension.

@eddyb that would just fall out from having self be a valid pattern + intersection patterns; i.e.

pat ::= "self" | pat "@" pat | pat ":" type | ... ;

I've mentioned the possibility of making self a valid pattern in https://github.com/Centril/rfcs/blob/rfc/generalized-type-ascription/text/0000-generalized-type-ascription.md#self-as-a-pattern but it isn't part of the proposal right now.

matklad · 2018-11-02T09:51:38Z

Am I correct that this grammar does not reflect peculiarities around "restrictions" ? That, that some expressions must be terminated by ; in statments and ',' in match arms, and that conditions can't contain struct literals? What's the plan with dealing with them?

eddyb · 2018-11-02T10:00:34Z

@matklad I think that's a long-term question, that this WG should figure out, eventually.
I wanted to get something out the door ASAP, so we can start experimenting with checking for consistency with libsyntax and syn, start writing tests for all the nonterminals, etc.

@eternaleye suggested using boolean grammars to indicate such things, but even though it would be possible to extend the GLL implementation to handle conjunction and negation, natively, I would prefer if we waited for something based on rewriting the grammar to a CFG (similar to what you'd do with propagating the restriction down through grammar templates, but automated).

We might introduce templates into the notation, but I'd rather not make a mess of things with it.

If we end up with two grammars (because this initial one is pretty small and could be kept around anyway), I would want to have some sort of naive "inclusion check", that would ensure that the refined grammar parses strictly less than the simple, more general, one.

petrochenkov · 2018-11-02T10:24:09Z

I usually assign myself to PRs/issues I need to do something with, to not forget about them.
What GitHub team I need to be included into to be able to do it in this repo?
Or perhaps this repo should be included to the list of repos accessible to rust-lang-nursery/compiler team?

eddyb · 2018-11-02T11:22:00Z

@petrochenkov Oh that's annoying, this is a different org than rust-lang, so I can't share teams between them. I'll try to figure something out though.
EDIT: done, @rust-lang-nursery/wg-grammar should now be a team.

matklad · 2018-11-05T08:31:44Z

grammar/generics.g

+WhereClause = "where" bounds:WhereBound* % "," ","?;
+WhereBound =
+    Lifetime:{ lt:LIFETIME ":" bounds:LifetimeBound* % "+" "+"? } |
+    Type:{ binder:ForAllBinder? ty:Type ":" bounds:TypeBound* % "+" "+"? } |


Disturbing fact: the following is valid Rust syntax:

trait X {} fn foo<T>() where for<'a> for<'b> X: Clone {}

however, syntactically these for's are different: the first one is a part of where predicate, the second one is a part of the type. Our ambiguity testing should ideally expose this problem.

Yupp, we basically have a precedence rule that eagerly takes the first encountered for<'a> and uses it for the whole T: A + B +....

Even more fun is the optional but not nestable parens around a bound.
where i32: (for<> Copy) looks weird but is perfectly valid today!

@Centril suggested we should make it a proper expression grammar with parens and + as a binop between bounds, which I feel pretty sympathetic towards.

Even more fun is the optional but not nestable parens around a bound.

Sorry, I tried to prevent that, but lang team forced the decision.

we should make it a proper expression grammar with parens and + as a binop between bounds

Why?
Bound + Bound + Bound + ... is a list with separator, not multiple binary operators, it could use Bound, Bound, Bound, ... or something equally well.

Why?

Well, it would explain the parens. And they do feel maybe like entities that can be arbitrarily composed.

ehuss

The rules with the optional trailing comma don't seem to handle the case for zero elements. For example (,) seems to parse.

I added notes about things that are unstable. libsyntax often parses them without complaint, but I think they should probably be noted somehow.

Apologies if I'm wrong about anything. Also, sometimes the line between syntactical and semantic rules seem to be a little blurry to me. My rule of thumb was whether or not it's accepted as a macro fragment, which may not be the best way to look at it, but allows you to easily say what the fragment matchers accept.

ehuss · 2018-11-05T15:52:52Z

grammar/attr.g

-    "(" TOKEN_TREE* ")" |
-    "[" TOKEN_TREE* "]" |
-    "{" TOKEN_TREE* "}";
+    "=" TOKEN_TREE |


Using token_tree (and macroinput) here seems very broad. The grammar at https://github.com/rust-lang-nursery/reference/blob/master/src/attributes.md I think is correct and more specific. I'm not sure what would be a good way to express that literals cannot have suffixes, though.

@ehuss This is intended afaik. rust-lang/rust#55208.

Ah, indeed, I had forgotten about that. Regardless, I think it would be good if the grammar eventually covered the stable syntax.

AFAIK it has been stabilized for attribute proc macros, just not built-in ones?
@petrochenkov can you confirm?

As for specific built-in attributes, sure, we could provide grammars for each of them, but it's similar to having a grammar for e.g. format_args!.

Syntax with delimiters is stable and is rejected/gated in non-macro attributes with a semantic check.

PATH = TOKEN_TREE is mostly unstable, I think.
Key-value attributes are not allowed in macros, and gated for non-literals in non-macros, but gating happens after expansion, so something like #[doc = $doc] where $doc: expr is also on stable.

ehuss · 2018-11-05T16:07:38Z

grammar/expr.g

+    Range:{ start:Expr? ".." end:Expr? } |
+    RangeInclusive:{ start:Expr? "..=" end:Expr } |
+    Type:{ expr:Expr ":" ty:Type } |
+    Cast:{ expr:Expr "as" ty:Type } |


Type without bounds.

Type:{ expr:Expr ":" ty:Type } | is also unstable (so leave a note).

grammar/expr.g

ehuss · 2018-11-05T17:08:35Z

grammar/macro.g

@@ -0,0 +1,13 @@
+MacroCall = path:Path "!" ident_input:IDENT? input:MacroInput;


When I was studying the parser code, this IDENT here had me stumped. Is this some old vestigial thing? I can't find a way to get it to actually parse, though.

Also, the type of path depends on where it is used. For the reference grammar, I just said it was a simple ("mod"-style) path, because in practice that's the only way it can be used. But from a parsing standpoint, I guess it could be more accurate. For example, foo<T>!{} does not parse as an expression, but it does parse as a type.

macro_rules! name { // .... }

It depends on whether macro_rules is considered a MacroCall, I suppose. As far as I'm aware, only macro_rules! has this syntax.

Hm, interesting.

What I was thinking of is that foo!bar{} is considered a valid statement in libsyntax (https://github.com/rust-lang/rust/blob/65e485d8f1d28102b426c9d6d82f835cd6470d3e/src/libsyntax/parse/parser.rs#L4711-L4714). I assumed that was what it was referring to. macro_rules is parsed through a separate mechanism (eat_macro_def). This "special identifier" was added 6 years ago, and I couldn't tell from the commit what it means. All other instances of parsing macro invocations do not check for that identifier.

Parsing macro_rules is not special, only when expanding it, does it allow the extra identifier.
A "macro definition" is the new macro keyword

I think I'm understanding it a little better. However, macro calls are parsed differently in various situations, and some don't allow the extra ident (type, pattern, trait/impl item, foreign item). Eventually these differences will need to be captured in the grammar, right?

Also, I think the syntax for macro_rules should be captured somewhere, since it is useful for certain rust tooling, syntax highlighting, etc. It is now in the reference, but I'm hoping the wg-grammar and the reference can be closely tied if not identical. Maybe the reference will just have extra stuff beyond the wg-grammar?

Also, I think the syntax for macro_rules should be captured somewhere

We could do this, especially since macro definitions are not just macro invocations, and we could even have a converter from macro_rules input patterns to CFGs.

Eventually these differences will need to be captured in the grammar, right?

Ideally we would find a way to keep adding rules without making the grammar unreadable and without losing the ability to encode into (E)BNF (automatically).

ehuss · 2018-11-05T21:52:58Z

grammar/generics.g

@@ -0,0 +1,35 @@
+Generics = "<" params:GenericParam* % "," ","? ">";
+GenericParam = attrs:OuterAttr* kind:GenericParamKind;


Lifetimes can't be listed after types.

That's more of a semantic restriction, to be really honest.
The introduction of const generics has made the old rules seem quite silly other thsn from a stylistic point of view. Most of the compiler has been fixed to not assume lifetimes come first.

If it actually were a semantic restriction then this would not be an error:

macro_rules! foo { ($x: item) => {} } foo! { fn stuff<T, 'a>() {} }

(Semantic vs. syntactic restrictions have semver consequences due to macros...)

And yeah... grammar wise this is super annoying, especially since const generic variables can be interspersed with type variables (afaik, or we haven't made any decision here...).

I guess we currently have the check duplicated (since the AST type loses the ordering requirement). @petrochenkov knows more.

I don't know anything anymore since @varkor rewrote everything for const generics.
Before that different kinds of parameters were stored separately in AST and the ordering was enforced solely in the parser.

ehuss · 2018-11-05T21:58:13Z

grammar/generics.g

+
+ForAllBinder = "for" generics:Generics;
+
+WhereClause = "where" bounds:WhereBound* % "," ","?;


This allows a bare , which I don't think is valid.

Wow, it's not valid! Hmm and neither is (,). I think I need to make a general note about this.

grammar/generics.g

ehuss · 2018-11-05T22:00:10Z

grammar/generics.g

+WhereClause = "where" bounds:WhereBound* % "," ","?;
+WhereBound =
+    Lifetime:{ lt:LIFETIME ":" bounds:LifetimeBound* % "+" "+"? } |
+    Type:{ binder:ForAllBinder? ty:Type ":" bounds:TypeBound* % "+" "+"? } |


I think this allows a bare + which I don't think is allowed.

ehuss · 2018-11-05T22:00:27Z

grammar/generics.g

+
+WhereClause = "where" bounds:WhereBound* % "," ","?;
+WhereBound =
+    Lifetime:{ lt:LIFETIME ":" bounds:LifetimeBound* % "+" "+"? } |


I think this allows a bare + which is not allowed?

eddyb · 2018-11-06T04:16:40Z

@ehuss General note: this grammar doesn't include "restrictions" or any disambiguations in general.
It has all the structure of Rust, but not all the constraints.

Practical declaractive constraints and disambiguations are an open question that this WG will have to come up with a solution for.

The official implementation has often picked arbitrary constraints, in the name of being able to make decisions early, while complicating the formalism.
Encoding those ad-hoc rules in a more formal setting may very well be needed, but I would prefer to start from and maintain a "clean slate".

eddyb · 2018-11-06T04:27:38Z

General note about unstable features: I plan to actually split them out into a subdirectory and have a system for merging rules (I mean, |-ing them together is enough).
The comments are still really helpful though!

We could even have one file per feature-gate, instead of duplicating the file structure of the stable grammar.

Centril · 2018-11-06T04:40:44Z

@eddyb From the POV of language design (i.e. changing the grammar later) it seems more readable to me to have the feature gates integrated in the grammar where they naturally change things because feature gates sometimes affect multiple different parts of the grammar at the same time... I think a sort of pragma system might work well here, but for now we should just add comments saying that some alternation is unstable...

eddyb · 2018-11-06T04:46:30Z

@Centril I don't see how having them separate would hamper that? You just put them where they belong, and maybe simplify the definition to not include old special-cases.

eddyb · 2018-11-06T04:55:18Z

General note: I assumed optionally-trailing-separator separated lists were X* % Sep Sep?.
And we planned to make that more ergonomic by adopting the X* %% Sep syntax (also from Perl6).

But now I'm not sure whether X* %% Sep is X* % Sep Sep? or (X+ %% Sep)? in Perl6.
We appear to use the latter in Rust, but I don't want to make %% do that unless it's consistent with where we're taking the syntax from (i.e. Perl6 regexes/grammars).

EDIT: @solson tried it out, and it looks like Rust is different than Perl6's %% syntax (which parses lone commas)! I wonder what languages also have trailing commas everywhere and what they chose.
The sad thing is that even if we add (X+ %% Sep)? syntax, that's no longer a list, it's an optional list.

EDIT2: I've been informed that the language Perl6 doesn't have lone commas (and some are speculating a bug might be letting X* %% "," parse ",").
So we could make a pragmatic decision about X* %% Sep and say it doesn't allow lone Sep.

Centril · 2018-11-06T04:57:13Z

@eddyb Maybe I'm not entirely understanding the suggestion... but I think having something like:

ExprKind
    = Literal:LITERAL
    | Try:{ expr:Expr "?" }
    | #[feature = "async"]   -- invent concrete syntax corresponding to this...
      AsyncBlock:{ "async" block:Block }
    | #[feature = "try_block"]
      TryBlock:{ "try" block:Block }
    | #[feature = "generators"]
      Yield:{ "yield" value:Expr? }
    | Closure:{
        #[feature = "generators"]
        generator_static:"static"?
        #[feature = "async"]
        asyncness:"async"?
        by_val:"move"? "|" args:ClosureArg* % "," ","? "|" { "->" ret_ty:Type }? body:Expr
      }
    ;

with the semantics that #[feature = ...] will strip out the next alternation or the next thing in a "concatenation"... would be nice.

It's nice because you can see the context to which the gated alternation applies and the other things so you can understand this better.

Centril · 2018-11-06T04:59:37Z

General note: I assumed optionally-trailing-separator separated lists were X* % Sep Sep?.
And we planned to make that more ergonomic by adopting the X* %% Sep syntax (also from Perl6).

But now I'm not sure whether X* %% Sep is X* % Sep Sep? or (X+ %% Sep)? in Perl6.
We appear to use the latter in Rust, but I don't want to make %% do that unless it's consistent with where we're taking the syntax from (i.e. Perl6 regexes/grammars).

I would personally advocate higher order grammars and just writing List(X, Sep)" and such things; it does make things more readable and you have to remember the concrete syntax of the GLL notation less.

eddyb · 2018-11-06T05:35:42Z

What I mean is you'd have something like this in an async.g file:

ExprKind |=
    AsyncBlock:{ "async" block:Block };

(not an imperative |=, but @Centril mentioned on Discord a plain = could be confusing)

eddyb · 2018-11-06T05:48:48Z

Oops, I forgot to mark this as WIP (so let's not land it for now).
I still want to work on the GLL side to improve the experience here.

petrochenkov · 2018-12-01T21:44:04Z

grammar/expr.g

+    Index:{ base:Expr "[" index:Expr "]" } |
+    Array:{ "[" attrs:InnerAttr* exprs:Expr* % "," ","? "]" } |
+    Repeat:{ "[" attrs:InnerAttr* elem:Expr ";" count:Expr "]" } |
+    Tuple:{ "(" attrs:InnerAttr* exprs:Expr* % "," ","? ")" } |


(EXPR) is accepted as a tuple here (introducing false ambiguity).

(Same with types and patterns.)

petrochenkov · 2018-12-01T21:50:02Z

grammar/expr.g

+If = "if" cond:Cond then:Block { "else" else_expr:ElseExpr }?;
+Cond =
+    Bool:Expr |
+    Let:{ "let" pat:Pat "=" expr:Expr };


if let PAT | PAT = EXPR { ... } is not covered.

That might've been accepted/implemented after I've already opened this PR, heh.

petrochenkov · 2018-12-01T21:59:23Z

So, I started reading this at last (now at generics.g).

Two issues are immediately noticeable:

The grammar is written in terms of multi-character operators, so things requiring token splitting like double borrows &&EXPR cannot be expressed.
Unless the parser generator automatically desugars things like "EXPR" "&&" "EXPR" into EXPR "&join" ("&join" | "&nojoin") "EXPR", and "&" EXPR into ("&join" | "&nojoin") "EXPR"?
The list notation with trailing separators. All the lists currently use DELIM_OPEN ELEM* % SEP SEP? DELIM_CLOSE, but that should accept undesired single separator DELIM_OPEN SEP DELIM_CLOSE?

CAD97 · 2018-12-02T00:52:09Z

The generated parser accepts a TokenStream and handles processing the Punct tokens and enforcing their Spacing to not accept [Punct { ch: '&', spacing: Alone, .. }, Punct { ch: '&', spacing: Alone, .. }] as "&&" and to accept [Punct { ch: '&', spacing: Joint, .. }, Punct { ch: '&', spacing: Alone }] as "&" "&".

(lykenware/gll side note: how are unbalanced groups handled (error reported) in the TokenStream generator? Not relevant for us though.)

List notation has been noted before, intent is to add a notation ELEM* %% SEP that behaves like Rust lists do. (Though, do we want to prohibit a lone trailing comma? Something to consider sometime down the line, I suppose.)

petrochenkov · 2018-12-04T00:48:19Z

grammar/type.g

+        binder:ForAllBinder? unsafety:"unsafe"? { "extern" abi:Abi }?
+        "fn" "(" inputs:FnSigInputs? ","? ")" { "->" ret_ty:Type }?
+    } |
+    ImplTrait:{ "impl" bounds:TypeBound+ % "+" "+"? } |


Ha, it's TypeBound*!

#[cfg(FALSE)] fn f() -> impl { 10 } // OK

Also true for DynTrait if dyn is present (at least on 2018 edition when dyn is a proper keyword).

petrochenkov · 2018-12-04T00:55:37Z

One thing I noticed is that the grammar is somewhat relaxed compared to what accepted by libsyntax parser (and libsyntax parser in general accept semantically invalid constructions if they can fit into AST - that forms the "language accepted under cfg(FALSE)"), but relaxed to different extent, so it's hard to decide what exactly to report as an issue and what not.
(So I didn't report anything that's more relaxed than the real parser.)

petrochenkov · 2018-12-04T01:06:59Z

grammar/stmt.g

+    Item:Item |
+    Local:{ attrs:OuterAttr* "let" pat:Pat { ":" ty:Type }? { "=" init:Expr }? ";" } |
+    Expr:Expr |
+    Semi:";";


If nonterminals in this grammar are supposed to match fragments in macro_rules, then Stmt needs to be organized slightly differently, more like

Stmt = Item:Item | Local:{ attrs:OuterAttr* "let" pat:Pat { ":" ty:Type }? { "=" init:Expr }? } | Expr:Expr; StmtX = Item | Local ";" | Expr | ";"; Block = "{" attrs:InnerAttr* StmtX* "}";

Fix formatting + unstable notes due to ehuss

adopt .lyg as lykenware/gll's new file extension

qmx · 2019-01-08T04:10:46Z

in the spirit of improving visibility and making progress I'm going to fix the conflicts now and merge this PR.

qmx · 2019-01-09T19:23:01Z

closing this as #17 is merged

eddyb added 4 commits November 1, 2018 10:26

Polyfill LIFETIME builtin.

c248bf1

Make the output from the tester more useful.

7e8dfb6

Implement a lot more of the grammar.

4c7e6e9

Add a blacklist to the tester to avoid self-DoS-ing.

6ad653f

matklad reviewed Nov 2, 2018

View reviewed changes

petrochenkov self-assigned this Nov 2, 2018

matklad reviewed Nov 5, 2018

View reviewed changes

ehuss reviewed Nov 5, 2018

View reviewed changes

eddyb changed the title ~~Implement (possibly?) the entire Rust grammar.~~ [WIP] Implement (possibly?) the entire Rust grammar. Nov 6, 2018

eddyb mentioned this pull request Nov 6, 2018

Make the turbofish syntax redundant rust-lang/rfcs#2544

Closed

petrochenkov reviewed Dec 1, 2018

View reviewed changes

petrochenkov reviewed Dec 4, 2018

View reviewed changes

petrochenkov removed their assignment Dec 4, 2018

petrochenkov reviewed Dec 4, 2018

View reviewed changes

qmx and others added 16 commits December 5, 2018 03:20

adopt .lyg as lykenware/gll's new file extension

9926a37

adopt .lyg as lykenware/gll's new file extension

b267067

add unstable notes where ehuss made notes.

3ff30f8

format abi.lyg

1bf260f

format attr.lyg

7609557

format expr.lyg

e990067

format generics.lyg

b94e6be

format item.lyg

178801e

format macro.lyg

91655dd

format pat.lyg

149f083

format path.lyg

024e799

format stmt.lyg

78f9302

format type.lyg

b63b6e0

format vis.lyg

ebbc976

Merge pull request #1 from Centril/fix-formatting

2bebbb7

Fix formatting + unstable notes due to ehuss

Merge pull request #1 from qmx/lyg-here-too

686d5ff

adopt .lyg as lykenware/gll's new file extension

Centril mentioned this pull request Jan 5, 2019

RFC: Accept semicolons as item-like rust-lang/rfcs#2479

Closed

CAD97 mentioned this pull request Jan 7, 2019

Stabilize unrestricted_attribute_tokens rust-lang/rust#57367

Merged

qmx mentioned this pull request Jan 8, 2019

Implement (possibly?) the full Rust Grammar #17

Merged

qmx closed this Jan 9, 2019

eddyb deleted the full branch May 5, 2019 16:51

ehuss mentioned this pull request Jun 28, 2019

How to handle brace expression restrictions? #53

Open

ehuss mentioned this pull request Jul 10, 2019

Arbitrary enum discriminants are nightly only #63

Open

		@@ -0,0 +1,13 @@
		MacroCall = path:Path "!" ident_input:IDENT? input:MacroInput;

		@@ -0,0 +1,35 @@
		Generics = "<" params:GenericParam* % "," ","? ">";
		GenericParam = attrs:OuterAttr* kind:GenericParamKind;


		ForAllBinder = "for" generics:Generics;

		WhereClause = "where" bounds:WhereBound* % "," ","?;

[WIP] Implement (possibly?) the entire Rust grammar. #13

[WIP] Implement (possibly?) the entire Rust grammar. #13

Conversation

eddyb commented Nov 1, 2018

eddyb commented Nov 2, 2018 • edited Loading

eddyb commented Nov 2, 2018 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

matklad commented Nov 2, 2018

eddyb commented Nov 2, 2018

petrochenkov commented Nov 2, 2018 • edited Loading

eddyb commented Nov 2, 2018 • edited Loading

Choose a reason for hiding this comment

eddyb Nov 5, 2018 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

ehuss left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

eddyb commented Nov 6, 2018

eddyb commented Nov 6, 2018

Centril commented Nov 6, 2018

eddyb commented Nov 6, 2018

eddyb commented Nov 6, 2018 • edited Loading

Centril commented Nov 6, 2018

Centril commented Nov 6, 2018

eddyb commented Nov 6, 2018

eddyb commented Nov 6, 2018

petrochenkov Dec 1, 2018 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

petrochenkov commented Dec 1, 2018

CAD97 commented Dec 2, 2018 • edited Loading

petrochenkov Dec 4, 2018 • edited Loading

Choose a reason for hiding this comment

petrochenkov commented Dec 4, 2018 • edited Loading

petrochenkov Dec 4, 2018 • edited Loading

Choose a reason for hiding this comment

qmx commented Jan 8, 2019

qmx commented Jan 9, 2019

eddyb commented Nov 2, 2018 •

edited

Loading

eddyb commented Nov 2, 2018 •

edited

Loading

petrochenkov commented Nov 2, 2018 •

edited

Loading

eddyb commented Nov 2, 2018 •

edited

Loading

eddyb Nov 5, 2018 •

edited

Loading

eddyb commented Nov 6, 2018 •

edited

Loading

petrochenkov Dec 1, 2018 •

edited

Loading

CAD97 commented Dec 2, 2018 •

edited

Loading

petrochenkov Dec 4, 2018 •

edited

Loading

petrochenkov commented Dec 4, 2018 •

edited

Loading

petrochenkov Dec 4, 2018 •

edited

Loading