Skip to content

Commit

Permalink
Merge pull request #40 from tomblachut/feature/rework-lexer
Browse files Browse the repository at this point in the history
Rework lexer
  • Loading branch information
tomblachut authored Aug 6, 2019
2 parents fd76295 + 1fa22a9 commit e7fcdac
Show file tree
Hide file tree
Showing 28 changed files with 1,290 additions and 532 deletions.
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ insert_final_newline = true
[*.md]
trim_trailing_whitespace = false

[*.bnf]
[{*.bnf, *.flex, _SvelteLexer.java}]
indent_size = 2
633 changes: 330 additions & 303 deletions src/main/gen/dev/blachut/svelte/lang/_SvelteLexer.java

Large diffs are not rendered by default.

176 changes: 113 additions & 63 deletions src/main/java/dev/blachut/svelte/lang/Svelte.flex
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@ import static dev.blachut.svelte.lang.psi.SvelteTypes.*;

%%

%{
private char quote;
private int leftBraceCount;
private int leftParenCount;
%}

//%debug
%public
%class _SvelteLexer
Expand All @@ -24,106 +18,162 @@ import static dev.blachut.svelte.lang.psi.SvelteTypes.*;
%type IElementType
%unicode

%{
private int braces = 0;
private int parens = 0;
private int brackets = 0;
private boolean rootKeywordsEnabled = false;

private IElementType quotedToken;

private Stack<Integer> stack = new Stack<>();

private void pushState(int newState) {
stack.push(yystate());
yybegin(newState);
}

private void popState() {
yybegin(stack.pop());
}

private void eatWsThenBegin(int nextState) {
yybegin(nextState);
pushState(ONLY_WHITESPACE);
}

private IElementType beginQuote(int quoteState, IElementType token) {
quotedToken = token;
pushState(quoteState);
return quotedToken;
}

private void enableRootKeywords() {
if (notNestedCode()) rootKeywordsEnabled = true;
}

private boolean notNestedCode() {
return (braces + parens + brackets) == 0;
}

private void resetCounters() {
rootKeywordsEnabled = false;
braces = 0;
parens = 0;
brackets = 0;
}
%}

%eof{
leftBraceCount = 0;
leftParenCount = 0;
resetCounters();
%eof}

WHITE_SPACE=\s+
ID=[$_a-zA-Z0-9]+
SINGLE_QUOTE="'"
DOUBLE_QUOTE="\""
TICKED_QUOTE="`"

%state SVELTE_TAG_START
%state SVELTE_TAG
%state SVELTE_TAG_PARAMETER
%state SVELTE_INTERPOLATION_START
%state SVELTE_INTERPOLATION
%state VERBATIM_COMMENT
%state VERBATIM_HTML
%state HTML_TAG
%state TAG_STRING
%state SVELTE_INTERPOLATION_PRE
%state SVELTE_INTERPOLATION
%state SVELTE_TAG_PRE
%state SVELTE_TAG
%state SVELTE_ELSE_TAG
%state SVELTE_TAG_PAREN_AWARE
%xstate ONLY_WHITESPACE
%xstate SINGLE_QUOTE
%xstate DOUBLE_QUOTE
%xstate TICKED_QUOTE

%%
<YYINITIAL> {
"<!--" { yybegin(VERBATIM_COMMENT); return HTML_FRAGMENT; }
"<script" | "<style" { yybegin(VERBATIM_HTML); return HTML_FRAGMENT; }
"<" { yybegin(HTML_TAG); return HTML_FRAGMENT; }
"{"\s*"#" { yybegin(SVELTE_TAG_PRE); return START_OPENING_MUSTACHE; }
"{"\s*":" { yybegin(SVELTE_TAG_PRE); return START_INNER_MUSTACHE; }
"{"\s*"/" { yybegin(SVELTE_TAG_PRE); return START_CLOSING_MUSTACHE; }
"{" { yybegin(SVELTE_INTERPOLATION_PRE); return START_MUSTACHE; }
{WHITE_SPACE} { return WHITE_SPACE; }
"{"\s*"#" { resetCounters(); yybegin(SVELTE_TAG_START); return START_OPENING_MUSTACHE; }
"{"\s*":" { resetCounters(); yybegin(SVELTE_TAG_START); return START_INNER_MUSTACHE; }
"{"\s*"/" { resetCounters(); yybegin(SVELTE_TAG_START); return START_CLOSING_MUSTACHE; }
"{" { yybegin(SVELTE_INTERPOLATION_START); return START_MUSTACHE; }
}

<SVELTE_TAG_PRE> {
<SVELTE_TAG_START> {
"if" { yybegin(SVELTE_TAG); return IF; }
"else" { eatWsThenBegin(SVELTE_TAG); return ELSE; }
"each" { yybegin(SVELTE_TAG); return EACH; }
"await" { yybegin(SVELTE_TAG); return AWAIT; }
"then" { yybegin(SVELTE_TAG); return THEN; }
"catch" { yybegin(SVELTE_TAG); return CATCH; }
"else" { yybegin(SVELTE_ELSE_TAG); return ELSE; }
"then" { yybegin(SVELTE_TAG_PARAMETER); return THEN; }
"catch" { yybegin(SVELTE_TAG_PARAMETER); return CATCH; }
{ID} { yybegin(SVELTE_TAG); return BAD_CHARACTER; }
{WHITE_SPACE} { return BAD_CHARACTER; }
}

<SVELTE_ELSE_TAG> {
"if" { yybegin(SVELTE_TAG); return IF; }
{ID} { yybegin(SVELTE_TAG); return CODE_FRAGMENT; }
{WHITE_SPACE} { return WHITE_SPACE; }
<SVELTE_TAG> {
"if" { if (notNestedCode()) { return IF; } else { return CODE_FRAGMENT; } } // That could as well be always lexed as IF because if is an invalid token in JS expression
"as" { if (notNestedCode() && rootKeywordsEnabled) { yybegin(SVELTE_TAG_PARAMETER); return AS; } else { enableRootKeywords(); return CODE_FRAGMENT; } }
"then" { if (notNestedCode() && rootKeywordsEnabled) { yybegin(SVELTE_TAG_PARAMETER); return THEN; } else { enableRootKeywords(); return CODE_FRAGMENT; } }
"(" { enableRootKeywords(); parens++; return CODE_FRAGMENT; }
")" { parens--; return CODE_FRAGMENT; }
}

<SVELTE_TAG, SVELTE_TAG_PAREN_AWARE> {
"then" { return THEN; }
"as" { yybegin(SVELTE_TAG_PAREN_AWARE); return AS; }
"," { if (leftBraceCount == 0) { return COMMA; } else { return CODE_FRAGMENT; } }

{WHITE_SPACE} { if (leftBraceCount == 0) { return WHITE_SPACE; } else { return CODE_FRAGMENT; } }
{ID}("then"|"as"){ID} { return CODE_FRAGMENT; }
("then"|"as"){ID} { return CODE_FRAGMENT; }
{ID}("then"|"as") { return CODE_FRAGMENT; }
// Key expressions are wrapped in parens and can contain any number of paren pairs. Outermost parens need to be distinguished
<SVELTE_TAG_PARAMETER> {
"(" { parens++; if (parens == 1) { return START_PAREN; } else { return CODE_FRAGMENT; } }
")" { parens--; if (parens == 0) { return END_PAREN; } else { return CODE_FRAGMENT; } }
}

/*
Key expressions are wrapped in parens and can contain any number of paren pairs. Wrapping parens need to be distinguished.
*/
<SVELTE_TAG_PAREN_AWARE> {
"(" { leftParenCount += 1; if (leftParenCount == 1) { return START_PAREN; } else { return CODE_FRAGMENT; } }
")" { leftParenCount -= 1; if (leftParenCount == 0) { return END_PAREN; } else { return CODE_FRAGMENT; } }
<SVELTE_TAG, SVELTE_TAG_PARAMETER> {
{SINGLE_QUOTE} { enableRootKeywords(); return beginQuote(SINGLE_QUOTE, CODE_FRAGMENT); }
{DOUBLE_QUOTE} { enableRootKeywords(); return beginQuote(DOUBLE_QUOTE, CODE_FRAGMENT); }
{TICKED_QUOTE} { enableRootKeywords(); return beginQuote(TICKED_QUOTE, CODE_FRAGMENT); }
"," { if (notNestedCode()) { return COMMA; } else { return CODE_FRAGMENT; } }
"[" { enableRootKeywords(); brackets++; return CODE_FRAGMENT; }
"]" { brackets--; return CODE_FRAGMENT; }
"{" { enableRootKeywords(); braces++; return CODE_FRAGMENT; }
// Following eatWsThenBegin is a hack around formatter bugs
"}" { if (braces == 0) { eatWsThenBegin(YYINITIAL); return END_MUSTACHE; } else { braces--; return CODE_FRAGMENT; } }
{ID} { enableRootKeywords(); return CODE_FRAGMENT; }
{ID}"." { enableRootKeywords(); return CODE_FRAGMENT; } // Fixes weird highlighting for incomplete JS
{WHITE_SPACE} { return CODE_FRAGMENT; }
[^] { if (notNestedCode()) rootKeywordsEnabled = false; return CODE_FRAGMENT; }
}

<SVELTE_INTERPOLATION_PRE> {
{WHITE_SPACE} { return WHITE_SPACE; }
<SVELTE_INTERPOLATION_START> {
{WHITE_SPACE}/"@" { return WHITE_SPACE; }
"@html" { yybegin(SVELTE_INTERPOLATION); return HTML_PREFIX; }
"@debug" { yybegin(SVELTE_INTERPOLATION); return DEBUG_PREFIX; }
"@" | "@"{ID} { yybegin(SVELTE_INTERPOLATION); return BAD_CHARACTER; }
"{" { yybegin(SVELTE_INTERPOLATION); leftBraceCount += 1; return CODE_FRAGMENT; }
[^] { yybegin(SVELTE_INTERPOLATION); return CODE_FRAGMENT; }
[^] { yybegin(SVELTE_INTERPOLATION); yypushback(yylength()); }
}

<SVELTE_INTERPOLATION, SVELTE_TAG, SVELTE_ELSE_TAG, SVELTE_TAG_PAREN_AWARE> {
"{" { leftBraceCount += 1; return CODE_FRAGMENT; }
"}" { if (leftBraceCount == 0) { yybegin(YYINITIAL); return END_MUSTACHE; } else { leftBraceCount -= 1; return CODE_FRAGMENT; } }

<SVELTE_INTERPOLATION> {
"{" { braces++; return CODE_FRAGMENT; }
"}" { if (braces == 0) { eatWsThenBegin(YYINITIAL); return END_MUSTACHE; } else { braces--; return CODE_FRAGMENT; } }
[^] { return CODE_FRAGMENT; }
}

<VERBATIM_COMMENT> {
"-->" { yybegin(YYINITIAL); return HTML_FRAGMENT; }
}

<VERBATIM_HTML> {
"</script>" | "</style>" { yybegin(YYINITIAL); return HTML_FRAGMENT; }
}
<VERBATIM_COMMENT> "-->" { yybegin(YYINITIAL); return HTML_FRAGMENT; }
<VERBATIM_HTML> "</script>" | "</style>" { yybegin(YYINITIAL); return HTML_FRAGMENT; }

<HTML_TAG> {
"'" { yybegin(TAG_STRING); quote = '\''; return HTML_FRAGMENT; }
"\"" { yybegin(TAG_STRING); quote = '"'; return HTML_FRAGMENT; }
{SINGLE_QUOTE} { return beginQuote(SINGLE_QUOTE, HTML_FRAGMENT); }
{DOUBLE_QUOTE} { return beginQuote(DOUBLE_QUOTE, HTML_FRAGMENT); }
">" { yybegin(YYINITIAL); return HTML_FRAGMENT; }
}

<TAG_STRING> {
"'" { if (quote == '\'') yybegin(HTML_TAG); return HTML_FRAGMENT; }
"\"" { if (quote == '"') yybegin(HTML_TAG); return HTML_FRAGMENT; }
<SINGLE_QUOTE> {SINGLE_QUOTE} { popState(); return quotedToken; }
<DOUBLE_QUOTE> {DOUBLE_QUOTE} { popState(); return quotedToken; }
<TICKED_QUOTE> {TICKED_QUOTE} { popState(); return quotedToken; }

<SINGLE_QUOTE, DOUBLE_QUOTE, TICKED_QUOTE> {
\\[^] { return quotedToken; }
[^] { return quotedToken; }
}

<ONLY_WHITESPACE> {
{WHITE_SPACE} { return WHITE_SPACE; }
[^] { popState(); yypushback(1); }
}

[^] { return HTML_FRAGMENT; }
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,11 @@ import dev.blachut.svelte.lang.psi.SvelteOpeningTag
class SvelteEnterHandler : EnterHandlerDelegateAdapter() {
/**
* if we are between open and close tags, we ensure the caret ends up in the "logical" place on Enter.
* i.e. "{{#foo}}<caret>{{/foo}}" becomes the following on Enter:
* i.e. "{#if x}<caret>{/if}" becomes the following on Enter:
*
* {{#foo}}
* {#if x}
* <caret>
* {{/foo}}
* {/if}
*
* (Note: <caret> may be indented depending on formatter settings.)
*/
Expand All @@ -48,9 +48,7 @@ class SvelteEnterHandler : EnterHandlerDelegateAdapter() {
}

/**
* Checks to see if `Enter` has been typed while the caret is between an open and close tag pair.
*
* @return true if between open and close tags, false otherwise
* Checks to see if `Enter` has been typed while the caret is between an open and close tag pair
*/
private fun isBetweenSvelteTags(editor: Editor, file: PsiFile, offset: Int): Boolean {
if (offset == 0) return false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,28 +61,28 @@ private class SvelteBlock(node: ASTNode,
* SVELTE_BLOCKS
* MARKUP_FRAGMENT
* SVELTE_BLOCKS
* {#if condition}
* {/if}
* ```
*
* Formatting seems easy. Simply apply an indent (represented here by `----`) to the SCOPE and call it a day:
* ```
* {{#foo}}
* {#if condition}
* ----SVELTE_BLOCKS
* ----MARKUP_FRAGMENT
* ----SVELTE_BLOCKS
* {{/foo}}
* {/if}
* ```
*
* However, if we're contained in markup block, it's going to provide some indents of its own
* (call them `::::`) which quickly leads to undesirable double-indenting:
*
* ```
* <div>
* ::::{{#foo}}
* ::::{#if condition}
* ----SVELTE_BLOCKS
* ::::----MARKUP_FRAGMENT
* ----SVELTE_BLOCKS
* ::::{{/foo}}
* ::::{/if}
* </div>
* ```
* So to behave correctly in both situations, we indent SCOPE from the "outside" anytime we're not wrapped
Expand Down Expand Up @@ -179,4 +179,4 @@ private class SvelteBlock(node: ASTNode,

return foreignBlockParent
}
}
}
6 changes: 3 additions & 3 deletions src/test/java/dev/blachut/svelte/lang/SvelteLexerTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ class SvelteLexerTest : LexerTestCase() {
fun testIncompleteExpression() = doTest()
fun testWhitespace() = doTest()

// TODO Improve lexer and enable following tests
// fun testEachAsAs() = doTest(true)
// fun testAwaitThenThenThen() = doTest(true)
fun testEachAsAsAsAs() = doTest()
fun testAwaitThenThenThen() = doTest()
fun testEachAmbiguousAs() = doTest()

private fun doTest() = doFileTest("svelte")
}
Expand Down
20 changes: 11 additions & 9 deletions src/test/java/dev/blachut/svelte/lang/SvelteParsingTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,17 @@ import com.intellij.testFramework.ParsingTestCase
class SvelteParsingTest : ParsingTestCase("dev/blachut/svelte/lang", "svelte", SvelteParserDefinition()) {
override fun getTestDataPath(): String = "src/test/resources"

fun testIfElseIf() = doTest(true)
fun testEachAssets() = doTest(true)
fun testExpression() = doTest(true)
fun testIncompleteExpression() = doTest(true)
fun testWhitespace() = doTest(true)
fun testNestedBlocks() = doTest(true)
fun testIfElseIf() = doTest()
fun testEachAssets() = doTest()
fun testExpression() = doTest()
fun testIncompleteExpression() = doTest()
fun testWhitespace() = doTest()
fun testNestedBlocks() = doTest()

// TODO Improve lexer and enable following tests
// fun testEachAsAs() = doTest(true)
// fun testAwaitThenThenThen() = doTest(true)
fun testEachAsAsAsAs() = doTest()
fun testAwaitThenThenThen() = doTest()
fun testEachAmbiguousAs() = doTest()

private fun doTest() = doTest(true)
}

Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@ class SvelteFormatterTest : FormatterTestCase() {
fun testScriptContents() = doTest()
fun testNestedBlocks() = doTest()
fun testNestedBlocksFlat() = doTest()
fun testIndentedExpressions() = doTest()
// fun testOneLineBlock() = doTest() // TODO Fix formatter and enable test
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{# ('{#')
await ('await')
CODE_FRAGMENT (' then ')
then ('then')
CODE_FRAGMENT (' then')
} ('}')
{/ ('{/')
await ('await')
} ('}')
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,15 @@ Svelte Component
Svelte: AWAIT_THEN_BLOCK_OPENING_TAG
PsiElement({#)('{#')
PsiElement(await)('await')
PsiWhiteSpace(' ')
Svelte: EXPRESSION
PsiElement(CODE_FRAGMENT)('then')
PsiWhiteSpace(' ')
PsiElement(CODE_FRAGMENT)(' then ')
PsiElement(then)('then')
PsiWhiteSpace(' ')
Svelte: PARAMETER
PsiElement(CODE_FRAGMENT)('then')
PsiElement(CODE_FRAGMENT)(' then')
PsiElement(})('}')
Svelte: SCOPE
<empty list>
Svelte: AWAIT_BLOCK_CLOSING_TAG
PsiElement({/)('{/')
PsiElement(await)('await')
PsiElement(})('}')
PsiElement(})('}')
Loading

0 comments on commit e7fcdac

Please sign in to comment.