Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HTML: introduce a specialized tokenizer for script areas #3598

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Units/parser-html.r/comment-starter-in-script.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--sort=no
--extras=+g
--fields=+Kl
3 changes: 3 additions & 0 deletions Units/parser-html.r/comment-starter-in-script.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Foo input.html /^<h1>Foo<\/h1>$/;" heading1 language:HTML
BAR input.html /^<h1>BAR<\/h1>$/;" heading1 language:HTML
x input.html /^ var x/;" variable language:JavaScript
6 changes: 6 additions & 0 deletions Units/parser-html.r/comment-starter-in-script.d/input.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<h1>Foo</h1>
<script>
// <!--
var x
</script>
<h1>BAR</h1>
3 changes: 3 additions & 0 deletions Units/parser-html.r/string-in-script.d/args.ctags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
--sort=no
--extras=+g
--fields=+Kl
6 changes: 6 additions & 0 deletions Units/parser-html.r/string-in-script.d/expected.tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
Foo input.html /^<h1>Foo<\/h1>$/;" heading1 language:HTML
BAR input.html /^<h1>BAR<\/h1>$/;" heading1 language:HTML
bar input.html /^ const bar = 123$/;" constant language:JavaScript
baz input.html /^ function baz () {$/;" function language:JavaScript
bar2 input.html /^ const bar2 = 123$/;" constant language:JavaScript
baz2 input.html /^ function baz2 () {$/;" function language:JavaScript
24 changes: 24 additions & 0 deletions Units/parser-html.r/string-in-script.d/input.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!-- Taken from #3581 submitted by @polyscone -->
<h1>Foo</h1>

<script>
const bar = 123

// I don't know why, but an apostrophe breaks
// the JavaScript guest language
function baz () {
return 'abc'
}
</script>

<script>
const bar2 = 123

// I don"t know why, but an apostrophe breaks
// the JavaScript guest language
function baz2 () {
return 'abc'
}
</script>

<h1>BAR</h1>
54 changes: 53 additions & 1 deletion parsers/html.c
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,58 @@ static void readTokenText (tokenInfo *const token, bool collectText)
}
}

static void readTokenInScript (tokenInfo *const token)
{
int c;

vStringClear (token->string);

c = getcFromInputFile ();
while (isspace (c))
c = getcFromInputFile ();

switch (c)
{
case EOF:
token->type = TOKEN_EOF;
break;

case '<':
{
int d = getcFromInputFile ();
if (d == '/')
token->type = TOKEN_CLOSE_TAG_START;
else
{
ungetcToInputFile (d);
token->type = TOKEN_OTHER;
}
break;
}
default:
{
while (!isspace (c) && c != '<' && c != '>' && c != '/' &&
c != '=' && c != '\'' && c != '"' && c != EOF)
{
vStringPut (token->string, tolower (c));
c = getcFromInputFile ();
}

if (vStringLength (token->string) == 0)
token->type = TOKEN_OTHER;
else
{
token->type = TOKEN_NAME;
if (c != EOF)
ungetcToInputFile (c);
}
break;
}
}

TRACE_PRINT("token (in script): %s (%s)", tokenTypes[token->type], vStringValue (token->string));
}

static void readToken (tokenInfo *const token, bool skipComments)
{
int c;
Expand Down Expand Up @@ -414,7 +466,7 @@ static bool skipScriptContent (tokenInfo *token, long *line, long *lineOffset)
line_tmp[0] = getInputLineNumber ();
lineOffset_tmp[0] = getInputLineOffset ();

readToken (token, false);
readTokenInScript (token);
type = token->type;

if (type == TOKEN_CLOSE_TAG_START)
Expand Down