Scanner:skip @ in backticks or with bad whitespace

Outside backticks, @ only starts a new tag after whitespace and before non-whitespace. The scanner now checks for this instead of the parser.
microsoft · Mar 4, 2023 · 9f4daf5 · 9f4daf5
1 parent 3d96fa9
commit 9f4daf5
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 39 deletions.
diff --git a/src/compiler/parser.ts b/src/compiler/parser.ts
@@ -2165,8 +2165,8 @@ namespace Parser {
         return currentToken = scanner.scanJsDocToken();
     }
 
-    function nextTokenJSDocBig(): JSDocSyntaxKind { // TODO: nextTokenJSDocCommentText
-        return currentToken = scanner.scanBigJsDocToken();
+    function nextTokenJSDocBig(inBackticks: boolean): JSDocSyntaxKind { // TODO: nextTokenJSDocCommentText
+        return currentToken = scanner.scanBigJsDocToken(inBackticks);
     }
 
     function reScanGreaterToken(): SyntaxKind {
@@ -8680,7 +8680,7 @@ namespace Parser {
                             break;
                     }
                     if (state === JSDocState.SavingComments) {
-                        nextTokenJSDocBig();
+                        nextTokenJSDocBig(/*inBackticks*/ false);
                     }
                     else {
                         nextTokenJSDoc();
@@ -8869,7 +8869,6 @@ namespace Parser {
                 const parts: JSDocComment[] = [];
                 let linkEnd;
                 let state = JSDocState.BeginningOfLine;
-                let previousWhitespace = true;
                 let margin: number | undefined;
                 function pushComment(text: string) {
                     if (!margin) {
@@ -8901,14 +8900,8 @@ namespace Parser {
                             indent = 0;
                             break;
                         case SyntaxKind.AtToken:
-                            if (state === JSDocState.SavingBackticks // TODO: nextTokenJSDocBig should be able to skip @ inside backticks
-                                || state === JSDocState.SavingComments && (!previousWhitespace || lookAhead(isNextJSDocTokenWhitespace))) {
-                                // @ doesn't start a new tag inside ``, and inside a comment, only after whitespace or not before whitespace
-                                comments.push(scanner.getTokenText());
-                                break;
-                            }
                             scanner.setTextPos(scanner.getTextPos() - 1);
-                            // falls through
+                            break loop;
                         case SyntaxKind.EndOfFileToken:
                             // Done
                             break loop;
@@ -8965,11 +8958,8 @@ namespace Parser {
                             pushComment(scanner.getTokenText());
                             break;
                     }
-                    // TODO: nextTokenJSDocBig always returns Identifier, even when that token ends with some whitespace.
-                    // Make this hack less hacky: call a isWhitespace function, and importantly, the state *currently* being SavingComments doesn't mean that the previous call was for a big token
-                    previousWhitespace = token() === SyntaxKind.WhitespaceTrivia || ((state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) && tok === SyntaxKind.Identifier && scanner.getTokenValue().at(-1) === " ");
                     if (state === JSDocState.SavingComments || state === JSDocState.SavingBackticks) { // TODO: Add another scanner method for scanning over the introductory " *" after BeginningOfLine
-                        tok = nextTokenJSDocBig(); // TODO: Maybe SawAsterisk could also call nextTokenJSDocBig?
+                        tok = nextTokenJSDocBig(state === JSDocState.SavingBackticks); // TODO: Maybe SawAsterisk could also call nextTokenJSDocBig?
                     } // TODO: Maybe nextTokenJSDocBig is backward-compatible enough to just call all the time
                     else {
                         tok = nextTokenJSDoc();
@@ -8989,11 +8979,6 @@ namespace Parser {
                 }
             }
 
-            function isNextJSDocTokenWhitespace() {
-                const next = nextTokenJSDoc();
-                return next === SyntaxKind.WhitespaceTrivia || next === SyntaxKind.NewLineTrivia;
-            }
-
             function parseJSDocLink(start: number) {
                 const linkType = tryParse(parseJSDocLinkPrefix);
                 if (!linkType) {

diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts
@@ -75,7 +75,7 @@ export interface Scanner {
     reScanInvalidIdentifier(): SyntaxKind;
     scanJsxToken(): JsxTokenSyntaxKind;
     scanJsDocToken(): JSDocSyntaxKind;
-    scanBigJsDocToken(): JSDocSyntaxKind; // TODO: Should only be the Big Token kinds
+    scanBigJsDocToken(inBackticks: boolean): JSDocSyntaxKind;
     scan(): SyntaxKind;
 
     getText(): string;
@@ -2457,32 +2457,33 @@ export function createScanner(languageVersion: ScriptTarget,
         return scanJsxAttributeValue();
     }
 
-    /** TODO: might need to return WhitespaceTrivia if only whitespace was encountered? */
-    function scanBigJsDocToken(): JSDocSyntaxKind { // can be configurable to skip almost everything (except newline and backtick) if backticks is true
+    function scanBigJsDocToken(inBackticks: boolean): JSDocSyntaxKind {
         startPos = tokenPos = pos;
         tokenFlags = TokenFlags.None;
         if (pos >= end) {
             return token = SyntaxKind.EndOfFileToken;
         }
-
-        let ch = codePointAt(text, pos);
-        while (pos < end) {
-            if (ch !== CharacterCodes.lineFeed && ch !== CharacterCodes.at && ch !== CharacterCodes.backtick && ch !== CharacterCodes.openBrace) {
-                // TODO: We can also be smarter about openBrace, backtick and at by looking at a tiny amount of context
-                pos++;
-            }
-            else {
-                break;
+        // TODO: Probably need to increment pos in the initial part to avoid a double read
+        // TODO: Need to increment `pos += charSize(ch)`,
+        for (let ch = codePointAt(text, pos);
+             pos < end && (ch !== CharacterCodes.lineFeed && ch !== CharacterCodes.carriageReturn && ch !== CharacterCodes.backtick);
+             ch = codePointAt(text, ++pos)) {
+            if (!inBackticks) {
+                if (ch === CharacterCodes.openBrace) {
+                    break;
+                }
+                else if (ch === CharacterCodes.at
+                    && pos - 1 >= 0 && isWhiteSpaceSingleLine(codePointAt(text, pos - 1))
+                    && !(pos + 1 < end && isWhiteSpaceLike(codePointAt(text, pos + 1)))) {
+                    // @ doesn't start a new tag inside ``, and elsewhere, only after whitespace and before non-whitespace
+                    break;
+                }
             }
-            ch = codePointAt(text, pos);
         }
         if (pos === tokenPos) {
             return scanJsDocToken();
         }
-        else {
-            // TODO: Make sure this is right (and in the right place)
-            tokenValue = text.substring(tokenPos, pos);
-        }
+        tokenValue = text.substring(tokenPos, pos);
         return token = SyntaxKind.Identifier;
     }
 

diff --git a/tests/baselines/reference/api/tsserverlibrary.d.ts b/tests/baselines/reference/api/tsserverlibrary.d.ts
@@ -8350,7 +8350,7 @@ declare namespace ts {
         reScanInvalidIdentifier(): SyntaxKind;
         scanJsxToken(): JsxTokenSyntaxKind;
         scanJsDocToken(): JSDocSyntaxKind;
-        scanBigJsDocToken(): JSDocSyntaxKind;
+        scanBigJsDocToken(inBackticks: boolean): JSDocSyntaxKind;
         scan(): SyntaxKind;
         getText(): string;
         setText(text: string | undefined, start?: number, length?: number): void;

diff --git a/tests/baselines/reference/api/typescript.d.ts b/tests/baselines/reference/api/typescript.d.ts
@@ -4407,7 +4407,7 @@ declare namespace ts {
         reScanInvalidIdentifier(): SyntaxKind;
         scanJsxToken(): JsxTokenSyntaxKind;
         scanJsDocToken(): JSDocSyntaxKind;
-        scanBigJsDocToken(): JSDocSyntaxKind;
+        scanBigJsDocToken(inBackticks: boolean): JSDocSyntaxKind;
         scan(): SyntaxKind;
         getText(): string;
         setText(text: string | undefined, start?: number, length?: number): void;