Skip to content

Commit

Permalink
gh-100445: Improve error message for unterminated strings with escapes (
Browse files Browse the repository at this point in the history
  • Loading branch information
hauntsaninja authored Oct 18, 2023
1 parent baefbb2 commit 3156d19
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 4 deletions.
10 changes: 8 additions & 2 deletions Lib/test/test_syntax.py
Original file line number Diff line number Diff line change
Expand Up @@ -2298,8 +2298,14 @@ def test_error_parenthesis(self):

def test_error_string_literal(self):

self._check_error("'blech", "unterminated string literal")
self._check_error('"blech', "unterminated string literal")
self._check_error("'blech", r"unterminated string literal \(.*\)$")
self._check_error('"blech', r"unterminated string literal \(.*\)$")
self._check_error(
r'"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
)
self._check_error(
r'r"blech\"', r"unterminated string literal \(.*\); perhaps you escaped the end quote"
)
self._check_error("'''blech", "unterminated triple-quoted string literal")
self._check_error('"""blech', "unterminated triple-quoted string literal")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Improve error message for unterminated strings with escapes.
18 changes: 16 additions & 2 deletions Parser/lexer/lexer.c
Original file line number Diff line number Diff line change
Expand Up @@ -972,6 +972,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
int quote = c;
int quote_size = 1; /* 1 or 3 */
int end_quote_size = 0;
int has_escaped_quote = 0;

/* Nodes of type STRING, especially multi line strings
must be handled differently in order to get both
Expand Down Expand Up @@ -1037,8 +1038,18 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
return MAKE_TOKEN(ERRORTOKEN);
}
else {
_PyTokenizer_syntaxerror(tok, "unterminated string literal (detected at"
" line %d)", start);
if (has_escaped_quote) {
_PyTokenizer_syntaxerror(
tok,
"unterminated string literal (detected at line %d); "
"perhaps you escaped the end quote?",
start
);
} else {
_PyTokenizer_syntaxerror(
tok, "unterminated string literal (detected at line %d)", start
);
}
if (c != '\n') {
tok->done = E_EOLS;
}
Expand All @@ -1052,6 +1063,9 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t
end_quote_size = 0;
if (c == '\\') {
c = tok_nextc(tok); /* skip escaped char */
if (c == quote) { /* but record whether the escaped char was a quote */
has_escaped_quote = 1;
}
if (c == '\r') {
c = tok_nextc(tok);
}
Expand Down

0 comments on commit 3156d19

Please sign in to comment.