From 87e78b517b22bd5a7e2d55313a67585924c1d80b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 13 May 2020 09:56:47 -0700 Subject: [PATCH 1/7] bpo-40612: Fix SyntaxError edge cases in traceback module --- Lib/test/test_traceback.py | 10 +++++----- Lib/traceback.py | 14 +++++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index 7361d091cfbbef..c703cedb051ef1 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -58,13 +58,13 @@ def test_caret(self): SyntaxError) self.assertIn("^", err[2]) # third line has caret self.assertEqual(err[2].count('\n'), 1) # and no additional newline - self.assertEqual(err[1].find("+"), err[2].find("^")) # in the right place + self.assertEqual(err[1].find("+") + 1, err[2].find("^")) # in the right place err = self.get_exception_format(self.syntax_error_with_caret_non_ascii, SyntaxError) self.assertIn("^", err[2]) # third line has caret self.assertEqual(err[2].count('\n'), 1) # and no additional newline - self.assertEqual(err[1].find("+"), err[2].find("^")) # in the right place + self.assertEqual(err[1].find("+") + 1, err[2].find("^")) # in the right place def test_nocaret(self): exc = SyntaxError("error", ("x.py", 23, None, "bad syntax")) @@ -78,7 +78,7 @@ def test_bad_indentation(self): self.assertEqual(len(err), 4) self.assertEqual(err[1].strip(), "print(2)") self.assertIn("^", err[2]) - self.assertEqual(err[1].find(")"), err[2].find("^")) + self.assertEqual(err[1].find(")") + 1, err[2].find("^")) err = self.get_exception_format(self.syntax_error_bad_indentation2, IndentationError) @@ -656,7 +656,7 @@ def outer_raise(): self.assertIn('inner_raise() # Marker', blocks[2]) self.check_zero_div(blocks[2]) - @support.skip_if_new_parser("Pegen is arguably better here, so no need to fix this") + @unittest.skipIf(support.use_old_parser(), "Pegen is arguably better here, so no need to fix this") def test_syntax_error_offset_at_eol(self): # See #10186. def e(): @@ -666,7 +666,7 @@ def e(): def e(): exec("x = 5 | 4 |") msg = self.get_report(e).splitlines() - self.assertEqual(msg[-2], ' ^') + self.assertEqual(msg[-2], ' ^') def test_message_none(self): # A message that looks like "None" should not be treated specially diff --git a/Lib/traceback.py b/Lib/traceback.py index bf34bbab8a1629..7d5561769f541d 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -569,9 +569,12 @@ def format_exception_only(self): if not issubclass(self.exc_type, SyntaxError): yield _format_final_exc_line(stype, self._str) - return + else: + yield from self._format_syntax_error(stype) - # It was a syntax error; show exactly where the problem was found. + def _format_syntax_error(self, stype): + """Format SyntaxError exceptions (internal helper).""" + # Show exactly where the problem was found. filename = self.filename or "" lineno = str(self.lineno) or '?' yield ' File "{}", line {}\n'.format(filename, lineno) @@ -580,12 +583,13 @@ def format_exception_only(self): offset = self.offset if badline is not None: yield ' {}\n'.format(badline.strip()) - if offset is not None: + if offset is not None and offset >= 1: caretspace = badline.rstrip('\n') - offset = min(len(caretspace), offset) - 1 + # Convert to 0-based offset, and clip to text length + offset = min(len(caretspace), offset - 1) caretspace = caretspace[:offset].lstrip() # non-space whitespace (likes tabs) must be kept for alignment - caretspace = ((c.isspace() and c or ' ') for c in caretspace) + caretspace = ((c if c.isspace() else ' ') for c in caretspace) yield ' {}^\n'.format(''.join(caretspace)) msg = self.msg or "" yield "{}: {}\n".format(stype, msg) From b951b0f15b2497f3e254fff5fd1f4980750a237b Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 13 May 2020 10:47:32 -0700 Subject: [PATCH 2/7] Misc/NEWS blurb --- .../next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst diff --git a/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst b/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst new file mode 100644 index 00000000000000..32cc8073d3f79c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst @@ -0,0 +1,2 @@ +Fix edge cases in SyntaxError formatting. If the offset is <= 0, no caret is printed. +If the offset is > line length, the caret is printed pointing just after the last character. From ea1c6f23492ea2baad8cefe7a47d07c0c7807579 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 13 May 2020 11:41:59 -0700 Subject: [PATCH 3/7] Refactor print_error_text() --- Python/pythonrun.c | 64 +++++++++++++++++++++++++++++++--------------- 1 file changed, 43 insertions(+), 21 deletions(-) diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 45f08b707eb999..0828e3cf3ade04 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -554,36 +554,58 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename, static void print_error_text(PyObject *f, int offset, PyObject *text_obj) { - const char *text; - const char *nl; - - text = PyUnicode_AsUTF8(text_obj); + /* Convert text to a char pointer; return if error */ + const char *text = PyUnicode_AsUTF8(text_obj); if (text == NULL) return; - if (offset >= 0) { - if (offset > 0 && (size_t)offset == strlen(text) && text[offset - 1] == '\n') - offset--; - for (;;) { - nl = strchr(text, '\n'); - if (nl == NULL || nl-text >= offset) - break; - offset -= (int)(nl+1-text); - text = nl+1; - } - while (*text == ' ' || *text == '\t' || *text == '\f') { - text++; - offset--; - } + /* Convert offset from 1-based to 0-based */ + offset--; + + /* Strip leading whitespace from text, adjusting offset as we go */ + while (*text == ' ' || *text == '\t' || *text == '\f') { + text++; + offset--; + } + + /* Calculate text length excluding trailing newline */ + Py_ssize_t len = strlen(text); + if (len > 0 && text[len-1] == '\n') + len--; + + /* Clip offset to at most len */ + if (offset > len) + offset = len; + + /* Skip past newlines embedded in text */ + for (;;) { + const char *nl = strchr(text, '\n'); + if (nl == NULL) + break; + Py_ssize_t inl = nl - text; + if (inl >= (Py_ssize_t)offset) + break; + inl += 1; + text += inl; + len -= inl; + offset -= (int)inl; } + + /* Print text */ PyFile_WriteString(" ", f); PyFile_WriteString(text, f); - if (*text == '\0' || text[strlen(text)-1] != '\n') + + /* Make sure there's a newline at the end */ + if (text[len] != '\n') PyFile_WriteString("\n", f); - if (offset == -1) + + /* Don't print caret if it points to the left of the text */ + if (offset < 0) return; + + /* Write caret line */ PyFile_WriteString(" ", f); - while (--offset > 0) + while (--offset >= 0) PyFile_WriteString(" ", f); PyFile_WriteString("^\n", f); } From 72e5b93e2447a3f6ccf1ac0a6797156e7cb61579 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 13 May 2020 12:45:47 -0700 Subject: [PATCH 4/7] Fix test (and rebase) --- Lib/test/test_cmd_line_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py index 171340581af228..15fca7b8a5191e 100644 --- a/Lib/test/test_cmd_line_script.py +++ b/Lib/test/test_cmd_line_script.py @@ -633,7 +633,7 @@ def test_syntaxerror_multi_line_fstring(self): stderr.splitlines()[-3:], [ b' foo"""', - b' ^', + b' ^', b'SyntaxError: f-string: empty expression not allowed', ], ) From 6df7662ca5f0fa63d31986f56b3d7dd33889be0e Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Wed, 13 May 2020 19:28:20 -0700 Subject: [PATCH 5/7] Add thorough test, and fix found issues in traceback.py --- Lib/test/test_traceback.py | 28 +++++++++++++++++++++++++--- Lib/traceback.py | 23 +++++++++++++---------- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index c703cedb051ef1..cd6b99928c5546 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -80,12 +80,11 @@ def test_bad_indentation(self): self.assertIn("^", err[2]) self.assertEqual(err[1].find(")") + 1, err[2].find("^")) + # No caret for "unexpected indent" err = self.get_exception_format(self.syntax_error_bad_indentation2, IndentationError) - self.assertEqual(len(err), 4) + self.assertEqual(len(err), 3) self.assertEqual(err[1].strip(), "print(2)") - self.assertIn("^", err[2]) - self.assertEqual(err[1].find("p"), err[2].find("^")) def test_base_exception(self): # Test that exceptions derived from BaseException are formatted right @@ -679,6 +678,29 @@ def test_message_none(self): err = self.get_report(Exception('')) self.assertIn('Exception\n', err) + def test_syntax_error_various_offsets(self): + print() + for offset in range(-5, 10): + for add in [0, 2]: + text = " "*add + "text%d" % offset + expected = [' File "file.py", line 1'] + if offset < 1: + expected.append(" %s" % text.lstrip()) + elif offset <= 6: + expected.append(" %s" % text.lstrip()) + expected.append(" %s^" % (" "*(offset-1))) + else: + expected.append(" %s" % text.lstrip()) + expected.append(" %s^" % (" "*5)) + expected.append("SyntaxError: msg") + expected.append("") + err = self.get_report(SyntaxError("msg", ("file.py", 1, offset+add, text))) + exp = "\n".join(expected) + if exp != err: + print(f">>> offset={offset}; add={add}; text={text!r}") + print(err) + self.assertEqual(exp, err) + class PyExcReportingTests(BaseExceptionReportingTests, unittest.TestCase): # diff --git a/Lib/traceback.py b/Lib/traceback.py index 7d5561769f541d..a19e38718b1205 100644 --- a/Lib/traceback.py +++ b/Lib/traceback.py @@ -579,17 +579,20 @@ def _format_syntax_error(self, stype): lineno = str(self.lineno) or '?' yield ' File "{}", line {}\n'.format(filename, lineno) - badline = self.text - offset = self.offset - if badline is not None: - yield ' {}\n'.format(badline.strip()) - if offset is not None and offset >= 1: - caretspace = badline.rstrip('\n') - # Convert to 0-based offset, and clip to text length - offset = min(len(caretspace), offset - 1) - caretspace = caretspace[:offset].lstrip() + text = self.text + if text is not None: + # text = " foo\n" + # rtext = " foo" + # ltext = "foo" + rtext = text.rstrip('\n') + ltext = rtext.lstrip(' \n\f') + spaces = len(rtext) - len(ltext) + yield ' {}\n'.format(ltext) + # Convert 1-based column offset to 0-based index into stripped text + caret = (self.offset or 0) - 1 - spaces + if caret >= 0: # non-space whitespace (likes tabs) must be kept for alignment - caretspace = ((c if c.isspace() else ' ') for c in caretspace) + caretspace = ((c if c.isspace() else ' ') for c in ltext[:caret]) yield ' {}^\n'.format(''.join(caretspace)) msg = self.msg or "" yield "{}: {}\n".format(stype, msg) From 1c7e6bb160e3f25117ee3f13f9b751b46b477233 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 14 May 2020 11:15:45 -0700 Subject: [PATCH 6/7] Remove debug print() calls --- Lib/test/test_traceback.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py index cd6b99928c5546..f9a5f2fc53e1e9 100644 --- a/Lib/test/test_traceback.py +++ b/Lib/test/test_traceback.py @@ -679,7 +679,6 @@ def test_message_none(self): self.assertIn('Exception\n', err) def test_syntax_error_various_offsets(self): - print() for offset in range(-5, 10): for add in [0, 2]: text = " "*add + "text%d" % offset @@ -696,9 +695,6 @@ def test_syntax_error_various_offsets(self): expected.append("") err = self.get_report(SyntaxError("msg", ("file.py", 1, offset+add, text))) exp = "\n".join(expected) - if exp != err: - print(f">>> offset={offset}; add={add}; text={text!r}") - print(err) self.assertEqual(exp, err) From 31641ff0e4b18c8d002d019f4506f0e8fb446983 Mon Sep 17 00:00:00 2001 From: Guido van Rossum Date: Thu, 14 May 2020 18:57:40 -0700 Subject: [PATCH 7/7] Apply PEP 7 suggestions from code review Co-authored-by: Pablo Galindo --- Python/pythonrun.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 0828e3cf3ade04..160f44d38e2e19 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -570,21 +570,25 @@ print_error_text(PyObject *f, int offset, PyObject *text_obj) /* Calculate text length excluding trailing newline */ Py_ssize_t len = strlen(text); - if (len > 0 && text[len-1] == '\n') + if (len > 0 && text[len-1] == '\n') { len--; + } /* Clip offset to at most len */ - if (offset > len) + if (offset > len) { offset = len; + } /* Skip past newlines embedded in text */ for (;;) { const char *nl = strchr(text, '\n'); - if (nl == NULL) + if (nl == NULL) { break; + } Py_ssize_t inl = nl - text; - if (inl >= (Py_ssize_t)offset) + if (inl >= (Py_ssize_t)offset) { break; + } inl += 1; text += inl; len -= inl; @@ -596,8 +600,9 @@ print_error_text(PyObject *f, int offset, PyObject *text_obj) PyFile_WriteString(text, f); /* Make sure there's a newline at the end */ - if (text[len] != '\n') + if (text[len] != '\n') { PyFile_WriteString("\n", f); + } /* Don't print caret if it points to the left of the text */ if (offset < 0) @@ -605,8 +610,9 @@ print_error_text(PyObject *f, int offset, PyObject *text_obj) /* Write caret line */ PyFile_WriteString(" ", f); - while (--offset >= 0) + while (--offset >= 0) { PyFile_WriteString(" ", f); + } PyFile_WriteString("^\n", f); }