python · miss-islington · May 15, 2020 · May 13, 2020 · May 13, 2020 · May 13, 2020
diff --git a/Lib/test/test_cmd_line_script.py b/Lib/test/test_cmd_line_script.py
@@ -633,7 +633,7 @@ def test_syntaxerror_multi_line_fstring(self):
                 stderr.splitlines()[-3:],
                 [
                     b'    foo"""',
-                    b'         ^',
+                    b'          ^',
                     b'SyntaxError: f-string: empty expression not allowed',
                 ],
             )

diff --git a/Lib/test/test_traceback.py b/Lib/test/test_traceback.py
@@ -58,13 +58,13 @@ def test_caret(self):
                                         SyntaxError)
         self.assertIn("^", err[2]) # third line has caret
         self.assertEqual(err[2].count('\n'), 1)   # and no additional newline
-        self.assertEqual(err[1].find("+"), err[2].find("^"))  # in the right place
+        self.assertEqual(err[1].find("+") + 1, err[2].find("^"))  # in the right place
 
         err = self.get_exception_format(self.syntax_error_with_caret_non_ascii,
                                         SyntaxError)
         self.assertIn("^", err[2]) # third line has caret
         self.assertEqual(err[2].count('\n'), 1)   # and no additional newline
-        self.assertEqual(err[1].find("+"), err[2].find("^"))  # in the right place
+        self.assertEqual(err[1].find("+") + 1, err[2].find("^"))  # in the right place
 
     def test_nocaret(self):
         exc = SyntaxError("error", ("x.py", 23, None, "bad syntax"))
@@ -78,14 +78,13 @@ def test_bad_indentation(self):
         self.assertEqual(len(err), 4)
         self.assertEqual(err[1].strip(), "print(2)")
         self.assertIn("^", err[2])
-        self.assertEqual(err[1].find(")"), err[2].find("^"))
+        self.assertEqual(err[1].find(")") + 1, err[2].find("^"))
 
+        # No caret for "unexpected indent"
         err = self.get_exception_format(self.syntax_error_bad_indentation2,
                                         IndentationError)
-        self.assertEqual(len(err), 4)
+        self.assertEqual(len(err), 3)
         self.assertEqual(err[1].strip(), "print(2)")
-        self.assertIn("^", err[2])
-        self.assertEqual(err[1].find("p"), err[2].find("^"))
 
     def test_base_exception(self):
         # Test that exceptions derived from BaseException are formatted right
@@ -656,7 +655,7 @@ def outer_raise():
         self.assertIn('inner_raise() # Marker', blocks[2])
         self.check_zero_div(blocks[2])
 
-    @support.skip_if_new_parser("Pegen is arguably better here, so no need to fix this")
+    @unittest.skipIf(support.use_old_parser(), "Pegen is arguably better here, so no need to fix this")
     def test_syntax_error_offset_at_eol(self):
         # See #10186.
         def e():
@@ -666,7 +665,7 @@ def e():
         def e():
             exec("x = 5 | 4 |")
         msg = self.get_report(e).splitlines()
-        self.assertEqual(msg[-2], '              ^')
+        self.assertEqual(msg[-2], '               ^')
 
     def test_message_none(self):
         # A message that looks like "None" should not be treated specially
@@ -679,6 +678,29 @@ def test_message_none(self):
         err = self.get_report(Exception(''))
         self.assertIn('Exception\n', err)
 
+    def test_syntax_error_various_offsets(self):
+        print()
+        for offset in range(-5, 10):
+            for add in [0, 2]:
+                text = " "*add + "text%d" % offset
+                expected = ['  File "file.py", line 1']
+                if offset < 1:
+                    expected.append("    %s" % text.lstrip())
+                elif offset <= 6:
+                    expected.append("    %s" % text.lstrip())
+                    expected.append("    %s^" % (" "*(offset-1)))
+                else:
+                    expected.append("    %s" % text.lstrip())
+                    expected.append("    %s^" % (" "*5))
+                expected.append("SyntaxError: msg")
+                expected.append("")
+                err = self.get_report(SyntaxError("msg", ("file.py", 1, offset+add, text)))
+                exp = "\n".join(expected)
+                if exp != err:
+                    print(f">>> offset={offset}; add={add}; text={text!r}")
+                    print(err)
+                self.assertEqual(exp, err)
+
 
 class PyExcReportingTests(BaseExceptionReportingTests, unittest.TestCase):
     #

diff --git a/Lib/traceback.py b/Lib/traceback.py
@@ -569,23 +569,30 @@ def format_exception_only(self):
 
         if not issubclass(self.exc_type, SyntaxError):
             yield _format_final_exc_line(stype, self._str)
-            return
+        else:
+            yield from self._format_syntax_error(stype)
 
-        # It was a syntax error; show exactly where the problem was found.
+    def _format_syntax_error(self, stype):
+        """Format SyntaxError exceptions (internal helper)."""
+        # Show exactly where the problem was found.
         filename = self.filename or "<string>"
         lineno = str(self.lineno) or '?'
         yield '  File "{}", line {}\n'.format(filename, lineno)
 
-        badline = self.text
-        offset = self.offset
-        if badline is not None:
-            yield '    {}\n'.format(badline.strip())
-            if offset is not None:
-                caretspace = badline.rstrip('\n')
-                offset = min(len(caretspace), offset) - 1
-                caretspace = caretspace[:offset].lstrip()
+        text = self.text
+        if text is not None:
+            # text  = "   foo\n"
+            # rtext = "   foo"
+            # ltext =    "foo"
+            rtext = text.rstrip('\n')
+            ltext = rtext.lstrip(' \n\f')
+            spaces = len(rtext) - len(ltext)
+            yield '    {}\n'.format(ltext)
+            # Convert 1-based column offset to 0-based index into stripped text
+            caret = (self.offset or 0) - 1 - spaces
+            if caret >= 0:
                 # non-space whitespace (likes tabs) must be kept for alignment
-                caretspace = ((c.isspace() and c or ' ') for c in caretspace)
+                caretspace = ((c if c.isspace() else ' ') for c in ltext[:caret])
                 yield '    {}^\n'.format(''.join(caretspace))
         msg = self.msg or "<no detail available>"
         yield "{}: {}\n".format(stype, msg)

diff --git a/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst b/Misc/NEWS.d/next/Library/2020-05-13-10-23-29.bpo-40612.gOIreM.rst
@@ -0,0 +1,2 @@
+Fix edge cases in SyntaxError formatting. If the offset is <= 0, no caret is printed.
+If the offset is > line length, the caret is printed pointing just after the last character.
diff --git a/Python/pythonrun.c b/Python/pythonrun.c
@@ -554,36 +554,58 @@ parse_syntax_error(PyObject *err, PyObject **message, PyObject **filename,
 static void
 print_error_text(PyObject *f, int offset, PyObject *text_obj)
 {
-    const char *text;
-    const char *nl;
-
-    text = PyUnicode_AsUTF8(text_obj);
+    /* Convert text to a char pointer; return if error */
+    const char *text = PyUnicode_AsUTF8(text_obj);
     if (text == NULL)
         return;
 
-    if (offset >= 0) {
-        if (offset > 0 && (size_t)offset == strlen(text) && text[offset - 1] == '\n')
-            offset--;
-        for (;;) {
-            nl = strchr(text, '\n');
-            if (nl == NULL || nl-text >= offset)
-                break;
-            offset -= (int)(nl+1-text);
 err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf)); 
 len = tok->inp - tok->buf; 
 err_ret->text = (char *) PyObject_MALLOC(len + 1); 
 if (err_ret->text != NULL) { 
     if (len > 0) 
         strncpy(err_ret->text, tok->buf, len); 
     err_ret->text[len] = '\0'; 
 err_ret->offset = col_offset != -1 ? col_offset + 1 : ((int)(tok->cur - tok->buf)); 
 len = tok->inp - tok->buf; 
 err_ret->text = (char *) PyObject_MALLOC(len + 1); 
 if (err_ret->text != NULL) { 
     if (len > 0) 
         strncpy(err_ret->text, tok->buf, len); 
     err_ret->text[len] = '\0'; 
-            text = nl+1;
-        }
-        while (*text == ' ' || *text == '\t' || *text == '\f') {
-            text++;
-            offset--;
-        }
+    /* Convert offset from 1-based to 0-based */
+    offset--;
+
+    /* Strip leading whitespace from text, adjusting offset as we go */
+    while (*text == ' ' || *text == '\t' || *text == '\f') {
+        text++;
+        offset--;
+    }
+
+    /* Calculate text length excluding trailing newline */
+    Py_ssize_t len = strlen(text);
+    if (len > 0 && text[len-1] == '\n')
+        len--;
+
+    /* Clip offset to at most len */
+    if (offset > len)
+        offset = len;
+
+    /* Skip past newlines embedded in text */
+    for (;;) {
+        const char *nl = strchr(text, '\n');
+        if (nl == NULL)
+            break;
+        Py_ssize_t inl = nl - text;
+        if (inl >= (Py_ssize_t)offset)
+            break;
+        inl += 1;
+        text += inl;
+        len -= inl;
+        offset -= (int)inl;
     }
+
+    /* Print text */
     PyFile_WriteString("    ", f);
     PyFile_WriteString(text, f);
-    if (*text == '\0' || text[strlen(text)-1] != '\n')
+
+    /* Make sure there's a newline at the end */
+    if (text[len] != '\n')
         PyFile_WriteString("\n", f);
-    if (offset == -1)
+
+    /* Don't print caret if it points to the left of the text */
+    if (offset < 0)
         return;
+
+    /* Write caret line */
     PyFile_WriteString("    ", f);
-    while (--offset > 0)
+    while (--offset >= 0)
         PyFile_WriteString(" ", f);
     PyFile_WriteString("^\n", f);
 }
-Original file line number
+Diff line change
@@ Expand Up / @@ -633,7 +633,7 @@ def test_syntaxerror_multi_line_fstring(self): @@
                     stderr.splitlines()[-3:],
                     [
                         b'    foo"""',
-                        b'         ^',
+                        b'          ^',
                         b'SyntaxError: f-string: empty expression not allowed',
                     ],
                 )
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		Fix edge cases in SyntaxError formatting. If the offset is <= 0, no caret is printed.
		If the offset is > line length, the caret is printed pointing just after the last character.