From e81810d83d5c9ace84905d36064d1e35ba5188cf Mon Sep 17 00:00:00 2001
From: Pablo Galindo <pablogsal@gmail.com>
Date: Wed, 24 May 2023 10:00:20 +0100
Subject: [PATCH] gh-104825: Remove implicit newline in the line attribute in
 tokens emitted in the tokenize module

---
 Lib/test/test_tokenize.py                                     | 4 ++--
 Lib/tokenize.py                                               | 4 ++--
 .../2023-05-24-09-59-56.gh-issue-104825.mQesie.rst            | 2 ++
 Python/Python-tokenize.c                                      | 4 ++++
 4 files changed, 10 insertions(+), 4 deletions(-)
 create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst

diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
index 8e7ab3d4b7b578..fd9c919ce6a0d1 100644
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -103,7 +103,7 @@ def k(x):
             e.exception.msg,
             'unindent does not match any outer indentation level')
         self.assertEqual(e.exception.offset, 9)
-        self.assertEqual(e.exception.text, '  x += 5\n')
+        self.assertEqual(e.exception.text, '  x += 5')
 
     def test_int(self):
         # Ordinary integers and binary operators
@@ -1157,7 +1157,7 @@ def readline():
 
         # skip the initial encoding token and the end tokens
         tokens = list(_tokenize(readline(), encoding='utf-8'))[:-2]
-        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"\n')]
+        expected_tokens = [TokenInfo(3, '"ЉЊЈЁЂ"', (1, 0), (1, 7), '"ЉЊЈЁЂ"')]
         self.assertEqual(tokens, expected_tokens,
                          "bytes not decoded with encoding")
 
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
index 911f0f12f9bb7e..d2cf1d5b038277 100644
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -518,8 +518,8 @@ def error(message, filename=None, location=None):
             if args.exact:
                 token_type = token.exact_type
             token_range = "%d,%d-%d,%d:" % (token.start + token.end)
-            print("%-20s%-15s%-15r" %
-                  (token_range, tok_name[token_type], token.string))
+            print("%-20s%-15s%-15r%-15r" %
+                  (token_range, tok_name[token_type], token.string, token.line))
     except IndentationError as err:
         line, column = err.args[1][1:3]
         error(err.args[0], filename, (line, column))
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst
new file mode 100644
index 00000000000000..caf5d3527085f3
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-05-24-09-59-56.gh-issue-104825.mQesie.rst	
@@ -0,0 +1,2 @@
+Tokens emitted by the :mod:`tokenize` module do not include an implicit
+``\n`` character in the ``line`` attribute anymore. Patch by Pablo Galindo
diff --git a/Python/Python-tokenize.c b/Python/Python-tokenize.c
index f7e32d3af9a9f7..0023e303b96e83 100644
--- a/Python/Python-tokenize.c
+++ b/Python/Python-tokenize.c
@@ -123,6 +123,8 @@ _tokenizer_error(struct tok_state *tok)
     int result = 0;
 
     Py_ssize_t size = tok->inp - tok->buf;
+    assert(tok->buf[size-1] == '\n');
+    size -= 1; // Remove the newline character from the end of the line
     error_line = PyUnicode_DecodeUTF8(tok->buf, size, "replace");
     if (!error_line) {
         result = -1;
@@ -193,6 +195,8 @@ tokenizeriter_next(tokenizeriterobject *it)
     }
 
     Py_ssize_t size = it->tok->inp - it->tok->buf;
+    assert(it->tok->buf[size-1] == '\n');
+    size -= 1; // Remove the newline character from the end of the line
     PyObject *line = PyUnicode_DecodeUTF8(it->tok->buf, size, "replace");
     if (line == NULL) {
         Py_DECREF(str);