From 2b176bc90269b400226d0219a05e9b4c49604c51 Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 27 May 2023 10:24:30 -0700 Subject: [PATCH] [3.12] gh-105017: Fix including additional NL token when using CRLF (GH-105022) (#105023) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Marta Gómez Macías Co-authored-by: Pablo Galindo Salgado --- Lib/test/test_tokenize.py | 8 ++++++++ .../2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst | 1 + Parser/tokenizer.c | 2 +- 3 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index abb68859be944c..293592b3fd13db 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -84,6 +84,14 @@ def test_basic(self): NEWLINE '\\n' (4, 26) (4, 27) DEDENT '' (5, 0) (5, 0) """) + + self.check_tokenize("foo='bar'\r\n", """\ + NAME 'foo' (1, 0) (1, 3) + OP '=' (1, 3) (1, 4) + STRING "'bar'" (1, 4) (1, 9) + NEWLINE '\\n' (1, 9) (1, 10) + """) + indent_error_file = b"""\ def k(x): x += 2 diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst b/Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst new file mode 100644 index 00000000000000..d41a2169ccb3de --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-05-27-16-23-16.gh-issue-105017.KQrsC0.rst @@ -0,0 +1 @@ +Do not include an additional final ``NL`` token when parsing files having CRLF lines. Patch by Marta Gómez. diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c index 1e8f785a331ac5..b8c1c110b546fd 100644 --- a/Parser/tokenizer.c +++ b/Parser/tokenizer.c @@ -800,7 +800,7 @@ translate_newlines(const char *s, int exec_input, struct tok_state *tok) { } /* If this is exec input, add a newline to the end of the string if there isn't one already. */ - if (exec_input && c != '\n') { + if (exec_input && c != '\n' && c != '\0') { *current = '\n'; current++; }