-
-
Notifications
You must be signed in to change notification settings - Fork 30.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[3.10] gh-99612: Fix PyUnicode_DecodeUTF8Stateful() for ASCII-only da…
…ta (GH-99613) (GH-107224) (#107230) Previously *consumed was not set in this case. (cherry picked from commit b8b3e6a) (cherry picked from commit f08e52c) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
- Loading branch information
1 parent
37d7180
commit 1c937e5
Showing
4 changed files
with
95 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
import unittest | ||
from test.support import import_helper | ||
|
||
_testcapi = import_helper.import_module('_testcapi') | ||
|
||
|
||
class CAPITest(unittest.TestCase): | ||
|
||
def test_decodeutf8(self): | ||
"""Test PyUnicode_DecodeUTF8()""" | ||
decodeutf8 = _testcapi.unicode_decodeutf8 | ||
|
||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: | ||
b = s.encode('utf-8') | ||
self.assertEqual(decodeutf8(b), s) | ||
self.assertEqual(decodeutf8(b, 'strict'), s) | ||
|
||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\x80') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xc0') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'\xff') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8, b'a\xf0\x9f') | ||
self.assertEqual(decodeutf8(b'a\xf0\x9f', 'replace'), 'a\ufffd') | ||
self.assertEqual(decodeutf8(b'a\xf0\x9fb', 'replace'), 'a\ufffdb') | ||
|
||
self.assertRaises(LookupError, decodeutf8, b'a\x80', 'foo') | ||
# TODO: Test PyUnicode_DecodeUTF8() with NULL as data and | ||
# negative size. | ||
|
||
def test_decodeutf8stateful(self): | ||
"""Test PyUnicode_DecodeUTF8Stateful()""" | ||
decodeutf8stateful = _testcapi.unicode_decodeutf8stateful | ||
|
||
for s in ['abc', '\xa1\xa2', '\u4f60\u597d', 'a\U0001f600']: | ||
b = s.encode('utf-8') | ||
self.assertEqual(decodeutf8stateful(b), (s, len(b))) | ||
self.assertEqual(decodeutf8stateful(b, 'strict'), (s, len(b))) | ||
|
||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\x80') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xc0') | ||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'\xff') | ||
self.assertEqual(decodeutf8stateful(b'a\xf0\x9f'), ('a', 1)) | ||
self.assertEqual(decodeutf8stateful(b'a\xf0\x9f', 'replace'), ('a', 1)) | ||
self.assertRaises(UnicodeDecodeError, decodeutf8stateful, b'a\xf0\x9fb') | ||
self.assertEqual(decodeutf8stateful(b'a\xf0\x9fb', 'replace'), ('a\ufffdb', 4)) | ||
|
||
self.assertRaises(LookupError, decodeutf8stateful, b'a\x80', 'foo') | ||
# TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as data and | ||
# negative size. | ||
# TODO: Test PyUnicode_DecodeUTF8Stateful() with NULL as the address of | ||
# "consumed". | ||
|
||
|
||
if __name__ == "__main__": | ||
unittest.main() |
2 changes: 2 additions & 0 deletions
2
Misc/NEWS.d/next/C API/2022-11-20-09-52-50.gh-issue-99612.eBHksg.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
Fix :c:func:`PyUnicode_DecodeUTF8Stateful` for ASCII-only data: | ||
``*consumed`` was not set. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters