Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-36346: Prepare for removing the legacy Unicode C API. #12409

Closed
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
699e616
bpo-36346: Prepare for removing the legacy Unicode C API.
serhiy-storchaka Oct 26, 2018
60b89c9
Fix winreg.SetValue().
serhiy-storchaka Mar 19, 2019
f386b63
Clean up some ifdefs in _testcapimodule.
serhiy-storchaka Mar 19, 2019
11e0e0c
Make path_cleanup() paranoidally safer.
serhiy-storchaka Mar 19, 2019
236f608
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 19, 2019
8750d48
Fix os.scandir().
serhiy-storchaka Mar 20, 2019
431e71f
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 20, 2019
545c7a9
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 28, 2019
3c1ab31
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Mar 13, 2020
54b0561
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 15, 2020
2c62d96
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 29, 2020
6d89775
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 30, 2020
3b5294a
Silence compiler warnings.
serhiy-storchaka Jun 30, 2020
e7898fa
Fix PyUnicode_IsIdentifier for the cache-less build.
serhiy-storchaka Jun 30, 2020
fd641c6
Silence compiler warnings on Windows.
serhiy-storchaka Jun 30, 2020
a79d935
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jun 30, 2020
ed14aa9
Fix compiler warning in _testcapi.
serhiy-storchaka Jun 30, 2020
c5eb102
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 1, 2020
d529224
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 1, 2020
6102b4b
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 5, 2020
74695b3
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 5, 2020
97b5228
Use HAVE_UNICODE_WCHAR_CACHE instead of USE_UNICODE_WCHAR_CACHE in _P…
serhiy-storchaka Jul 5, 2020
d6ba6b7
Set HAVE_UNICODE_WCHAR_CACHE and USE_UNICODE_WCHAR_CACHE only if they…
serhiy-storchaka Jul 9, 2020
0da4146
Remove Py_UNICODE_MATCH.
serhiy-storchaka Jul 9, 2020
51365fb
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 10, 2020
e425908
Fix unterminated #if.
serhiy-storchaka Jul 10, 2020
8a9259b
Reset arraymodule.c.
serhiy-storchaka Jul 10, 2020
76ba4b6
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 10, 2020
16ac7fd
Temporary disable the wchar_t cache by default.
serhiy-storchaka Jul 10, 2020
5e90bf8
Fix unicode_result().
serhiy-storchaka Jul 10, 2020
09675ab
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Jul 10, 2020
6915ce3
Merge branch 'master' into disable-wchar-cache
serhiy-storchaka Apr 25, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Include/cpython/unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;

/* --- Internal Unicode Operations ---------------------------------------- */

#define HAVE_UNICODE_WCHAR_CACHE 1
#define USE_UNICODE_WCHAR_CACHE 1

/* Since splitting on whitespace is an important use case, and
whitespace in most situations is solely ASCII whitespace, we
optimize for the common case by using a quick look-up table
Expand Down Expand Up @@ -71,13 +74,15 @@ typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;
/* low surrogate = bottom 10 bits added to DC00 */
#define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))

#if HAVE_UNICODE_WCHAR_CACHE
/* Check if substring matches at given offset. The offset must be
valid, and the substring must not be empty. */

#define Py_UNICODE_MATCH(string, offset, substring) \
((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
!memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
#endif /* HAVE_UNICODE_WCHAR_CACHE */

/* --- Unicode Type ------------------------------------------------------- */

Expand Down Expand Up @@ -218,7 +223,9 @@ typedef struct {
4 bytes (see issue #19537 on m68k). */
unsigned int :24;
} state;
#if HAVE_UNICODE_WCHAR_CACHE
wchar_t *wstr; /* wchar_t representation (null-terminated) */
#endif /* HAVE_UNICODE_WCHAR_CACHE */
} PyASCIIObject;

/* Non-ASCII strings allocated through PyUnicode_New use the
Expand All @@ -229,8 +236,10 @@ typedef struct {
Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the
* terminating \0. */
char *utf8; /* UTF-8 representation (null-terminated) */
#if HAVE_UNICODE_WCHAR_CACHE
Py_ssize_t wstr_length; /* Number of code points in wstr, possible
* surrogates count as two code points. */
#endif /* HAVE_UNICODE_WCHAR_CACHE */
} PyCompactUnicodeObject;

/* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
Expand All @@ -247,6 +256,8 @@ typedef struct {
} PyUnicodeObject;

/* Fast access macros */
#if HAVE_UNICODE_WCHAR_CACHE

#define PyUnicode_WSTR_LENGTH(op) \
(PyUnicode_IS_COMPACT_ASCII(op) ? \
((PyASCIIObject*)op)->length : \
Expand Down Expand Up @@ -285,6 +296,7 @@ typedef struct {
((const char *)(PyUnicode_AS_UNICODE(op)))
/* Py_DEPRECATED(3.3) */

#endif /* HAVE_UNICODE_WCHAR_CACHE */

/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */

Expand Down Expand Up @@ -1240,6 +1252,9 @@ PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
and where the hash values are equal (i.e. a very probable match) */
PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);

PyAPI_FUNC(int) _PyUnicode_WideCharString_Converter(PyObject *, void *);
PyAPI_FUNC(int) _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *);

#ifdef __cplusplus
}
#endif
19 changes: 16 additions & 3 deletions Lib/test/clinic.test
Original file line number Diff line number Diff line change
Expand Up @@ -1871,13 +1871,26 @@ test_Py_UNICODE_converter(PyObject *module, PyObject *const *args, Py_ssize_t na
const Py_UNICODE *e;
Py_ssize_clean_t e_length;

if (!_PyArg_ParseStack(args, nargs, "uuZu#Z#:test_Py_UNICODE_converter",
&a, &b, &c, &d, &d_length, &e, &e_length)) {
if (!_PyArg_ParseStack(args, nargs, "O&O&O&u#Z#:test_Py_UNICODE_converter",
_PyUnicode_WideCharString_Converter, &a, _PyUnicode_WideCharString_Converter, &b, _PyUnicode_WideCharString_Opt_Converter, &c, &d, &d_length, &e, &e_length)) {
goto exit;
}
return_value = test_Py_UNICODE_converter_impl(module, a, b, c, d, d_length, e, e_length);

exit:
/* Cleanup for a */
#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free((void *)a);
#endif /* USE_UNICODE_WCHAR_CACHE */
/* Cleanup for b */
#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free((void *)b);
#endif /* USE_UNICODE_WCHAR_CACHE */
/* Cleanup for c */
#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free((void *)c);
#endif /* USE_UNICODE_WCHAR_CACHE */

return return_value;
}

Expand All @@ -1888,7 +1901,7 @@ test_Py_UNICODE_converter_impl(PyObject *module, const Py_UNICODE *a,
Py_ssize_clean_t d_length,
const Py_UNICODE *e,
Py_ssize_clean_t e_length)
/*[clinic end generated code: output=dd0a09a1b772e57b input=064a3b68ad7f04b0]*/
/*[clinic end generated code: output=ef45e982fedf0b3d input=064a3b68ad7f04b0]*/


/*[clinic input]
Expand Down
8 changes: 8 additions & 0 deletions Lib/test/support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,11 @@
except ImportError:
resource = None

try:
from _testcapi import unicode_legacy_string
except ImportError:
unicode_legacy_string = None

__all__ = [
# globals
"PIPE_MAX_SIZE", "verbose", "max_memuse", "use_resources", "failfast",
Expand Down Expand Up @@ -814,6 +819,9 @@ def dec(*args, **kwargs):

requires_lzma = unittest.skipUnless(lzma, 'requires lzma')

requires_legacy_unicode_capi = unittest.skipUnless(unicode_legacy_string,
'requires legacy Unicode C API')

is_jython = sys.platform.startswith('java')

is_android = hasattr(sys, 'getandroidapilevel')
Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,9 +231,9 @@ def test_writerows_with_none(self):
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_writerows_legacy_strings(self):
import _testcapi

c = _testcapi.unicode_legacy_string('a')
with TemporaryFile("w+", newline='') as fileobj:
writer = csv.writer(fileobj)
Expand Down
7 changes: 5 additions & 2 deletions Lib/test/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,10 @@
import numbers
import locale
from test.support import (run_unittest, run_doctest, is_resource_enabled,
requires_IEEE_754, requires_docstrings)
requires_IEEE_754, requires_docstrings,
requires_legacy_unicode_capi)
from test.support import (import_fresh_module, TestFailed,
run_with_locale, cpython_only)
run_with_locale, cpython_only, get_attribute)
import random
import inspect
import threading
Expand Down Expand Up @@ -581,6 +582,7 @@ def test_explicit_from_string(self):
self.assertRaises(InvalidOperation, Decimal, "1_2_\u00003")

@cpython_only
@requires_legacy_unicode_capi
def test_from_legacy_strings(self):
import _testcapi
Decimal = self.decimal.Decimal
Expand Down Expand Up @@ -2816,6 +2818,7 @@ def test_none_args(self):
Overflow])

@cpython_only
@requires_legacy_unicode_capi
def test_from_legacy_strings(self):
import _testcapi
c = self.decimal.Context()
Expand Down
4 changes: 4 additions & 0 deletions Lib/test/test_getargs2.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,7 @@ def test_et_hash(self):
buf = bytearray()
self.assertRaises(ValueError, getargs_et_hash, 'abc\xe9', 'latin1', buf)

@support.requires_legacy_unicode_capi
def test_u(self):
from _testcapi import getargs_u
self.assertEqual(getargs_u('abc\xe9'), 'abc\xe9')
Expand All @@ -994,6 +995,7 @@ def test_u(self):
self.assertRaises(TypeError, getargs_u, memoryview(b'memoryview'))
self.assertRaises(TypeError, getargs_u, None)

@support.requires_legacy_unicode_capi
def test_u_hash(self):
from _testcapi import getargs_u_hash
self.assertEqual(getargs_u_hash('abc\xe9'), 'abc\xe9')
Expand All @@ -1003,6 +1005,7 @@ def test_u_hash(self):
self.assertRaises(TypeError, getargs_u_hash, memoryview(b'memoryview'))
self.assertRaises(TypeError, getargs_u_hash, None)

@support.requires_legacy_unicode_capi
def test_Z(self):
from _testcapi import getargs_Z
self.assertEqual(getargs_Z('abc\xe9'), 'abc\xe9')
Expand All @@ -1012,6 +1015,7 @@ def test_Z(self):
self.assertRaises(TypeError, getargs_Z, memoryview(b'memoryview'))
self.assertIsNone(getargs_Z(None))

@support.requires_legacy_unicode_capi
def test_Z_hash(self):
from _testcapi import getargs_Z_hash
self.assertEqual(getargs_Z_hash('abc\xe9'), 'abc\xe9')
Expand Down
2 changes: 2 additions & 0 deletions Lib/test/test_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -2832,6 +2832,7 @@ def test_copycharacters(self):
self.assertRaises(SystemError, unicode_copycharacters, s, 0, b'', 0, 0)

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_encode_decimal(self):
from _testcapi import unicode_encodedecimal
self.assertEqual(unicode_encodedecimal('123'),
Expand All @@ -2848,6 +2849,7 @@ def test_encode_decimal(self):
unicode_encodedecimal, "123\u20ac", "replace")

@support.cpython_only
@support.requires_legacy_unicode_capi
def test_transform_decimal(self):
from _testcapi import unicode_transformdecimaltoascii as transform_decimal
self.assertEqual(transform_decimal('123'),
Expand Down
22 changes: 20 additions & 2 deletions Modules/_codecsmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -704,20 +704,38 @@ _codecs_unicode_internal_encode_impl(PyObject *module, PyObject *obj,
return NULL;

if (PyUnicode_Check(obj)) {
Py_UNICODE *u;
Py_ssize_t len, size;

if (PyUnicode_READY(obj) < 0)
return NULL;

u = PyUnicode_AsUnicodeAndSize(obj, &len);
#if USE_UNICODE_WCHAR_CACHE
Py_UNICODE *u = PyUnicode_AsUnicodeAndSize(obj, &len);
if (u == NULL)
return NULL;
if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(Py_UNICODE))
return PyErr_NoMemory();
size = len * sizeof(Py_UNICODE);
return codec_tuple(PyBytes_FromStringAndSize((const char*)u, size),
PyUnicode_GET_LENGTH(obj));
#else /* USE_UNICODE_WCHAR_CACHE */
len = PyUnicode_AsWideChar(obj, NULL, 0);
if (len < 0) {
return NULL;
}
assert(len > 0);
len--;
if ((size_t)len > (size_t)PY_SSIZE_T_MAX / sizeof(wchar_t)) {
return PyErr_NoMemory();
}
size = len * sizeof(wchar_t);
PyObject *bytes = PyBytes_FromStringAndSize(NULL, size);
if (bytes == NULL) {
return NULL;
}
PyUnicode_AsWideChar(obj, (wchar_t *)PyBytes_AS_STRING(bytes), len);
return codec_tuple(bytes, PyUnicode_GET_LENGTH(obj));
#endif /* USE_UNICODE_WCHAR_CACHE */
}
else {
Py_buffer view;
Expand Down
28 changes: 20 additions & 8 deletions Modules/_ctypes/_ctypes.c
Original file line number Diff line number Diff line change
Expand Up @@ -1293,7 +1293,6 @@ static int
WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored))
{
Py_ssize_t result = 0;
Py_UNICODE *wstr;
Py_ssize_t len;

if (value == NULL) {
Expand All @@ -1309,21 +1308,32 @@ WCharArray_set_value(CDataObject *self, PyObject *value, void *Py_UNUSED(ignored
} else
Py_INCREF(value);

wstr = PyUnicode_AsUnicodeAndSize(value, &len);
if (wstr == NULL)
#if USE_UNICODE_WCHAR_CACHE
len = PyUnicode_GetSize(value);
if (len < 0) {
Py_DECREF(value);
return -1;
}
#else /* USE_UNICODE_WCHAR_CACHE */
len = PyUnicode_AsWideChar(value, NULL, 0);
if (len < 0) {
Py_DECREF(value);
return -1;
}
assert(len > 0);
len--;
#endif /* USE_UNICODE_WCHAR_CACHE */
if ((size_t)len > self->b_size/sizeof(wchar_t)) {
PyErr_SetString(PyExc_ValueError,
"string too long");
result = -1;
goto done;
Py_DECREF(value);
return -1;
}
result = PyUnicode_AsWideChar(value,
(wchar_t *)self->b_ptr,
self->b_size/sizeof(wchar_t));
if (result >= 0 && (size_t)result < self->b_size/sizeof(wchar_t))
((wchar_t *)self->b_ptr)[result] = (wchar_t)0;
done:
Py_DECREF(value);

return result >= 0 ? 0 : -1;
Expand Down Expand Up @@ -3358,10 +3368,12 @@ _validate_paramflags(PyTypeObject *type, PyObject *paramflags)
for (i = 0; i < len; ++i) {
PyObject *item = PyTuple_GET_ITEM(paramflags, i);
int flag;
char *name;
PyObject *name = Py_None;
PyObject *defval;
PyObject *typ;
if (!PyArg_ParseTuple(item, "i|ZO", &flag, &name, &defval)) {
if (!PyArg_ParseTuple(item, "i|OO", &flag, &name, &defval) ||
!(name == Py_None || PyUnicode_Check(name)))
{
PyErr_SetString(PyExc_TypeError,
"paramflags must be a sequence of (int [,string [,value]]) tuples");
return 0;
Expand Down
10 changes: 8 additions & 2 deletions Modules/_ctypes/callproc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1274,22 +1274,28 @@ The handle may be used to locate exported functions in this\n\
module.\n";
static PyObject *load_library(PyObject *self, PyObject *args)
{
const WCHAR *name;
PyObject *nameobj;
PyObject *ignored;
HMODULE hMod;

if (!PyArg_ParseTuple(args, "U|O:LoadLibrary", &nameobj, &ignored))
return NULL;

name = _PyUnicode_AsUnicode(nameobj);
#if USE_UNICODE_WCHAR_CACHE
const WCHAR *name = _PyUnicode_AsUnicode(nameobj);
#else /* USE_UNICODE_WCHAR_CACHE */
WCHAR *name = PyUnicode_AsWideCharString(nameobj, NULL);
serhiy-storchaka marked this conversation as resolved.
Show resolved Hide resolved
#endif /* USE_UNICODE_WCHAR_CACHE */
if (!name)
return NULL;

Py_BEGIN_ALLOW_THREADS
hMod = LoadLibraryW(name);
Py_END_ALLOW_THREADS

#if !USE_UNICODE_WCHAR_CACHE
PyMem_Free(name);
#endif /* USE_UNICODE_WCHAR_CACHE */
if (!hMod)
return PyErr_SetFromWindowsErr(GetLastError());
#ifdef _WIN64
Expand Down
Loading