Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bpo-40521: Disable Unicode caches in isolated subinterpreters #19933

Merged
merged 1 commit into from
May 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Objects/typeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@ static size_t method_cache_misses = 0;
static size_t method_cache_collisions = 0;
#endif

/* bpo-40521: Interned strings are shared by all subinterpreters */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERN_NAME_STRINGS
#endif

/* alphabetical order */
_Py_IDENTIFIER(__abstractmethods__);
_Py_IDENTIFIER(__class__);
Expand Down Expand Up @@ -3418,6 +3423,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
if (name == NULL)
return -1;
}
#ifdef INTERN_NAME_STRINGS
if (!PyUnicode_CHECK_INTERNED(name)) {
PyUnicode_InternInPlace(&name);
if (!PyUnicode_CHECK_INTERNED(name)) {
Expand All @@ -3427,6 +3433,7 @@ type_setattro(PyTypeObject *type, PyObject *name, PyObject *value)
return -1;
}
}
#endif
}
else {
/* Will fail in _PyObject_GenericSetAttrWithDict. */
Expand Down Expand Up @@ -7531,10 +7538,17 @@ _PyTypes_InitSlotDefs(void)
for (slotdef *p = slotdefs; p->name; p++) {
/* Slots must be ordered by their offset in the PyHeapTypeObject. */
assert(!p[1].name || p->offset <= p[1].offset);
#ifdef INTERN_NAME_STRINGS
p->name_strobj = PyUnicode_InternFromString(p->name);
if (!p->name_strobj || !PyUnicode_CHECK_INTERNED(p->name_strobj)) {
return _PyStatus_NO_MEMORY();
}
#else
p->name_strobj = PyUnicode_FromString(p->name);
if (!p->name_strobj) {
return _PyStatus_NO_MEMORY();
}
#endif
}
slotdefs_initialized = 1;
return _PyStatus_OK();
Expand All @@ -7559,7 +7573,9 @@ update_slot(PyTypeObject *type, PyObject *name)
int offset;

assert(PyUnicode_CheckExact(name));
#ifdef INTERN_NAME_STRINGS
assert(PyUnicode_CHECK_INTERNED(name));
#endif

assert(slotdefs_initialized);
pp = ptrs;
Expand Down
78 changes: 63 additions & 15 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,11 @@ extern "C" {
# define OVERALLOCATE_FACTOR 4
#endif

/* bpo-40521: Interned strings are shared by all interpreters. */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define INTERNED_STRINGS
#endif

/* This dictionary holds all interned unicode strings. Note that references
to strings in this dictionary are *not* counted in the string's ob_refcnt.
When the interned string reaches a refcnt of 0 the string deallocation
Expand All @@ -206,7 +211,9 @@ extern "C" {
Another way to look at this is that to say that the actual reference
count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
*/
#ifdef INTERNED_STRINGS
static PyObject *interned = NULL;
#endif

/* The empty Unicode object is shared to improve performance. */
static PyObject *unicode_empty = NULL;
Expand Down Expand Up @@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
/* List of static strings. */
static _Py_Identifier *static_strings = NULL;

/* bpo-40521: Latin1 singletons are shared by all interpreters. */
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
# define LATIN1_SINGLETONS
#endif

#ifdef LATIN1_SINGLETONS
/* Single character Unicode strings in the Latin-1 range are being
shared as well. */
static PyObject *unicode_latin1[256] = {NULL};
#endif

/* Fast detection of the most frequent whitespace characters */
const unsigned char _Py_ascii_whitespace[] = {
Expand Down Expand Up @@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
return unicode_empty;
}

#ifdef LATIN1_SINGLETONS
if (length == 1) {
const void *data = PyUnicode_DATA(unicode);
int kind = PyUnicode_KIND(unicode);
Expand All @@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
}
}
}
#endif

assert(_PyUnicode_CheckConsistency(unicode, 1));
return unicode;
Expand Down Expand Up @@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
case SSTATE_INTERNED_MORTAL:
/* revive dead object temporarily for DelItem */
Py_SET_REFCNT(unicode, 3);
#ifdef INTERNED_STRINGS
if (PyDict_DelItem(interned, unicode) != 0) {
_PyErr_WriteUnraisableMsg("deletion of interned string failed",
NULL);
}
#endif
break;

case SSTATE_INTERNED_IMMORTAL:
Expand Down Expand Up @@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
static int
unicode_is_singleton(PyObject *unicode)
{
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (unicode == unicode_empty)
if (unicode == unicode_empty) {
return 1;
}
#ifdef LATIN1_SINGLETONS
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
{
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
if (ch < 256 && unicode_latin1[ch] == unicode)
return 1;
}
#endif
return 0;
}
#endif
Expand Down Expand Up @@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
static PyObject*
get_latin1_char(unsigned char ch)
{
PyObject *unicode = unicode_latin1[ch];
PyObject *unicode;

#ifdef LATIN1_SINGLETONS
unicode = unicode_latin1[ch];
if (unicode) {
Py_INCREF(unicode);
return unicode;
}
#endif

unicode = PyUnicode_New(1, ch);
if (!unicode) {
unicode = PyUnicode_New(1, ch);
if (!unicode)
return NULL;
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
assert(_PyUnicode_CheckConsistency(unicode, 1));
unicode_latin1[ch] = unicode;
return NULL;
}

PyUnicode_1BYTE_DATA(unicode)[0] = ch;
assert(_PyUnicode_CheckConsistency(unicode, 1));

#ifdef LATIN1_SINGLETONS
Py_INCREF(unicode);
unicode_latin1[ch] = unicode;
#endif
return unicode;
}

Expand Down Expand Up @@ -11270,7 +11303,6 @@ int
_PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
{
PyObject *right_uni;
Py_hash_t hash;

assert(_PyUnicode_CHECK(left));
assert(right->string);
Expand Down Expand Up @@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
if (PyUnicode_CHECK_INTERNED(left))
return 0;

#ifdef INTERNED_STRINGS
assert(_PyUnicode_HASH(right_uni) != -1);
hash = _PyUnicode_HASH(left);
Py_hash_t hash = _PyUnicode_HASH(left);
if (hash != -1 && hash != _PyUnicode_HASH(right_uni))
return 0;
#endif

return unicode_compare_eq(left, right_uni);
}
Expand Down Expand Up @@ -15487,43 +15521,55 @@ void
PyUnicode_InternInPlace(PyObject **p)
{
PyObject *s = *p;
PyObject *t;
#ifdef Py_DEBUG
assert(s != NULL);
assert(_PyUnicode_CHECK(s));
#else
if (s == NULL || !PyUnicode_Check(s))
if (s == NULL || !PyUnicode_Check(s)) {
return;
}
#endif

/* If it's a subclass, we don't really know what putting
it in the interned dict might do. */
if (!PyUnicode_CheckExact(s))
if (!PyUnicode_CheckExact(s)) {
return;
if (PyUnicode_CHECK_INTERNED(s))
}

if (PyUnicode_CHECK_INTERNED(s)) {
return;
}

#ifdef INTERNED_STRINGS
if (interned == NULL) {
interned = PyDict_New();
if (interned == NULL) {
PyErr_Clear(); /* Don't leave an exception */
return;
}
}

PyObject *t;
Py_ALLOW_RECURSION
t = PyDict_SetDefault(interned, s, s);
Py_END_ALLOW_RECURSION

if (t == NULL) {
PyErr_Clear();
return;
}

if (t != s) {
Py_INCREF(t);
Py_SETREF(*p, t);
return;
}

/* The two references in interned are not counted by refcnt.
The deallocator will take care of this */
Py_SET_REFCNT(s, Py_REFCNT(s) - 2);
_PyUnicode_STATE(s).interned = SSTATE_INTERNED_MORTAL;
#endif
}

void
Expand Down Expand Up @@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)

Py_CLEAR(unicode_empty);

#ifdef LATIN1_SINGLETONS
for (Py_ssize_t i = 0; i < 256; i++) {
Py_CLEAR(unicode_latin1[i]);
}
#endif
_PyUnicode_ClearStaticStrings();
}

Expand Down