Skip to content

Commit

Permalink
gh-108724: Add PyMutex and _PyParkingLot APIs (gh-109344)
Browse files Browse the repository at this point in the history
PyMutex is a one byte lock with fast, inlineable lock and unlock functions for the common uncontended case.  The design is based on WebKit's WTF::Lock.

PyMutex is built using the _PyParkingLot APIs, which provides a cross-platform futex-like API (based on WebKit's WTF::ParkingLot).  This internal API will be used for building other synchronization primitives used to implement PEP 703, such as one-time initialization and events.

This also includes tests and a mini benchmark in Tools/lockbench/lockbench.py to compare with the existing PyThread_type_lock.

Uncontended acquisition + release:
* Linux (x86-64): PyMutex: 11 ns, PyThread_type_lock: 44 ns
* macOS (arm64): PyMutex: 13 ns, PyThread_type_lock: 18 ns
* Windows (x86-64): PyMutex: 13 ns, PyThread_type_lock: 38 ns

PR Overview:

The primary purpose of this PR is to implement PyMutex, but there are a number of support pieces (described below).

* PyMutex:  A 1-byte lock that doesn't require memory allocation to initialize and is generally faster than the existing PyThread_type_lock.  The API is internal only for now.
* _PyParking_Lot:  A futex-like API based on the API of the same name in WebKit.  Used to implement PyMutex.
* _PyRawMutex:  A word sized lock used to implement _PyParking_Lot.
* PyEvent:  A one time event.  This was used a bunch in the "nogil" fork and is useful for testing the PyMutex implementation, so I've included it as part of the PR.
* pycore_llist.h:  Defines common operations on doubly-linked list.  Not strictly necessary (could do the list operations manually), but they come up frequently in the "nogil" fork. ( Similar to https://man.freebsd.org/cgi/man.cgi?queue)

---------

Co-authored-by: Eric Snow <ericsnowcurrently@gmail.com>
  • Loading branch information
colesbury and ericsnowcurrently authored Sep 19, 2023
1 parent 0a31ff0 commit 0c89056
Show file tree
Hide file tree
Showing 29 changed files with 1,665 additions and 21 deletions.
1 change: 1 addition & 0 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "pytypedefs.h"
#include "pybuffer.h"
#include "pystats.h"
#include "pyatomic.h"
#include "object.h"
#include "objimpl.h"
#include "typeslots.h"
Expand Down
9 changes: 3 additions & 6 deletions Include/cpython/pyatomic.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,9 @@
// # release
// ...

#ifndef Py_ATOMIC_H
#define Py_ATOMIC_H

#ifndef Py_CPYTHON_ATOMIC_H
# error "this header file must not be included directly"
#endif

// --- _Py_atomic_add --------------------------------------------------------
// Atomically adds `value` to `obj` and returns the previous value
Expand Down Expand Up @@ -501,6 +501,3 @@ static inline void _Py_atomic_fence_release(void);
#else
# error "no available pyatomic implementation for this platform/compiler"
#endif

#endif /* Py_ATOMIC_H */

2 changes: 1 addition & 1 deletion Include/cpython/pyatomic_msc.h
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ _Py_atomic_store_ptr_release(void *obj, void *value)
#if defined(_M_X64) || defined(_M_IX86)
*(void * volatile *)obj = value;
#elif defined(_M_ARM64)
__stlr64(obj, (uintptr_t)value);
__stlr64((unsigned __int64 volatile *)obj, (uintptr_t)value);
#else
# error "no implementation of _Py_atomic_store_ptr_release"
#endif
Expand Down
107 changes: 107 additions & 0 deletions Include/internal/pycore_llist.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// A doubly-linked list that can be embedded in a struct.
//
// Usage:
// struct llist_node head = LLIST_INIT(head);
// typedef struct {
// ...
// struct llist_node node;
// ...
// } MyObj;
//
// llist_insert_tail(&head, &obj->node);
// llist_remove(&obj->node);
//
// struct llist_node *node;
// llist_for_each(node, &head) {
// MyObj *obj = llist_data(node, MyObj, node);
// ...
// }
//

#ifndef Py_INTERNAL_LLIST_H
#define Py_INTERNAL_LLIST_H

#include <stddef.h>

#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "Py_BUILD_CORE must be defined to include this header"
#endif

struct llist_node {
struct llist_node *next;
struct llist_node *prev;
};

// Get the struct containing a node.
#define llist_data(node, type, member) \
(type*)((char*)node - offsetof(type, member))

// Iterate over a list.
#define llist_for_each(node, head) \
for (node = (head)->next; node != (head); node = node->next)

// Iterate over a list, but allow removal of the current node.
#define llist_for_each_safe(node, head) \
for (struct llist_node *_next = (node = (head)->next, node->next); \
node != (head); node = _next, _next = node->next)

#define LLIST_INIT(head) { &head, &head }

static inline void
llist_init(struct llist_node *head)
{
head->next = head;
head->prev = head;
}

// Returns 1 if the list is empty, 0 otherwise.
static inline int
llist_empty(struct llist_node *head)
{
return head->next == head;
}

// Appends to the tail of the list.
static inline void
llist_insert_tail(struct llist_node *head, struct llist_node *node)
{
node->prev = head->prev;
node->next = head;
head->prev->next = node;
head->prev = node;
}

// Remove a node from the list.
static inline void
llist_remove(struct llist_node *node)
{
struct llist_node *prev = node->prev;
struct llist_node *next = node->next;
prev->next = next;
next->prev = prev;
node->prev = NULL;
node->next = NULL;
}

// Append all nodes from head2 onto head1. head2 is left empty.
static inline void
llist_concat(struct llist_node *head1, struct llist_node *head2)
{
if (!llist_empty(head2)) {
head1->prev->next = head2->next;
head2->next->prev = head1->prev;

head1->prev = head2->prev;
head2->prev->next = head1;
llist_init(head2);
}
}

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_LLIST_H */
158 changes: 158 additions & 0 deletions Include/internal/pycore_lock.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
// Lightweight locks and other synchronization mechanisms.
//
// These implementations are based on WebKit's WTF::Lock. See
// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the
// design.
#ifndef Py_INTERNAL_LOCK_H
#define Py_INTERNAL_LOCK_H
#ifdef __cplusplus
extern "C" {
#endif

#ifndef Py_BUILD_CORE
# error "this header requires Py_BUILD_CORE define"
#endif

#include "pycore_time.h" // _PyTime_t


// A mutex that occupies one byte. The lock can be zero initialized.
//
// Only the two least significant bits are used. The remaining bits should be
// zero:
// 0b00: unlocked
// 0b01: locked
// 0b10: unlocked and has parked threads
// 0b11: locked and has parked threads
//
// Typical initialization:
// PyMutex m = (PyMutex){0};
//
// Typical usage:
// PyMutex_Lock(&m);
// ...
// PyMutex_Unlock(&m);
typedef struct _PyMutex {
uint8_t v;
} PyMutex;

#define _Py_UNLOCKED 0
#define _Py_LOCKED 1
#define _Py_HAS_PARKED 2

// (private) slow path for locking the mutex
PyAPI_FUNC(void) _PyMutex_LockSlow(PyMutex *m);

// (private) slow path for unlocking the mutex
PyAPI_FUNC(void) _PyMutex_UnlockSlow(PyMutex *m);

// Locks the mutex.
//
// If the mutex is currently locked, the calling thread will be parked until
// the mutex is unlocked. If the current thread holds the GIL, then the GIL
// will be released while the thread is parked.
static inline void
PyMutex_Lock(PyMutex *m)
{
uint8_t expected = _Py_UNLOCKED;
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_LOCKED)) {
_PyMutex_LockSlow(m);
}
}

// Unlocks the mutex.
static inline void
PyMutex_Unlock(PyMutex *m)
{
uint8_t expected = _Py_LOCKED;
if (!_Py_atomic_compare_exchange_uint8(&m->v, &expected, _Py_UNLOCKED)) {
_PyMutex_UnlockSlow(m);
}
}

// Checks if the mutex is currently locked.
static inline int
PyMutex_IsLocked(PyMutex *m)
{
return (_Py_atomic_load_uint8(&m->v) & _Py_LOCKED) != 0;
}

typedef enum _PyLockFlags {
// Do not detach/release the GIL when waiting on the lock.
_Py_LOCK_DONT_DETACH = 0,

// Detach/release the GIL while waiting on the lock.
_PY_LOCK_DETACH = 1,

// Handle signals if interrupted while waiting on the lock.
_PY_LOCK_HANDLE_SIGNALS = 2,
} _PyLockFlags;

// Lock a mutex with an optional timeout and additional options. See
// _PyLockFlags for details.
extern PyLockStatus
_PyMutex_LockTimed(PyMutex *m, _PyTime_t timeout_ns, _PyLockFlags flags);

// Unlock a mutex, returns 0 if the mutex is not locked (used for improved
// error messages).
extern int _PyMutex_TryUnlock(PyMutex *m);


// PyEvent is a one-time event notification
typedef struct {
uint8_t v;
} PyEvent;

// Set the event and notify any waiting threads.
// Export for '_testinternalcapi' shared extension
PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt);

// Wait for the event to be set. If the event is already set, then this returns
// immediately.
PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt);

// Wait for the event to be set, or until the timeout expires. If the event is
// already set, then this returns immediately. Returns 1 if the event was set,
// and 0 if the timeout expired or thread was interrupted.
PyAPI_FUNC(int) PyEvent_WaitTimed(PyEvent *evt, _PyTime_t timeout_ns);


// _PyRawMutex implements a word-sized mutex that that does not depend on the
// parking lot API, and therefore can be used in the parking lot
// implementation.
//
// The mutex uses a packed representation: the least significant bit is used to
// indicate whether the mutex is locked or not. The remaining bits are either
// zero or a pointer to a `struct raw_mutex_entry` (see lock.c).
typedef struct {
uintptr_t v;
} _PyRawMutex;

// Slow paths for lock/unlock
extern void _PyRawMutex_LockSlow(_PyRawMutex *m);
extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m);

static inline void
_PyRawMutex_Lock(_PyRawMutex *m)
{
uintptr_t unlocked = _Py_UNLOCKED;
if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) {
return;
}
_PyRawMutex_LockSlow(m);
}

static inline void
_PyRawMutex_Unlock(_PyRawMutex *m)
{
uintptr_t locked = _Py_LOCKED;
if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) {
return;
}
_PyRawMutex_UnlockSlow(m);
}

#ifdef __cplusplus
}
#endif
#endif /* !Py_INTERNAL_LOCK_H */
Loading

0 comments on commit 0c89056

Please sign in to comment.