Skip to content

Commit

Permalink
bpo-40602: Optimize _Py_hashtable for pointer keys (GH-20051)
Browse files Browse the repository at this point in the history
Optimize _Py_hashtable_get() and _Py_hashtable_get_entry() for
pointer keys:

* key_size == sizeof(void*)
* hash_func == _Py_hashtable_hash_ptr
* compare_func == _Py_hashtable_compare_direct

Changes:

* Add get_func and get_entry_func members to _Py_hashtable_t
* Convert _Py_hashtable_get() and _Py_hashtable_get_entry() functions
  to static nline functions.
* Add specialized get and get entry for pointer keys.
  • Loading branch information
vstinner authored May 12, 2020
1 parent 74ea6b5 commit 7c6e970
Show file tree
Hide file tree
Showing 2 changed files with 153 additions and 94 deletions.
40 changes: 25 additions & 15 deletions Include/internal/pycore_hashtable.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,17 @@ typedef struct {

/* Forward declaration */
struct _Py_hashtable_t;
typedef struct _Py_hashtable_t _Py_hashtable_t;

typedef Py_uhash_t (*_Py_hashtable_hash_func) (struct _Py_hashtable_t *ht,
typedef Py_uhash_t (*_Py_hashtable_hash_func) (_Py_hashtable_t *ht,
const void *pkey);
typedef int (*_Py_hashtable_compare_func) (struct _Py_hashtable_t *ht,
typedef int (*_Py_hashtable_compare_func) (_Py_hashtable_t *ht,
const void *pkey,
const _Py_hashtable_entry_t *he);
typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht,
const void *pkey);
typedef int (*_Py_hashtable_get_func) (_Py_hashtable_t *ht,
const void *pkey, void *data);

typedef struct {
/* allocate a memory block */
Expand All @@ -93,18 +98,19 @@ typedef struct {


/* _Py_hashtable: table */

typedef struct _Py_hashtable_t {
struct _Py_hashtable_t {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
size_t key_size;
size_t data_size;

_Py_hashtable_get_func get_func;
_Py_hashtable_get_entry_func get_entry_func;
_Py_hashtable_hash_func hash_func;
_Py_hashtable_compare_func compare_func;
_Py_hashtable_allocator_t alloc;
} _Py_hashtable_t;
};

/* hash a pointer (void*) */
PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(
Expand Down Expand Up @@ -176,10 +182,12 @@ PyAPI_FUNC(int) _Py_hashtable_set(
Don't call directly this function, but use _Py_HASHTABLE_GET_ENTRY()
macro */
PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey);
static inline _Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht, size_t key_size, const void *pkey)
{
assert(key_size == ht->key_size);
return ht->get_entry_func(ht, pkey);
}

#define _Py_HASHTABLE_GET_ENTRY(TABLE, KEY) \
_Py_hashtable_get_entry(TABLE, sizeof(KEY), &(KEY))
Expand All @@ -189,12 +197,14 @@ PyAPI_FUNC(_Py_hashtable_entry_t*) _Py_hashtable_get_entry(
exists, return 0 if the entry does not exist.
Don't call directly this function, but use _Py_HASHTABLE_GET() macro */
PyAPI_FUNC(int) _Py_hashtable_get(
_Py_hashtable_t *ht,
size_t key_size,
const void *pkey,
size_t data_size,
void *data);
static inline int
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
size_t data_size, void *data)
{
assert(key_size == ht->key_size);
assert(data_size == ht->data_size);
return ht->get_func(ht, pkey, data);
}

#define _Py_HASHTABLE_GET(TABLE, KEY, DATA) \
_Py_hashtable_get(TABLE, sizeof(KEY), &(KEY), sizeof(DATA), &(DATA))
Expand Down
207 changes: 128 additions & 79 deletions Python/hashtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ Py_uhash_t
_Py_hashtable_hash_ptr(struct _Py_hashtable_t *ht, const void *pkey)
{
void *key;

_Py_HASHTABLE_READ_KEY(ht, pkey, key);
return (Py_uhash_t)_Py_HashPointer(key);
}
Expand Down Expand Up @@ -137,61 +136,6 @@ round_size(size_t s)
}


_Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_allocator_t *allocator)
{
_Py_hashtable_t *ht;
size_t buckets_size;
_Py_hashtable_allocator_t alloc;

if (allocator == NULL) {
alloc.malloc = PyMem_Malloc;
alloc.free = PyMem_Free;
}
else {
alloc = *allocator;
}

ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
if (ht == NULL)
return ht;

ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;

buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);

ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->alloc = alloc;
return ht;
}


_Py_hashtable_t *
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL);
}


size_t
_Py_hashtable_size(_Py_hashtable_t *ht)
{
Expand Down Expand Up @@ -251,23 +195,20 @@ _Py_hashtable_print_stats(_Py_hashtable_t *ht)


_Py_hashtable_entry_t *
_Py_hashtable_get_entry(_Py_hashtable_t *ht,
size_t key_size, const void *pkey)
_Py_hashtable_get_entry_generic(_Py_hashtable_t *ht, const void *pkey)
{
Py_uhash_t key_hash;
size_t index;
_Py_hashtable_entry_t *entry;

assert(key_size == ht->key_size);

key_hash = ht->hash_func(ht, pkey);
index = key_hash & (ht->num_buckets - 1);

for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry))
Py_uhash_t key_hash = ht->hash_func(ht, pkey);
size_t index = key_hash & (ht->num_buckets - 1);
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
while (1) {
if (entry == NULL) {
return NULL;
}
if (entry->key_hash == key_hash && ht->compare_func(ht, pkey, entry)) {
break;
}
entry = ENTRY_NEXT(entry);
}

return entry;
}

Expand Down Expand Up @@ -324,7 +265,7 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
entry = _Py_hashtable_get_entry(ht, key_size, pkey);
entry = ht->get_entry_func(ht, pkey);
assert(entry == NULL);
#endif

Expand Down Expand Up @@ -352,18 +293,62 @@ _Py_hashtable_set(_Py_hashtable_t *ht, size_t key_size, const void *pkey,


int
_Py_hashtable_get(_Py_hashtable_t *ht, size_t key_size,const void *pkey,
size_t data_size, void *data)
_Py_hashtable_get_generic(_Py_hashtable_t *ht, const void *pkey, void *data)
{
_Py_hashtable_entry_t *entry;

assert(data != NULL);
_Py_hashtable_entry_t *entry = ht->get_entry_func(ht, pkey);
if (entry != NULL) {
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
return 1;
}
else {
return 0;
}
}

entry = _Py_hashtable_get_entry(ht, key_size, pkey);
if (entry == NULL)

// Specialized for:
// key_size == sizeof(void*)
// hash_func == _Py_hashtable_hash_ptr
// compare_func == _Py_hashtable_compare_direct
_Py_hashtable_entry_t *
_Py_hashtable_get_entry_ptr(_Py_hashtable_t *ht, const void *pkey)
{
Py_uhash_t key_hash = _Py_hashtable_hash_ptr(ht, pkey);
size_t index = key_hash & (ht->num_buckets - 1);
_Py_hashtable_entry_t *entry = entry = TABLE_HEAD(ht, index);
while (1) {
if (entry == NULL) {
return NULL;
}
if (entry->key_hash == key_hash) {
const void *pkey2 = _Py_HASHTABLE_ENTRY_PKEY(entry);
if (memcmp(pkey, pkey2, sizeof(void*)) == 0) {
break;
}
}
entry = ENTRY_NEXT(entry);
}
return entry;
}


// Specialized for:
// key_size == sizeof(void*)
// hash_func == _Py_hashtable_hash_ptr
// compare_func == _Py_hashtable_compare_direct
int
_Py_hashtable_get_ptr(_Py_hashtable_t *ht, const void *pkey, void *data)
{
assert(data != NULL);
_Py_hashtable_entry_t *entry = _Py_hashtable_get_entry_ptr(ht, pkey);
if (entry != NULL) {
ENTRY_READ_PDATA(ht, entry, ht->data_size, data);
return 1;
}
else {
return 0;
ENTRY_READ_PDATA(ht, entry, data_size, data);
return 1;
}
}


Expand Down Expand Up @@ -454,6 +439,70 @@ hashtable_rehash(_Py_hashtable_t *ht)
}


_Py_hashtable_t *
_Py_hashtable_new_full(size_t key_size, size_t data_size,
size_t init_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func,
_Py_hashtable_allocator_t *allocator)
{
_Py_hashtable_t *ht;
size_t buckets_size;
_Py_hashtable_allocator_t alloc;

if (allocator == NULL) {
alloc.malloc = PyMem_Malloc;
alloc.free = PyMem_Free;
}
else {
alloc = *allocator;
}

ht = (_Py_hashtable_t *)alloc.malloc(sizeof(_Py_hashtable_t));
if (ht == NULL)
return ht;

ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->key_size = key_size;
ht->data_size = data_size;

buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);

ht->get_func = _Py_hashtable_get_generic;
ht->get_entry_func = _Py_hashtable_get_entry_generic;
ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->alloc = alloc;
if (ht->key_size == sizeof(void*)
&& ht->hash_func == _Py_hashtable_hash_ptr
&& ht->compare_func == _Py_hashtable_compare_direct)
{
ht->get_func = _Py_hashtable_get_ptr;
ht->get_entry_func = _Py_hashtable_get_entry_ptr;
}
return ht;
}


_Py_hashtable_t *
_Py_hashtable_new(size_t key_size, size_t data_size,
_Py_hashtable_hash_func hash_func,
_Py_hashtable_compare_func compare_func)
{
return _Py_hashtable_new_full(key_size, data_size,
HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL);
}


void
_Py_hashtable_clear(_Py_hashtable_t *ht)
{
Expand Down

0 comments on commit 7c6e970

Please sign in to comment.