From 992c39ea475fed8a999c56a434336f02b7776625 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 14 Nov 2023 19:06:23 +0100 Subject: [PATCH] gh-111545: Add PyHash_Pointer() function * Keep _Py_HashPointer() function as an alias to PyHash_Pointer(). * Add PyHash_Pointer() tests to test_capi.test_hash. * Remove _Py_HashPointerRaw() function: inline code in _Py_hashtable_hash_ptr(). --- Doc/c-api/hash.rst | 9 ++++ Doc/whatsnew/3.13.rst | 3 ++ Include/cpython/pyhash.h | 2 + Include/internal/pycore_pyhash.h | 7 +-- Lib/test/test_capi/test_hash.py | 48 ++++++++++++++++++- ...-11-15-01-26-59.gh-issue-111545.iAoFtA.rst | 2 + Modules/_testcapi/hash.c | 16 +++++++ PC/pyconfig.h | 2 + Python/hashtable.c | 16 +++++-- Python/pyhash.c | 27 +++++------ 10 files changed, 109 insertions(+), 23 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 4dc121d7fbaa9b4..71ab845e5f1df92 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -46,3 +46,12 @@ See also the :c:member:`PyTypeObject.tp_hash` member. Get the hash function definition. .. versionadded:: 3.4 + + +.. c:function:: Py_hash_t PyHash_Pointer(const void *ptr) + + Hash a pointer. + + The function cannot fail: it cannot return ``-1``. + + .. versionadded:: 3.13 diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 136fe901ce39fb8..5b277dea10a22a1 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1181,6 +1181,9 @@ New Features :exc:`KeyError` if the key missing. (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) +* Add :c:func:`PyHash_Pointer` function to hash a pointer. + (Contributed by Victor Stinner in :gh:`111545`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index 62ae6084bbcf533..27b93282e873afa 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -11,3 +11,5 @@ typedef struct { } PyHash_FuncDef; PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); + +PyAPI_FUNC(Py_hash_t) PyHash_Pointer(const void *ptr); diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h index 78bf0c7d07eb105..0f789aca160273b 100644 --- a/Include/internal/pycore_pyhash.h +++ b/Include/internal/pycore_pyhash.h @@ -8,11 +8,8 @@ /* Helpers for hash functions */ extern Py_hash_t _Py_HashDouble(PyObject *, double); -// Export for '_decimal' shared extension -PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); - -// Similar to _Py_HashPointer(), but don't replace -1 with -2 -extern Py_hash_t _Py_HashPointerRaw(const void*); +// Kept for backward compatibility +#define _Py_HashPointer PyHash_Pointer // Export for '_datetime' shared extension PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py index 59dec15bc21445f..7f4d688aea72ae4 100644 --- a/Lib/test/test_capi/test_hash.py +++ b/Lib/test/test_capi/test_hash.py @@ -4,7 +4,8 @@ _testcapi = import_helper.import_module('_testcapi') -SIZEOF_PY_HASH_T = _testcapi.SIZEOF_VOID_P +SIZEOF_VOID_P = _testcapi.SIZEOF_VOID_P +SIZEOF_PY_HASH_T = SIZEOF_VOID_P class CAPITest(unittest.TestCase): @@ -31,3 +32,48 @@ def test_hash_getfuncdef(self): self.assertEqual(func_def.name, hash_info.algorithm) self.assertEqual(func_def.hash_bits, hash_info.hash_bits) self.assertEqual(func_def.seed_bits, hash_info.seed_bits) + + def test_hash_pointer(self): + # Test PyHash_Pointer() + hash_pointer = _testcapi.hash_pointer + + UHASH_T_MASK = ((2 ** (8 * SIZEOF_PY_HASH_T)) - 1) + HASH_T_MAX = (2 ** (8 * SIZEOF_PY_HASH_T - 1) - 1) + + def python_hash_pointer(x): + # PyHash_Pointer() rotates the pointer bits by 4 bits to the right + x = (x >> 4) | ((x & 15) << (8 * SIZEOF_VOID_P - 4)) + + # Convert unsigned uintptr_t to signed Py_hash_t + if HASH_T_MAX < x: + x = (~x) + 1 + x &= UHASH_T_MASK + x = (~x) + 1 + return x + + if SIZEOF_VOID_P == 8: + values = ( + 0xABCDEF1234567890, + 0x1234567890ABCDEF, + 0xFEE4ABEDD1CECA5E, + ) + else: + values = ( + 0x12345678, + 0x1234ABCD, + 0xDEADCAFE, + ) + + for value in values: + expected = python_hash_pointer(value) + with self.subTest(value=value): + self.assertEqual(hash_pointer(value), expected, + f"hash_pointer({value:x}) = " + f"{hash_pointer(value):x} != {expected:x}") + + # PyHash_Pointer(NULL) returns 0 + self.assertEqual(hash_pointer(0), 0) + + # PyHash_Pointer((void*)(uintptr_t)-1) doesn't return -1 but -2 + VOID_P_MAX = -1 & (2 ** (8 * SIZEOF_VOID_P) - 1) + self.assertEqual(hash_pointer(VOID_P_MAX), -2) diff --git a/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst new file mode 100644 index 000000000000000..b22aa1c4d5eb4ef --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst @@ -0,0 +1,2 @@ +Add :c:func:`PyHash_Pointer` function to hash a pointer. Patch by Victor +Stinner. diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c index d0b8127020c5c14..706b3cd9a5ef7c1 100644 --- a/Modules/_testcapi/hash.c +++ b/Modules/_testcapi/hash.c @@ -44,8 +44,24 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) return result; } + +static PyObject * +hash_pointer(PyObject *Py_UNUSED(module), PyObject *arg) +{ + void *ptr = PyLong_AsVoidPtr(arg); + if (ptr == NULL && PyErr_Occurred()) { + return NULL; + } + + Py_hash_t hash = PyHash_Pointer(ptr); + Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); + return PyLong_FromLongLong(hash); +} + + static PyMethodDef test_methods[] = { {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, + {"hash_pointer", hash_pointer, METH_O}, {NULL}, }; diff --git a/PC/pyconfig.h b/PC/pyconfig.h index e6b368caffe2801..36e3e1db0476dfb 100644 --- a/PC/pyconfig.h +++ b/PC/pyconfig.h @@ -355,6 +355,8 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ # define ALIGNOF_MAX_ALIGN_T 8 #endif +#define SIZEOF_UINTPTR_T SIZEOF_VOID_P + #ifdef _DEBUG # define Py_DEBUG #endif diff --git a/Python/hashtable.c b/Python/hashtable.c index 8f5e8168ba1339e..69c493e7d3145ac 100644 --- a/Python/hashtable.c +++ b/Python/hashtable.c @@ -45,8 +45,7 @@ */ #include "Python.h" -#include "pycore_hashtable.h" -#include "pycore_pyhash.h" // _Py_HashPointerRaw() +#include "pycore_hashtable.h" // export _Py_hashtable_new() #define HASHTABLE_MIN_SIZE 16 #define HASHTABLE_HIGH 0.50 @@ -89,10 +88,21 @@ _Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous, } +// Similar to PyHash_Pointer() but avoid "if (x == -1) x = -2;" for best +// performance. The value (Py_uhash_t)-1 is not special for +// _Py_hashtable_t.hash_func function, there is no need to replace it with -2. Py_uhash_t _Py_hashtable_hash_ptr(const void *key) { - return (Py_uhash_t)_Py_HashPointerRaw(key); + uintptr_t x = (uintptr_t)key; + Py_BUILD_ASSERT(sizeof(x) == sizeof(key)); + + // Bottom 3 or 4 bits are likely to be 0; rotate x by 4 to the right + // to avoid excessive hash collisions. + x = (x >> 4) | (x << (8 * SIZEOF_UINTPTR_T - 4)); + + Py_BUILD_ASSERT(sizeof(x) == sizeof(Py_hash_t)); + return (Py_uhash_t)x; } diff --git a/Python/pyhash.c b/Python/pyhash.c index f9060b8003a0a7d..469f02926dbe6e5 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -131,24 +131,23 @@ _Py_HashDouble(PyObject *inst, double v) return (Py_hash_t)x; } +// See also _Py_hashtable_hash_ptr() which is similar but can return -1. Py_hash_t -_Py_HashPointerRaw(const void *p) +PyHash_Pointer(const void *ptr) { - size_t y = (size_t)p; - /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid - excessive hash collisions for dicts and sets */ - y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4)); - return (Py_hash_t)y; -} + uintptr_t x = (uintptr_t)ptr; + Py_BUILD_ASSERT(sizeof(x) == sizeof(ptr)); -Py_hash_t -_Py_HashPointer(const void *p) -{ - Py_hash_t x = _Py_HashPointerRaw(p); - if (x == -1) { - x = -2; + // Bottom 3 or 4 bits are likely to be 0; rotate x by 4 to the right + // to avoid excessive hash collisions for dicts and sets. + x = (x >> 4) | (x << (8 * SIZEOF_UINTPTR_T - 4)); + + Py_BUILD_ASSERT(sizeof(x) == sizeof(Py_hash_t)); + Py_hash_t result = (Py_hash_t)x; + if (result == -1) { + result = -2; } - return x; + return result; } Py_hash_t