From 22531144e236e9696881cb36bb8173cea2d5537f Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 14 Nov 2023 18:07:09 +0100 Subject: [PATCH] gh-111545: Add Py_HashDouble() function * Add again the private _PyHASH_NAN constant. * Add tests: Modules/_testcapi/hash.c and Lib/test/test_capi/test_hash.py. --- Doc/c-api/hash.rst | 29 ++++++++++++ Doc/library/sys.rst | 8 +++- Doc/whatsnew/3.13.rst | 3 ++ Include/cpython/pyhash.h | 3 ++ Lib/test/test_capi/test_hash.py | 44 +++++++++++++++++++ ...-11-15-01-26-59.gh-issue-111545.iAoFtA.rst | 2 + Modules/_testcapi/hash.c | 20 +++++++++ Python/pyhash.c | 32 +++++++++++--- Python/sysmodule.c | 2 +- 9 files changed, 134 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst diff --git a/Doc/c-api/hash.rst b/Doc/c-api/hash.rst index 4dc121d7fbaa9b4..190a726958438d9 100644 --- a/Doc/c-api/hash.rst +++ b/Doc/c-api/hash.rst @@ -5,12 +5,16 @@ PyHash API See also the :c:member:`PyTypeObject.tp_hash` member. +Types +^^^^^ + .. c:type:: Py_hash_t Hash value type: signed integer. .. versionadded:: 3.2 + .. c:type:: Py_uhash_t Hash value type: unsigned integer. @@ -41,8 +45,33 @@ See also the :c:member:`PyTypeObject.tp_hash` member. .. versionadded:: 3.4 +Functions +^^^^^^^^^ + +.. c:function:: int Py_HashDouble(double value, Py_hash_t *result) + + Hash a C double number. + + * Set *\*result* to the hash and return ``1`` if *value* is finite or is + infinity. + * Set *\*result* to :data:`sys.hash_info.nan ` (``0``) and + return ``0`` if *value* is not-a-number (NaN). + + *result* must not be ``NULL``. + + .. note:: + Only rely on the function return value to distinguish the "not-a-number" + case. *\*result* can be ``0`` if *value* is finite. For example, + ``Py_HashDouble(0.0, &result)`` sets *\*result* to 0. + + .. versionadded:: 3.13 + + .. c:function:: PyHash_FuncDef* PyHash_GetFuncDef(void) Get the hash function definition. + .. seealso:: + :pep:`456` "Secure and interchangeable hash algorithm". + .. versionadded:: 3.4 diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index bf9aaca2a696ded..c6e1bef7aa1032b 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1034,7 +1034,13 @@ always available. .. attribute:: hash_info.nan - (This attribute is no longer used) + The hash value returned for not-a-number (NaN). + + This hash value is only used by the :c:func:`Py_HashDouble` C function if + the argument is not-a-number (NaN). + + .. versionchanged:: 3.10 + This hash value is no longer used to hash numbers in Python. .. attribute:: hash_info.imag diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 372e4a45468e68b..a50321fc9ed8ef7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1230,6 +1230,9 @@ New Features :exc:`KeyError` if the key missing. (Contributed by Stefan Behnel and Victor Stinner in :gh:`111262`.) +* Add :c:func:`Py_HashDouble` function to hash a C double number. + (Contributed by Victor Stinner in :gh:`111545`.) + Porting to Python 3.13 ---------------------- diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h index 6f7113daa5fe4de..16bb702598fd800 100644 --- a/Include/cpython/pyhash.h +++ b/Include/cpython/pyhash.h @@ -17,6 +17,7 @@ #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) #define _PyHASH_INF 314159 +#define _PyHASH_NAN 0 #define _PyHASH_IMAG _PyHASH_MULTIPLIER /* Helpers for hash functions */ @@ -33,3 +34,5 @@ typedef struct { } PyHash_FuncDef; PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); + +PyAPI_FUNC(int) Py_HashDouble(double value, Py_hash_t *result); diff --git a/Lib/test/test_capi/test_hash.py b/Lib/test/test_capi/test_hash.py index 59dec15bc21445f..68bdcd8ccf0469e 100644 --- a/Lib/test/test_capi/test_hash.py +++ b/Lib/test/test_capi/test_hash.py @@ -1,3 +1,4 @@ +import math import sys import unittest from test.support import import_helper @@ -31,3 +32,46 @@ def test_hash_getfuncdef(self): self.assertEqual(func_def.name, hash_info.algorithm) self.assertEqual(func_def.hash_bits, hash_info.hash_bits) self.assertEqual(func_def.seed_bits, hash_info.seed_bits) + + def test_hash_double(self): + # Test Py_HashDouble() + hash_double = _testcapi.hash_double + + def check_number(value, expected): + self.assertEqual(hash_double(value), (1, expected)) + + # test some integers + integers = [ + *range(1, 30), + 2**30 - 1, + 2 ** 233, + int(sys.float_info.max), + ] + for x in integers: + with self.subTest(x=x): + check_number(float(x), hash(x)) + check_number(float(-x), hash(-x)) + + # test positive and negative zeros + check_number(float(0.0), 0) + check_number(float(-0.0), 0) + + # test +inf and -inf + inf = float("inf") + check_number(inf, sys.hash_info.inf) + check_number(-inf, -sys.hash_info.inf) + + # special float values: compare with Python hash() function + special_values = ( + math.nextafter(0.0, 1.0), # smallest positive subnormal number + sys.float_info.min, # smallest positive normal number + sys.float_info.epsilon, + sys.float_info.max, # largest positive finite number + ) + for x in special_values: + with self.subTest(x=x): + check_number(x, hash(x)) + check_number(-x, hash(-x)) + + # test not-a-number (NaN) + self.assertEqual(hash_double(float('nan')), (0, sys.hash_info.nan)) diff --git a/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst new file mode 100644 index 000000000000000..b6f1db895a3523d --- /dev/null +++ b/Misc/NEWS.d/next/C API/2023-11-15-01-26-59.gh-issue-111545.iAoFtA.rst @@ -0,0 +1,2 @@ +Add :c:func:`Py_HashDouble` function to hash a C double number. Patch by +Victor Stinner. diff --git a/Modules/_testcapi/hash.c b/Modules/_testcapi/hash.c index d0b8127020c5c14..7b8e6d6b1fcb5fb 100644 --- a/Modules/_testcapi/hash.c +++ b/Modules/_testcapi/hash.c @@ -1,6 +1,7 @@ #include "parts.h" #include "util.h" + static PyObject * hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) { @@ -44,8 +45,27 @@ hash_getfuncdef(PyObject *Py_UNUSED(module), PyObject *Py_UNUSED(args)) return result; } + +static PyObject * +hash_double(PyObject *Py_UNUSED(module), PyObject *args) +{ + double value; + if (!PyArg_ParseTuple(args, "d", &value)) { + return NULL; + } + + Py_hash_t hash; + int res = Py_HashDouble(value, &hash); + assert(hash != -1); + + Py_BUILD_ASSERT(sizeof(long long) >= sizeof(hash)); + return Py_BuildValue("iN", res, PyLong_FromLongLong(hash)); +} + + static PyMethodDef test_methods[] = { {"hash_getfuncdef", hash_getfuncdef, METH_NOARGS}, + {"hash_double", hash_double, METH_VARARGS}, {NULL}, }; diff --git a/Python/pyhash.c b/Python/pyhash.c index f9060b8003a0a7d..6fa79d08c288213 100644 --- a/Python/pyhash.c +++ b/Python/pyhash.c @@ -85,18 +85,23 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0}; Py_hash_t _Py_HashPointer(const void *); -Py_hash_t -_Py_HashDouble(PyObject *inst, double v) +int +Py_HashDouble(double v, Py_hash_t *result) { int e, sign; double m; Py_uhash_t x, y; if (!Py_IS_FINITE(v)) { - if (Py_IS_INFINITY(v)) - return v > 0 ? _PyHASH_INF : -_PyHASH_INF; - else - return _Py_HashPointer(inst); + if (Py_IS_INFINITY(v)) { + *result = (v > 0 ? _PyHASH_INF : -_PyHASH_INF); + return 1; + } + else { + assert(Py_IS_NAN(v)); + *result = _PyHASH_NAN; + return 0; + } } m = frexp(v, &e); @@ -128,7 +133,20 @@ _Py_HashDouble(PyObject *inst, double v) x = x * sign; if (x == (Py_uhash_t)-1) x = (Py_uhash_t)-2; - return (Py_hash_t)x; + *result = (Py_hash_t)x; + return 1; +} + +Py_hash_t +_Py_HashDouble(PyObject *obj, double v) +{ + assert(obj != NULL); + + Py_hash_t hash; + if (Py_HashDouble(v, &hash) == 0) { + hash = _Py_HashPointer(obj); + } + return hash; } Py_hash_t diff --git a/Python/sysmodule.c b/Python/sysmodule.c index c17de44731b7030..11113e21bfb6b7f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -1497,7 +1497,7 @@ get_hash_info(PyThreadState *tstate) PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_INF)); PyStructSequence_SET_ITEM(hash_info, field++, - PyLong_FromLong(0)); // This is no longer used + PyLong_FromLong(_PyHASH_NAN)); PyStructSequence_SET_ITEM(hash_info, field++, PyLong_FromLong(_PyHASH_IMAG)); PyStructSequence_SET_ITEM(hash_info, field++,