Skip to content

Commit

Permalink
fix numpy scalar hash for bigendian (#285)
Browse files Browse the repository at this point in the history
* Revert "KeyBuilder: use system byteorder to match numpy"

This reverts commit cee5027.

* fix numpy hash for bigendian

* speed up endianess determination

* cleanups, add mock test

* broaden test slightly

Co-authored-by: Michał Górny <[email protected]>

* rename _IS_BIGENDIAN

* add comments

---------

Co-authored-by: Michał Górny <[email protected]>
  • Loading branch information
matthiasdiener and mgorny authored Jan 17, 2025
1 parent 99d91be commit c0364fd
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 11 deletions.
27 changes: 16 additions & 11 deletions pytools/persistent_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,10 @@ class RecommendedHashNotFoundWarning(UserWarning):
_HAS_ATTRS = True


# Used to speed up the comparison in update_for_numpy_scalar and for mock testing
_IS_BIGENDIAN = sys.byteorder == "big"


logger = logging.getLogger(__name__)

# NOTE: not always available so they get hardcoded here
Expand Down Expand Up @@ -163,12 +167,6 @@ class KeyBuilder:
may stop working as early as 2022.
.. versionadded:: 2021.2
.. note::
Some key-building uses system byte order, so the built keys may not match
across different systems. It would be desirable to fix this, but this is
not yet done.
"""

# this exists so that we can (conceivably) switch algorithms at some point
Expand Down Expand Up @@ -225,6 +223,8 @@ def rec(self, key_hash: Hash, key: Any) -> Hash:
# Non-numpy scalars are handled above in the try block.
method = self.update_for_numpy_scalar

# numpy arrays are mutable so they are not handled in KeyBuilder

if method is None:
if issubclass(tp, Enum):
method = self.update_for_enum
Expand Down Expand Up @@ -275,12 +275,11 @@ def update_for_type(self, key_hash: Hash, key: type) -> None:

def update_for_int(self, key_hash: Hash, key: int) -> None:
sz = 8
# Note: this must match the hash for numpy integers, since
# np.int64(1) == 1
while True:
try:
# Must match system byte order so that numpy and this
# generate the same string of bytes.
# https://github.com/inducer/pytools/issues/259
key_hash.update(key.to_bytes(sz, byteorder=sys.byteorder, signed=True))
key_hash.update(key.to_bytes(sz, byteorder="little", signed=True))
return
except OverflowError:
sz *= 2
Expand Down Expand Up @@ -333,12 +332,18 @@ def update_for_specific_dtype(self, key_hash: Hash, key: Any) -> None:

def update_for_numpy_scalar(self, key_hash: Hash, key: Any) -> None:
import numpy as np

# tobytes() of np.complex256 and np.float128 are non-deterministic,
# so we need to use their repr() instead.
if hasattr(np, "complex256") and key.dtype == np.dtype("complex256"):
key_hash.update(repr(complex(key)).encode("utf8"))
elif hasattr(np, "float128") and key.dtype == np.dtype("float128"):
key_hash.update(repr(float(key)).encode("utf8"))
else:
key_hash.update(np.array(key).tobytes())
if _IS_BIGENDIAN:
key_hash.update(np.array(key).byteswap().tobytes())
else:
key_hash.update(np.array(key).tobytes())

def update_for_dataclass(self, key_hash: Hash, key: Any) -> None:
self.rec(key_hash, f"{type(key).__qualname__}.{type(key).__name__}")
Expand Down
11 changes: 11 additions & 0 deletions pytools/test/test_persistent_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,17 @@ def test_scalar_hashing() -> None:
assert keyb(np.complex256(1.1+2.2j)) == keyb(np.complex256(1.1+2.2j))
assert keyb(np.clongdouble(1.1+2.2j)) == keyb(np.clongdouble(1.1+2.2j))

h_int64 = keyb(np.int64(1))

import unittest.mock

from pytools.persistent_dict import _IS_BIGENDIAN
with unittest.mock.patch("pytools.persistent_dict._IS_BIGENDIAN",
not _IS_BIGENDIAN):
# Very crude test that the other endianness works as well.
be_val = np.int64(1).byteswap()
assert h_int64 == keyb(be_val)


@pytest.mark.parametrize("dict_impl", ("immutabledict", "frozendict",
"constantdict",
Expand Down

0 comments on commit c0364fd

Please sign in to comment.