From b57ee476e2f9b99d59dc128105873ffb5b97f1e6 Mon Sep 17 00:00:00 2001 From: Remco de Boer <29308176+redeboer@users.noreply.github.com> Date: Tue, 24 Dec 2024 12:11:28 +0100 Subject: [PATCH] ENH: compute hash without `PYTHONHASHSED` (#146) * ENH: reuse `get_readable_hash()` from AmpForm v0.15.5 * MAINT: remove `PYTHONHASHSEED` explanations --- .cspell.json | 1 - .envrc | 2 - .github/workflows/ci.yml | 3 -- .pre-commit-config.yaml | 1 - pyproject.toml | 2 - src/ampform_dpd/_cache.py | 91 ---------------------------------- src/ampform_dpd/io/__init__.py | 9 ++-- tests/test_io.py | 50 +------------------ 8 files changed, 5 insertions(+), 154 deletions(-) delete mode 100644 src/ampform_dpd/_cache.py diff --git a/.cspell.json b/.cspell.json index 37053ac..f564c2f 100644 --- a/.cspell.json +++ b/.cspell.json @@ -167,7 +167,6 @@ "pyplot", "pyright", "pytest", - "PYTHONHASHSEED", "QRules", "recoupling", "recouplings", diff --git a/.envrc b/.envrc index 4f10d2f..41fd2ca 100644 --- a/.envrc +++ b/.envrc @@ -1,4 +1,2 @@ uv sync --all-extras --quiet source .venv/bin/activate - -export PYTHONHASHSEED=0 diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index edb299e..ed094dd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -5,9 +5,6 @@ concurrency: cancel-in-progress: |- ${{ github.ref != format('refs/heads/{0}', github.event.repository.default_branch) }} -env: - PYTHONHASHSEED: "0" - on: push: branches: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0f12b2a..1c1c69b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,6 @@ repos: - id: check-dev-files args: - --doc-apt-packages=graphviz - - --environment-variables=PYTHONHASHSEED=0 - --github-pages - --repo-name=ampform-dpd - --repo-title=AmpForm-DPD diff --git a/pyproject.toml b/pyproject.toml index 05df3c1..4c8d08f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -374,8 +374,6 @@ commands = pytest {posargs} description = Run all unit tests passenv = * -setenv = - PYTHONHASHSEED = 0 [testenv:cov] allowlist_externals = diff --git a/src/ampform_dpd/_cache.py b/src/ampform_dpd/_cache.py deleted file mode 100644 index d19552e..0000000 --- a/src/ampform_dpd/_cache.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Helper functions for :func:`.perform_cached_doit`. - -Implementation taken from -https://github.com/ComPWA/ampform/blob/40a898f/src/ampform/sympy/_cache.py -""" - -from __future__ import annotations - -import functools -import hashlib -import logging -import os -import pickle # noqa: S403 -import sys -from textwrap import dedent - -import sympy as sp - -_LOGGER = logging.getLogger(__name__) - - -def get_system_cache_directory() -> str: - r"""Return the system cache directory for the current platform. - - >>> import sys - >>> if sys.platform.startswith("darwin"): - ... assert get_system_cache_directory().endswith("/Library/Caches") - >>> if sys.platform.startswith("linux"): - ... assert get_system_cache_directory().endswith("/.cache") - >>> if sys.platform.startswith("win"): - ... assert get_system_cache_directory().endswith(R"\AppData\Local") - """ - if sys.platform.startswith("linux"): - cache_directory = os.getenv("XDG_CACHE_HOME") - if cache_directory is not None: - return cache_directory - if sys.platform.startswith("darwin"): # macos - return os.path.expanduser("~/Library/Caches") - if sys.platform.startswith("win"): - cache_directory = os.getenv("LocalAppData") # noqa: SIM112 - if cache_directory is not None: - return cache_directory - return os.path.expanduser("~/AppData/Local") - return os.path.expanduser("~/.cache") - - -def get_readable_hash(obj, ignore_hash_seed: bool = False) -> str: - """Get a human-readable hash of any hashable Python object. - - The algorithm is fastest if `PYTHONHASHSEED - `_ is set. - Otherwise, it falls back to computing the hash with :func:`hashlib.sha256()`. - - Args: - obj: Any hashable object, mutable or immutable, to be hashed. - ignore_hash_seed: Ignore the :code:`PYTHONHASHSEED` environment variable. If - :code:`True`, the hash seed is ignored and the hash is computed with - :func:`hashlib.sha256`. - """ - python_hash_seed = _get_python_hash_seed() - if ignore_hash_seed or python_hash_seed is None: - b = _to_bytes(obj) - return hashlib.sha256(b).hexdigest() - return f"pythonhashseed-{python_hash_seed}{hash(obj):+}" - - -def _to_bytes(obj) -> bytes: - if isinstance(obj, sp.Expr): - # Using the str printer is slower and not necessarily unique, - # but pickle.dumps() does not always result in the same bytes stream. - _warn_about_unsafe_hash() - return str(obj).encode() - return pickle.dumps(obj) - - -def _get_python_hash_seed() -> int | None: - python_hash_seed = os.environ.get("PYTHONHASHSEED") - if python_hash_seed is not None and python_hash_seed.isdigit(): - return int(python_hash_seed) - return None - - -@functools.cache # warn once -def _warn_about_unsafe_hash() -> None: - message = """ - PYTHONHASHSEED has not been set. For faster and safer hashing of SymPy expressions, - set the PYTHONHASHSEED environment variable to a fixed value and rerun the program. - See https://docs.python.org/3/using/cmdline.html#envvar-PYTHONHASHSEED - """ - message = dedent(message).replace("\n", " ").strip() - _LOGGER.warning(message) diff --git a/src/ampform_dpd/io/__init__.py b/src/ampform_dpd/io/__init__.py index d2a176d..2c6ab13 100644 --- a/src/ampform_dpd/io/__init__.py +++ b/src/ampform_dpd/io/__init__.py @@ -28,10 +28,13 @@ import cloudpickle import sympy as sp from ampform.io import aslatex +from ampform.sympy._cache import ( + get_readable_hash, # noqa: PLC2701 + get_system_cache_directory, # noqa: PLC2701 +) from tensorwaves.function.sympy import create_function, create_parametrized_function from ampform_dpd import DefinedExpression -from ampform_dpd._cache import get_readable_hash, get_system_cache_directory from ampform_dpd.decay import ( IsobarNode, Particle, @@ -269,10 +272,6 @@ def perform_cached_lambdify( # type:ignore[misc] # pyright:ignore[reportIncons directory will be put under the home directory, or to the path specified by the environment variable :code:`SYMPY_CACHE_DIR`. - .. tip:: For a faster cache, set `PYTHONHASHSEED - `_ to a - fixed value. - .. seealso:: :func:`ampform.sympy.perform_cached_doit` """ if cache_directory is None: diff --git a/tests/test_io.py b/tests/test_io.py index bc05b0b..4fa0bdd 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,23 +1,8 @@ # pyright: reportPrivateUsage=false from __future__ import annotations -import logging -import os -import sys -from os.path import abspath, dirname -from typing import TYPE_CHECKING - -import pytest -import sympy as sp - -from ampform_dpd._cache import _warn_about_unsafe_hash from ampform_dpd.decay import IsobarNode, Particle -from ampform_dpd.io import aslatex, get_readable_hash - -if TYPE_CHECKING: - from _pytest.logging import LogCaptureFixture - -THIS_DIR = dirname(abspath(__file__)) +from ampform_dpd.io import aslatex # https://compwa-org--129.org.readthedocs.build/report/018.html#resonances-and-ls-scheme dummy_args = {"mass": 0, "width": 0} @@ -50,36 +35,3 @@ def test_aslatex_isobar_node(): node = IsobarNode(Λ1520, p, K, interaction=(2, 1)) latex = aslatex(node) assert latex == R"\left(\Lambda(1520) \xrightarrow[S=1]{L=2} p K^-\right)" - - -@pytest.mark.parametrize( - ("assumptions", "expected_hash"), - [ - (dict(), (+7459658071388516764, +8778804591879682108)), - (dict(real=True), (+3665410414623666716, -7967572625470457155)), - (dict(rational=True), (-7926839224244779605, -8321323707982755013)), - ], -) -def test_get_readable_hash( - assumptions, expected_hash: tuple[int, int], caplog: LogCaptureFixture -): - caplog.set_level(logging.WARNING) - x, y = sp.symbols("x y", **assumptions) - expr = x**2 + y - h = get_readable_hash(expr) - python_hash_seed = os.environ.get("PYTHONHASHSEED") - if python_hash_seed is None or not python_hash_seed.isdigit(): - assert h[:7] == "bbc9833" - if _warn_about_unsafe_hash.cache_info().hits == 0: - assert "PYTHONHASHSEED has not been set." in caplog.text - caplog.clear() - elif python_hash_seed == "0": - if sys.version_info >= (3, 11): - expected_hash = expected_hash[1] # type:ignore[assignment] - else: - expected_hash = expected_hash[0] # type:ignore[assignment] - expected = f"pythonhashseed-0{expected_hash:+d}" - assert h == expected - else: - pytest.skip("PYTHONHASHSEED has been set, but is not 0") - assert not caplog.text