From 779246d2f040643fa985c56eb7b93fa6c3bacd28 Mon Sep 17 00:00:00 2001 From: Nicolas Tessore Date: Sat, 23 Nov 2024 11:48:24 +0000 Subject: [PATCH] gh-213: better stringification of keys in FITS (#214) --- heracles/io.py | 39 +++++++++++++++++++++++++-------------- tests/test_io.py | 35 ++++++++++++++++++++++------------- 2 files changed, 47 insertions(+), 27 deletions(-) diff --git a/heracles/io.py b/heracles/io.py index 8dd9639..2d92638 100644 --- a/heracles/io.py +++ b/heracles/io.py @@ -21,7 +21,7 @@ import logging import os import re -from collections.abc import MutableMapping +from collections.abc import MutableMapping, Sequence from functools import partial from pathlib import Path from types import MappingProxyType @@ -76,25 +76,36 @@ def _string_from_key(key: _DictKey) -> str: """ Return string representation for a given key. """ - if isinstance(key, tuple): - names = list(map(_string_from_key, key)) - c = ";" if any("," in name for name in names) else "," - return c.join(names) - return re.sub(r"\W+", "_", str(key)) + # recursive expansion for sequences + if isinstance(key, Sequence) and not isinstance(key, str): + return "-".join(map(_string_from_key, key)) + + # get string representation of key + s = str(key) + + # escape literal "\" + s = s.replace("\\", "\\\\") + + # escape literal "-" + s = s.replace("-", "\\-") + + # substitute non-FITS characters by tilde + s = re.sub(r"[^ -~]+", "~", s, flags=re.ASCII) + + return s def _key_from_string(s: str) -> _DictKey: """ Return key for a given string representation. """ - keys = s.split(";") - if len(keys) > 1: - return tuple(map(_key_from_string, keys)) - keys = keys[0].split(",") - if len(keys) > 1: - return tuple(map(_key_from_string, keys)) - key = keys[0] - return int(key) if key.isdigit() else key + parts = re.split(r"(? 1: + return tuple(map(_key_from_string, parts)) + key = parts[0] + key = key.replace("\\-", "-") + key = key.replace("\0", "\\") + return int(key) if key.removeprefix("-").isdigit() else key def _get_next_extname(fits, prefix): diff --git a/tests/test_io.py b/tests/test_io.py index 32b15be..97e6b65 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -157,14 +157,19 @@ def test_string_from_key(): assert _string_from_key(1) == "1" assert _string_from_key(("a",)) == "a" assert _string_from_key((1,)) == "1" - assert _string_from_key(("a", 1)) == "a,1" - assert _string_from_key(("a", "b", 1, 2)) == "a,b,1,2" - assert _string_from_key((("a", 1), "b")) == "a,1;b" - assert _string_from_key((("a", 1), ("b", 2))) == "a,1;b,2" + assert _string_from_key(("a", 1)) == "a-1" + assert _string_from_key(("a", "b", 1, 2)) == "a-b-1-2" + + # flatten nested sequences + assert _string_from_key([("a", 1), "b"]) == "a-1-b" + assert _string_from_key([("a", 1), ("b", (2,))]) == "a-1-b-2" # test special chars - assert _string_from_key("a,b,c") == "a_b_c" - assert _string_from_key("!@#$%^&*()[]{};,.") == "_" + assert _string_from_key("a-b-c") == r"a\-b\-c" + assert _string_from_key(("a\\", 1)) == r"a\\-1" + assert _string_from_key(("a\\-", 1)) == r"a\\\--1" + assert _string_from_key(("a\\", -1)) == r"a\\-\-1" + assert _string_from_key("a€£") == "a~" def test_key_from_string(): @@ -172,10 +177,12 @@ def test_key_from_string(): assert _key_from_string("a") == "a" assert _key_from_string("1") == 1 - assert _key_from_string("a,1") == ("a", 1) - assert _key_from_string("a,b,1,2") == ("a", "b", 1, 2) - assert _key_from_string("a,1;b") == (("a", 1), "b") - assert _key_from_string("a,1;b,2") == (("a", 1), ("b", 2)) + assert _key_from_string("a-1") == ("a", 1) + assert _key_from_string("a-b-1-2") == ("a", "b", 1, 2) + assert _key_from_string(r"a\-b\-c") == "a-b-c" + assert _key_from_string(r"a\\-1") == ("a\\", 1) + assert _key_from_string(r"a\\\-1") == "a\\-1" + assert _key_from_string(r"a\\-\-1") == ("a\\", -1) def test_write_read_maps(rng, tmp_path): @@ -293,9 +300,11 @@ def test_write_read_cov(mock_cls, tmp_path): workdir = str(tmp_path) cov = {} - for k1, k2 in combinations_with_replacement(mock_cls, 2): - cl1, cl2 = mock_cls[k1], mock_cls[k2] - cov[k1, k2] = np.outer(cl1, cl2) + for (a1, b1, i1, j1), (a2, b2, i2, j2) in combinations_with_replacement( + mock_cls, 2 + ): + cl1, cl2 = mock_cls[a1, b1, i1, j1], mock_cls[a2, b2, i2, j2] + cov[a1, b1, a2, b2, i1, j1, i2, j2] = np.outer(cl1, cl2) filename = "cov.fits"