From d60dd6ed351d165de489724ac72342349553e84e Mon Sep 17 00:00:00 2001 From: George Marshall Date: Mon, 25 Nov 2024 14:24:08 +0000 Subject: [PATCH 1/6] update validity files to new format --- src/legendmeta/catalog.py | 32 +++++++++++++++++++++++++------- src/legendmeta/textdb.py | 12 ++++++------ tests/test_jsondb.py | 21 ++++++++++++--------- tests/testdb/dir1/file3.json | 3 --- tests/testdb/dir1/file3.yml | 1 + tests/testdb/dir1/file5.json | 3 --- tests/testdb/dir1/file5.yml | 1 + tests/testdb/dir1/file6.yml | 1 + tests/testdb/dir1/validity.jsonl | 2 -- tests/testdb/dir1/validity.yml | 22 ++++++++++++++++++++++ tests/testdb/file2.yaml | 2 +- 11 files changed, 69 insertions(+), 31 deletions(-) delete mode 100644 tests/testdb/dir1/file3.json create mode 100644 tests/testdb/dir1/file3.yml delete mode 100644 tests/testdb/dir1/file5.json create mode 100644 tests/testdb/dir1/file5.yml create mode 100644 tests/testdb/dir1/file6.yml delete mode 100644 tests/testdb/dir1/validity.jsonl create mode 100644 tests/testdb/dir1/validity.yml diff --git a/src/legendmeta/catalog.py b/src/legendmeta/catalog.py index 6299310..4846f80 100644 --- a/src/legendmeta/catalog.py +++ b/src/legendmeta/catalog.py @@ -17,13 +17,14 @@ import bisect import collections import copy -import json import types from collections import namedtuple from datetime import datetime from pathlib import Path from string import Template +import yaml + from . import utils @@ -57,13 +58,14 @@ def get(value): @staticmethod def read_from(file_name): - with Path(file_name).open() as file: - for json_str in file: - yield json.loads(json_str) + with Path(file_name).open() as r: + file = yaml.safe_load(r) + file = sorted(file, key=lambda item: unix_time(item["valid_from"])) + yield from file class Catalog(namedtuple("Catalog", ["entries"])): - """Implementation of the `JSONL metadata validity specification `_.""" + """Implementation of the `YAML metadata validity specification `_.""" __slots__ = () @@ -84,14 +86,30 @@ def get(value): @staticmethod def read_from(file_name): entries = {} - for props in PropsStream.get(file_name): timestamp = props["valid_from"] system = "all" if props.get("category") is None else props["category"] file_key = props["apply"] if system not in entries: entries[system] = [] - entries[system].append(Catalog.Entry(unix_time(timestamp), file_key)) + mode = "append" if props.get("mode") is None else props["mode"] + mode = "reset" if len(entries[system]) == 0 else mode + if mode == "reset": + entries[system].append(Catalog.Entry(unix_time(timestamp), file_key)) + elif mode == "append": + entries[system].append( + Catalog.Entry( + unix_time(timestamp), entries[system][-1].file.copy() + file_key + ) + ) + elif mode == "remove": + previous = entries[system][-1].file.copy() + for file in file_key: + previous.remove(file) + entries[system].append(Catalog.Entry(unix_time(timestamp), previous)) + else: + msg = f"Unknown mode for {timestamp}" + raise ValueError(msg) for system in entries: entries[system] = sorted( diff --git a/src/legendmeta/textdb.py b/src/legendmeta/textdb.py index e68a6d6..40db67c 100644 --- a/src/legendmeta/textdb.py +++ b/src/legendmeta/textdb.py @@ -381,11 +381,11 @@ def on( ) -> AttrsDict | list: """Query database in `time[, file pattern, system]`. - A (only one) valid ``validity.jsonl`` file must exist in the directory + A (only one) valid ``validity.yml`` file must exist in the directory to specify a validity mapping. This functionality relies on the :class:`.catalog.Catalog` class. - The JSONL specification is documented at `this link + The YAML specification is documented at `this link `_. The special ``$_`` string is expanded to the directory containing the @@ -401,12 +401,12 @@ def on( system: 'all', 'phy', 'cal', 'lar', ... query only a data taking "system". """ - jsonl = self.__path__ / "validity.jsonl" - if not jsonl.is_file(): - msg = f"no validity.jsonl file found in {self.__path__!s}" + yml = self.__path__ / "validity.yml" + if not yml.is_file(): + msg = f"no validity.yml file found in {self.__path__!s}" raise RuntimeError(msg) - file_list = Catalog.get_files(str(jsonl), timestamp, system) + file_list = Catalog.get_files(str(yml), timestamp, system) # select only files matching pattern if specified if pattern is not None: c = re.compile(pattern) diff --git a/tests/test_jsondb.py b/tests/test_jsondb.py index 7b5ef9e..cf5f9e2 100644 --- a/tests/test_jsondb.py +++ b/tests/test_jsondb.py @@ -162,21 +162,24 @@ def test_scan(): def test_time_validity(): jdb = TextDB(testdb) - assert isinstance(jdb["dir1"].on("20220628T221955Z"), AttrsDict) + assert isinstance(jdb["dir1"].on("20220101T000001Z"), AttrsDict) - assert jdb["dir1"].on("20220628T221955Z")["data"] == 1 - assert jdb.dir1.on("20220629T221955Z").data == 2 + assert jdb["dir1"].on("20220101T000000Z")["data"] == 1 + assert jdb.dir1.on("20220102T000000Z").data == 2 # time point in between - assert jdb["dir1"].on("20220628T233500Z")["data"] == 1 + assert jdb["dir1"].on("20220102T120000Z")["data"] == 1 # time point after - assert jdb["dir1"].on("20220630T233500Z")["data"] == 2 + assert jdb["dir1"].on("20220102T120000Z")["data"] == 2 # time point before with pytest.raises(RuntimeError): - jdb["dir1"].on("20220627T233500Z")["data"] - - # directory with no .jsonl + jdb["dir1"].on("20210101T000000Z")["data"] + # test remove functionality + assert jdb["dir1"].on("20220103T120000Z")["data"] == 1 + # test reset functionality + assert jdb["dir1"].on("20220104T120000Z")["data"] == 3 + # directory with no .yml with pytest.raises(RuntimeError): - jdb["dir1"]["dir2"].on("20220627T233500Z") + jdb["dir1"]["dir2"].on("20220101T000001Z") # invalid timestamp with pytest.raises(ValueError): diff --git a/tests/testdb/dir1/file3.json b/tests/testdb/dir1/file3.json deleted file mode 100644 index 858a13c..0000000 --- a/tests/testdb/dir1/file3.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "data": 1 -} diff --git a/tests/testdb/dir1/file3.yml b/tests/testdb/dir1/file3.yml new file mode 100644 index 0000000..21bfb3c --- /dev/null +++ b/tests/testdb/dir1/file3.yml @@ -0,0 +1 @@ +- data: 1 diff --git a/tests/testdb/dir1/file5.json b/tests/testdb/dir1/file5.json deleted file mode 100644 index 087e30f..0000000 --- a/tests/testdb/dir1/file5.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "data": 2 -} diff --git a/tests/testdb/dir1/file5.yml b/tests/testdb/dir1/file5.yml new file mode 100644 index 0000000..e84811c --- /dev/null +++ b/tests/testdb/dir1/file5.yml @@ -0,0 +1 @@ +- data: 2 diff --git a/tests/testdb/dir1/file6.yml b/tests/testdb/dir1/file6.yml new file mode 100644 index 0000000..0a63124 --- /dev/null +++ b/tests/testdb/dir1/file6.yml @@ -0,0 +1 @@ +- data: 3 diff --git a/tests/testdb/dir1/validity.jsonl b/tests/testdb/dir1/validity.jsonl deleted file mode 100644 index 30c101f..0000000 --- a/tests/testdb/dir1/validity.jsonl +++ /dev/null @@ -1,2 +0,0 @@ -{"valid_from":"20220628T221955Z","select":"all","apply":["file3.json"]} -{"valid_from":"20220629T221955Z","select":"all","apply":["file5.json"]} diff --git a/tests/testdb/dir1/validity.yml b/tests/testdb/dir1/validity.yml new file mode 100644 index 0000000..ced9053 --- /dev/null +++ b/tests/testdb/dir1/validity.yml @@ -0,0 +1,22 @@ +- valid_from: 20220101T000000Z + category: all + apply: + - file3.json + +- valid_from: 20230102T000000Z + category: all + mode: append + apply: + - file5.json + +- valid_from: 20230103T000000Z + category: all + mode: remove + apply: + - file5.json + +- valid_from: 20230104T000000Z + category: all + mode: reset + apply: + - file5.json diff --git a/tests/testdb/file2.yaml b/tests/testdb/file2.yaml index 70e68b8..640c8ba 100644 --- a/tests/testdb/file2.yaml +++ b/tests/testdb/file2.yaml @@ -1,5 +1,5 @@ data: 2 -filepath: $_/dir1/file3.json +filepath: $_/dir1/file3.yml key: label: 3 label: 3 From bc850f4a3ad215dbbdd9c1299bab0888319c5031 Mon Sep 17 00:00:00 2001 From: George Marshall Date: Mon, 25 Nov 2024 14:31:43 +0000 Subject: [PATCH 2/6] yml -> yaml, update tests --- src/legendmeta/textdb.py | 6 +++--- tests/test_jsondb.py | 20 +++++++++---------- tests/testdb/dir1/{file3.yml => file3.yaml} | 0 tests/testdb/dir1/{file5.yml => file5.yaml} | 0 tests/testdb/dir1/file6.yaml | 1 + tests/testdb/dir1/validity.yaml | 22 +++++++++++++++++++++ 6 files changed, 36 insertions(+), 13 deletions(-) rename tests/testdb/dir1/{file3.yml => file3.yaml} (100%) rename tests/testdb/dir1/{file5.yml => file5.yaml} (100%) create mode 100644 tests/testdb/dir1/file6.yaml create mode 100644 tests/testdb/dir1/validity.yaml diff --git a/src/legendmeta/textdb.py b/src/legendmeta/textdb.py index 40db67c..d14daf2 100644 --- a/src/legendmeta/textdb.py +++ b/src/legendmeta/textdb.py @@ -381,7 +381,7 @@ def on( ) -> AttrsDict | list: """Query database in `time[, file pattern, system]`. - A (only one) valid ``validity.yml`` file must exist in the directory + A (only one) valid ``validity.yaml`` file must exist in the directory to specify a validity mapping. This functionality relies on the :class:`.catalog.Catalog` class. @@ -401,9 +401,9 @@ def on( system: 'all', 'phy', 'cal', 'lar', ... query only a data taking "system". """ - yml = self.__path__ / "validity.yml" + yml = self.__path__ / "validity.yaml" if not yml.is_file(): - msg = f"no validity.yml file found in {self.__path__!s}" + msg = f"no validity.yaml file found in {self.__path__!s}" raise RuntimeError(msg) file_list = Catalog.get_files(str(yml), timestamp, system) diff --git a/tests/test_jsondb.py b/tests/test_jsondb.py index cf5f9e2..03dff5d 100644 --- a/tests/test_jsondb.py +++ b/tests/test_jsondb.py @@ -19,10 +19,10 @@ def test_props(): # test subst_vars Props.subst_vars(test_dict, var_values={"_": str(Path(__file__).parent / "testdb")}) assert test_dict["filepath"] == str( - Path(__file__).parent / "testdb/dir1/file3.json" + Path(__file__).parent / "testdb/dir1/file3.yaml" ) - test_dict2 = Props.read_from(str(Path(__file__).parent / "testdb/file3.json")) + test_dict2 = Props.read_from(str(Path(__file__).parent / "testdb/file3.yaml")) # test add_to Props.add_to(test_dict, test_dict2) @@ -36,14 +36,14 @@ def test_props(): test_dict = Props.read_from( [ str(Path(__file__).parent / "testdb/file2.yaml"), - str(Path(__file__).parent / "testdb/file3.json"), + str(Path(__file__).parent / "testdb/file3.yaml"), ], subst_pathvar=True, trim_null=True, ) assert test_dict["data"] == 3 assert test_dict["filepath"] == str( - Path(__file__).parent / "testdb/dir1/file3.json" + Path(__file__).parent / "testdb/dir1/file3.yaml" ) with pytest.raises(KeyError): test_dict["null_key"] @@ -55,12 +55,12 @@ def test_access(): assert isinstance(jdb["file2.yaml"], AttrsDict) assert isinstance(jdb["file1"], AttrsDict) assert isinstance(jdb["dir1"], TextDB) - assert isinstance(jdb["dir1"]["file3.json"], AttrsDict) + assert isinstance(jdb["dir1"]["file3.yaml"], AttrsDict) assert isinstance(jdb["dir1"]["file3"], AttrsDict) - assert isinstance(jdb["dir1/file3.json"], AttrsDict) + assert isinstance(jdb["dir1/file3.yaml"], AttrsDict) assert isinstance(jdb["dir1"]["dir2"], TextDB) - assert isinstance(jdb["dir1"]["dir2"]["file4.json"], AttrsDict) - assert isinstance(jdb["dir1/dir2/file4.json"], AttrsDict) + assert isinstance(jdb["dir1"]["dir2"]["file4.yaml"], AttrsDict) + assert isinstance(jdb["dir1/dir2/file4.yaml"], AttrsDict) assert jdb["file1.json"]["data"] == 1 assert isinstance(jdb["file1"]["group"], AttrsDict) @@ -82,7 +82,7 @@ def test_access(): assert jdb.arrays[1].array[0] == 1 assert jdb.arrays[1].array[1].data == 2 - assert jdb.file2.filepath == str(Path(__file__).parent / "testdb/dir1/file3.json") + assert jdb.file2.filepath == str(Path(__file__).parent / "testdb/dir1/file3.yaml") with pytest.raises(ValueError): TextDB("non-existent-db") @@ -98,7 +98,7 @@ def test_access(): def test_keys(): jdb = TextDB(testdb, lazy=False) assert sorted(jdb.keys()) == ["arrays", "dir1", "dir2", "file1", "file2", "file3"] - assert sorted(jdb.dir1.keys()) == ["dir2", "file3", "file5"] + assert sorted(jdb.dir1.keys()) == ["dir2", "file3", "file5", "file6"] assert "arrays" in jdb diff --git a/tests/testdb/dir1/file3.yml b/tests/testdb/dir1/file3.yaml similarity index 100% rename from tests/testdb/dir1/file3.yml rename to tests/testdb/dir1/file3.yaml diff --git a/tests/testdb/dir1/file5.yml b/tests/testdb/dir1/file5.yaml similarity index 100% rename from tests/testdb/dir1/file5.yml rename to tests/testdb/dir1/file5.yaml diff --git a/tests/testdb/dir1/file6.yaml b/tests/testdb/dir1/file6.yaml new file mode 100644 index 0000000..0a63124 --- /dev/null +++ b/tests/testdb/dir1/file6.yaml @@ -0,0 +1 @@ +- data: 3 diff --git a/tests/testdb/dir1/validity.yaml b/tests/testdb/dir1/validity.yaml new file mode 100644 index 0000000..ced9053 --- /dev/null +++ b/tests/testdb/dir1/validity.yaml @@ -0,0 +1,22 @@ +- valid_from: 20220101T000000Z + category: all + apply: + - file3.json + +- valid_from: 20230102T000000Z + category: all + mode: append + apply: + - file5.json + +- valid_from: 20230103T000000Z + category: all + mode: remove + apply: + - file5.json + +- valid_from: 20230104T000000Z + category: all + mode: reset + apply: + - file5.json From e7ad60e556f8850c994159775f9d6d99ddb244ff Mon Sep 17 00:00:00 2001 From: George Marshall Date: Mon, 25 Nov 2024 14:42:05 +0000 Subject: [PATCH 3/6] test fixing --- src/legendmeta/textdb.py | 7 +++++-- tests/test_jsondb.py | 4 ++-- tests/testdb/dir1/file6.yml | 1 - tests/testdb/dir1/validity.yml | 22 ---------------------- 4 files changed, 7 insertions(+), 27 deletions(-) delete mode 100644 tests/testdb/dir1/file6.yml delete mode 100644 tests/testdb/dir1/validity.yml diff --git a/src/legendmeta/textdb.py b/src/legendmeta/textdb.py index d14daf2..e6e44c9 100644 --- a/src/legendmeta/textdb.py +++ b/src/legendmeta/textdb.py @@ -401,9 +401,12 @@ def on( system: 'all', 'phy', 'cal', 'lar', ... query only a data taking "system". """ - yml = self.__path__ / "validity.yaml" + for ext in utils.___file_extensions__["yaml"]: + yml = self.__path__ / f"validity{ext}" + if yml.is_file(): + break if not yml.is_file(): - msg = f"no validity.yaml file found in {self.__path__!s}" + msg = f"no validity.yaml / validity.yml file found in {self.__path__!s}" raise RuntimeError(msg) file_list = Catalog.get_files(str(yml), timestamp, system) diff --git a/tests/test_jsondb.py b/tests/test_jsondb.py index 03dff5d..500a24d 100644 --- a/tests/test_jsondb.py +++ b/tests/test_jsondb.py @@ -22,7 +22,7 @@ def test_props(): Path(__file__).parent / "testdb/dir1/file3.yaml" ) - test_dict2 = Props.read_from(str(Path(__file__).parent / "testdb/file3.yaml")) + test_dict2 = Props.read_from(str(Path(__file__).parent / "testdb/file3.json")) # test add_to Props.add_to(test_dict, test_dict2) @@ -36,7 +36,7 @@ def test_props(): test_dict = Props.read_from( [ str(Path(__file__).parent / "testdb/file2.yaml"), - str(Path(__file__).parent / "testdb/file3.yaml"), + str(Path(__file__).parent / "testdb/file3.json"), ], subst_pathvar=True, trim_null=True, diff --git a/tests/testdb/dir1/file6.yml b/tests/testdb/dir1/file6.yml deleted file mode 100644 index 0a63124..0000000 --- a/tests/testdb/dir1/file6.yml +++ /dev/null @@ -1 +0,0 @@ -- data: 3 diff --git a/tests/testdb/dir1/validity.yml b/tests/testdb/dir1/validity.yml deleted file mode 100644 index ced9053..0000000 --- a/tests/testdb/dir1/validity.yml +++ /dev/null @@ -1,22 +0,0 @@ -- valid_from: 20220101T000000Z - category: all - apply: - - file3.json - -- valid_from: 20230102T000000Z - category: all - mode: append - apply: - - file5.json - -- valid_from: 20230103T000000Z - category: all - mode: remove - apply: - - file5.json - -- valid_from: 20230104T000000Z - category: all - mode: reset - apply: - - file5.json From b238f89059b97812d8a2a03aed4035ff585f923a Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 25 Nov 2024 17:30:30 +0100 Subject: [PATCH 4/6] more test fixes, add replace functionality also --- src/legendmeta/catalog.py | 27 ++++++++++++++++++--------- src/legendmeta/textdb.py | 2 +- tests/test_jsondb.py | 27 +++++++++++++++------------ tests/testdb/dir1/file3.yaml | 2 +- tests/testdb/dir1/file5.yaml | 2 +- tests/testdb/dir1/file6.yaml | 2 +- tests/testdb/dir1/validity.yaml | 17 ++++++++++++----- tests/testdb/file2.yaml | 2 +- 8 files changed, 50 insertions(+), 31 deletions(-) diff --git a/src/legendmeta/catalog.py b/src/legendmeta/catalog.py index 4846f80..06ba3d6 100644 --- a/src/legendmeta/catalog.py +++ b/src/legendmeta/catalog.py @@ -95,21 +95,30 @@ def read_from(file_name): mode = "append" if props.get("mode") is None else props["mode"] mode = "reset" if len(entries[system]) == 0 else mode if mode == "reset": - entries[system].append(Catalog.Entry(unix_time(timestamp), file_key)) + new = file_key elif mode == "append": - entries[system].append( - Catalog.Entry( - unix_time(timestamp), entries[system][-1].file.copy() + file_key - ) - ) + new = entries[system][-1].file.copy() + file_key elif mode == "remove": - previous = entries[system][-1].file.copy() + new = entries[system][-1].file.copy() for file in file_key: - previous.remove(file) - entries[system].append(Catalog.Entry(unix_time(timestamp), previous)) + new.remove(file) + elif mode == "replace": + new = entries[system][-1].file.copy() + if len(file_key) != 2: + msg = f"Invalid number of elements in replace mode: {len(file_key)}" + raise ValueError(msg) + new.remove(file_key[0]) + new += [file_key[1]] + else: msg = f"Unknown mode for {timestamp}" raise ValueError(msg) + + if timestamp in [entry.valid_from for entry in entries[system]]: + msg = f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" + raise ValueError(msg) + else: + entries[system].append(Catalog.Entry(unix_time(timestamp), new)) for system in entries: entries[system] = sorted( diff --git a/src/legendmeta/textdb.py b/src/legendmeta/textdb.py index e6e44c9..bc2b20b 100644 --- a/src/legendmeta/textdb.py +++ b/src/legendmeta/textdb.py @@ -401,7 +401,7 @@ def on( system: 'all', 'phy', 'cal', 'lar', ... query only a data taking "system". """ - for ext in utils.___file_extensions__["yaml"]: + for ext in utils.__file_extensions__["yaml"]: yml = self.__path__ / f"validity{ext}" if yml.is_file(): break diff --git a/tests/test_jsondb.py b/tests/test_jsondb.py index 500a24d..d3936fa 100644 --- a/tests/test_jsondb.py +++ b/tests/test_jsondb.py @@ -98,7 +98,7 @@ def test_access(): def test_keys(): jdb = TextDB(testdb, lazy=False) assert sorted(jdb.keys()) == ["arrays", "dir1", "dir2", "file1", "file2", "file3"] - assert sorted(jdb.dir1.keys()) == ["dir2", "file3", "file5", "file6"] + assert sorted(jdb.dir1.keys()) == ["dir2", "file3", "file5", "file6", "validity"] assert "arrays" in jdb @@ -162,31 +162,34 @@ def test_scan(): def test_time_validity(): jdb = TextDB(testdb) - assert isinstance(jdb["dir1"].on("20220101T000001Z"), AttrsDict) + assert isinstance(jdb["dir1"].on("20230101T000001Z"), AttrsDict) - assert jdb["dir1"].on("20220101T000000Z")["data"] == 1 - assert jdb.dir1.on("20220102T000000Z").data == 2 + assert jdb["dir1"].on("20230101T000000Z")["data"] == 1 + assert jdb.dir1.on("20230102T000000Z").data == 2 # time point in between - assert jdb["dir1"].on("20220102T120000Z")["data"] == 1 + assert jdb["dir1"].on("20230101T120000Z")["data"] == 1 # time point after - assert jdb["dir1"].on("20220102T120000Z")["data"] == 2 + assert jdb["dir1"].on("20230102T120000Z")["data"] == 2 # time point before with pytest.raises(RuntimeError): jdb["dir1"].on("20210101T000000Z")["data"] # test remove functionality - assert jdb["dir1"].on("20220103T120000Z")["data"] == 1 + assert jdb["dir1"].on("20230103T120000Z")["data"] == 1 # test reset functionality - assert jdb["dir1"].on("20220104T120000Z")["data"] == 3 + assert jdb["dir1"].on("20230104T120000Z")["data"] == 3 + # test replace functionality + assert jdb["dir1"].on("20230105T120000Z")["data"] == 1 # directory with no .yml with pytest.raises(RuntimeError): - jdb["dir1"]["dir2"].on("20220101T000001Z") + jdb["dir1"]["dir2"].on("20230101T000001Z") + # invalid timestamp with pytest.raises(ValueError): - jdb.dir1.on("20220627T2335002Z") + jdb.dir1.on("20230627T2335002Z") # test usage of datetime object - tstamp = datetime(2022, 6, 28, 23, 35, 00, tzinfo=timezone.utc) + tstamp = datetime(2023, 6, 28, 23, 35, 00, tzinfo=timezone.utc) assert jdb.dir1.on(tstamp).data == 1 assert jdb.dir1.on(tstamp, r"^file3.*", "all").data == 1 @@ -244,7 +247,7 @@ def test_merging(): jdb = TextDB(testdb, lazy=False) j = jdb.dir1 | jdb.dir2 assert isinstance(j, AttrsDict) - assert sorted(j.keys()) == ["dir2", "file3", "file5", "file7", "file8"] + assert sorted(j.keys()) == ["dir2", "file3", "file5", "file6", "file7", "file8", "validity"] assert hasattr(j, "dir2") assert hasattr(j, "file8") diff --git a/tests/testdb/dir1/file3.yaml b/tests/testdb/dir1/file3.yaml index 21bfb3c..937ac32 100644 --- a/tests/testdb/dir1/file3.yaml +++ b/tests/testdb/dir1/file3.yaml @@ -1 +1 @@ -- data: 1 +data: 1 diff --git a/tests/testdb/dir1/file5.yaml b/tests/testdb/dir1/file5.yaml index e84811c..4182ac4 100644 --- a/tests/testdb/dir1/file5.yaml +++ b/tests/testdb/dir1/file5.yaml @@ -1 +1 @@ -- data: 2 +data: 2 diff --git a/tests/testdb/dir1/file6.yaml b/tests/testdb/dir1/file6.yaml index 0a63124..ebf3e8d 100644 --- a/tests/testdb/dir1/file6.yaml +++ b/tests/testdb/dir1/file6.yaml @@ -1 +1 @@ -- data: 3 +data: 3 diff --git a/tests/testdb/dir1/validity.yaml b/tests/testdb/dir1/validity.yaml index ced9053..d1af97e 100644 --- a/tests/testdb/dir1/validity.yaml +++ b/tests/testdb/dir1/validity.yaml @@ -1,22 +1,29 @@ -- valid_from: 20220101T000000Z +- valid_from: 20230101T000000Z category: all apply: - - file3.json + - file3.yaml - valid_from: 20230102T000000Z category: all mode: append apply: - - file5.json + - file5.yaml - valid_from: 20230103T000000Z category: all mode: remove apply: - - file5.json + - file5.yaml - valid_from: 20230104T000000Z category: all mode: reset apply: - - file5.json + - file6.yaml + +- valid_from: 20230105T000000Z + category: all + mode: replace + apply: + - file6.yaml + - file3.yaml \ No newline at end of file diff --git a/tests/testdb/file2.yaml b/tests/testdb/file2.yaml index 640c8ba..e7c6d8a 100644 --- a/tests/testdb/file2.yaml +++ b/tests/testdb/file2.yaml @@ -1,5 +1,5 @@ data: 2 -filepath: $_/dir1/file3.yml +filepath: $_/dir1/file3.yaml key: label: 3 label: 3 From ec8aae0305a5337ed53b02ccc979df97d15279da Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 26 Nov 2024 15:01:55 +0100 Subject: [PATCH 5/6] add docs --- .pre-commit-hooks.yaml | 4 +-- docs/source/tutorial.rst | 44 +++++++++++++++++++++++++++++---- src/legendmeta/catalog.py | 15 ++++++++--- src/legendmeta/police.py | 12 +++++---- tests/test_jsondb.py | 11 +++++++-- tests/testdb/dir1/validity.yaml | 2 +- 6 files changed, 69 insertions(+), 19 deletions(-) diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 3c82fc9..964de5d 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -2,10 +2,10 @@ name: check LEGEND channel maps format entry: validate-legend-chmaps language: python - types: [json] + types: [yaml] - id: validate-legend-detdb name: check LEGEND detector database format entry: validate-legend-detdb language: python - types: [json] + types: [yaml] diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index f937009..f043349 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -80,9 +80,9 @@ Metadata validity ----------------- Mappings of metadata to time periods, data taking systems etc. are specified -through JSONL files (`specification +through YAML files (`specification `_). -If a ``.jsonl`` file is present in a directory, ``TextDB`` +If a ``validity.yaml`` file is present in a directory, ``TextDB`` exposes the :meth:`~.textdb.textdb.on` interface to perform a query. Let's assume the ``legend-metadata`` directory from the example above contains @@ -90,10 +90,44 @@ the following file: .. code-block:: :linenos: - :caption: ``validity.jsonl`` + :caption: ``validity.yaml`` + + - valid_from: 20230101T000000Z + category: all + apply: + - file3.yaml + + - valid_from: 20230102T000000Z + category: all + mode: append + apply: + - file2.yaml + + - valid_from: 20230103T000000Z + category: all + mode: remove + apply: + - file2.yaml + + - valid_from: 20230104T000000Z + category: all + mode: reset + apply: + - file2.yaml + + - valid_from: 20230105T000000Z + category: all + mode: replace + apply: + - file2.yaml + - file3.yaml + +Here the modes define how each block is applied to the database. +Reset means that the database is reset to the files in the apply block. +Append adds the apply files to the current state. +Remove removes the apply files from the current state. +Replace replaces the first apply file with the second apply file. - {"valid_from": "20220628T000000Z", "select": "all", "apply": ["file2.json"]} - {"valid_from": "20220629T000000Z", "select": "all", "apply": ["file3.yaml"]} From code, it's possible to obtain the metadata valid for a certain time point: diff --git a/src/legendmeta/catalog.py b/src/legendmeta/catalog.py index 06ba3d6..fa610fa 100644 --- a/src/legendmeta/catalog.py +++ b/src/legendmeta/catalog.py @@ -34,6 +34,7 @@ def to_datetime(value): def unix_time(value): + """Convert a LEGEND timestamp or datetime object to Unix time value""" if isinstance(value, str): return datetime.timestamp(datetime.strptime(value, "%Y%m%dT%H%M%SZ")) @@ -45,6 +46,8 @@ def unix_time(value): class PropsStream: + """Simple class to control loading of validity.yaml files""" + @staticmethod def get(value): if isinstance(value, str): @@ -85,6 +88,7 @@ def get(value): @staticmethod def read_from(file_name): + """Read from a valdiity YAML file and build a Catalog object""" entries = {} for props in PropsStream.get(file_name): timestamp = props["valid_from"] @@ -109,16 +113,15 @@ def read_from(file_name): raise ValueError(msg) new.remove(file_key[0]) new += [file_key[1]] - + else: msg = f"Unknown mode for {timestamp}" raise ValueError(msg) - + if timestamp in [entry.valid_from for entry in entries[system]]: msg = f"Duplicate timestamp: {timestamp}, use reset mode instead with a single entry" raise ValueError(msg) - else: - entries[system].append(Catalog.Entry(unix_time(timestamp), new)) + entries[system].append(Catalog.Entry(unix_time(timestamp), new)) for system in entries: entries[system] = sorted( @@ -127,6 +130,7 @@ def read_from(file_name): return Catalog(entries) def valid_for(self, timestamp, system="all", allow_none=False): + """Get the valid entries for a given timestamp and system""" if system in self.entries: valid_from = [entry.valid_from for entry in self.entries[system]] pos = bisect.bisect_right(valid_from, unix_time(timestamp)) @@ -153,11 +157,14 @@ def valid_for(self, timestamp, system="all", allow_none=False): @staticmethod def get_files(catalog_file, timestamp, category="all"): + """Helper function to get the files for a given timestamp and category""" catalog = Catalog.read_from(catalog_file) return Catalog.valid_for(catalog, timestamp, category) class Props: + """Class to handle overwriting of dictionaries in cascade order""" + @staticmethod def read_from(sources, subst_pathvar=False, trim_null=False): def read_impl(sources): diff --git a/src/legendmeta/police.py b/src/legendmeta/police.py index 57b10a0..9becbc6 100644 --- a/src/legendmeta/police.py +++ b/src/legendmeta/police.py @@ -16,12 +16,13 @@ from __future__ import annotations import argparse -import json import re import sys from importlib import resources from pathlib import Path +import yaml + from . import utils from .textdb import TextDB @@ -96,10 +97,11 @@ def validate_legend_channel_map() -> bool: db = TextDB(d) valid = True - with Path(f"{d}/validity.jsonl").open() as f: - for line in f.readlines(): - ts = json.loads(line)["valid_from"] - sy = json.loads(line)["select"] + with Path(f"{d}/validity.yaml").open() as f: + validity = yaml.safe_load(f) + for line in validity(): + ts = line["valid_from"] + sy = line["apply"] chmap = db.on(ts, system=sy) for k, v in chmap.items(): diff --git a/tests/test_jsondb.py b/tests/test_jsondb.py index d3936fa..484e007 100644 --- a/tests/test_jsondb.py +++ b/tests/test_jsondb.py @@ -183,7 +183,6 @@ def test_time_validity(): with pytest.raises(RuntimeError): jdb["dir1"]["dir2"].on("20230101T000001Z") - # invalid timestamp with pytest.raises(ValueError): jdb.dir1.on("20230627T2335002Z") @@ -247,7 +246,15 @@ def test_merging(): jdb = TextDB(testdb, lazy=False) j = jdb.dir1 | jdb.dir2 assert isinstance(j, AttrsDict) - assert sorted(j.keys()) == ["dir2", "file3", "file5", "file6", "file7", "file8", "validity"] + assert sorted(j.keys()) == [ + "dir2", + "file3", + "file5", + "file6", + "file7", + "file8", + "validity", + ] assert hasattr(j, "dir2") assert hasattr(j, "file8") diff --git a/tests/testdb/dir1/validity.yaml b/tests/testdb/dir1/validity.yaml index d1af97e..3604dc7 100644 --- a/tests/testdb/dir1/validity.yaml +++ b/tests/testdb/dir1/validity.yaml @@ -26,4 +26,4 @@ mode: replace apply: - file6.yaml - - file3.yaml \ No newline at end of file + - file3.yaml From b46092180db1eca245ea54e94a4b23a007b30985 Mon Sep 17 00:00:00 2001 From: Luigi Pertoldi Date: Tue, 26 Nov 2024 15:43:24 +0100 Subject: [PATCH 6/6] More docs fixes --- docs/source/tutorial.rst | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index f043349..08a5ed4 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -38,7 +38,7 @@ Let's consider the following database: │   └── file1.json ├── file2.json ├── file3.yaml - └── validity.jsonl + └── validity.yaml With: @@ -88,7 +88,7 @@ exposes the :meth:`~.textdb.textdb.on` interface to perform a query. Let's assume the ``legend-metadata`` directory from the example above contains the following file: -.. code-block:: +.. code-block:: yaml :linenos: :caption: ``validity.yaml`` @@ -122,13 +122,6 @@ the following file: - file2.yaml - file3.yaml -Here the modes define how each block is applied to the database. -Reset means that the database is reset to the files in the apply block. -Append adds the apply files to the current state. -Remove removes the apply files from the current state. -Replace replaces the first apply file with the second apply file. - - From code, it's possible to obtain the metadata valid for a certain time point: >>> from datetime import datetime, timezone