From 4a6dcf66acb36c6e47ee40c78043fbb2da5edf66 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Wed, 31 Jul 2024 09:57:54 +0200 Subject: [PATCH 01/28] WIP --- setup.cfg | 1 + src/aerovaldb/plugins.py | 15 ++-- src/aerovaldb/sqlitedb/__init__.py | 1 + src/aerovaldb/sqlitedb/sqlitedb.py | 113 +++++++++++++++++++++++++++++ src/aerovaldb/sqlitedb/utils.py | 10 +++ tests/sqlitedb/test_sqlitedb.py | 30 ++++++++ tests/sqlitedb/test_utils.py | 16 ++++ tests/test_plugins.py | 27 ++++++- 8 files changed, 205 insertions(+), 8 deletions(-) create mode 100644 src/aerovaldb/sqlitedb/__init__.py create mode 100644 src/aerovaldb/sqlitedb/sqlitedb.py create mode 100644 src/aerovaldb/sqlitedb/utils.py create mode 100644 tests/sqlitedb/test_sqlitedb.py create mode 100644 tests/sqlitedb/test_utils.py diff --git a/setup.cfg b/setup.cfg index 9f0dc7b..fa006d2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,6 +43,7 @@ where=src [options.entry_points] aerovaldb = json_files = aerovaldb.jsondb:AerovalJsonFileDB + sqlitedb = aerovaldb.sqlitedb:AerovalSqliteDB [tox:tox] labels = diff --git a/src/aerovaldb/plugins.py b/src/aerovaldb/plugins.py index c43f530..156e45a 100644 --- a/src/aerovaldb/plugins.py +++ b/src/aerovaldb/plugins.py @@ -1,4 +1,5 @@ import functools +import os import sys import warnings @@ -68,11 +69,15 @@ def open(resource, /, use_async: bool = False) -> AerovalDB: name = "json_files" path = resource else: - # Assume directory and json. - # TODO: In the future this should differentiate based on file path, eg. folder->json_files - # .sqlite-> SqliteDB, etc. - name = "json_files" - path = resource + fileextension = os.path.splitext(resource)[1] + if fileextension in [".db", ".sqlite"]: + # Sqlite file. + name = "sqlitedb" + path = resource + else: + # Assume directory and json. + name = "json_files" + path = resource aerodb = list_engines()[name] diff --git a/src/aerovaldb/sqlitedb/__init__.py b/src/aerovaldb/sqlitedb/__init__.py new file mode 100644 index 0000000..4ea507e --- /dev/null +++ b/src/aerovaldb/sqlitedb/__init__.py @@ -0,0 +1 @@ +from .sqlitedb import AerovalSqliteDB diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py new file mode 100644 index 0000000..68b25b6 --- /dev/null +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -0,0 +1,113 @@ +import sqlite3 +import aerovaldb +from ..aerovaldb import AerovalDB +from ..routes import * +from .utils import extract_substitutions +import os + + +class AerovalSqliteDB(AerovalDB): + """ + Allows reading and writing from sqlite3 database files. + """ + + TABLE_NAME_LOOKUP = { + ROUTE_GLOB_STATS: "glob_stats", + ROUTE_REG_STATS: "glob_stats", + ROUTE_HEATMAP: "glob_stats", + ROUTE_CONTOUR: "contour", + ROUTE_TIMESERIES: "timeseries", + ROUTE_TIMESERIES_WEEKLY: "timeseries_weekly", + ROUTE_EXPERIMENTS: "experiments", + ROUTE_CONFIG: "config", + ROUTE_MENU: "menu", + ROUTE_STATISTICS: "statistics", + ROUTE_RANGES: "ranges", + ROUTE_REGIONS: "regions", + ROUTE_MODELS_STYLE: "models_style", + ROUTE_MAP: "map", + ROUTE_SCATTER: "scatter", + ROUTE_PROFILES: "profiles", + ROUTE_HEATMAP_TIMESERIES: "heatmap_timeseries", + ROUTE_FORECAST: "forecast", + ROUTE_GRIDDED_MAP: "gridded_map", + ROUTE_REPORT: "report", + } + + def __init__(self, database: str, /, **kwargs): + self._dbfile = database + + if not os.path.exists(database): + self._con = sqlite3.connect(database) + self._initialize_db() + else: + self._con = sqlite3.connect(database) + if not self._get_metadata_by_key("created_by") == "aerovaldb": + ValueError(f"Database {database} is not a valid aerovaldb database.") + + def _get_metadata_by_key(self, key: str) -> str: + """ + Returns the value associated with a key from the metadata + table. + """ + cur = self._con.cursor() + + cur.execute( + """ + SELECT value FROM metadata + WHERE key = ? + """, + (key,), + ) + return cur.fetchone()[0] + + def _set_metadata_by_key(self, key: str, value: str): + """ """ + cur = self._con.cursor() + + cur.execute( + """ + INSERT OR REPLACE INTO metadata(key, value) + VALUES(?, ?) + """, + (key, value), + ) + + def _initialize_db(self): + """Given an existing sqlite connection or sqlite3 database + identifier string, initializes the database so it has the + necessary tables. + """ + cur = self._con.cursor() + + # Metadata table for information used internally by aerovaldb. + cur.execute( + """ + CREATE TABLE metadata(key, value, + UNIQUE(key)) + """ + ) + self._set_metadata_by_key("created_by", f"aerovaldb_{aerovaldb.__version__}") + self._set_metadata_by_key( + "last_modified_by", f"aerovaldb_{aerovaldb.__version__}" + ) + + # Data tables. Currently one table is used per type of asset + # stored and json blobs are stored in the json column. + for route, table_name in self.TABLE_NAME_LOOKUP.items(): + route_args = extract_substitutions(route) + + column_names = ",".join(route_args) + cur.execute( + f""" + CREATE TABLE IF NOT EXISTS {table_name}({column_names},json, + + UNIQUE({column_names})) + """ + ) + + async def _get(self, route, route_args, *args, **kwargs): + pass + + async def _put(self, obj, route, route_args, *args, **kwargs): + pass diff --git a/src/aerovaldb/sqlitedb/utils.py b/src/aerovaldb/sqlitedb/utils.py new file mode 100644 index 0000000..2ea4f36 --- /dev/null +++ b/src/aerovaldb/sqlitedb/utils.py @@ -0,0 +1,10 @@ +import re + + +def extract_substitutions(template: str): + """ + For a python template string, extracts the names between curly brackets: + + For example 'blah blah {test} blah {test2}' returns [test, test2] + """ + return re.findall(r"\{(.*?)\}", template) diff --git a/tests/sqlitedb/test_sqlitedb.py b/tests/sqlitedb/test_sqlitedb.py new file mode 100644 index 0000000..85445d1 --- /dev/null +++ b/tests/sqlitedb/test_sqlitedb.py @@ -0,0 +1,30 @@ +import aerovaldb +import pytest +import os +from aerovaldb.sqlitedb import AerovalSqliteDB + + +def test_db_initialization(tmp_path): + file = os.path.join(tmp_path, "test.sqlite") + with aerovaldb.open(file) as db: + db: AerovalSqliteDB + + assert db._dbfile == file + assert ( + db._get_metadata_by_key("created_by") + == f"aerovaldb_{aerovaldb.__version__}" + ) + assert ( + db._get_metadata_by_key("last_modified_by") + == f"aerovaldb_{aerovaldb.__version__}" + ) + + # Check that all tables are properly initialized. + cur = db._con.cursor() + for table in AerovalSqliteDB.TABLE_NAME_LOOKUP.values(): + cur.execute( + f""" + PRAGMA table_info({table}) + """ + ) + assert cur.fetchone() is not None diff --git a/tests/sqlitedb/test_utils.py b/tests/sqlitedb/test_utils.py new file mode 100644 index 0000000..e8e8b94 --- /dev/null +++ b/tests/sqlitedb/test_utils.py @@ -0,0 +1,16 @@ +import pytest +from aerovaldb.sqlitedb.utils import extract_substitutions + + +@pytest.mark.parametrize( + "template,result", + ( + ("{A}{B}{C}", {"A", "B", "C"}), + ("{A}hello world{B} test {C}", {"A", "B", "C"}), + ("", set()), + ), +) +def test_extract_substitutions(template: str, result: set[str]): + l = extract_substitutions(template) + + assert set(l) == result diff --git a/tests/test_plugins.py b/tests/test_plugins.py index 87bd625..ec9a8e2 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -3,21 +3,42 @@ import aerovaldb from aerovaldb.jsondb.jsonfiledb import AerovalJsonFileDB +from aerovaldb.sqlitedb.sqlitedb import AerovalSqliteDB def test_plugins(): engines = aerovaldb.list_engines() print(engines) - assert len(engines) >= 1 + assert len(engines) == 2 -def test_open_1(): +def test_open_json_1(): with aerovaldb.open("json_files:.") as db: assert isinstance(db, AerovalJsonFileDB) assert os.path.realpath(db._basedir) == os.path.realpath(".") -def test_open_2(): +def test_open_json_2(): with aerovaldb.open(".") as db: assert isinstance(db, AerovalJsonFileDB) assert os.path.realpath(db._basedir) == os.path.realpath(".") + + +@pytest.mark.parametrize( + "fext", (pytest.param(".sqlite", id="sqlite"), pytest.param(".db", id="db")) +) +def test_open_sqlite_1(tmp_path, fext): + path = os.path.join(tmp_path, f"test{fext}") + with aerovaldb.open(f"sqlitedb:{path}") as db: + assert isinstance(db, AerovalSqliteDB) + assert db._dbfile == path + + +@pytest.mark.parametrize( + "fext", (pytest.param(".sqlite", id="sqlite"), pytest.param(".db", id="db")) +) +def test_open_sqlite_2(tmp_path, fext): + path = os.path.join(tmp_path, f"test{fext}") + with aerovaldb.open(path) as db: + assert isinstance(db, AerovalSqliteDB) + assert db._dbfile == path From 9bf4395d94d7a0f05c0dbfe5996eb2d40566df81 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Wed, 31 Jul 2024 14:04:03 +0200 Subject: [PATCH 02/28] WIP --- src/aerovaldb/aerovaldb.py | 28 ++++++++++++ src/aerovaldb/jsondb/jsonfiledb.py | 28 ------------ src/aerovaldb/plugins.py | 6 ++- src/aerovaldb/sqlitedb/sqlitedb.py | 68 +++++++++++++++++++++++++++++- tests/jsondb/test_jsonfiledb.py | 26 ++++++++++-- tests/test_plugins.py | 6 +++ 6 files changed, 127 insertions(+), 35 deletions(-) diff --git a/src/aerovaldb/aerovaldb.py b/src/aerovaldb/aerovaldb.py index ec4f4fc..ad2a714 100644 --- a/src/aerovaldb/aerovaldb.py +++ b/src/aerovaldb/aerovaldb.py @@ -1170,3 +1170,31 @@ def lock(self): See also: https://aerovaldb.readthedocs.io/en/latest/locking.html """ raise NotImplementedError + + def _normalize_access_type( + self, access_type: AccessType | str | None, default: AccessType = AccessType.OBJ + ) -> AccessType: + """Normalizes the access_type to an instance of AccessType enum. + + :param access_type: AccessType instance or string convertible to AccessType + :param default: The type to return if access_type is None. Defaults to AccessType.OBJ + :raises ValueError: If str access_type can't be converted to AccessType. + :raises ValueError: If access_type is not str or AccessType + :return: The normalized AccessType. + """ + if isinstance(access_type, AccessType): + return access_type + + if isinstance(access_type, str): + try: + return AccessType[access_type] + except: + raise ValueError( + f"String '{access_type}' can not be converted to AccessType." + ) + if access_type is None: + return default + + raise ValueError( + f"Access_type, {access_type}, could not be normalized. This is probably due to input that is not a str or AccessType instance." + ) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index d97d52d..69e4f9d 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -252,34 +252,6 @@ async def _get_version(self, project: str, experiment: str) -> Version: return version - def _normalize_access_type( - self, access_type: AccessType | str | None, default: AccessType = AccessType.OBJ - ) -> AccessType: - """Normalizes the access_type to an instance of AccessType enum. - - :param access_type: AccessType instance or string convertible to AccessType - :param default: The type to return if access_type is None. Defaults to AccessType.OBJ - :raises ValueError: If str access_type can't be converted to AccessType. - :raises ValueError: If access_type is not str or AccessType - :return: The normalized AccessType. - """ - if isinstance(access_type, AccessType): - return access_type - - if isinstance(access_type, str): - try: - return AccessType[access_type] - except: - raise ValueError( - f"String '{access_type}' can not be converted to AccessType." - ) - if access_type is None: - return default - - raise ValueError( - f"Access_type, {access_type}, could not be normalized. This is probably due to input that is not a str or AccessType instance." - ) - @async_and_sync async def _get_template(self, route: str, substitutions: dict) -> str: """ diff --git a/src/aerovaldb/plugins.py b/src/aerovaldb/plugins.py index 156e45a..e1ca368 100644 --- a/src/aerovaldb/plugins.py +++ b/src/aerovaldb/plugins.py @@ -58,8 +58,12 @@ def open(resource, /, use_async: bool = False) -> AerovalDB: synchronously. :return: an implementation-object of AerovalDB openend to a location """ + if resource == ":memory:": + # Special case for sqlite in memory database. + name = "sqlitedb" + path = ":memory:" - if ":" in resource: + elif ":" in resource: parts = resource.split(":") if len(parts) > 1: name = parts[0] diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 68b25b6..058df99 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -1,8 +1,12 @@ import sqlite3 + +import simplejson import aerovaldb +from aerovaldb.exceptions import UnsupportedOperation from ..aerovaldb import AerovalDB from ..routes import * from .utils import extract_substitutions +from ..types import AccessType import os @@ -106,8 +110,68 @@ def _initialize_db(self): """ ) + def _get_column_list_and_substitution_list(self, kwargs: dict) -> tuple[str, str]: + keys = list(kwargs.keys()) + + columnlist = ", ".join(keys) + substitutionlist = ", ".join([f":{k}" for k in keys]) + + return (columnlist, substitutionlist) + async def _get(self, route, route_args, *args, **kwargs): - pass + assert len(args) == 0 + access_type = self._normalize_access_type(kwargs.pop("access_type", None)) + + if access_type in [AccessType.FILE_PATH]: + raise UnsupportedOperation( + f"sqlitedb does not support access_mode FILE_PATH." + ) + + cur = self._con.cursor() + + table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] + + columnlist, substitutionlist = self._get_column_list_and_substitution_list( + route_args + ) + + cur.execute( + f""" + SELECT json FROM {table_name} + WHERE + ({columnlist}) = ({substitutionlist}) + """, + route_args, + ) + fetched = cur.fetchone()[0] + if access_type == AccessType.JSON_STR: + return fetched + + if access_type == AccessType.OBJ: + return simplejson.loads(fetched) + + assert False # Should never happen. async def _put(self, obj, route, route_args, *args, **kwargs): - pass + assert len(args) == 0 + + cur = self._con.cursor() + + table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] + + columnlist, substitutionlist = self._get_column_list_and_substitution_list( + route_args + ) + + json = obj + if not isinstance(json, str): + json = simplejson.dumps(json) + + route_args.update(json=json) + cur.execute( + f""" + INSERT OR REPLACE INTO {table_name}({columnlist}, json) + VALUES({substitutionlist}, :json) + """, + route_args, + ) diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index 6d42413..cc972c6 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -301,14 +301,29 @@ async def test_file_does_not_exist(): ) +@pytest.fixture +def tmpdb(tmp_path, dbtype: str) -> aerovaldb.AerovalDB: + """Fixture encapsulating logic for each tested database connection to create + a temporary database and connect to it.""" + if dbtype == "json_files": + return aerovaldb.open(f"json_files:{str(tmp_path)}") + elif dbtype == "sqlitedb": + return aerovaldb.open(":memory:") + + assert False + + @pytest.mark.asyncio +@pytest.mark.parametrize( + "dbtype", (pytest.param("json_files"), pytest.param("sqlitedb")) +) @set_parametrization -async def test_setters(fun: str, args: list, kwargs: dict, tmp_path): +async def test_setters(dbtype: str, fun: str, args: list, kwargs: dict, tmpdb): """ This test tests that you read back the expected data, once you have written to a fresh db, assuming the same arguments. """ - with aerovaldb.open(f"json_files:{os.path.join(tmp_path, fun)}") as db: + with tmpdb as db: get = getattr(db, f"get_{fun}") put = getattr(db, f"put_{fun}") @@ -325,13 +340,16 @@ async def test_setters(fun: str, args: list, kwargs: dict, tmp_path): assert data["data"] == expected +@pytest.mark.parametrize( + "dbtype", (pytest.param("json_files"), pytest.param("sqlitedb")) +) @set_parametrization -def test_setters_sync(fun: str, args: list, kwargs: dict, tmp_path): +def test_setters_sync(fun: str, args: list, kwargs: dict, tmpdb): """ This test tests that you read back the expected data, once you have written to a fresh db, assuming the same arguments. """ - with aerovaldb.open(f"json_files:{os.path.join(tmp_path, fun)}") as db: + with tmpdb as db: get = getattr(db, f"get_{fun}") put = getattr(db, f"put_{fun}") diff --git a/tests/test_plugins.py b/tests/test_plugins.py index ec9a8e2..d27a8b9 100644 --- a/tests/test_plugins.py +++ b/tests/test_plugins.py @@ -42,3 +42,9 @@ def test_open_sqlite_2(tmp_path, fext): with aerovaldb.open(path) as db: assert isinstance(db, AerovalSqliteDB) assert db._dbfile == path + + +def test_open_sqlite_3(): + with aerovaldb.open(":memory:") as db: + assert isinstance(db, AerovalSqliteDB) + assert db._dbfile == ":memory:" From 37d4ec51f73a9668d6a4d43c928a2854a2ddf041 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Wed, 31 Jul 2024 14:58:55 +0200 Subject: [PATCH 03/28] All main tests work --- src/aerovaldb/sqlitedb/sqlitedb.py | 34 ++- tests/conftest.py | 2 + tests/jsondb/test_jsonfiledb.py | 347 ---------------------------- tests/test_aerovaldb.py | 355 +++++++++++++++++++++++++++++ 4 files changed, 387 insertions(+), 351 deletions(-) create mode 100644 tests/test_aerovaldb.py diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 058df99..ce285d4 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -1,6 +1,6 @@ import sqlite3 -import simplejson +import simplejson # type: ignore import aerovaldb from aerovaldb.exceptions import UnsupportedOperation from ..aerovaldb import AerovalDB @@ -15,6 +15,26 @@ class AerovalSqliteDB(AerovalDB): Allows reading and writing from sqlite3 database files. """ + # When creating a table it works to extract the substitution template + # names from the route, as this constitutes all arguments. For the ones + # which have extra arguments (currently only time) the following table + # defines the override. Currently this only applies to map which has + # an extra time argument. + ROUTE_COLUMN_NAME_OVERRIDE = { + ROUTE_MAP: ( + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ) + } + + # This lookup table stores the name of the table in which json + # for a specific route is stored. TABLE_NAME_LOOKUP = { ROUTE_GLOB_STATS: "glob_stats", ROUTE_REG_STATS: "glob_stats", @@ -82,6 +102,7 @@ def _initialize_db(self): identifier string, initializes the database so it has the necessary tables. """ + print("test") cur = self._con.cursor() # Metadata table for information used internally by aerovaldb. @@ -98,10 +119,13 @@ def _initialize_db(self): # Data tables. Currently one table is used per type of asset # stored and json blobs are stored in the json column. - for route, table_name in self.TABLE_NAME_LOOKUP.items(): - route_args = extract_substitutions(route) + for route, table_name in AerovalSqliteDB.TABLE_NAME_LOOKUP.items(): + args = AerovalSqliteDB.ROUTE_COLUMN_NAME_OVERRIDE.get( + route, extract_substitutions(route) + ) + + column_names = ",".join(args) - column_names = ",".join(route_args) cur.execute( f""" CREATE TABLE IF NOT EXISTS {table_name}({column_names},json, @@ -110,6 +134,8 @@ def _initialize_db(self): """ ) + self._con.commit() + def _get_column_list_and_substitution_list(self, kwargs: dict) -> tuple[str, str]: keys = list(kwargs.keys()) diff --git a/tests/conftest.py b/tests/conftest.py index 8db14f7..2d43226 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,3 +1,5 @@ import os os.environ["AVDB_USE_LOCKING"] = "1" + +pytest_plugins = ("pytest_asyncio",) diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index cc972c6..c5a9636 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -7,193 +7,6 @@ import simplejson # type: ignore import aerovaldb -pytest_plugins = ("pytest_asyncio",) - -get_parameters = [ - "fun,args,kwargs,expected", - ( - ( - "get_glob_stats", - ["project", "experiment", "frequency"], - None, - "./project/experiment/hm/", - ), - ( - "get_regional_stats", - ["project", "experiment", "frequency", "network", "variable", "layer"], - None, - "./project/experiment/hm/regional_stats", - ), - pytest.param( - "get_heatmap", - ["project", "experiment", "frequency", "region", "time"], - None, - "./project/experiment/hm/regional_stats", - marks=pytest.mark.xfail(reason="missing test file in json testdb"), - ), - ( - "get_contour", - ["project", "experiment", "modvar", "model"], - None, - "./project/experiment/contour/", - ), - ( - "get_timeseries", - ["project", "experiment", "location", "network", "obsvar", "layer"], - None, - "./project/experiment/ts/", - ), - ( - "get_timeseries_weekly", - ["project", "experiment", "location", "network", "obsvar", "layer"], - None, - "./project/experiment/ts/dirunal/", - ), - ("get_config", ["project", "experiment"], None, "./project/experiment/"), - ("get_menu", ["project", "experiment"], None, "./project/experiment/"), - ("get_statistics", ["project", "experiment"], None, "./project/experiment/"), - ("get_ranges", ["project", "experiment"], None, "./project/experiment/"), - ("get_regions", ["project", "experiment"], None, "./project/experiment/"), - ("get_models_style", ["project"], None, "./project/"), - ("get_experiments", ["project"], None, "./project/"), - ( - "get_models_style", - ["project"], - {"experiment": "experiment"}, - "./project/experiment/", - ), - ( - "get_map", - [ - "project", - "experiment-old", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - "./project/experiment/map/", - ), - ( - "get_map", - [ - "project", - "experiment", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - "./project/experiment/map/with_time", - ), - ( - "get_scatter", - [ - "project", - "experiment", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - "./project/experiment/scat/time", - ), - ( - "get_scatter", - [ - "project", - "experiment-old", - "network", - "obsvar", - "layer", - "model", - "modvar", - "test", - ], - None, - "./project/experiment/scat/", - ), - ( - "get_profiles", - ["project", "experiment", "region", "network", "obsvar"], - None, - "./project/experiment/profiles/", - ), - ( - "get_heatmap_timeseries", - ["project", "experiment", "region", "network", "obsvar", "layer"], - None, - "./project/experiment/hm/ts/region-network-obsvar-layer", - ), - ( - "get_heatmap_timeseries", - ["project", "experiment-old", "region", "network", "obsvar", "layer"], - None, - "project/experiment/hm/ts/stats_ts.json", - ), - # TODO: Missing test case for heatmap_ts with the middle version format. - ( - "get_forecast", - ["project", "experiment", "region", "network", "obsvar", "layer"], - None, - "./project/experiment/forecast/", - ), - ( - "get_gridded_map", - ["project", "experiment", "obsvar", "model"], - None, - "./project/experiment/contour/", - ), - ( - "get_report", - ["project", "experiment", "title"], - None, - "./reports/project/experiment/", - ), - ), -] - - -@pytest.mark.asyncio -@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",))) -@pytest.mark.parametrize(*get_parameters) -async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expected): - """ - This test tests that data is read as expected from a static, fixed database. - """ - with aerovaldb.open(resource, use_async=True) as db: - f = getattr(db, fun) - - if kwargs is not None: - data = await f(*args, **kwargs) - else: - data = await f(*args) - - assert data["path"] == expected - - -@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",))) -@pytest.mark.parametrize(*get_parameters) -def test_getter_sync(resource: str, fun: str, args: list, kwargs: dict, expected): - with aerovaldb.open(resource, use_async=False) as db: - f = getattr(db, fun) - - if kwargs is not None: - data = f(*args, **kwargs) - else: - data = f(*args) - - assert data["path"] == expected - @pytest.mark.asyncio async def test_file_does_not_exist(): @@ -206,166 +19,6 @@ async def test_file_does_not_exist(): ) -set_parametrization = pytest.mark.parametrize( - "fun,args,kwargs", - ( - ("glob_stats", ["project", "experiment", "frequency"], None), - ("contour", ["project", "experiment", "obsvar", "model"], None), - ( - "timeseries", - ["project", "experiment", "location", "network", "obsvar", "layer"], - None, - ), - ( - "timeseries_weekly", - ["project", "experiment", "location", "network", "obsvar", "layer"], - None, - ), - ("config", ["project", "experiment"], None), - ("menu", ["project", "experiment"], None), - ("statistics", ["project", "experiment"], None), - ("ranges", ["project", "experiment"], None), - ("regions", ["project", "experiment"], None), - ("models_style", ["project"], None), - ("models_style", ["project"], {"experiment": "experiment"}), - ( - "map", - [ - "project", - "experiment", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - ), - ( - "map", - [ - "project", - "experiment", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - ), - ( - "scatter", - [ - "project", - "experiment", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - ), - ( - "scatter", - [ - "project", - "experiment", - "network", - "obsvar", - "layer", - "model", - "modvar", - "time", - ], - None, - ), - ("profiles", ["project", "experiment", "station", "network", "obsvar"], None), - ( - "heatmap_timeseries", - ["project", "experiment", "region", "network", "obsvar", "layer"], - None, - ), - ( - "forecast", - ["project", "experiment", "station", "network", "obsvar", "layer"], - None, - ), - ("gridded_map", ["project", "experiment", "obsvar", "model"], None), - ("report", ["project", "experiment", "title"], None), - ), -) - - -@pytest.fixture -def tmpdb(tmp_path, dbtype: str) -> aerovaldb.AerovalDB: - """Fixture encapsulating logic for each tested database connection to create - a temporary database and connect to it.""" - if dbtype == "json_files": - return aerovaldb.open(f"json_files:{str(tmp_path)}") - elif dbtype == "sqlitedb": - return aerovaldb.open(":memory:") - - assert False - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "dbtype", (pytest.param("json_files"), pytest.param("sqlitedb")) -) -@set_parametrization -async def test_setters(dbtype: str, fun: str, args: list, kwargs: dict, tmpdb): - """ - This test tests that you read back the expected data, once you have written - to a fresh db, assuming the same arguments. - """ - with tmpdb as db: - get = getattr(db, f"get_{fun}") - put = getattr(db, f"put_{fun}") - - expected = fun + str(random.randint(0, 100000)) - if kwargs is not None: - await put({"data": expected}, *args, **kwargs) - - data = await get(*args, **kwargs) - else: - await put({"data": expected}, *args) - - data = await get(*args) - - assert data["data"] == expected - - -@pytest.mark.parametrize( - "dbtype", (pytest.param("json_files"), pytest.param("sqlitedb")) -) -@set_parametrization -def test_setters_sync(fun: str, args: list, kwargs: dict, tmpdb): - """ - This test tests that you read back the expected data, once you have written - to a fresh db, assuming the same arguments. - """ - with tmpdb as db: - get = getattr(db, f"get_{fun}") - put = getattr(db, f"put_{fun}") - - expected = fun + str(random.randint(0, 100000)) - if kwargs is not None: - put({"data": expected}, *args, **kwargs) - - data = get(*args, **kwargs) - else: - put({"data": expected}, *args) - - data = get(*args) - - assert data["data"] == expected - - def test_write_and_read_of_nan(tmp_path): with aerovaldb.open(f"json_files:{tmp_path}") as db: data = dict(value=float("nan")) diff --git a/tests/test_aerovaldb.py b/tests/test_aerovaldb.py new file mode 100644 index 0000000..3f13a64 --- /dev/null +++ b/tests/test_aerovaldb.py @@ -0,0 +1,355 @@ +# This test file applies tests to each implementation of database connector +# through the interface defined in src/aerovaldb/aerovaldb.py +# In addition each implementation has implementation specific tests in its +# respective test_*.py file +# - json_files: tests/jsondb/test_jsonfiledb.py +# - sqlitedb: tests/sqlitedb/test_sqlitedb.py + +import aerovaldb +import pytest +import random + + +@pytest.fixture +def tmpdb(tmp_path, dbtype: str) -> aerovaldb.AerovalDB: + """Fixture encapsulating logic for each tested database connection to create + a temporary database and connect to it.""" + if dbtype == "json_files": + return aerovaldb.open(f"json_files:{str(tmp_path)}") + elif dbtype == "sqlitedb": + return aerovaldb.open(":memory:") + + assert False + + +GET_PARAMETRIZATION = pytest.mark.parametrize( + "fun,args,kwargs,expected", + ( + ( + "get_glob_stats", + ["project", "experiment", "frequency"], + None, + "./project/experiment/hm/", + ), + ( + "get_regional_stats", + ["project", "experiment", "frequency", "network", "variable", "layer"], + None, + "./project/experiment/hm/regional_stats", + ), + pytest.param( + "get_heatmap", + ["project", "experiment", "frequency", "region", "time"], + None, + "./project/experiment/hm/regional_stats", + marks=pytest.mark.xfail(reason="missing test file in json testdb"), + ), + ( + "get_contour", + ["project", "experiment", "modvar", "model"], + None, + "./project/experiment/contour/", + ), + ( + "get_timeseries", + ["project", "experiment", "location", "network", "obsvar", "layer"], + None, + "./project/experiment/ts/", + ), + ( + "get_timeseries_weekly", + ["project", "experiment", "location", "network", "obsvar", "layer"], + None, + "./project/experiment/ts/dirunal/", + ), + ("get_config", ["project", "experiment"], None, "./project/experiment/"), + ("get_menu", ["project", "experiment"], None, "./project/experiment/"), + ("get_statistics", ["project", "experiment"], None, "./project/experiment/"), + ("get_ranges", ["project", "experiment"], None, "./project/experiment/"), + ("get_regions", ["project", "experiment"], None, "./project/experiment/"), + ("get_models_style", ["project"], None, "./project/"), + ("get_experiments", ["project"], None, "./project/"), + ( + "get_models_style", + ["project"], + {"experiment": "experiment"}, + "./project/experiment/", + ), + ( + "get_map", + [ + "project", + "experiment-old", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + "./project/experiment/map/", + ), + ( + "get_map", + [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + "./project/experiment/map/with_time", + ), + ( + "get_scatter", + [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + "./project/experiment/scat/time", + ), + ( + "get_scatter", + [ + "project", + "experiment-old", + "network", + "obsvar", + "layer", + "model", + "modvar", + "test", + ], + None, + "./project/experiment/scat/", + ), + ( + "get_profiles", + ["project", "experiment", "region", "network", "obsvar"], + None, + "./project/experiment/profiles/", + ), + ( + "get_heatmap_timeseries", + ["project", "experiment", "region", "network", "obsvar", "layer"], + None, + "./project/experiment/hm/ts/region-network-obsvar-layer", + ), + ( + "get_heatmap_timeseries", + ["project", "experiment-old", "region", "network", "obsvar", "layer"], + None, + "project/experiment/hm/ts/stats_ts.json", + ), + # TODO: Missing test case for heatmap_ts with the middle version format. + ( + "get_forecast", + ["project", "experiment", "region", "network", "obsvar", "layer"], + None, + "./project/experiment/forecast/", + ), + ( + "get_gridded_map", + ["project", "experiment", "obsvar", "model"], + None, + "./project/experiment/contour/", + ), + ( + "get_report", + ["project", "experiment", "title"], + None, + "./reports/project/experiment/", + ), + ), +) + +PUT_PARAMETRIZATION = pytest.mark.parametrize( + "fun,args,kwargs", + ( + ("glob_stats", ["project", "experiment", "frequency"], None), + ("contour", ["project", "experiment", "obsvar", "model"], None), + ( + "timeseries", + ["project", "experiment", "location", "network", "obsvar", "layer"], + None, + ), + ( + "timeseries_weekly", + ["project", "experiment", "location", "network", "obsvar", "layer"], + None, + ), + ("config", ["project", "experiment"], None), + ("menu", ["project", "experiment"], None), + ("statistics", ["project", "experiment"], None), + ("ranges", ["project", "experiment"], None), + ("regions", ["project", "experiment"], None), + ("models_style", ["project"], None), + ("models_style", ["project"], {"experiment": "experiment"}), + ( + "map", + [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + ), + ( + "map", + [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + ), + ( + "scatter", + [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + ), + ( + "scatter", + [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + None, + ), + ("profiles", ["project", "experiment", "station", "network", "obsvar"], None), + ( + "heatmap_timeseries", + ["project", "experiment", "region", "network", "obsvar", "layer"], + None, + ), + ( + "forecast", + ["project", "experiment", "station", "network", "obsvar", "layer"], + None, + ), + ("gridded_map", ["project", "experiment", "obsvar", "model"], None), + ("report", ["project", "experiment", "title"], None), + ), +) + + +@pytest.mark.asyncio +@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",))) +@GET_PARAMETRIZATION +async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expected): + """ + This test tests that data is read as expected from a static, fixed database. + """ + with aerovaldb.open(resource, use_async=True) as db: + f = getattr(db, fun) + + if kwargs is not None: + data = await f(*args, **kwargs) + else: + data = await f(*args) + + assert data["path"] == expected + + +@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",))) +@GET_PARAMETRIZATION +def test_getter_sync(resource: str, fun: str, args: list, kwargs: dict, expected): + with aerovaldb.open(resource, use_async=False) as db: + f = getattr(db, fun) + + if kwargs is not None: + data = f(*args, **kwargs) + else: + data = f(*args) + + assert data["path"] == expected + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "dbtype", (pytest.param("json_files"), pytest.param("sqlitedb")) +) +@PUT_PARAMETRIZATION +async def test_setters(dbtype: str, fun: str, args: list, kwargs: dict, tmpdb): + """ + This test tests that you read back the expected data, once you have written + to a fresh db, assuming the same arguments. + """ + with tmpdb as db: + get = getattr(db, f"get_{fun}") + put = getattr(db, f"put_{fun}") + + expected = fun + str(random.randint(0, 100000)) + if kwargs is not None: + await put({"data": expected}, *args, **kwargs) + + data = await get(*args, **kwargs) + else: + await put({"data": expected}, *args) + + data = await get(*args) + + assert data["data"] == expected + + +@pytest.mark.parametrize( + "dbtype", (pytest.param("json_files"), pytest.param("sqlitedb")) +) +@PUT_PARAMETRIZATION +def test_setters_sync(fun: str, args: list, kwargs: dict, tmpdb): + """ + This test tests that you read back the expected data, once you have written + to a fresh db, assuming the same arguments. + """ + with tmpdb as db: + get = getattr(db, f"get_{fun}") + put = getattr(db, f"put_{fun}") + + expected = fun + str(random.randint(0, 100000)) + if kwargs is not None: + put({"data": expected}, *args, **kwargs) + + data = get(*args, **kwargs) + else: + put({"data": expected}, *args) + + data = get(*args) + + assert data["data"] == expected From 9bd7895970ee0a92258fce87d65de21f54f38626 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 1 Aug 2024 09:33:50 +0200 Subject: [PATCH 04/28] WIP --- src/aerovaldb/jsondb/jsonfiledb.py | 12 +----------- src/aerovaldb/sqlitedb/sqlitedb.py | 6 +++--- src/aerovaldb/utils.py | 12 ++++++++++++ tests/jsondb/test_jsonfiledb.py | 13 ------------- tests/test_aerovaldb.py | 24 ++++++++++++++++++++++++ 5 files changed, 40 insertions(+), 27 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 69e4f9d..b1672c0 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -14,7 +14,7 @@ from aerovaldb.serialization.default_serialization import default_serialization from aerovaldb.types import AccessType -from ..utils import async_and_sync +from ..utils import async_and_sync, json_dumps_wrapper from .uri import get_uri from .templatemapper import ( TemplateMapper, @@ -33,16 +33,6 @@ logger = logging.getLogger(__name__) -def json_dumps_wrapper(obj, **kwargs) -> str: - """ - Wrapper which calls simplejson.dumps with the correct options, known to work for objects - returned by Pyaerocom. - - This ensures that nan values are serialized as null to be compliant with the json standard. - """ - return simplejson.dumps(obj, ignore_nan=True, **kwargs) - - class AerovalJsonFileDB(AerovalDB): def __init__(self, basedir: str | Path, /, use_async: bool = False): """ diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index ce285d4..947523a 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -7,6 +7,7 @@ from ..routes import * from .utils import extract_substitutions from ..types import AccessType +from ..utils import json_dumps_wrapper import os @@ -102,7 +103,6 @@ def _initialize_db(self): identifier string, initializes the database so it has the necessary tables. """ - print("test") cur = self._con.cursor() # Metadata table for information used internally by aerovaldb. @@ -174,7 +174,7 @@ async def _get(self, route, route_args, *args, **kwargs): return fetched if access_type == AccessType.OBJ: - return simplejson.loads(fetched) + return simplejson.loads(fetched, allow_nan=True) assert False # Should never happen. @@ -191,7 +191,7 @@ async def _put(self, obj, route, route_args, *args, **kwargs): json = obj if not isinstance(json, str): - json = simplejson.dumps(json) + json = json_dumps_wrapper(json) route_args.update(json=json) cur.execute( diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index dc7aa82..cf4ff82 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -1,6 +1,18 @@ import asyncio import functools from typing import Callable, ParamSpec, TypeVar +import simplejson + + +def json_dumps_wrapper(obj, **kwargs) -> str: + """ + Wrapper which calls simplejson.dumps with the correct options, known to work for objects + returned by Pyaerocom. + + This ensures that nan values are serialized as null to be compliant with the json standard. + """ + return simplejson.dumps(obj, ignore_nan=True, **kwargs) + # Workaround to ensure function signature of the decorated function is shown correctly # Solution from here: https://stackoverflow.com/questions/74074580/how-to-avoid-losing-type-hinting-of-decorated-function diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index c5a9636..e952614 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -19,19 +19,6 @@ async def test_file_does_not_exist(): ) -def test_write_and_read_of_nan(tmp_path): - with aerovaldb.open(f"json_files:{tmp_path}") as db: - data = dict(value=float("nan")) - - db.put_by_uri(data, "./test") - - read = db.get_by_uri("./test") - - # See Additional Notes on #59 - # https://github.com/metno/aerovaldb/pull/59 - assert read["value"] is None - - def test_exception_on_unexpected_args(): """ https://github.com/metno/aerovaldb/issues/19 diff --git a/tests/test_aerovaldb.py b/tests/test_aerovaldb.py index 3f13a64..a449a13 100644 --- a/tests/test_aerovaldb.py +++ b/tests/test_aerovaldb.py @@ -353,3 +353,27 @@ def test_setters_sync(fun: str, args: list, kwargs: dict, tmpdb): data = get(*args) assert data["data"] == expected + + +@pytest.mark.parametrize( + "dbtype", + ( + pytest.param( + "json_files", + ), + pytest.param( + "sqlitedb", + ), + ), +) +def test_write_and_read_of_nan(tmpdb): + with tmpdb as db: + data = dict(value=float("nan")) + + db.put_by_uri(data, "./test") + + read = db.get_by_uri("./test") + + # See Additional Notes on #59 + # https://github.com/metno/aerovaldb/pull/59 + assert read["value"] is None From 9e04c2f9ed60ddb80af2e195c6b5ea3024bd1bad Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 1 Aug 2024 11:43:10 +0200 Subject: [PATCH 05/28] New URI design for sqlite --- src/aerovaldb/routes.py | 24 +++++++++++ src/aerovaldb/sqlitedb/sqlitedb.py | 28 +++++++++++- src/aerovaldb/utils.py | 69 ++++++++++++++++++++++++++++++ tests/__init__.py | 0 tests/jsondb/__init__.py | 0 tests/sqlitedb/__init__.py | 0 tests/test_aerovaldb.py | 4 +- tests/test_utils.py | 55 ++++++++++++++++++++++++ 8 files changed, 177 insertions(+), 3 deletions(-) create mode 100644 tests/__init__.py create mode 100644 tests/jsondb/__init__.py create mode 100644 tests/sqlitedb/__init__.py create mode 100644 tests/test_utils.py diff --git a/src/aerovaldb/routes.py b/src/aerovaldb/routes.py index 627dab6..8447282 100644 --- a/src/aerovaldb/routes.py +++ b/src/aerovaldb/routes.py @@ -45,3 +45,27 @@ ROUTE_GRIDDED_MAP = "/v0/gridded_map/{project}/{experiment}/{obsvar}/{model}" ROUTE_REPORT = "/v0/report/{project}/{experiment}/{title}" + + +ALL_ROUTES = [ + ROUTE_GLOB_STATS, + ROUTE_REG_STATS, + ROUTE_HEATMAP, + ROUTE_CONTOUR, + ROUTE_TIMESERIES, + ROUTE_TIMESERIES_WEEKLY, + ROUTE_EXPERIMENTS, + ROUTE_CONFIG, + ROUTE_MENU, + ROUTE_STATISTICS, + ROUTE_RANGES, + ROUTE_REGIONS, + ROUTE_MODELS_STYLE, + ROUTE_MAP, + ROUTE_SCATTER, + ROUTE_PROFILES, + ROUTE_HEATMAP_TIMESERIES, + ROUTE_FORECAST, + ROUTE_GRIDDED_MAP, + ROUTE_REPORT, +] diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 947523a..4e59238 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -7,7 +7,7 @@ from ..routes import * from .utils import extract_substitutions from ..types import AccessType -from ..utils import json_dumps_wrapper +from ..utils import json_dumps_wrapper, parse_uri, async_and_sync import os @@ -199,5 +199,31 @@ async def _put(self, obj, route, route_args, *args, **kwargs): INSERT OR REPLACE INTO {table_name}({columnlist}, json) VALUES({substitutionlist}, :json) """, + route_args | kwargs, + ) + + @async_and_sync + async def get_by_uri( + self, + uri: str, + /, + access_type: str | AccessType = AccessType.OBJ, + cache: bool = False, + default=None, + ): + route, route_args, kwargs = parse_uri(uri) + + return await self._get( + route, route_args, + access_type=access_type, + cache=cache, + default=default, + **kwargs, ) + + @async_and_sync + async def put_by_uri(self, obj, uri: str): + route, route_args, kwargs = parse_uri(uri) + + await self._put(obj, route, route_args, **kwargs) diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index cf4ff82..0917707 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -1,7 +1,10 @@ +import re import asyncio import functools from typing import Callable, ParamSpec, TypeVar import simplejson +from .routes import ALL_ROUTES +import urllib def json_dumps_wrapper(obj, **kwargs) -> str: @@ -14,6 +17,72 @@ def json_dumps_wrapper(obj, **kwargs) -> str: return simplejson.dumps(obj, ignore_nan=True, **kwargs) +def parse_formatted_string(template: str, s: str) -> dict: + """Match s against a python format string, extracting the + parameter values from the format string in a dictionary. + + Note + ---- + Only supports {named_parameter} style format. + """ + + # First split on any keyword arguments, note that the names of keyword arguments will be in the + # 1st, 3rd, ... positions in this list + tokens = re.split(r"\{(.*?)\}", template) + keywords = tokens[1::2] + + # Now replace keyword arguments with named groups matching them. We also escape between keyword + # arguments so we support meta-characters there. Re-join tokens to form our regexp pattern + tokens[1::2] = map("(?P<{}>.*)".format, keywords) + tokens[0::2] = map(re.escape, tokens[0::2]) + pattern = "".join(tokens) + + # Use our pattern to match the given string, raise if it doesn't match + matches = re.match(pattern, s) + if not matches: + raise Exception("Format string did not match") + + # Return a dict with all of our keywords and their values + return {x: matches.group(x) for x in keywords} + + +def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: + """ + Parses an uri returning a tuple consisting of + - The route against which it was matched. + - Route args. + - Additional kwargs. + + Parameters + ---------- + uri : + The uri to be parsed. + """ + split = uri.split("?") + + for template in ALL_ROUTES: + if len(split) == 1: + try: + route_args = parse_formatted_string(template, split[0]) + except Exception: + continue + else: + return (template, route_args, dict()) + + elif len(split) == 2: + try: + route_args = parse_formatted_string(template, split[0]) + except Exception: + continue + + kwargs = urllib.parse.parse_qs(split[1]) + kwargs = {k: v[0] for k, v in kwargs.items()} + + return (template, route_args, kwargs) + + raise ValueError(f"URI {uri} is not a valid URI.") + + # Workaround to ensure function signature of the decorated function is shown correctly # Solution from here: https://stackoverflow.com/questions/74074580/how-to-avoid-losing-type-hinting-of-decorated-function P = ParamSpec("P") diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/jsondb/__init__.py b/tests/jsondb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/sqlitedb/__init__.py b/tests/sqlitedb/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_aerovaldb.py b/tests/test_aerovaldb.py index a449a13..60dfc48 100644 --- a/tests/test_aerovaldb.py +++ b/tests/test_aerovaldb.py @@ -370,9 +370,9 @@ def test_write_and_read_of_nan(tmpdb): with tmpdb as db: data = dict(value=float("nan")) - db.put_by_uri(data, "./test") + db.put_by_uri(data, "/v0/experiments/project") - read = db.get_by_uri("./test") + read = db.get_by_uri("/v0/experiments/project") # See Additional Notes on #59 # https://github.com/metno/aerovaldb/pull/59 diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..62dc132 --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,55 @@ +from aerovaldb.utils import parse_formatted_string, parse_uri +import pytest +from aerovaldb.routes import * + + +@pytest.mark.parametrize( + "template,s,expected", + ( + ("{test}", "hello", {"test": "hello"}), + ("ABCD{test}1234", "ABCDhello world1234", {"test": "hello world"}), + ( + "test/{a}/{b}/{c}/{d}", + "test/1/2/3/4", + {"a": "1", "b": "2", "c": "3", "d": "4"}, + ), + ), +) +def test_parse_formatted_string(template: str, s: str, expected: dict): + assert parse_formatted_string(template, s) == expected + + +@pytest.mark.parametrize( + "uri,expected", + ( + ( + "/v0/experiments/project", + (ROUTE_EXPERIMENTS, {"project": "project"}, {}), + ), + ( + "/v0/map/project/experiment/network/obsvar/layer/model/modvar?time=time", + ( + ROUTE_MAP, + { + "project": "project", + "experiment": "experiment", + "network": "network", + "obsvar": "obsvar", + "layer": "layer", + "model": "model", + "modvar": "modvar", + }, + {"time": "time"}, + ), + ), + ), +) +def test_parse_uri(uri: str, expected: tuple[str, dict, dict]): + template, route_args, kwargs = parse_uri(uri) + + assert (template, route_args, kwargs) == expected + + +def test_parse_uri_error(): + with pytest.raises(ValueError): + parse_uri("??") From da974a81d3ab5f4adb6f16dbbe3013fbb23b9bcd Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:10:41 +0200 Subject: [PATCH 06/28] Use new URI in jsonfiledb --- src/aerovaldb/jsondb/jsonfiledb.py | 159 +++++++++++++++++------------ src/aerovaldb/sqlitedb/sqlitedb.py | 8 +- src/aerovaldb/types.py | 4 + src/aerovaldb/utils.py | 9 ++ 4 files changed, 114 insertions(+), 66 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index b1672c0..b9deee0 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -14,7 +14,7 @@ from aerovaldb.serialization.default_serialization import default_serialization from aerovaldb.types import AccessType -from ..utils import async_and_sync, json_dumps_wrapper +from ..utils import async_and_sync, json_dumps_wrapper, parse_uri from .uri import get_uri from .templatemapper import ( TemplateMapper, @@ -292,36 +292,47 @@ async def _get( access_type = self._normalize_access_type(kwargs.pop("access_type", None)) - file_path = Path(os.path.join(self._basedir, relative_path)).resolve() + file_path = str(Path(os.path.join(self._basedir, relative_path)).resolve()) logger.debug(f"Fetching file {file_path} as {access_type}-") + default = kwargs.pop("default", None) + filter_func = self.FILTERS.get(route, None) filter_vars = route_args | kwargs - data = await self.get_by_uri( - file_path, - access_type=access_type, - cache=use_caching, - default=kwargs.get("default", None), - ) - if "default" in kwargs: - # Dont want to apply filtering to default value. - return data - if filter_func is not None: - if access_type in (AccessType.JSON_STR, AccessType.OBJ): - if isinstance(data, str): - data = simplejson.loads(data, allow_nan=True) + if not os.path.exists(file_path): + if default is None or access_type == AccessType.FILE_PATH: + raise FileNotFoundError(f"File {file_path} does not exist.") + return default + + # No filtered. + if filter_func is None: + if access_type == AccessType.FILE_PATH: + return file_path + + if access_type == AccessType.JSON_STR: + raw = await self._cache.get_json(file_path, no_cache=not use_caching) + return json_dumps_wrapper(raw) + + raw = await self._cache.get_json(file_path, no_cache=not use_caching) + + return simplejson.loads(raw, allow_nan=True) - data = filter_func(data, **filter_vars) + if access_type == AccessType.FILE_PATH: + raise UnsupportedOperation("Filtered endpoints can not return a filepath") + + json = await self._cache.get_json(file_path, no_cache=not use_caching) + obj = simplejson.loads(json, allow_nan=True) - if access_type == AccessType.JSON_STR: - data = json_dumps_wrapper(data) + obj = await filter_func(obj, **filter_vars) - return data + if access_type == AccessType.OBJ: + return obj - raise UnsupportedOperation("Filtered endpoints can not return a file path.") + if access_type == AccessType.JSON_STR: + return json_dumps_wrapper(obj) - return data + raise UnsupportedOperation async def _put(self, obj, route, route_args, *args, **kwargs): """Jsondb implemention of database put operation. @@ -338,11 +349,18 @@ async def _put(self, obj, route, route_args, *args, **kwargs): path_template = await self._get_template(route, substitutions) relative_path = path_template.format(**substitutions) - file_path = Path(os.path.join(self._basedir, relative_path)).resolve() + file_path = str(Path(os.path.join(self._basedir, relative_path)).resolve()) logger.debug(f"Mapped route {route} / { route_args} to file {file_path}.") - await self.put_by_uri(obj, file_path) + if not os.path.exists(os.path.dirname(file_path)): + os.makedirs(os.path.dirname(file_path)) + if isinstance(obj, str): + json = obj + else: + json = json_dumps_wrapper(obj) + with open(file_path, "w") as f: + f.write(json) @async_and_sync async def get_experiments(self, project: str, /, *args, exp_order=None, **kwargs): @@ -561,53 +579,64 @@ async def get_by_uri( cache: bool = False, default=None, ): - if not isinstance(uri, str): - uri = str(uri) - if uri.startswith("."): - uri = get_uri(os.path.join(self._basedir, uri)) - - if not uri.startswith(self._basedir): - raise PermissionError( - f"URI {uri} is out of bounds of the current aerovaldb connection." - ) - - access_type = self._normalize_access_type(access_type) - - if not os.path.exists(uri): - if default is None or access_type == AccessType.FILE_PATH: - raise FileNotFoundError(f"Object with URI {uri} does not exist.") - - return default - if access_type == AccessType.FILE_PATH: + if access_type in [AccessType.URI]: return uri - if access_type == AccessType.JSON_STR: - raw = await self._cache.get_json(uri, no_cache=not cache) - return json_dumps_wrapper(raw) - - raw = await self._cache.get_json(uri, no_cache=not cache) - - return simplejson.loads(raw, allow_nan=True) + route, route_args, kwargs = parse_uri(uri) + + return await self._get(route, route_args, **kwargs) + # if not isinstance(uri, str): + # uri = str(uri) + # if uri.startswith("."): + # uri = get_uri(os.path.join(self._basedir, uri)) + + # + # if not uri.startswith(self._basedir): + # raise PermissionError( + # f"URI {uri} is out of bounds of the current aerovaldb connection." + # ) + # + # access_type = self._normalize_access_type(access_type) + # + # if not os.path.exists(uri): + # if default is None or access_type == AccessType.FILE_PATH: + # raise FileNotFoundError(f"Object with URI {uri} does not exist.") + # + # return default + # if access_type == AccessType.FILE_PATH: + # return uri + # + # if access_type == AccessType.JSON_STR: + # raw = await self._cache.get_json(uri, no_cache=not cache) + # return json_dumps_wrapper(raw) + + # raw = await self._cache.get_json(uri, no_cache=not cache) + # + # return simplejson.loads(raw, allow_nan=True) @async_and_sync async def put_by_uri(self, obj, uri: str): - if not isinstance(uri, str): - uri = str(uri) - if uri.startswith("."): - uri = get_uri(os.path.join(self._basedir, uri)) - - if not uri.startswith(self._basedir): - raise PermissionError( - f"URI {uri} is out of bounds of the current aerovaldb connection." - ) - if not os.path.exists(os.path.dirname(uri)): - os.makedirs(os.path.dirname(uri)) - if isinstance(obj, str): - json = obj - else: - json = json_dumps_wrapper(obj) - with open(uri, "w") as f: - f.write(json) + route, route_args, kwargs = parse_uri(uri) + + await self._put(obj, route, route_args, **kwargs) + # if not isinstance(uri, str): + # uri = str(uri) + # if uri.startswith("."): + # uri = get_uri(os.path.join(self._basedir, uri)) + + # + # if not uri.startswith(self._basedir): + # raise PermissionError( + # f"URI {uri} is out of bounds of the current aerovaldb connection." + # ) + # if not os.path.exists(os.path.dirname(uri)): + # os.makedirs(os.path.dirname(uri)) + # if isinstance(obj, str): + # json = obj + # else: + # json = json_dumps_wrapper(obj) + # with open(uri, "w") as f: + # f.write(json) def _get_lock_file(self) -> str: os.makedirs(os.path.expanduser("~/.aerovaldb/.lock/"), exist_ok=True) diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 4e59238..3884b0e 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -7,7 +7,7 @@ from ..routes import * from .utils import extract_substitutions from ..types import AccessType -from ..utils import json_dumps_wrapper, parse_uri, async_and_sync +from ..utils import json_dumps_wrapper, parse_uri, async_and_sync, build_uri import os @@ -153,6 +153,9 @@ async def _get(self, route, route_args, *args, **kwargs): f"sqlitedb does not support access_mode FILE_PATH." ) + if access_type in [AccessType.URI]: + return build_uri(route, route_args, kwargs) + cur = self._con.cursor() table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] @@ -211,6 +214,9 @@ async def get_by_uri( cache: bool = False, default=None, ): + if access_type in [AccessType.URI]: + return uri + route, route_args, kwargs = parse_uri(uri) return await self._get( diff --git a/src/aerovaldb/types.py b/src/aerovaldb/types.py index 619d462..965829a 100644 --- a/src/aerovaldb/types.py +++ b/src/aerovaldb/types.py @@ -9,8 +9,12 @@ class AccessType(Enum): FILE_PATH: Result will be returned as the file path to the file containing the data. OBJ: The json will be parsed and returned as a python object. + URI: A string which is a unique identifier of this asset between + implementations of Aerovaldb. Can be used with `get_by_uuid()` and + `put_by_uuid()` to read or write respectively. """ JSON_STR = auto() FILE_PATH = auto() OBJ = auto() + URI = auto() diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index 0917707..116d94a 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -83,6 +83,15 @@ def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: raise ValueError(f"URI {uri} is not a valid URI.") +def build_uri(route: str, route_args: dict, kwargs: dict | None = None) -> str: + uri = route.format(**route_args) + if not kwargs: + queries = "&".join([f"{k}={v}" for k, v in kwargs.items()]) + uri = f"{uri}?{queries}" + + return uri + + # Workaround to ensure function signature of the decorated function is shown correctly # Solution from here: https://stackoverflow.com/questions/74074580/how-to-avoid-losing-type-hinting-of-decorated-function P = ParamSpec("P") From e1f5d6af99ca5c9477aec6c87cb7ab4512214882 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:38:57 +0200 Subject: [PATCH 07/28] All tests work again --- src/aerovaldb/jsondb/jsonfiledb.py | 11 +++++++++-- src/aerovaldb/utils.py | 6 +++--- tests/jsondb/test_jsonfiledb.py | 8 +++++--- tests/jsondb/test_lock.py | 9 ++++----- .../project/experiment}/invalid-json.json | 0 5 files changed, 21 insertions(+), 13 deletions(-) rename tests/test-db/json/{ => reports/project/experiment}/invalid-json.json (100%) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index b9deee0..a7fac0b 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -324,7 +324,7 @@ async def _get( json = await self._cache.get_json(file_path, no_cache=not use_caching) obj = simplejson.loads(json, allow_nan=True) - obj = await filter_func(obj, **filter_vars) + obj = filter_func(obj, **filter_vars) if access_type == AccessType.OBJ: return obj @@ -584,7 +584,14 @@ async def get_by_uri( route, route_args, kwargs = parse_uri(uri) - return await self._get(route, route_args, **kwargs) + return await self._get( + route, + route_args, + cache=cache, + default=default, + access_type=access_type, + **kwargs, + ) # if not isinstance(uri, str): # uri = str(uri) # if uri.startswith("."): diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index 116d94a..8b835b3 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -2,7 +2,7 @@ import asyncio import functools from typing import Callable, ParamSpec, TypeVar -import simplejson +import simplejson # type: ignore from .routes import ALL_ROUTES import urllib @@ -75,7 +75,7 @@ def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: except Exception: continue - kwargs = urllib.parse.parse_qs(split[1]) + kwargs = urllib.parse.parse_qs(split[1]) # type: ignore kwargs = {k: v[0] for k, v in kwargs.items()} return (template, route_args, kwargs) @@ -83,7 +83,7 @@ def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: raise ValueError(f"URI {uri} is not a valid URI.") -def build_uri(route: str, route_args: dict, kwargs: dict | None = None) -> str: +def build_uri(route: str, route_args: dict, kwargs: dict = {}) -> str: uri = route.format(**route_args) if not kwargs: queries = "&".join([f"{k}={v}" for k, v in kwargs.items()]) diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index e952614..87c0506 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -90,7 +90,7 @@ def test_list_glob_stats(): def test_getter_with_default(): with aerovaldb.open("json_files:./tests/test-db/json") as db: data = db.get_by_uri( - "./project/experiment/non-existent-file.json", default={"data": "test"} + "/v0/experiments/non-existent-project", default={"data": "test"} ) assert data["data"] == "test" @@ -98,5 +98,7 @@ def test_getter_with_default(): def test_getter_with_default_error(): with aerovaldb.open("json_files:./tests/test-db/json") as db: - with pytest.raises(simplejson.errors.JSONDecodeError): - db.get_by_uri("./invalid-json.json", default={"data": "data"}) + with pytest.raises(simplejson.JSONDecodeError): + db.get_by_uri( + "/v0/report/project/experiment/invalid-json", default={"data": "data"} + ) diff --git a/tests/jsondb/test_lock.py b/tests/jsondb/test_lock.py index 9742817..298d607 100644 --- a/tests/jsondb/test_lock.py +++ b/tests/jsondb/test_lock.py @@ -5,7 +5,6 @@ import asyncio from aerovaldb.lock.lock import FileLock, FakeLock import aerovaldb -from pathlib import Path import logging pytest_plugins = ("pytest_asyncio",) @@ -38,15 +37,15 @@ async def test_simple_locking(tmp_path): async def test_multiprocess_locking(monkeypatch, tmp_path): COUNTER_LIST = [100, 200, 150, 300, 400, 200] # monkeypatch.setenv("AVDB_LOCK_DIR", tmp_path / "lock") - data_file = Path(tmp_path / "data") + uri = "/v0/experiments/project" async def increment(n: int): with aerovaldb.open(f"json_files:{tmp_path}") as db: for i in range(n): with db.lock(): - data = await db.get_by_uri(data_file, default={"counter": 0}) + data = await db.get_by_uri(uri, default={"counter": 0}) data["counter"] += 1 - await db.put_by_uri(data, data_file) + await db.put_by_uri(data, uri) def helper(x): asyncio.run(increment(x)) @@ -61,6 +60,6 @@ def helper(x): [p.join() for p in processes] with aerovaldb.open(f"json_files:{tmp_path}") as db: - data = await db.get_by_uri(str(data_file)) + data = await db.get_by_uri(uri) assert data["counter"] == sum(COUNTER_LIST) diff --git a/tests/test-db/json/invalid-json.json b/tests/test-db/json/reports/project/experiment/invalid-json.json similarity index 100% rename from tests/test-db/json/invalid-json.json rename to tests/test-db/json/reports/project/experiment/invalid-json.json From 59a76df7e0968f10a6655493e0408754d6f7c8f6 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 2 Aug 2024 08:55:27 +0200 Subject: [PATCH 08/28] Remove commented code --- src/aerovaldb/aerovaldb.py | 16 +++--- src/aerovaldb/jsondb/jsonfiledb.py | 86 +++++++++++++----------------- tests/jsondb/test_jsonfiledb.py | 15 ++++-- 3 files changed, 54 insertions(+), 63 deletions(-) diff --git a/src/aerovaldb/aerovaldb.py b/src/aerovaldb/aerovaldb.py index ad2a714..726ca87 100644 --- a/src/aerovaldb/aerovaldb.py +++ b/src/aerovaldb/aerovaldb.py @@ -1140,9 +1140,9 @@ async def get_by_uri( Note: ----- - URI is implementation specific. While AerovalJsonFileDB returns - a file path, this behaviour should not be relied upon as other - implementations may not. + URI is intended to be consistent between implementations. Using get_by_uri() + to fetch an identifier which can then be written to another connector using + its respective put_by_uri() method. """ raise NotImplementedError @@ -1156,9 +1156,9 @@ async def put_by_uri(self, obj, uri: str): Note: ----- - URI is implementation specific. While AerovalJsonFileDB returns - a file path as the uri, this behaviour should not be relied upon - as other implementations will not. + URI is intended to be consistent between implementations. Using get_by_uri() + to fetch an identifier which can then be written to another connector using + its respective put_by_uri() method. """ raise NotImplementedError @@ -1195,6 +1195,4 @@ def _normalize_access_type( if access_type is None: return default - raise ValueError( - f"Access_type, {access_type}, could not be normalized. This is probably due to input that is not a str or AccessType instance." - ) + assert False diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index a7fac0b..b13b138 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -11,11 +11,15 @@ from aerovaldb.aerovaldb import AerovalDB from aerovaldb.exceptions import UnusedArguments, TemplateNotFound -from aerovaldb.serialization.default_serialization import default_serialization from aerovaldb.types import AccessType -from ..utils import async_and_sync, json_dumps_wrapper, parse_uri -from .uri import get_uri +from ..utils import ( + async_and_sync, + json_dumps_wrapper, + parse_uri, + parse_formatted_string, + build_uri, +) from .templatemapper import ( TemplateMapper, DataVersionToTemplateMapper, @@ -29,6 +33,9 @@ from ..lock.lock import FakeLock, FileLock from hashlib import md5 import simplejson # type: ignore +from ..sqlitedb.utils import ( + extract_substitutions, +) # TODO: Move this to a more approriate location before merging PR. logger = logging.getLogger(__name__) @@ -477,6 +484,32 @@ async def get_heatmap( cache=True, ) + @async_and_sync + async def _get_uri_for_file(self, file_path: str) -> str: + """ + For the provided data file path, returns the corresponding + URI. + + :param file_path : The file_path. + """ + file_path = os.path.relpath(file_path, start=self._basedir) + + for route in self.PATH_LOOKUP: + template = await self._get_template(route, {}) + route_arg_names = extract_substitutions(template) + + try: + all_args = parse_formatted_string(f"./{template}", file_path) + + route_args = {k: v for k, v in all_args.items() if k in route_arg_names} + kwargs = { + k: v for k, v in all_args.items() if not (k in route_arg_names) + } + except: + continue + else: + return build_uri(route, route_args, kwargs) + def list_glob_stats( self, project: str, experiment: str ) -> Generator[str, None, None]: @@ -579,6 +612,7 @@ async def get_by_uri( cache: bool = False, default=None, ): + access_type = self._normalize_access_type(access_type) if access_type in [AccessType.URI]: return uri @@ -592,58 +626,12 @@ async def get_by_uri( access_type=access_type, **kwargs, ) - # if not isinstance(uri, str): - # uri = str(uri) - # if uri.startswith("."): - # uri = get_uri(os.path.join(self._basedir, uri)) - - # - # if not uri.startswith(self._basedir): - # raise PermissionError( - # f"URI {uri} is out of bounds of the current aerovaldb connection." - # ) - # - # access_type = self._normalize_access_type(access_type) - # - # if not os.path.exists(uri): - # if default is None or access_type == AccessType.FILE_PATH: - # raise FileNotFoundError(f"Object with URI {uri} does not exist.") - # - # return default - # if access_type == AccessType.FILE_PATH: - # return uri - # - # if access_type == AccessType.JSON_STR: - # raw = await self._cache.get_json(uri, no_cache=not cache) - # return json_dumps_wrapper(raw) - - # raw = await self._cache.get_json(uri, no_cache=not cache) - # - # return simplejson.loads(raw, allow_nan=True) @async_and_sync async def put_by_uri(self, obj, uri: str): route, route_args, kwargs = parse_uri(uri) await self._put(obj, route, route_args, **kwargs) - # if not isinstance(uri, str): - # uri = str(uri) - # if uri.startswith("."): - # uri = get_uri(os.path.join(self._basedir, uri)) - - # - # if not uri.startswith(self._basedir): - # raise PermissionError( - # f"URI {uri} is out of bounds of the current aerovaldb connection." - # ) - # if not os.path.exists(os.path.dirname(uri)): - # os.makedirs(os.path.dirname(uri)) - # if isinstance(obj, str): - # json = obj - # else: - # json = json_dumps_wrapper(obj) - # with open(uri, "w") as f: - # f.write(json) def _get_lock_file(self) -> str: os.makedirs(os.path.expanduser("~/.aerovaldb/.lock/"), exist_ok=True) diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index 87c0506..ef1679d 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -1,11 +1,7 @@ -import asyncio -import math -import os -import random - import pytest import simplejson # type: ignore import aerovaldb +from aerovaldb.jsondb.jsonfiledb import AerovalJsonFileDB @pytest.mark.asyncio @@ -102,3 +98,12 @@ def test_getter_with_default_error(): db.get_by_uri( "/v0/report/project/experiment/invalid-json", default={"data": "data"} ) + + +def test_jsonfiledb__get_uri_for_file(tmp_path): + with aerovaldb.open(f"json_files:{str(tmp_path)}") as db: + db: AerovalJsonFileDB + assert ( + db._get_uri_for_file(str(tmp_path / "project/experiments.json")) + == "/v0/experiments/project" + ) From 4836e138200cc8bc9793706e05c4f8b629ba1782 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 2 Aug 2024 09:38:10 +0200 Subject: [PATCH 09/28] List api returns new uri --- src/aerovaldb/jsondb/jsonfiledb.py | 132 +++++++++++-------------- src/aerovaldb/jsondb/templatemapper.py | 21 ++++ src/aerovaldb/utils.py | 2 +- 3 files changed, 82 insertions(+), 73 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index b13b138..7267178 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -24,6 +24,7 @@ TemplateMapper, DataVersionToTemplateMapper, PriorityDataVersionToTemplateMapper, + ConstantTemplateMapper, SkipMapper, ) from .filter import filter_heatmap, filter_regional_stats @@ -62,74 +63,52 @@ def __init__(self, basedir: str | Path, /, use_async: bool = False): self.PATH_LOOKUP: dict[str, list[TemplateMapper]] = { ROUTE_GLOB_STATS: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/hm/glob_stats_{frequency}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/hm/glob_stats_{frequency}.json" ) ], ROUTE_REG_STATS: [ # Same as glob_stats - DataVersionToTemplateMapper( - "./{project}/{experiment}/hm/glob_stats_{frequency}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/hm/glob_stats_{frequency}.json" ) ], ROUTE_HEATMAP: [ # Same as glob_stats - DataVersionToTemplateMapper( - "./{project}/{experiment}/hm/glob_stats_{frequency}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/hm/glob_stats_{frequency}.json" ) ], ROUTE_CONTOUR: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/contour/{obsvar}_{model}.geojson", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/contour/{obsvar}_{model}.geojson" ) ], ROUTE_TIMESERIES: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/ts/{location}_{network}-{obsvar}_{layer}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/ts/{location}_{network}-{obsvar}_{layer}.json" ) ], ROUTE_TIMESERIES_WEEKLY: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/ts/diurnal/{location}_{network}-{obsvar}_{layer}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/ts/diurnal/{location}_{network}-{obsvar}_{layer}.json" ) ], - ROUTE_EXPERIMENTS: [ - PriorityDataVersionToTemplateMapper(["./{project}/experiments.json"]) - ], + ROUTE_EXPERIMENTS: [ConstantTemplateMapper("./{project}/experiments.json")], ROUTE_CONFIG: [ - PriorityDataVersionToTemplateMapper( - ["./{project}/{experiment}/cfg_{project}_{experiment}.json"] - ) - ], - ROUTE_MENU: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/menu.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/cfg_{project}_{experiment}.json" ) ], + ROUTE_MENU: [ConstantTemplateMapper("./{project}/{experiment}/menu.json")], ROUTE_STATISTICS: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/statistics.json", - version_provider=self._get_version, - ) + ConstantTemplateMapper("./{project}/{experiment}/statistics.json") ], ROUTE_RANGES: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/ranges.json", - version_provider=self._get_version, - ) + ConstantTemplateMapper("./{project}/{experiment}/ranges.json") ], ROUTE_REGIONS: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/regions.json", - version_provider=self._get_version, - ) + ConstantTemplateMapper("./{project}/{experiment}/regions.json") ], ROUTE_MODELS_STYLE: [ PriorityDataVersionToTemplateMapper( @@ -164,9 +143,8 @@ def __init__(self, basedir: str | Path, /, use_async: bool = False): ), ], ROUTE_PROFILES: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/profiles/{location}_{network}-{obsvar}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/profiles/{location}_{network}-{obsvar}.json" ) ], ROUTE_HEATMAP_TIMESERIES: [ @@ -188,22 +166,17 @@ def __init__(self, basedir: str | Path, /, use_async: bool = False): ), ], ROUTE_FORECAST: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/forecast/{region}_{network}-{obsvar}_{layer}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/forecast/{region}_{network}-{obsvar}_{layer}.json" ) ], ROUTE_GRIDDED_MAP: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/contour/{obsvar}_{model}.json", - version_provider=self._get_version, + ConstantTemplateMapper( + "./{project}/{experiment}/contour/{obsvar}_{model}.json" ) ], ROUTE_REPORT: [ - DataVersionToTemplateMapper( - "./reports/{project}/{experiment}/{title}.json", - version_provider=self._get_version, - ) + ConstantTemplateMapper("./reports/{project}/{experiment}/{title}.json") ], } @@ -237,7 +210,7 @@ async def _get_version(self, project: str, experiment: str) -> Version: version = Version("0.0.1") finally: return version - except simplejson.errors.JSONDecodeError: + except simplejson.JSONDecodeError: # Work around for https://github.com/metno/aerovaldb/issues/28 return Version("0.14.0") @@ -278,6 +251,16 @@ async def _get_template(self, route: str, substitutions: dict) -> str: return file_path_template + def _get_templates(self, route: str) -> list[str]: + templates = list() + + for f in self.PATH_LOOKUP[route]: + templates.extend(f.get_templates_without_constraints()) + if isinstance(f, ConstantTemplateMapper): + break + + return templates + async def _get( self, route, @@ -484,8 +467,7 @@ async def get_heatmap( cache=True, ) - @async_and_sync - async def _get_uri_for_file(self, file_path: str) -> str: + def _get_uri_for_file(self, file_path: str) -> str: """ For the provided data file path, returns the corresponding URI. @@ -495,20 +477,26 @@ async def _get_uri_for_file(self, file_path: str) -> str: file_path = os.path.relpath(file_path, start=self._basedir) for route in self.PATH_LOOKUP: - template = await self._get_template(route, {}) - route_arg_names = extract_substitutions(template) + templates = self._get_templates(route) - try: - all_args = parse_formatted_string(f"./{template}", file_path) + for t in templates: + route_arg_names = extract_substitutions(t) - route_args = {k: v for k, v in all_args.items() if k in route_arg_names} - kwargs = { - k: v for k, v in all_args.items() if not (k in route_arg_names) - } - except: - continue - else: - return build_uri(route, route_args, kwargs) + try: + all_args = parse_formatted_string(t, f"./{file_path}") + + route_args = { + k: v for k, v in all_args.items() if k in route_arg_names + } + kwargs = { + k: v for k, v in all_args.items() if not (k in route_arg_names) + } + except: + continue + else: + return build_uri(route, route_args, kwargs) + + raise ValueError(f"Unable to build URI for file path {file_path}") def list_glob_stats( self, project: str, experiment: str @@ -527,7 +515,7 @@ def list_glob_stats( glb = glb.format(project=project, experiment=experiment) for f in glob.glob(glb): - yield f + yield self._get_uri_for_file(f) def list_timeseries( self, project: str, experiment: str @@ -552,7 +540,7 @@ def list_timeseries( glb = glb.format(project=project, experiment=experiment) for f in glob.glob(glb): - yield f + yield self._get_uri_for_file(f) def list_map(self, project: str, experiment: str) -> Generator[str, None, None]: template = str( @@ -577,7 +565,7 @@ def list_map(self, project: str, experiment: str) -> Generator[str, None, None]: glb = glb.format(project=project, experiment=experiment) for f in glob.glob(glb): - yield f + yield self._get_uri_for_file(f) def _list_experiments( self, project: str, /, has_results: bool = False diff --git a/src/aerovaldb/jsondb/templatemapper.py b/src/aerovaldb/jsondb/templatemapper.py index cbaa7c4..ab5f6b2 100644 --- a/src/aerovaldb/jsondb/templatemapper.py +++ b/src/aerovaldb/jsondb/templatemapper.py @@ -31,6 +31,9 @@ class TemplateMapper(abc.ABC): async def __call__(self, *args, version_provider: VersionProvider, **kwargs) -> str: raise NotImplementedError + def get_templates_without_constraints(self) -> list[str]: + raise NotImplementedError + class DataVersionToTemplateMapper(TemplateMapper): """ @@ -69,6 +72,9 @@ def __init__( self.version_provider = version_provider + def get_templates_without_constraints(self): + return [self.template] + @async_and_sync async def __call__(self, *args, **kwargs) -> str: logger.debug(f"Trying template string {self.template}") @@ -112,3 +118,18 @@ async def __call__(self, *args, **kwargs) -> str: raise SkipMapper return selected_template + + def get_templates_without_constraints(self): + return self.templates + + +class ConstantTemplateMapper(TemplateMapper): + def __init__(self, template: str): + self.template = template + + @async_and_sync + async def __call__(self, *args, **kwargs) -> str: + return self.template + + def get_templates_without_constraints(self): + return [self.template] diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index 8b835b3..04de952 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -85,7 +85,7 @@ def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: def build_uri(route: str, route_args: dict, kwargs: dict = {}) -> str: uri = route.format(**route_args) - if not kwargs: + if kwargs: queries = "&".join([f"{k}={v}" for k, v in kwargs.items()]) uri = f"{uri}?{queries}" From dffdfa4d3c3009270fa0453224ff1c5ef8a74f17 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Mon, 5 Aug 2024 11:53:13 +0200 Subject: [PATCH 10/28] WIP --- scripts/build_sqlite_test_database.py | 33 ++++++++++ scripts/tmp.py | 46 ++++++++++++++ src/aerovaldb/aerovaldb.py | 7 +++ src/aerovaldb/jsondb/cache.py | 4 +- src/aerovaldb/jsondb/jsonfiledb.py | 59 ++++++++++++------ src/aerovaldb/sqlitedb/sqlitedb.py | 28 ++++++--- src/aerovaldb/sqlitedb/utils.py | 10 --- src/aerovaldb/utils.py | 16 ++++- tests/sqlitedb/test_utils.py | 16 ----- .../hm/ts/_network-obsvar-layer.json | 3 - .../hm/ts/_network-obsvar-layer.json | 3 - tests/test-db/sqlite/test.sqlite | Bin 0 -> 159744 bytes tests/test_aerovaldb.py | 22 ++++++- tests/test_utils.py | 16 ++++- 14 files changed, 196 insertions(+), 67 deletions(-) create mode 100644 scripts/build_sqlite_test_database.py create mode 100644 scripts/tmp.py delete mode 100644 tests/test-db/json/project-no-experiments-json/experiment/hm/ts/_network-obsvar-layer.json delete mode 100644 tests/test-db/json/project/experiment/hm/ts/_network-obsvar-layer.json create mode 100644 tests/test-db/sqlite/test.sqlite diff --git a/scripts/build_sqlite_test_database.py b/scripts/build_sqlite_test_database.py new file mode 100644 index 0000000..8a81cd2 --- /dev/null +++ b/scripts/build_sqlite_test_database.py @@ -0,0 +1,33 @@ +import os +import aerovaldb +import re + + +if os.path.exists("tests/test-db/sqlite/test.sqlite"): + os.remove("tests/test-db/sqlite/test.sqlite") + +jsondb = aerovaldb.open("json_files:tests/test-db/json") +sqlitedb = aerovaldb.open("sqlitedb:tests/test-db/sqlite/test.sqlite") + +data = jsondb.get_config( + "project", "experiment", access_type=aerovaldb.AccessType.FILE_PATH, default="{}" +) +print(data) +print(jsondb._get_uri_for_file(data)) +print( + jsondb.get_by_uri( + jsondb._get_uri_for_file(data), access_type=aerovaldb.AccessType.JSON_STR + ) +) + +sqlitedb.put_by_uri(data, jsondb._get_uri_for_file(data)) + +for i, uri in enumerate(list(jsondb.list_all())): + print(f"Processing uri {uri}") + data = jsondb.get_by_uri( + uri, access_type=aerovaldb.AccessType.JSON_STR, default="{}" + ) + sqlitedb.put_by_uri(data, uri) + +print(f"jsondb number of assets: {len(list(jsondb.list_all()))}") +# print(f"sqlite number of assets: {len(list(sqlitedb.list_all()))}") diff --git a/scripts/tmp.py b/scripts/tmp.py new file mode 100644 index 0000000..9a164c8 --- /dev/null +++ b/scripts/tmp.py @@ -0,0 +1,46 @@ +import simplejson +import sqlite3 + + +con = sqlite3.connect(":memory:") + +data = simplejson.dumps({"test": 1234}) + +print(data) + +cur = con.cursor() + +cur.execute( + """ + CREATE TABLE test(key, value) + """ +) + +con.commit() + +cur.execute( + """ + INSERT INTO test + VALUES(?, ?) + """, + ("test", data) +) + +con.commit() + +cur.execute( + """ + SELECT value FROM test + WHERE key='test' + """ +) +print(simplejson.loads(cur.fetchone()[0])) + +import aerovaldb + +with aerovaldb.open("tests/test-db/json") as db: + print(data) + + #db.put_by_uri(data, "/v0/config/project/experiment") + + print(db.get_by_uri("/v0/config/project/experiment", access_type=aerovaldb.AccessType.JSON_STR)) \ No newline at end of file diff --git a/src/aerovaldb/aerovaldb.py b/src/aerovaldb/aerovaldb.py index 726ca87..a08af67 100644 --- a/src/aerovaldb/aerovaldb.py +++ b/src/aerovaldb/aerovaldb.py @@ -1196,3 +1196,10 @@ def _normalize_access_type( return default assert False + + def list_all(self) -> Generator[str, None, None]: + """Iterator to list over the URI of each object + stored in the current aerovaldb connection, returning + the URI of each. + """ + raise NotImplementedError diff --git a/src/aerovaldb/jsondb/cache.py b/src/aerovaldb/jsondb/cache.py index 9ddcb72..91ad610 100644 --- a/src/aerovaldb/jsondb/cache.py +++ b/src/aerovaldb/jsondb/cache.py @@ -129,7 +129,7 @@ async def _read_json(self, file_path: str | Path) -> str: return await f.read() with open(abspath, "r") as f: - return f.read() + return f.read().replace("\n", "") def _get(self, abspath: str) -> str: """Returns an element from the cache.""" @@ -140,7 +140,7 @@ def _get(self, abspath: str) -> str: def _put(self, abspath: str, *, json: str, modified: float): self._cache[abspath] = { - "json": json, + "json": "".join(json.split(r"\n")), "last_modified": os.path.getmtime(abspath), } while self.size > self._max_size: diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 7267178..4ab6f1d 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -19,6 +19,7 @@ parse_uri, parse_formatted_string, build_uri, + extract_substitutions, ) from .templatemapper import ( TemplateMapper, @@ -34,9 +35,6 @@ from ..lock.lock import FakeLock, FileLock from hashlib import md5 import simplejson # type: ignore -from ..sqlitedb.utils import ( - extract_substitutions, -) # TODO: Move this to a more approriate location before merging PR. logger = logging.getLogger(__name__) @@ -302,7 +300,7 @@ async def _get( if access_type == AccessType.JSON_STR: raw = await self._cache.get_json(file_path, no_cache=not use_caching) - return json_dumps_wrapper(raw) + return raw raw = await self._cache.get_json(file_path, no_cache=not use_caching) @@ -474,27 +472,35 @@ def _get_uri_for_file(self, file_path: str) -> str: :param file_path : The file_path. """ + file_path = os.path.join(self._basedir, file_path) file_path = os.path.relpath(file_path, start=self._basedir) for route in self.PATH_LOOKUP: - templates = self._get_templates(route) + # templates = self._get_templates(route) + if file_path.startswith("reports/"): + str = "/".join(file_path.split("/")[1:3]) + subs = parse_formatted_string("{project}/{experiment}", str) + else: + str = "/".join(file_path.split("/")[0:2]) + subs = parse_formatted_string("{project}/{experiment}", str) - for t in templates: - route_arg_names = extract_substitutions(t) + # project = args["project"] + # experiment = args["experiment"] - try: - all_args = parse_formatted_string(t, f"./{file_path}") - - route_args = { - k: v for k, v in all_args.items() if k in route_arg_names - } - kwargs = { - k: v for k, v in all_args.items() if not (k in route_arg_names) - } - except: - continue - else: - return build_uri(route, route_args, kwargs) + template = self._get_template(route, subs) + route_arg_names = extract_substitutions(route) + + try: + all_args = parse_formatted_string(template, f"./{file_path}") + + route_args = {k: v for k, v in all_args.items() if k in route_arg_names} + kwargs = { + k: v for k, v in all_args.items() if not (k in route_arg_names) + } + except Exception: + continue + else: + return build_uri(route, route_args, kwargs) raise ValueError(f"Unable to build URI for file path {file_path}") @@ -634,3 +640,16 @@ def lock(self): return FileLock(self._get_lock_file()) return FakeLock() + + def list_all(self): + # glb = glob.iglob() + glb = glob.iglob(os.path.join(self._basedir, "./**"), recursive=True) + + for f in glb: + if os.path.isfile(f): + try: + uri = self._get_uri_for_file(f) + except (ValueError, KeyError): + continue + else: + yield uri diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 3884b0e..a3b5962 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -5,9 +5,14 @@ from aerovaldb.exceptions import UnsupportedOperation from ..aerovaldb import AerovalDB from ..routes import * -from .utils import extract_substitutions from ..types import AccessType -from ..utils import json_dumps_wrapper, parse_uri, async_and_sync, build_uri +from ..utils import ( + json_dumps_wrapper, + parse_uri, + async_and_sync, + build_uri, + extract_substitutions, +) import os @@ -31,7 +36,8 @@ class AerovalSqliteDB(AerovalDB): "model", "modvar", "time", - ) + ), + ROUTE_MODELS_STYLE: ("project", "experiment"), } # This lookup table stores the name of the table in which json @@ -128,7 +134,7 @@ def _initialize_db(self): cur.execute( f""" - CREATE TABLE IF NOT EXISTS {table_name}({column_names},json, + CREATE TABLE IF NOT EXISTS {table_name}({column_names},json TEXT, UNIQUE({column_names})) """ @@ -170,14 +176,16 @@ async def _get(self, route, route_args, *args, **kwargs): WHERE ({columnlist}) = ({substitutionlist}) """, - route_args, + route_args | kwargs, ) fetched = cur.fetchone()[0] + fetched = fetched.replace('\\"', '"') if access_type == AccessType.JSON_STR: return fetched if access_type == AccessType.OBJ: - return simplejson.loads(fetched, allow_nan=True) + dt = simplejson.loads(fetched, allow_nan=True) + return dt assert False # Should never happen. @@ -189,7 +197,7 @@ async def _put(self, obj, route, route_args, *args, **kwargs): table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] columnlist, substitutionlist = self._get_column_list_and_substitution_list( - route_args + route_args | kwargs ) json = obj @@ -204,6 +212,7 @@ async def _put(self, obj, route, route_args, *args, **kwargs): """, route_args | kwargs, ) + self._con.commit() @async_and_sync async def get_by_uri( @@ -232,4 +241,9 @@ async def get_by_uri( async def put_by_uri(self, obj, uri: str): route, route_args, kwargs = parse_uri(uri) + # if isinstance(obj, str): + # obj = "".join(obj.split(r"\n")) await self._put(obj, route, route_args, **kwargs) + + def list_all(self): + raise NotImplementedError diff --git a/src/aerovaldb/sqlitedb/utils.py b/src/aerovaldb/sqlitedb/utils.py index 2ea4f36..e69de29 100644 --- a/src/aerovaldb/sqlitedb/utils.py +++ b/src/aerovaldb/sqlitedb/utils.py @@ -1,10 +0,0 @@ -import re - - -def extract_substitutions(template: str): - """ - For a python template string, extracts the names between curly brackets: - - For example 'blah blah {test} blah {test2}' returns [test, test2] - """ - return re.findall(r"\{(.*?)\}", template) diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index 04de952..82dcde1 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -1,4 +1,4 @@ -import re +import regex as re import asyncio import functools from typing import Callable, ParamSpec, TypeVar @@ -7,6 +7,15 @@ import urllib +def extract_substitutions(template: str): + """ + For a python template string, extracts the names between curly brackets: + + For example 'blah blah {test} blah {test2}' returns [test, test2] + """ + return re.findall(r"\{(.*?)\}", template) + + def json_dumps_wrapper(obj, **kwargs) -> str: """ Wrapper which calls simplejson.dumps with the correct options, known to work for objects @@ -29,10 +38,11 @@ def parse_formatted_string(template: str, s: str) -> dict: # First split on any keyword arguments, note that the names of keyword arguments will be in the # 1st, 3rd, ... positions in this list tokens = re.split(r"\{(.*?)\}", template) - keywords = tokens[1::2] - + # keywords = tokens[1::2] + keywords = extract_substitutions(template) # Now replace keyword arguments with named groups matching them. We also escape between keyword # arguments so we support meta-characters there. Re-join tokens to form our regexp pattern + tokens[1::2] = map("(?P<{}>.*)".format, keywords) tokens[0::2] = map(re.escape, tokens[0::2]) pattern = "".join(tokens) diff --git a/tests/sqlitedb/test_utils.py b/tests/sqlitedb/test_utils.py index e8e8b94..e69de29 100644 --- a/tests/sqlitedb/test_utils.py +++ b/tests/sqlitedb/test_utils.py @@ -1,16 +0,0 @@ -import pytest -from aerovaldb.sqlitedb.utils import extract_substitutions - - -@pytest.mark.parametrize( - "template,result", - ( - ("{A}{B}{C}", {"A", "B", "C"}), - ("{A}hello world{B} test {C}", {"A", "B", "C"}), - ("", set()), - ), -) -def test_extract_substitutions(template: str, result: set[str]): - l = extract_substitutions(template) - - assert set(l) == result diff --git a/tests/test-db/json/project-no-experiments-json/experiment/hm/ts/_network-obsvar-layer.json b/tests/test-db/json/project-no-experiments-json/experiment/hm/ts/_network-obsvar-layer.json deleted file mode 100644 index da0441a..0000000 --- a/tests/test-db/json/project-no-experiments-json/experiment/hm/ts/_network-obsvar-layer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "path": "./project/experiment/hm/ts/network-obsvar-layer" -} \ No newline at end of file diff --git a/tests/test-db/json/project/experiment/hm/ts/_network-obsvar-layer.json b/tests/test-db/json/project/experiment/hm/ts/_network-obsvar-layer.json deleted file mode 100644 index da0441a..0000000 --- a/tests/test-db/json/project/experiment/hm/ts/_network-obsvar-layer.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "path": "./project/experiment/hm/ts/network-obsvar-layer" -} \ No newline at end of file diff --git a/tests/test-db/sqlite/test.sqlite b/tests/test-db/sqlite/test.sqlite new file mode 100644 index 0000000000000000000000000000000000000000..6cd7e040503fa3c4b28b2f8f177957a07d61dde6 GIT binary patch literal 159744 zcmeI*OKcm*8Nl(~B}GcKrE%g1qcE;1Dt^!*MYF0M#7&U6f|0~|y0PH~MM2QiN~TS6 z<=vHS)m2+Mha8FkHF7DCLwbmT0722B$6S(I8=#k73iOd43iQ%r03!v8qMdzEOPUW; zl4;5Ee<6{}4P#WSOS9$VP{kI|2|{eJt0+xBg{8C!~d zHS%Tb&5_i|$HPAwz8(GJ@N3cS(O*OkME*IH8TvZ>O87saBjJelqV_+vs_jrdRbNn) zdT4(%K0dD2i+1sRMK|>Y!?eBI12f+}dE(5;XkJXq&bOn`skC9_{)>s%F`50A>Z_1fBoo0G`@GQdgD2_Wb$y*OGV3W+^1T~)L7c7 zK&C!lHfougZeKObOBp}7N^way*I2$DDW=o8R3tvLw_9-=ffn+Oxykxl`=arDUcGVJ zE$9WkXv^JKu;l?|$-_#w+Lt6-3g7^)n;UcrK^jIOj$hd><)X)%8o2rM3mJmZ;mFUWq-(B-7=SG_#&>XFbzQ zr%%Kp@w2&Zhf`bBg?!thZT07)?(6r4_cjx(5X=pCvDhRC)5E=9`#~|9OFvTIHROE| z{KP#V7}UX(_P3uv2N9D+g=*O46K!{NfSm^%3Dm;=cTxaHI8j$b0i$a!WB^ zG0w{m-r+=Xf`m2p>-*EyD* z%eLjJ?so1%|LVo&W$tqK0+TzJy3gD#H}6ujV#T{?Amwb+n3h{+mY3_68kuP-VYX&u zgV3$4a}iJQc&n7!>;L~m5ub=ZJoE=S00<-e~$0-v25Qw?da>+R#AxN-yg?_D>c>Vvk6md&@vDt-*4*>)aKmY**5I_I{ z1Q0*~fh{dC>9#Dicl|e2`n6+jV?lqt|4rLD{r};yUn=5P;@PqP${8jE5I_I{1Q0*~ z0R#|0009IxK;Zk~;l#w=#CZG%ZT;$vpN{lL4e)+v&>bqEk8XC`l0EFtg@*G9C-F!8 z#GA${m@O}wwPK}5Cd-C4Jkt_e?=JctNjXCi^d33EjrX)V{B}#coibcq8JzzA@Yro- z?6#N}zY&KwU>B1m0tg_000IagfB*srAbD!|6uubRo3_$+xD*U_m!S={l6wV{>5kFPwOog+arJg0tg_0 z00IagfB*srAb`L?1UjDQ3- zav^{K0tg_000IagfB*srATSUC`u_*QN}>oLfB*srAbMfd2pW8LpvhwxFBks$R3RMrF39+gAHU9*j&X{L00IagfB*srAbe|f`%00IagfB*srAbh>KM5e~{VYM&O+$|Qzg-=1JNSdFSKY;b55w{L|KC-_yW-QWwTmeY z0tg_000IagfB*srAb@_2-x$ECe#vA|tup)jY4sSXY z0zv=*1Q0*~0R#|0009ILK;XU$6g0n%-q%I{{dUpM8Kzz;TDH9M-%|Y=eItW`ofpuQ zDgFQV{Xil^1Q0*~0R#|0009ILKmdV~Ko=i9~;@UH)_8nb$(k0As~M$I-BO?f^*NsY)e{8tj}eKx>aGx7TW zZz{psCWY9djEC`WpNVRg+ZFxJ z^9M|Q!7$}74*2IB aerovaldb.AerovalDB: @pytest.mark.asyncio -@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",))) +@pytest.mark.parametrize( + "resource", + ( + pytest.param( + "json_files:./tests/test-db/json", + ), + pytest.param( + "sqlitedb:./tests/test-db/sqlite/test.sqlite", + ), + ), +) @GET_PARAMETRIZATION async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expected): """ @@ -288,7 +298,15 @@ async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expecte assert data["path"] == expected -@pytest.mark.parametrize("resource", (("json_files:./tests/test-db/json",))) +@pytest.mark.parametrize( + "resource", + ( + pytest.param( + "json_files:./tests/test-db/json", + ), + pytest.param("sqlitedb:./tests/test-db/sqlite/test.sqlite"), + ), +) @GET_PARAMETRIZATION def test_getter_sync(resource: str, fun: str, args: list, kwargs: dict, expected): with aerovaldb.open(resource, use_async=False) as db: diff --git a/tests/test_utils.py b/tests/test_utils.py index 62dc132..80df77b 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,8 +1,22 @@ -from aerovaldb.utils import parse_formatted_string, parse_uri +from aerovaldb.utils import parse_formatted_string, parse_uri, extract_substitutions import pytest from aerovaldb.routes import * +@pytest.mark.parametrize( + "template,result", + ( + ("{A}{B}{C}", {"A", "B", "C"}), + ("{A}hello world{B} test {C}", {"A", "B", "C"}), + ("", set()), + ), +) +def test_extract_substitutions(template: str, result: set[str]): + l = extract_substitutions(template) + + assert set(l) == result + + @pytest.mark.parametrize( "template,s,expected", ( From aaf6eabb5d0e86e63e85fee4a0a7b6f7c8f49d39 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Mon, 5 Aug 2024 16:22:35 +0200 Subject: [PATCH 11/28] Fix some tests --- scripts/build_sqlite_test_database.py | 34 +++++++++++---------- src/aerovaldb/jsondb/jsonfiledb.py | 4 +-- src/aerovaldb/sqlitedb/sqlitedb.py | 41 ++++++++++++++++++++++++-- src/aerovaldb/utils.py | 10 ++++--- tests/test-db/sqlite/test.sqlite | Bin 159744 -> 159744 bytes 5 files changed, 63 insertions(+), 26 deletions(-) diff --git a/scripts/build_sqlite_test_database.py b/scripts/build_sqlite_test_database.py index 8a81cd2..2693871 100644 --- a/scripts/build_sqlite_test_database.py +++ b/scripts/build_sqlite_test_database.py @@ -1,7 +1,9 @@ import os import aerovaldb -import re +# This script is a helper script to create an sqlite database +# with the same contents as the json test database. It does so +# by copying each asset from the jsondb into test.sqlite if os.path.exists("tests/test-db/sqlite/test.sqlite"): os.remove("tests/test-db/sqlite/test.sqlite") @@ -9,25 +11,25 @@ jsondb = aerovaldb.open("json_files:tests/test-db/json") sqlitedb = aerovaldb.open("sqlitedb:tests/test-db/sqlite/test.sqlite") -data = jsondb.get_config( - "project", "experiment", access_type=aerovaldb.AccessType.FILE_PATH, default="{}" -) -print(data) -print(jsondb._get_uri_for_file(data)) -print( - jsondb.get_by_uri( - jsondb._get_uri_for_file(data), access_type=aerovaldb.AccessType.JSON_STR - ) -) - -sqlitedb.put_by_uri(data, jsondb._get_uri_for_file(data)) - for i, uri in enumerate(list(jsondb.list_all())): - print(f"Processing uri {uri}") + print(f"[{i}] - Processing uri {uri}") data = jsondb.get_by_uri( uri, access_type=aerovaldb.AccessType.JSON_STR, default="{}" ) sqlitedb.put_by_uri(data, uri) +# json_list = list(jsondb.list_all()) +# sqlite_list = list(sqlitedb.list_all()) +# print("The following URIs exist in jsondb but not sqlitedb") +# for x in json_list: +# if not (x in sqlite_list): +# print(x) +# +# print("The following URIs exist in sqlitedb but not jsondb") +# +# for x in sqlite_list: +# if not (x in json_list): +# print(x) + print(f"jsondb number of assets: {len(list(jsondb.list_all()))}") -# print(f"sqlite number of assets: {len(list(sqlitedb.list_all()))}") +print(f"sqlite number of assets: {len(list(sqlitedb.list_all()))}") diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 4ab6f1d..7dcfa25 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -12,6 +12,7 @@ from aerovaldb.aerovaldb import AerovalDB from aerovaldb.exceptions import UnusedArguments, TemplateNotFound from aerovaldb.types import AccessType +import re from ..utils import ( async_and_sync, @@ -484,9 +485,6 @@ def _get_uri_for_file(self, file_path: str) -> str: str = "/".join(file_path.split("/")[0:2]) subs = parse_formatted_string("{project}/{experiment}", str) - # project = args["project"] - # experiment = args["experiment"] - template = self._get_template(route, subs) route_arg_names = extract_substitutions(route) diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index a3b5962..34d11fe 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -76,6 +76,8 @@ def __init__(self, database: str, /, **kwargs): if not self._get_metadata_by_key("created_by") == "aerovaldb": ValueError(f"Database {database} is not a valid aerovaldb database.") + self._con.row_factory = sqlite3.Row + def _get_metadata_by_key(self, key: str) -> str: """ Returns the value associated with a key from the metadata @@ -178,8 +180,15 @@ async def _get(self, route, route_args, *args, **kwargs): """, route_args | kwargs, ) - fetched = cur.fetchone()[0] - fetched = fetched.replace('\\"', '"') + try: + fetched = cur.fetchone()[0] + except TypeError as e: + # Raising file not found error to be consistent with jsondb implementation. + # Probably should be a custom exception used by aerovaldb. + raise FileNotFoundError( + f"No object found for route, {route}, with args {route_args}, {kwargs}" + ) from e + if access_type == AccessType.JSON_STR: return fetched @@ -246,4 +255,30 @@ async def put_by_uri(self, obj, uri: str): await self._put(obj, route, route_args, **kwargs) def list_all(self): - raise NotImplementedError + cur = self._con.cursor() + for route, table in AerovalSqliteDB.TABLE_NAME_LOOKUP.items(): + cur.execute( + f""" + SELECT * FROM {table} + """ + ) + result = cur.fetchall() + + for r in result: + arg_names = extract_substitutions(route) + + route_args = {} + kwargs = {} + for k in r.keys(): + if k == "json": + continue + if k in arg_names: + route_args[k] = r[k] + else: + kwargs[k] = r[k] + + # route_args = {k: v for k, v in r.items() if k != "json" and k in arg_names} + # kwargs = {k: v for k, v in r.items() if k != "json" and not (k in arg_names)} + + route = build_uri(route, route_args, kwargs) + yield route diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index 82dcde1..b195c67 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -13,7 +13,7 @@ def extract_substitutions(template: str): For example 'blah blah {test} blah {test2}' returns [test, test2] """ - return re.findall(r"\{(.*?)\}", template) + return re.findall(r"\{([a-zA-Z-]*?)\}", template) def json_dumps_wrapper(obj, **kwargs) -> str: @@ -37,13 +37,13 @@ def parse_formatted_string(template: str, s: str) -> dict: # First split on any keyword arguments, note that the names of keyword arguments will be in the # 1st, 3rd, ... positions in this list - tokens = re.split(r"\{(.*?)\}", template) + tokens = re.split(r"\{([a-zA-Z-]*?)\}", template) # keywords = tokens[1::2] keywords = extract_substitutions(template) # Now replace keyword arguments with named groups matching them. We also escape between keyword # arguments so we support meta-characters there. Re-join tokens to form our regexp pattern - tokens[1::2] = map("(?P<{}>.*)".format, keywords) + tokens[1::2] = map("(?P<{}>[a-zA-Z-]*)".format, keywords) tokens[0::2] = map(re.escape, tokens[0::2]) pattern = "".join(tokens) @@ -53,7 +53,9 @@ def parse_formatted_string(template: str, s: str) -> dict: raise Exception("Format string did not match") # Return a dict with all of our keywords and their values - return {x: matches.group(x) for x in keywords} + result = {x: matches.group(x) for x in keywords} + + return result def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: diff --git a/tests/test-db/sqlite/test.sqlite b/tests/test-db/sqlite/test.sqlite index 6cd7e040503fa3c4b28b2f8f177957a07d61dde6..c38d690193d3b1432a4466a39c9f5d6293582ed3 100644 GIT binary patch delta 323 zcmZp8z}fJCbAmLZ`a~ILM)i#eOY%9G_>CC&`}vJF3ks<5b44?8NGr-pONz%%{@15H zJyxDcnZ2Y~U#T{qak5=~J`WcI0|OI(GXsA!{|=yza{kF*`%KuF+&P>TWvB1wXVRR! zyH0AF023RV5(9rbzY-^{?w=gYu% zkI$EX0?e=h_73jSXH s{Xkvy{F9fwmttg@y#Kuz8?zRRvm(fis^UyQU1CtZNL15@Wgi$D0F%OXZ2$lO From 715c11c30820845f544078586909a9b7028e1208 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 6 Aug 2024 10:37:52 +0200 Subject: [PATCH 12/28] WIP --- scripts/tmp.py | 46 ----------------------------- setup.cfg | 1 + src/aerovaldb/jsondb/jsonfiledb.py | 29 +++++++++++++----- src/aerovaldb/sqlitedb/sqlitedb.py | 28 ++++++++++++------ tests/test-db/sqlite/test.sqlite | Bin 159744 -> 159744 bytes tests/test_utils.py | 6 ++-- 6 files changed, 45 insertions(+), 65 deletions(-) delete mode 100644 scripts/tmp.py diff --git a/scripts/tmp.py b/scripts/tmp.py deleted file mode 100644 index 9a164c8..0000000 --- a/scripts/tmp.py +++ /dev/null @@ -1,46 +0,0 @@ -import simplejson -import sqlite3 - - -con = sqlite3.connect(":memory:") - -data = simplejson.dumps({"test": 1234}) - -print(data) - -cur = con.cursor() - -cur.execute( - """ - CREATE TABLE test(key, value) - """ -) - -con.commit() - -cur.execute( - """ - INSERT INTO test - VALUES(?, ?) - """, - ("test", data) -) - -con.commit() - -cur.execute( - """ - SELECT value FROM test - WHERE key='test' - """ -) -print(simplejson.loads(cur.fetchone()[0])) - -import aerovaldb - -with aerovaldb.open("tests/test-db/json") as db: - print(data) - - #db.put_by_uri(data, "/v0/config/project/experiment") - - print(db.get_by_uri("/v0/config/project/experiment", access_type=aerovaldb.AccessType.JSON_STR)) \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index fa006d2..6b2731d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ install_requires = aiofile async_lru packaging + regex package_dir = =src packages = diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 7dcfa25..dd29696 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -477,15 +477,30 @@ def _get_uri_for_file(self, file_path: str) -> str: file_path = os.path.relpath(file_path, start=self._basedir) for route in self.PATH_LOOKUP: - # templates = self._get_templates(route) - if file_path.startswith("reports/"): - str = "/".join(file_path.split("/")[1:3]) - subs = parse_formatted_string("{project}/{experiment}", str) + if not (route == ROUTE_MODELS_STYLE): + if file_path.startswith("reports/"): + str = "/".join(file_path.split("/")[1:3]) + subs = parse_formatted_string("{project}/{experiment}", str) + else: + str = "/".join(file_path.split("/")[0:2]) + subs = parse_formatted_string("{project}/{experiment}", str) + + template = self._get_template(route, subs) else: - str = "/".join(file_path.split("/")[0:2]) - subs = parse_formatted_string("{project}/{experiment}", str) + try: + subs = parse_formatted_string( + "{project}/{experiment}/models-style.json", file_path + ) + except Exception: + try: + subs = parse_formatted_string( + "{project}/models-style.json", file_path + ) + except: + continue + + template = self._get_template(route, subs) - template = self._get_template(route, subs) route_arg_names = extract_substitutions(route) try: diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 34d11fe..11a5eb1 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -153,6 +153,8 @@ def _get_column_list_and_substitution_list(self, kwargs: dict) -> tuple[str, str return (columnlist, substitutionlist) async def _get(self, route, route_args, *args, **kwargs): + cache = kwargs.pop("cache", False) + default = kwargs.pop("default", None) assert len(args) == 0 access_type = self._normalize_access_type(kwargs.pop("access_type", None)) @@ -168,20 +170,28 @@ async def _get(self, route, route_args, *args, **kwargs): table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] - columnlist, substitutionlist = self._get_column_list_and_substitution_list( - route_args - ) - + args = route_args | kwargs + columnlist, substitutionlist = self._get_column_list_and_substitution_list(args) cur.execute( f""" - SELECT json FROM {table_name} + SELECT * FROM {table_name} WHERE ({columnlist}) = ({substitutionlist}) """, - route_args | kwargs, + args, ) try: - fetched = cur.fetchone()[0] + fetched = cur.fetchall() + for r in fetched: + for k in r.keys(): + if k == "json": + continue + if not (k in route_args | kwargs) and r[k] is not None: + break + else: + fetched = r + break + except TypeError as e: # Raising file not found error to be consistent with jsondb implementation. # Probably should be a custom exception used by aerovaldb. @@ -190,10 +200,10 @@ async def _get(self, route, route_args, *args, **kwargs): ) from e if access_type == AccessType.JSON_STR: - return fetched + return fetched["json"] if access_type == AccessType.OBJ: - dt = simplejson.loads(fetched, allow_nan=True) + dt = simplejson.loads(fetched["json"], allow_nan=True) return dt assert False # Should never happen. diff --git a/tests/test-db/sqlite/test.sqlite b/tests/test-db/sqlite/test.sqlite index c38d690193d3b1432a4466a39c9f5d6293582ed3..b3e25c630e79bd56bd6a3392d4c187ee6583c045 100644 GIT binary patch delta 88 zcmZp8z}fJCbAmLZ#zYxsMvcaVtqF`v^0}D#eHr*S@R#uWZWa`<=Vw)AW|3x?d~d(f rb= Date: Fri, 9 Aug 2024 13:58:11 +0200 Subject: [PATCH 13/28] Make more tests apply to both json and sqlite --- src/aerovaldb/aerovaldb.py | 30 ++++++++-- src/aerovaldb/jsondb/jsonfiledb.py | 59 ++++++++++++++++--- src/aerovaldb/lock/__init__.py | 1 + src/aerovaldb/serialization/__init__.py | 0 .../serialization/default_serialization.py | 15 ----- src/aerovaldb/sqlitedb/sqlitedb.py | 34 ++++++++++- tests/jsondb/test_jsonfiledb.py | 47 --------------- tests/test_aerovaldb.py | 57 +++++++++++++++++- 8 files changed, 167 insertions(+), 76 deletions(-) delete mode 100644 src/aerovaldb/serialization/__init__.py delete mode 100644 src/aerovaldb/serialization/default_serialization.py diff --git a/src/aerovaldb/aerovaldb.py b/src/aerovaldb/aerovaldb.py index a08af67..ccf5b26 100644 --- a/src/aerovaldb/aerovaldb.py +++ b/src/aerovaldb/aerovaldb.py @@ -5,6 +5,7 @@ from .types import AccessType from .utils import async_and_sync from .routes import * +from .lock import FakeLock, FileLock def get_method(route): @@ -211,7 +212,11 @@ async def put_glob_stats( raise NotImplementedError def list_glob_stats( - self, project: str, experiment: str + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, ) -> Generator[str, None, None]: """Generator that lists the URI for each glob_stats object. @@ -339,7 +344,11 @@ async def put_timeseries( raise NotImplementedError def list_timeseries( - self, project: str, experiment: str + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, ) -> Generator[str, None, None]: """Returns a list of URIs of all timeseries files for a given project and experiment id. @@ -761,7 +770,13 @@ async def put_map( """ raise NotImplementedError - def list_map(self, project: str, experiment: str) -> Generator[str, None, None]: + def list_map( + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, + ) -> Generator[str, None, None]: """Lists all map files for a given project / experiment combination. :param project: The project ID. @@ -1197,9 +1212,16 @@ def _normalize_access_type( assert False - def list_all(self) -> Generator[str, None, None]: + def list_all( + self, access_type: str | AccessType = AccessType.URI + ) -> Generator[str, None, None]: """Iterator to list over the URI of each object stored in the current aerovaldb connection, returning the URI of each. + + :param access_type : What to return (This is implementation specific, but in general + each implementation should support URI). + :raises : UnsupportedOperation + For non-supported acces types. """ raise NotImplementedError diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index dd29696..5f6371d 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -12,7 +12,6 @@ from aerovaldb.aerovaldb import AerovalDB from aerovaldb.exceptions import UnusedArguments, TemplateNotFound from aerovaldb.types import AccessType -import re from ..utils import ( async_and_sync, @@ -33,7 +32,7 @@ from ..exceptions import UnsupportedOperation from .cache import JSONLRUCache from ..routes import * -from ..lock.lock import FakeLock, FileLock +from ..lock import FakeLock, FileLock from hashlib import md5 import simplejson # type: ignore @@ -518,8 +517,16 @@ def _get_uri_for_file(self, file_path: str) -> str: raise ValueError(f"Unable to build URI for file path {file_path}") def list_glob_stats( - self, project: str, experiment: str + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, ) -> Generator[str, None, None]: + access_type = self._normalize_access_type(access_type) + if access_type in [AccessType.OBJ, AccessType.JSON_STR]: + raise UnsupportedOperation(f"Unsupported accesstype, {access_type}") + template = str( os.path.realpath( os.path.join( @@ -534,11 +541,23 @@ def list_glob_stats( glb = glb.format(project=project, experiment=experiment) for f in glob.glob(glb): + if access_type == AccessType.FILE_PATH: + yield f + continue + yield self._get_uri_for_file(f) def list_timeseries( - self, project: str, experiment: str + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, ) -> Generator[str, None, None]: + access_type = self._normalize_access_type(access_type) + if access_type in [AccessType.OBJ, AccessType.JSON_STR]: + raise UnsupportedOperation(f"Unsupported accesstype, {access_type}") + template = str( os.path.realpath( os.path.join( @@ -559,9 +578,23 @@ def list_timeseries( glb = glb.format(project=project, experiment=experiment) for f in glob.glob(glb): + if access_type == AccessType.FILE_PATH: + yield f + continue + yield self._get_uri_for_file(f) - def list_map(self, project: str, experiment: str) -> Generator[str, None, None]: + def list_map( + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, + ) -> Generator[str, None, None]: + access_type = self._normalize_access_type(access_type) + if access_type in [AccessType.OBJ, AccessType.JSON_STR]: + raise UnsupportedOperation(f"Unsupported accesstype, {access_type}") + template = str( os.path.realpath( os.path.join( @@ -584,6 +617,10 @@ def list_map(self, project: str, experiment: str) -> Generator[str, None, None]: glb = glb.format(project=project, experiment=experiment) for f in glob.glob(glb): + if access_type == AccessType.FILE_PATH: + yield f + continue + yield self._get_uri_for_file(f) def _list_experiments( @@ -654,12 +691,20 @@ def lock(self): return FakeLock() - def list_all(self): - # glb = glob.iglob() + def list_all(self, access_type: str | AccessType = AccessType.URI): + access_type = self._normalize_access_type(access_type) + + if access_type in [AccessType.OBJ, AccessType.JSON_STR]: + UnsupportedOperation(f"Accesstype {access_type} not supported.") + glb = glob.iglob(os.path.join(self._basedir, "./**"), recursive=True) for f in glb: if os.path.isfile(f): + if access_type == AccessType.FILE_PATH: + yield f + continue + try: uri = self._get_uri_for_file(f) except (ValueError, KeyError): diff --git a/src/aerovaldb/lock/__init__.py b/src/aerovaldb/lock/__init__.py index e69de29..49120c8 100644 --- a/src/aerovaldb/lock/__init__.py +++ b/src/aerovaldb/lock/__init__.py @@ -0,0 +1 @@ +from .lock import FakeLock, FileLock diff --git a/src/aerovaldb/serialization/__init__.py b/src/aerovaldb/serialization/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/aerovaldb/serialization/default_serialization.py b/src/aerovaldb/serialization/default_serialization.py deleted file mode 100644 index fa7f6a7..0000000 --- a/src/aerovaldb/serialization/default_serialization.py +++ /dev/null @@ -1,15 +0,0 @@ -import sys - -try: - import numpy as np -except ImportError: - # Only needed for serialization typing. - pass - - -def default_serialization(val): - if "numpy" in sys.modules: - if isinstance(val, np.float64): - return float(val) - - raise TypeError diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 11a5eb1..bc8274a 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -2,7 +2,7 @@ import simplejson # type: ignore import aerovaldb -from aerovaldb.exceptions import UnsupportedOperation +from ..exceptions import UnsupportedOperation, UnusedArguments from ..aerovaldb import AerovalDB from ..routes import * from ..types import AccessType @@ -14,6 +14,8 @@ extract_substitutions, ) import os +from ..lock import FakeLock, FileLock +from hashlib import md5 class AerovalSqliteDB(AerovalDB): @@ -66,6 +68,12 @@ class AerovalSqliteDB(AerovalDB): } def __init__(self, database: str, /, **kwargs): + use_locking = os.environ.get("AVDB_USE_LOCKING", "") + if use_locking == "0" or use_locking == "": + self._use_real_lock = False + else: + self._use_real_lock = True + self._dbfile = database if not os.path.exists(database): @@ -155,7 +163,8 @@ def _get_column_list_and_substitution_list(self, kwargs: dict) -> tuple[str, str async def _get(self, route, route_args, *args, **kwargs): cache = kwargs.pop("cache", False) default = kwargs.pop("default", None) - assert len(args) == 0 + if len(args) > 0: + raise UnusedArguments("Unexpected arguments.") access_type = self._normalize_access_type(kwargs.pop("access_type", None)) if access_type in [AccessType.FILE_PATH]: @@ -182,6 +191,13 @@ async def _get(self, route, route_args, *args, **kwargs): ) try: fetched = cur.fetchall() + if not fetched: + if default is not None: + return default + # For now, raising a FileNotFoundError, since jsondb does and we want + # them to be interchangeable. Probably should be a aerovaldb custom + # exception. + raise FileNotFoundError("Object not found") for r in fetched: for k in r.keys(): if k == "json": @@ -292,3 +308,17 @@ def list_all(self): route = build_uri(route, route_args, kwargs) yield route + + def _get_lock_file(self) -> str: + os.makedirs(os.path.expanduser("~/.aerovaldb/.lock/"), exist_ok=True) + lock_file = os.path.join( + os.environ.get("AVDB_LOCK_DIR", os.path.expanduser("~/.aerovaldb/.lock/")), + md5(self._dbfile.encode()).hexdigest(), + ) + return lock_file + + def lock(self): + if self._use_real_lock: + return FileLock(self._get_lock_file()) + + return FakeLock() diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index ef1679d..5d161aa 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -4,36 +4,6 @@ from aerovaldb.jsondb.jsonfiledb import AerovalJsonFileDB -@pytest.mark.asyncio -async def test_file_does_not_exist(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - with pytest.raises(FileNotFoundError): - await db.get_config( - "non-existent-project", - "experiment", - access_type=aerovaldb.AccessType.FILE_PATH, - ) - - -def test_exception_on_unexpected_args(): - """ - https://github.com/metno/aerovaldb/issues/19 - """ - with aerovaldb.open("json_files:./tests/test-db/json") as db: - with pytest.raises(aerovaldb.UnusedArguments): - db.get_config("project", "experiment", "excessive-positional-argument") - - -@pytest.mark.xfail -def test_exception_on_unexpected_kwargs(): - """ - https://github.com/metno/aerovaldb/issues/19 - """ - with aerovaldb.open("json_files:./tests/test-db/json") as db: - with pytest.raises(ValueError): - db.get_experiments("project", unused_kwarg="test") - - def test_version1(): """ """ with aerovaldb.open("json_files:./tests/test-db/json") as db: @@ -83,23 +53,6 @@ def test_list_glob_stats(): assert len(glob_stats) == 1 -def test_getter_with_default(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - data = db.get_by_uri( - "/v0/experiments/non-existent-project", default={"data": "test"} - ) - - assert data["data"] == "test" - - -def test_getter_with_default_error(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - with pytest.raises(simplejson.JSONDecodeError): - db.get_by_uri( - "/v0/report/project/experiment/invalid-json", default={"data": "data"} - ) - - def test_jsonfiledb__get_uri_for_file(tmp_path): with aerovaldb.open(f"json_files:{str(tmp_path)}") as db: db: AerovalJsonFileDB diff --git a/tests/test_aerovaldb.py b/tests/test_aerovaldb.py index 6f36a6e..adb08b7 100644 --- a/tests/test_aerovaldb.py +++ b/tests/test_aerovaldb.py @@ -5,6 +5,7 @@ # - json_files: tests/jsondb/test_jsonfiledb.py # - sqlitedb: tests/sqlitedb/test_sqlitedb.py +import simplejson import aerovaldb import pytest import random @@ -13,7 +14,7 @@ @pytest.fixture def tmpdb(tmp_path, dbtype: str) -> aerovaldb.AerovalDB: """Fixture encapsulating logic for each tested database connection to create - a temporary database and connect to it.""" + a fresh, temporary database and connect to it.""" if dbtype == "json_files": return aerovaldb.open(f"json_files:{str(tmp_path)}") elif dbtype == "sqlitedb": @@ -22,6 +23,20 @@ def tmpdb(tmp_path, dbtype: str) -> aerovaldb.AerovalDB: assert False +TESTDB_PARAMETRIZATION = pytest.mark.parametrize( + # This is a parametrization which returns the correct resource string to access + # the prebuilt test database for each database connector. + "testdb", + ( + pytest.param( + "json_files:./tests/test-db/json", + ), + pytest.param( + "sqlitedb:./tests/test-db/sqlite/test.sqlite", + ), + ), +) + GET_PARAMETRIZATION = pytest.mark.parametrize( "fun,args,kwargs,expected", ( @@ -395,3 +410,43 @@ def test_write_and_read_of_nan(tmpdb): # See Additional Notes on #59 # https://github.com/metno/aerovaldb/pull/59 assert read["value"] is None + + +@pytest.mark.asyncio +@TESTDB_PARAMETRIZATION +async def test_file_does_not_exist(testdb): + with aerovaldb.open(testdb) as db: + with pytest.raises(FileNotFoundError): + await db.get_config( + "non-existent-project", + "experiment", + ) + + +@TESTDB_PARAMETRIZATION +def test_exception_on_unexpected_args(testdb): + """ + https://github.com/metno/aerovaldb/issues/19 + """ + with aerovaldb.open(testdb) as db: + with pytest.raises(aerovaldb.UnusedArguments): + db.get_config("project", "experiment", "excessive-positional-argument") + + +@TESTDB_PARAMETRIZATION +def test_getter_with_default(testdb): + with aerovaldb.open(testdb) as db: + data = db.get_by_uri( + "/v0/experiments/non-existent-project", default={"data": "test"} + ) + + assert data["data"] == "test" + + +@TESTDB_PARAMETRIZATION +def test_getter_with_default_error(testdb): + with aerovaldb.open(testdb) as db: + with pytest.raises(simplejson.JSONDecodeError): + db.get_by_uri( + "/v0/report/project/experiment/invalid-json", default={"data": "data"} + ) From 01d0aec6b655ae5716e007bc4ee9f2bb2c772dbd Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Wed, 14 Aug 2024 10:20:57 +0200 Subject: [PATCH 14/28] Add per-entry ctime/mtime --- src/aerovaldb/jsondb/cache.py | 4 +-- src/aerovaldb/jsondb/jsonfiledb.py | 8 +++--- src/aerovaldb/sqlitedb/sqlitedb.py | 41 ++++++++++++++++++++++-------- src/aerovaldb/utils.py | 7 ++--- tests/test_aerovaldb.py | 30 +++++----------------- 5 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/aerovaldb/jsondb/cache.py b/src/aerovaldb/jsondb/cache.py index 91ad610..9ddcb72 100644 --- a/src/aerovaldb/jsondb/cache.py +++ b/src/aerovaldb/jsondb/cache.py @@ -129,7 +129,7 @@ async def _read_json(self, file_path: str | Path) -> str: return await f.read() with open(abspath, "r") as f: - return f.read().replace("\n", "") + return f.read() def _get(self, abspath: str) -> str: """Returns an element from the cache.""" @@ -140,7 +140,7 @@ def _get(self, abspath: str) -> str: def _put(self, abspath: str, *, json: str, modified: float): self._cache[abspath] = { - "json": "".join(json.split(r"\n")), + "json": json, "last_modified": os.path.getmtime(abspath), } while self.size > self._max_size: diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 5f6371d..71a5e3c 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -208,9 +208,9 @@ async def _get_version(self, project: str, experiment: str) -> Version: version = Version("0.0.1") finally: return version - except simplejson.JSONDecodeError: - # Work around for https://github.com/metno/aerovaldb/issues/28 - return Version("0.14.0") + # except simplejson.JSONDecodeError: + # # Work around for https://github.com/metno/aerovaldb/issues/28 + # return Version("0.14.0") try: version_str = config["exp_info"]["pyaerocom_version"] @@ -503,7 +503,7 @@ def _get_uri_for_file(self, file_path: str) -> str: route_arg_names = extract_substitutions(route) try: - all_args = parse_formatted_string(template, f"./{file_path}") + all_args = parse_formatted_string(template, f"./{file_path}") # type: ignore route_args = {k: v for k, v in all_args.items() if k in route_arg_names} kwargs = { diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index bc8274a..32611f9 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -25,9 +25,8 @@ class AerovalSqliteDB(AerovalDB): # When creating a table it works to extract the substitution template # names from the route, as this constitutes all arguments. For the ones - # which have extra arguments (currently only time) the following table - # defines the override. Currently this only applies to map which has - # an extra time argument. + # which have extra arguments the following table defines the override. + # Currently this only applies to map which has an extra time argument. ROUTE_COLUMN_NAME_OVERRIDE = { ROUTE_MAP: ( "project", @@ -144,12 +143,35 @@ def _initialize_db(self): cur.execute( f""" - CREATE TABLE IF NOT EXISTS {table_name}({column_names},json TEXT, + CREATE TABLE IF NOT EXISTS {table_name}( + {column_names}, + ctime TEXT, + mtime TEXT, + json TEXT, UNIQUE({column_names})) """ ) + cur.execute( + f""" + CREATE TRIGGER IF NOT EXISTS insert_Timestamp_Trigger_{table_name} + AFTER INSERT ON {table_name} + BEGIN + UPDATE {table_name} SET ctime =STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE ROWID = NEW.ROWID; + END; + """ + ) + cur.execute( + f""" + CREATE TRIGGER IF NOT EXISTS update_Timestamp_Trigger_{table_name} + AFTER UPDATE On {table_name} + BEGIN + UPDATE {table_name} SET mtime = STRFTIME('%Y-%m-%d %H:%M:%f', 'NOW') WHERE ROWID = NEW.ROWID; + END; + """ + ) + self._con.commit() def _get_column_list_and_substitution_list(self, kwargs: dict) -> tuple[str, str]: @@ -200,7 +222,7 @@ async def _get(self, route, route_args, *args, **kwargs): raise FileNotFoundError("Object not found") for r in fetched: for k in r.keys(): - if k == "json": + if k in ("json", "ctime", "mtime"): continue if not (k in route_args | kwargs) and r[k] is not None: break @@ -247,6 +269,10 @@ async def _put(self, obj, route, route_args, *args, **kwargs): """, route_args | kwargs, ) + + self._set_metadata_by_key( + "last_modified_by", f"aerovaldb_{aerovaldb.__version__}" + ) self._con.commit() @async_and_sync @@ -276,8 +302,6 @@ async def get_by_uri( async def put_by_uri(self, obj, uri: str): route, route_args, kwargs = parse_uri(uri) - # if isinstance(obj, str): - # obj = "".join(obj.split(r"\n")) await self._put(obj, route, route_args, **kwargs) def list_all(self): @@ -303,9 +327,6 @@ def list_all(self): else: kwargs[k] = r[k] - # route_args = {k: v for k, v in r.items() if k != "json" and k in arg_names} - # kwargs = {k: v for k, v in r.items() if k != "json" and not (k in arg_names)} - route = build_uri(route, route_args, kwargs) yield route diff --git a/src/aerovaldb/utils.py b/src/aerovaldb/utils.py index b195c67..324ef53 100644 --- a/src/aerovaldb/utils.py +++ b/src/aerovaldb/utils.py @@ -48,14 +48,11 @@ def parse_formatted_string(template: str, s: str) -> dict: pattern = "".join(tokens) # Use our pattern to match the given string, raise if it doesn't match - matches = re.match(pattern, s) - if not matches: + if not (match := re.match(pattern, s)): raise Exception("Format string did not match") # Return a dict with all of our keywords and their values - result = {x: matches.group(x) for x in keywords} - - return result + return {x: match.group(x) for x in keywords} def parse_uri(uri: str) -> tuple[str, dict[str, str], dict[str, str]]: diff --git a/tests/test_aerovaldb.py b/tests/test_aerovaldb.py index adb08b7..418de92 100644 --- a/tests/test_aerovaldb.py +++ b/tests/test_aerovaldb.py @@ -286,23 +286,13 @@ def tmpdb(tmp_path, dbtype: str) -> aerovaldb.AerovalDB: @pytest.mark.asyncio -@pytest.mark.parametrize( - "resource", - ( - pytest.param( - "json_files:./tests/test-db/json", - ), - pytest.param( - "sqlitedb:./tests/test-db/sqlite/test.sqlite", - ), - ), -) +@TESTDB_PARAMETRIZATION @GET_PARAMETRIZATION -async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expected): +async def test_getter(testdb: str, fun: str, args: list, kwargs: dict, expected): """ This test tests that data is read as expected from a static, fixed database. """ - with aerovaldb.open(resource, use_async=True) as db: + with aerovaldb.open(testdb, use_async=True) as db: f = getattr(db, fun) if kwargs is not None: @@ -313,18 +303,10 @@ async def test_getter(resource: str, fun: str, args: list, kwargs: dict, expecte assert data["path"] == expected -@pytest.mark.parametrize( - "resource", - ( - pytest.param( - "json_files:./tests/test-db/json", - ), - pytest.param("sqlitedb:./tests/test-db/sqlite/test.sqlite"), - ), -) +@TESTDB_PARAMETRIZATION @GET_PARAMETRIZATION -def test_getter_sync(resource: str, fun: str, args: list, kwargs: dict, expected): - with aerovaldb.open(resource, use_async=False) as db: +def test_getter_sync(testdb: str, fun: str, args: list, kwargs: dict, expected): + with aerovaldb.open(testdb, use_async=False) as db: f = getattr(db, fun) if kwargs is not None: From a5572c2c06e01a587fadc9159efbe6ec3ebb1e20 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Wed, 14 Aug 2024 10:57:21 +0200 Subject: [PATCH 15/28] Remove performance claim about locking in docs --- docs/locking.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/locking.rst b/docs/locking.rst index b806dfe..f52d2e1 100644 --- a/docs/locking.rst +++ b/docs/locking.rst @@ -5,7 +5,7 @@ To ensure consistent writes, aerovaldb provides a locking mechanism which can be For :class:`AerovalJsonFileDB` the locking mechanism uses a folder of lock files (`~/.aerovaldb/` by default) to coordinate the lock. It is important that the file system where the lock files are stored supports `fcntl `. -By default locking is disabled as it has large effect on performance. To enable, set the environment variable `AVDB_USE_LOCKING=1`. +By default locking is disabled as it may impact performance. To enable, set the environment variable `AVDB_USE_LOCKING=1`. Overriding the lock-file directory ---------------------------------- From 59533c15d8edf9a643959bd18bd5fad5747f288e Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 15 Aug 2024 09:36:44 +0200 Subject: [PATCH 16/28] Fix parameter validation in jsonfiledb --- src/aerovaldb/jsondb/jsonfiledb.py | 18 ++++++++++-------- tests/jsondb/test_jsonfiledb.py | 8 ++++++++ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 8a0893b..5d95b78 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -269,27 +269,28 @@ async def _get( *args, **kwargs, ): - use_caching = kwargs.get("cache", False) + use_caching = kwargs.pop("cache", False) + default = kwargs.pop("default", None) + access_type = self._normalize_access_type(kwargs.pop("access_type", None)) + if len(args) > 0: raise UnusedArguments( f"Unexpected positional arguments {args}. Jsondb does not use additional positional arguments currently." ) - logger.debug(f"Fetching data for {route}.") + substitutions = route_args | kwargs - map(validate_filename_component, substitutions.values()) + [validate_filename_component(x) for x in substitutions.values()] + + logger.debug(f"Fetching data for {route}.") path_template = await self._get_template(route, substitutions) logger.debug(f"Using template string {path_template}") relative_path = path_template.format(**substitutions) - access_type = self._normalize_access_type(kwargs.pop("access_type", None)) - file_path = str(Path(os.path.join(self._basedir, relative_path)).resolve()) logger.debug(f"Fetching file {file_path} as {access_type}-") - default = kwargs.pop("default", None) - filter_func = self.FILTERS.get(route, None) filter_vars = route_args | kwargs @@ -339,7 +340,8 @@ async def _put(self, obj, route, route_args, *args, **kwargs): ) substitutions = route_args | kwargs - map(validate_filename_component, substitutions.values()) + [validate_filename_component(x) for x in substitutions.values()] + path_template = await self._get_template(route, substitutions) relative_path = path_template.format(**substitutions) diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index 5d161aa..624a0ee 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -60,3 +60,11 @@ def test_jsonfiledb__get_uri_for_file(tmp_path): db._get_uri_for_file(str(tmp_path / "project/experiments.json")) == "/v0/experiments/project" ) + + +def test_jsonfiledb_invalid_parameter_values(): + with aerovaldb.open("json_files:./tests/test-db/json") as db: + with pytest.raises(ValueError) as e: + db.get_config("/%&/())()", "test") + + assert "is not a valid file name component" in str(e.value) From cc0f0110e6093344c8ded959d787fa7721d93903 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 15 Aug 2024 11:44:20 +0200 Subject: [PATCH 17/28] Extract lookup code out of jsondb --- src/aerovaldb/jsondb/jsonfiledb.py | 214 ++++++------------ src/aerovaldb/jsondb/uri.py | 5 - src/aerovaldb/utils/string_mapper/__init__.py | 6 + .../string_mapper/mapper.py} | 112 ++++++--- tests/{jsondb => lock}/test_lock.py | 0 .../utils/string_mapper/test_string_mapper.py | 20 ++ 6 files changed, 171 insertions(+), 186 deletions(-) delete mode 100644 src/aerovaldb/jsondb/uri.py create mode 100644 src/aerovaldb/utils/string_mapper/__init__.py rename src/aerovaldb/{jsondb/templatemapper.py => utils/string_mapper/mapper.py} (52%) rename tests/{jsondb => lock}/test_lock.py (100%) create mode 100644 tests/utils/string_mapper/test_string_mapper.py diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 5d95b78..1864ba6 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -10,8 +10,9 @@ from pkg_resources import DistributionNotFound, get_distribution # type: ignore from aerovaldb.aerovaldb import AerovalDB -from aerovaldb.exceptions import UnusedArguments, TemplateNotFound +from aerovaldb.exceptions import UnusedArguments from aerovaldb.types import AccessType +from ..utils.string_mapper import StringMapper, VersionConstraintMapper from ..utils import ( async_and_sync, @@ -23,13 +24,6 @@ build_uri, extract_substitutions, ) -from .templatemapper import ( - TemplateMapper, - DataVersionToTemplateMapper, - PriorityDataVersionToTemplateMapper, - ConstantTemplateMapper, - SkipMapper, -) from .filter import filter_heatmap, filter_regional_stats from ..exceptions import UnsupportedOperation from .cache import JSONLRUCache @@ -62,124 +56,66 @@ def __init__(self, basedir: str | Path, /, use_async: bool = False): if not os.path.exists(self._basedir): os.makedirs(self._basedir) - self.PATH_LOOKUP: dict[str, list[TemplateMapper]] = { - ROUTE_GLOB_STATS: [ - ConstantTemplateMapper( - "./{project}/{experiment}/hm/glob_stats_{frequency}.json" - ) - ], - ROUTE_REG_STATS: [ - # Same as glob_stats - ConstantTemplateMapper( - "./{project}/{experiment}/hm/glob_stats_{frequency}.json" - ) - ], - ROUTE_HEATMAP: [ - # Same as glob_stats - ConstantTemplateMapper( - "./{project}/{experiment}/hm/glob_stats_{frequency}.json" - ) - ], - ROUTE_CONTOUR: [ - ConstantTemplateMapper( - "./{project}/{experiment}/contour/{obsvar}_{model}.geojson" - ) - ], - ROUTE_TIMESERIES: [ - ConstantTemplateMapper( - "./{project}/{experiment}/ts/{location}_{network}-{obsvar}_{layer}.json" - ) - ], - ROUTE_TIMESERIES_WEEKLY: [ - ConstantTemplateMapper( - "./{project}/{experiment}/ts/diurnal/{location}_{network}-{obsvar}_{layer}.json" - ) - ], - ROUTE_EXPERIMENTS: [ConstantTemplateMapper("./{project}/experiments.json")], - ROUTE_CONFIG: [ - ConstantTemplateMapper( - "./{project}/{experiment}/cfg_{project}_{experiment}.json" - ) - ], - ROUTE_MENU: [ConstantTemplateMapper("./{project}/{experiment}/menu.json")], - ROUTE_STATISTICS: [ - ConstantTemplateMapper("./{project}/{experiment}/statistics.json") - ], - ROUTE_RANGES: [ - ConstantTemplateMapper("./{project}/{experiment}/ranges.json") - ], - ROUTE_REGIONS: [ - ConstantTemplateMapper("./{project}/{experiment}/regions.json") - ], - ROUTE_MODELS_STYLE: [ - PriorityDataVersionToTemplateMapper( - [ - "./{project}/{experiment}/models-style.json", - "./{project}/models-style.json", - ] - ) - ], - ROUTE_MAP: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/map/{network}-{obsvar}_{layer}_{model}-{modvar}_{time}.json", - min_version="0.13.2", - version_provider=self._get_version, - ), - DataVersionToTemplateMapper( - "./{project}/{experiment}/map/{network}-{obsvar}_{layer}_{model}-{modvar}.json", - max_version="0.13.2", - version_provider=self._get_version, - ), - ], - ROUTE_SCATTER: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/scat/{network}-{obsvar}_{layer}_{model}-{modvar}_{time}.json", - min_version="0.13.2", - version_provider=self._get_version, - ), - DataVersionToTemplateMapper( - "./{project}/{experiment}/scat/{network}-{obsvar}_{layer}_{model}-{modvar}.json", - max_version="0.13.2", - version_provider=self._get_version, - ), - ], - ROUTE_PROFILES: [ - ConstantTemplateMapper( - "./{project}/{experiment}/profiles/{location}_{network}-{obsvar}.json" - ) - ], - ROUTE_HEATMAP_TIMESERIES: [ - DataVersionToTemplateMapper( - "./{project}/{experiment}/hm/ts/{region}-{network}-{obsvar}-{layer}.json", - min_version="0.13.2", # https://github.com/metno/pyaerocom/blob/4478b4eafb96f0ca9fd722be378c9711ae10c1f6/setup.cfg - version_provider=self._get_version, - ), - DataVersionToTemplateMapper( - "./{project}/{experiment}/hm/ts/{network}-{obsvar}-{layer}.json", - min_version="0.12.2", - max_version="0.13.2", - version_provider=self._get_version, - ), - DataVersionToTemplateMapper( - "./{project}/{experiment}/hm/ts/stats_ts.json", - max_version="0.12.2", - version_provider=self._get_version, - ), - ], - ROUTE_FORECAST: [ - ConstantTemplateMapper( - "./{project}/{experiment}/forecast/{region}_{network}-{obsvar}_{layer}.json" - ) - ], - ROUTE_GRIDDED_MAP: [ - ConstantTemplateMapper( - "./{project}/{experiment}/contour/{obsvar}_{model}.json" - ) - ], - ROUTE_REPORT: [ - ConstantTemplateMapper("./reports/{project}/{experiment}/{title}.json") - ], - } + self.PATH_LOOKUP = StringMapper( + { + ROUTE_GLOB_STATS: "./{project}/{experiment}/hm/glob_stats_{frequency}.json", + ROUTE_REG_STATS: "./{project}/{experiment}/hm/glob_stats_{frequency}.json", + ROUTE_HEATMAP: "./{project}/{experiment}/hm/glob_stats_{frequency}.json", + ROUTE_CONTOUR: "./{project}/{experiment}/contour/{obsvar}_{model}.geojson", + ROUTE_TIMESERIES: "./{project}/{experiment}/ts/{location}_{network}-{obsvar}_{layer}.json", + ROUTE_TIMESERIES_WEEKLY: "./{project}/{experiment}/ts/diurnal/{location}_{network}-{obsvar}_{layer}.json", + ROUTE_EXPERIMENTS: "./{project}/experiments.json", + ROUTE_CONFIG: "./{project}/{experiment}/cfg_{project}_{experiment}.json", + ROUTE_MENU: "./{project}/{experiment}/menu.json", + ROUTE_STATISTICS: "./{project}/{experiment}/statistics.json", + ROUTE_RANGES: "./{project}/{experiment}/ranges.json", + ROUTE_REGIONS: "./{project}/{experiment}/regions.json", + ROUTE_MODELS_STYLE: [ + "./{project}/{experiment}/models-style.json", + "./{project}/models-style.json", + ], + ROUTE_MAP: [ + VersionConstraintMapper( + "./{project}/{experiment}/map/{network}-{obsvar}_{layer}_{model}-{modvar}_{time}.json", + min_version="0.13.2", + ), + VersionConstraintMapper( + "./{project}/{experiment}/map/{network}-{obsvar}_{layer}_{model}-{modvar}.json", + max_version="0.13.2", + ), + ], + ROUTE_SCATTER: [ + VersionConstraintMapper( + "./{project}/{experiment}/scat/{network}-{obsvar}_{layer}_{model}-{modvar}_{time}.json", + min_version="0.13.2", + ), + VersionConstraintMapper( + "./{project}/{experiment}/scat/{network}-{obsvar}_{layer}_{model}-{modvar}.json", + max_version="0.13.2", + ), + ], + ROUTE_PROFILES: "./{project}/{experiment}/profiles/{location}_{network}-{obsvar}.json", + ROUTE_HEATMAP_TIMESERIES: [ + VersionConstraintMapper( + "./{project}/{experiment}/hm/ts/{region}-{network}-{obsvar}-{layer}.json", + min_version="0.13.2", # https://github.com/metno/pyaerocom/blob/4478b4eafb96f0ca9fd722be378c9711ae10c1f6/setup.cfg + ), + VersionConstraintMapper( + "./{project}/{experiment}/hm/ts/{network}-{obsvar}-{layer}.json", + min_version="0.12.2", + max_version="0.13.2", + ), + VersionConstraintMapper( + "./{project}/{experiment}/hm/ts/stats_ts.json", + max_version="0.12.2", + ), + ], + ROUTE_FORECAST: "./{project}/{experiment}/forecast/{region}_{network}-{obsvar}_{layer}.json", + ROUTE_GRIDDED_MAP: "./{project}/{experiment}/contour/{obsvar}_{model}.json", + ROUTE_REPORT: "./reports/{project}/{experiment}/{title}.json", + }, + version_provider=self._get_version, + ) self.FILTERS: dict[str, Callable[..., Awaitable[Any]]] = { ROUTE_REG_STATS: filter_regional_stats, @@ -238,29 +174,7 @@ async def _get_template(self, route: str, substitutions: dict) -> str: :raises TemplateNotFound : If no valid template was found. """ - file_path_template = None - for f in self.PATH_LOOKUP[route]: - try: - file_path_template = await f(**substitutions) - except SkipMapper: - continue - - break - - if file_path_template is None: - raise TemplateNotFound("No template found.") - - return file_path_template - - def _get_templates(self, route: str) -> list[str]: - templates = list() - - for f in self.PATH_LOOKUP[route]: - templates.extend(f.get_templates_without_constraints()) - if isinstance(f, ConstantTemplateMapper): - break - - return templates + return await self.PATH_LOOKUP.lookup(route, **substitutions) async def _get( self, @@ -483,7 +397,7 @@ def _get_uri_for_file(self, file_path: str) -> str: file_path = os.path.join(self._basedir, file_path) file_path = os.path.relpath(file_path, start=self._basedir) - for route in self.PATH_LOOKUP: + for route in self.PATH_LOOKUP._lookuptable: if not (route == ROUTE_MODELS_STYLE): if file_path.startswith("reports/"): str = "/".join(file_path.split("/")[1:3]) diff --git a/src/aerovaldb/jsondb/uri.py b/src/aerovaldb/jsondb/uri.py deleted file mode 100644 index 2049633..0000000 --- a/src/aerovaldb/jsondb/uri.py +++ /dev/null @@ -1,5 +0,0 @@ -import os - - -def get_uri(file_path: str) -> str: - return str(os.path.realpath(file_path)) diff --git a/src/aerovaldb/utils/string_mapper/__init__.py b/src/aerovaldb/utils/string_mapper/__init__.py new file mode 100644 index 0000000..b893ff3 --- /dev/null +++ b/src/aerovaldb/utils/string_mapper/__init__.py @@ -0,0 +1,6 @@ +from .mapper import ( + StringMapper, + ConstantMapper, + PriorityMapper, + VersionConstraintMapper, +) diff --git a/src/aerovaldb/jsondb/templatemapper.py b/src/aerovaldb/utils/string_mapper/mapper.py similarity index 52% rename from src/aerovaldb/jsondb/templatemapper.py rename to src/aerovaldb/utils/string_mapper/mapper.py index ab5f6b2..634007d 100644 --- a/src/aerovaldb/jsondb/templatemapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -1,12 +1,11 @@ -import abc -from aerovaldb.utils import async_and_sync -from packaging.version import Version -from typing import Callable, Awaitable import logging +from typing import Awaitable, Callable, Mapping +from abc import ABC +from packaging.version import Version -VersionProvider = Callable[[str, str], Awaitable[Version]] +logger = logging.getLogger() -logger = logging.getLogger(__name__) +VersionProvider = Callable[[str, str], Awaitable[Version]] class SkipMapper(Exception): @@ -18,7 +17,73 @@ class SkipMapper(Exception): pass -class TemplateMapper(abc.ABC): +class StringMapper: + """ + Class for mapping one type of string to the appropriate other + type of string. It is used by jsonfiledb to map from route to + the appropriate template string, and in sqlitedb to map from + route to the correct table name. + + It supports delivering different value strings based on + additional constraints (such as version). + """ + + def __init__(self, lookup_table: Mapping, /, version_provider: VersionProvider): + """ + :param lookup_table : A configuration lookuptable. + """ + self._lookuptable = lookup_table + for k in self._lookuptable: + # A single string will always be returned for that key. + if isinstance(self._lookuptable[k], str): + self._lookuptable[k] = ConstantMapper(self._lookuptable[k]) + + # Make sure value is a list. + if not isinstance(self._lookuptable[k], list): + self._lookuptable[k] = [self._lookuptable[k]] + + # If stringlist of len > 1, treat it as a priority order. + if len(self._lookuptable[k]) > 1 and all( + [isinstance(x, str) for x in self._lookuptable[k]] + ): + self._lookuptable[k] = [PriorityMapper(self._lookuptable[k])] + + self._version_provider = version_provider + + async def lookup(self, key: str, **kwargs) -> str: + """ + Performs a lookup of the value for the given key. + + :param key : The key for which to lookup a value. + :param kwargs : Additional values which will be used for constraint + matching. + :raises KeyError + If no entry exists for the key in the lookup table. + :return + The looked up string value. + """ + try: + values = self._lookuptable[key] + except KeyError as e: + raise KeyError(f"Key '{key}' does not exist in lookup table.") from e + + kwargs = kwargs | {"version_provider": self._version_provider} + return_value = None + for v in values: + try: + return_value = await v(**kwargs) + except SkipMapper: + continue + + break + + if not return_value: + raise ValueError(f"No valid value found for key '{key}'") + + return return_value + + +class Mapper(ABC): """ This class is a base class for objects that implement a file path template selection algorithm. Inheriting @@ -27,15 +92,11 @@ class TemplateMapper(abc.ABC): won't handle the request. """ - @async_and_sync - async def __call__(self, *args, version_provider: VersionProvider, **kwargs) -> str: - raise NotImplementedError - - def get_templates_without_constraints(self) -> list[str]: + async def __call__(self, *args, **kwargs) -> str: raise NotImplementedError -class DataVersionToTemplateMapper(TemplateMapper): +class VersionConstraintMapper(Mapper): """ This class returns its provided template if the data version read from a config file matches the configured bounds of this class. @@ -50,7 +111,6 @@ def __init__( *, min_version: str | None = None, max_version: str | None = None, - version_provider: VersionProvider, ): """ :param template : The template string to return. @@ -70,15 +130,13 @@ def __init__( self.template = template - self.version_provider = version_provider - - def get_templates_without_constraints(self): - return [self.template] - - @async_and_sync async def __call__(self, *args, **kwargs) -> str: + version_provider = kwargs.pop("version_provider") + version = await version_provider(kwargs["project"], kwargs["experiment"]) + if not version: + raise ValueError("No version provided") logger.debug(f"Trying template string {self.template}") - version = await self.version_provider(kwargs["project"], kwargs["experiment"]) + if self.min_version is not None and version < self.min_version: logging.debug( f"Skipping due to version mismatch. {version} < {self.min_version}" @@ -93,7 +151,7 @@ async def __call__(self, *args, **kwargs) -> str: return self.template -class PriorityDataVersionToTemplateMapper(TemplateMapper): +class PriorityMapper(Mapper): """ This class takes a list of templates, trying them in turn and returning the first template that fits the provided @@ -103,7 +161,6 @@ class PriorityDataVersionToTemplateMapper(TemplateMapper): def __init__(self, templates: list[str]): self.templates = templates - @async_and_sync async def __call__(self, *args, **kwargs) -> str: selected_template = None for t in self.templates: @@ -119,17 +176,10 @@ async def __call__(self, *args, **kwargs) -> str: return selected_template - def get_templates_without_constraints(self): - return self.templates - -class ConstantTemplateMapper(TemplateMapper): +class ConstantMapper(Mapper): def __init__(self, template: str): self.template = template - @async_and_sync async def __call__(self, *args, **kwargs) -> str: return self.template - - def get_templates_without_constraints(self): - return [self.template] diff --git a/tests/jsondb/test_lock.py b/tests/lock/test_lock.py similarity index 100% rename from tests/jsondb/test_lock.py rename to tests/lock/test_lock.py diff --git a/tests/utils/string_mapper/test_string_mapper.py b/tests/utils/string_mapper/test_string_mapper.py new file mode 100644 index 0000000..cdf8531 --- /dev/null +++ b/tests/utils/string_mapper/test_string_mapper.py @@ -0,0 +1,20 @@ +from aerovaldb.utils.string_mapper.mapper import * + + +def test_initialization(): + mapper = StringMapper( + { + "a": "test", + "b": ["test1", "test2"], + }, + version_provider=None, + ) + assert isinstance(mapper._lookuptable["a"], list) and all( + [isinstance(x, ConstantMapper) for x in mapper._lookuptable["a"]] + ) + + assert ( + isinstance(mapper._lookuptable["b"], list) + and len(mapper._lookuptable["b"]) == 1 + and isinstance(mapper._lookuptable["b"][0], PriorityMapper) + ) From 0feb274c4360149486d35888c49b8f73660c6ffb Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Thu, 15 Aug 2024 13:55:26 +0200 Subject: [PATCH 18/28] WIP --- src/aerovaldb/sqlitedb/sqlitedb.py | 200 +++++++++++++++----- src/aerovaldb/utils/string_mapper/mapper.py | 3 + tests/jsondb/test_jsonfiledb.py | 1 - 3 files changed, 158 insertions(+), 46 deletions(-) diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 32611f9..d56b7b3 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -1,5 +1,7 @@ import sqlite3 +from async_lru import alru_cache +from pkg_resources import DistributionNotFound, get_distribution import simplejson # type: ignore import aerovaldb from ..exceptions import UnsupportedOperation, UnusedArguments @@ -13,9 +15,11 @@ build_uri, extract_substitutions, ) +from aerovaldb.utils.string_mapper import StringMapper, VersionConstraintMapper import os from ..lock import FakeLock, FileLock from hashlib import md5 +from packaging.version import Version class AerovalSqliteDB(AerovalDB): @@ -23,12 +27,20 @@ class AerovalSqliteDB(AerovalDB): Allows reading and writing from sqlite3 database files. """ - # When creating a table it works to extract the substitution template - # names from the route, as this constitutes all arguments. For the ones - # which have extra arguments the following table defines the override. - # Currently this only applies to map which has an extra time argument. - ROUTE_COLUMN_NAME_OVERRIDE = { - ROUTE_MAP: ( + TABLE_COLUMN_NAMES = { + "glob_stats": extract_substitutions(ROUTE_GLOB_STATS), + "contour": extract_substitutions(ROUTE_CONTOUR), + "timeseries": extract_substitutions(ROUTE_TIMESERIES), + "timeseries_weekly": extract_substitutions(ROUTE_TIMESERIES_WEEKLY), + "experiments": extract_substitutions(ROUTE_EXPERIMENTS), + "config": extract_substitutions(ROUTE_CONFIG), + "menu": extract_substitutions(ROUTE_MENU), + "statistics": extract_substitutions(ROUTE_STATISTICS), + "ranges": extract_substitutions(ROUTE_RANGES), + "regions": extract_substitutions(ROUTE_REGIONS), + "models_style0": ["project", "experiment"], + "models_style1": ["project"], + "map0": [ "project", "experiment", "network", @@ -37,33 +49,39 @@ class AerovalSqliteDB(AerovalDB): "model", "modvar", "time", - ), - ROUTE_MODELS_STYLE: ("project", "experiment"), - } - - # This lookup table stores the name of the table in which json - # for a specific route is stored. - TABLE_NAME_LOOKUP = { - ROUTE_GLOB_STATS: "glob_stats", - ROUTE_REG_STATS: "glob_stats", - ROUTE_HEATMAP: "glob_stats", - ROUTE_CONTOUR: "contour", - ROUTE_TIMESERIES: "timeseries", - ROUTE_TIMESERIES_WEEKLY: "timeseries_weekly", - ROUTE_EXPERIMENTS: "experiments", - ROUTE_CONFIG: "config", - ROUTE_MENU: "menu", - ROUTE_STATISTICS: "statistics", - ROUTE_RANGES: "ranges", - ROUTE_REGIONS: "regions", - ROUTE_MODELS_STYLE: "models_style", - ROUTE_MAP: "map", - ROUTE_SCATTER: "scatter", - ROUTE_PROFILES: "profiles", - ROUTE_HEATMAP_TIMESERIES: "heatmap_timeseries", - ROUTE_FORECAST: "forecast", - ROUTE_GRIDDED_MAP: "gridded_map", - ROUTE_REPORT: "report", + ], + "map1": [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + ], + "scatter0": [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + "time", + ], + "scatter1": [ + "project", + "experiment", + "network", + "obsvar", + "layer", + "model", + "modvar", + ], + "profiles": extract_substitutions(ROUTE_PROFILES), + "heatmap_timeseries": extract_substitutions(ROUTE_HEATMAP_TIMESERIES), + "forecast": extract_substitutions(ROUTE_FORECAST), + "report": extract_substitutions(ROUTE_REPORT), } def __init__(self, database: str, /, **kwargs): @@ -85,6 +103,87 @@ def __init__(self, database: str, /, **kwargs): self._con.row_factory = sqlite3.Row + self.TABLE_NAME_LOOKUP = StringMapper( + { + ROUTE_GLOB_STATS: "glob_stats", + ROUTE_REG_STATS: "glob_stats", + ROUTE_HEATMAP: "glob_stats", + ROUTE_CONTOUR: "contour", + ROUTE_TIMESERIES: "timeseries", + ROUTE_TIMESERIES_WEEKLY: "timeseries_weekly", + ROUTE_EXPERIMENTS: "experiments", + ROUTE_CONFIG: "config", + ROUTE_MENU: "menu", + ROUTE_STATISTICS: "statistics", + ROUTE_RANGES: "ranges", + ROUTE_REGIONS: "regions", + ROUTE_MODELS_STYLE: ["models_style0", "models_style1"], + ROUTE_MAP: [ + VersionConstraintMapper( + "map0", + min_version="0.13.2", + ), + VersionConstraintMapper( + "map1", + max_version="0.13.2", + ), + ], + ROUTE_SCATTER: [ + VersionConstraintMapper( + "scatter0", + min_version="0.13.2", + ), + VersionConstraintMapper( + "scatter1", + max_version="0.13.2", + ), + ], + ROUTE_PROFILES: "profiles", + ROUTE_HEATMAP_TIMESERIES: "heatmap_timeseries", + ROUTE_FORECAST: "forecast", + ROUTE_GRIDDED_MAP: "gridded_map", + ROUTE_REPORT: "report", + }, + version_provider=self._get_version, + ) + + @async_and_sync + @alru_cache(maxsize=2048) + async def _get_version(self, project: str, experiment: str) -> Version: + """ + Returns the version of pyaerocom used to generate the files for a given project + and experiment. + + :param project : Project ID. + :param experiment : Experiment ID. + + :return : A Version object. + """ + try: + config = await self.get_config(project, experiment) + except FileNotFoundError: + try: + # If pyaerocom is installed in the current environment, but no config has + # been written, we use the version of the installed pyaerocom. This is + # important for tests to work correctly, and for files to be written + # correctly if the config file happens to be written after data files. + version = Version(get_distribution("pyaerocom").version) + except DistributionNotFound: + version = Version("0.0.1") + finally: + return version + # except simplejson.JSONDecodeError: + # # Work around for https://github.com/metno/aerovaldb/issues/28 + # return Version("0.14.0") + + try: + version_str = config["exp_info"]["pyaerocom_version"] + version = Version(version_str) + except KeyError: + version = Version("0.0.1") + + return version + def _get_metadata_by_key(self, key: str) -> str: """ Returns the value associated with a key from the metadata @@ -134,10 +233,8 @@ def _initialize_db(self): # Data tables. Currently one table is used per type of asset # stored and json blobs are stored in the json column. - for route, table_name in AerovalSqliteDB.TABLE_NAME_LOOKUP.items(): - args = AerovalSqliteDB.ROUTE_COLUMN_NAME_OVERRIDE.get( - route, extract_substitutions(route) - ) + for table_name in AerovalSqliteDB.TABLE_COLUMN_NAMES: + args = AerovalSqliteDB.TABLE_COLUMN_NAMES[table_name] column_names = ",".join(args) @@ -199,9 +296,17 @@ async def _get(self, route, route_args, *args, **kwargs): cur = self._con.cursor() - table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] + table_name = await self.TABLE_NAME_LOOKUP.lookup(route) args = route_args | kwargs + args = { + k: v + for k, v in args.items() + if k in AerovalSqliteDB.TABLE_COLUMN_NAMES[table_name] + } + # for a in args: + # if not (args in AerovalSqliteDB.TABLE_COLUMN_NAMES[table_name]): + # args.pop(a) columnlist, substitutionlist = self._get_column_list_and_substitution_list(args) cur.execute( f""" @@ -251,23 +356,28 @@ async def _put(self, obj, route, route_args, *args, **kwargs): cur = self._con.cursor() - table_name = AerovalSqliteDB.TABLE_NAME_LOOKUP[route] + table_name = await self.TABLE_NAME_LOOKUP.lookup(route, **(route_args | kwargs)) - columnlist, substitutionlist = self._get_column_list_and_substitution_list( - route_args | kwargs - ) + args = route_args | kwargs + args = { + k: v + for k, v in args.items() + if k in AerovalSqliteDB.TABLE_COLUMN_NAMES[table_name] + } + + columnlist, substitutionlist = self._get_column_list_and_substitution_list(args) json = obj if not isinstance(json, str): json = json_dumps_wrapper(json) - route_args.update(json=json) + args.update(json=json) cur.execute( f""" INSERT OR REPLACE INTO {table_name}({columnlist}, json) VALUES({substitutionlist}, :json) """, - route_args | kwargs, + args, ) self._set_metadata_by_key( @@ -306,7 +416,7 @@ async def put_by_uri(self, obj, uri: str): def list_all(self): cur = self._con.cursor() - for route, table in AerovalSqliteDB.TABLE_NAME_LOOKUP.items(): + for route in self.TABLE_NAME_LOOKUP: cur.execute( f""" SELECT * FROM {table} diff --git a/src/aerovaldb/utils/string_mapper/mapper.py b/src/aerovaldb/utils/string_mapper/mapper.py index 634007d..e082a8c 100644 --- a/src/aerovaldb/utils/string_mapper/mapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -50,6 +50,9 @@ def __init__(self, lookup_table: Mapping, /, version_provider: VersionProvider): self._version_provider = version_provider + def __iter__(self): + return iter(self._lookuptable.keys()) + async def lookup(self, key: str, **kwargs) -> str: """ Performs a lookup of the value for the given key. diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index 624a0ee..efa311e 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -1,5 +1,4 @@ import pytest -import simplejson # type: ignore import aerovaldb from aerovaldb.jsondb.jsonfiledb import AerovalJsonFileDB From a6bfdaeeb0eb7ba1fa50778b45c5498cd57d88cd Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 16 Aug 2024 09:40:41 +0200 Subject: [PATCH 19/28] Almost all tests work --- src/aerovaldb/jsondb/jsonfiledb.py | 9 ++- src/aerovaldb/lock/lock.py | 7 +- src/aerovaldb/routes.py | 6 +- src/aerovaldb/sqlitedb/sqlitedb.py | 75 +++++++++++++++++--- src/aerovaldb/utils/__init__.py | 2 +- src/aerovaldb/utils/asyncio.py | 7 ++ src/aerovaldb/utils/string_mapper/mapper.py | 11 ++- tests/jsondb/test_jsonfiledb.py | 2 +- tests/sqlitedb/test_sqlitedb.py | 2 +- tests/test-db/sqlite/test.sqlite | Bin 159744 -> 225280 bytes 10 files changed, 98 insertions(+), 23 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index 1864ba6..fa5793b 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -23,6 +23,7 @@ parse_formatted_string, build_uri, extract_substitutions, + run_until_finished, ) from .filter import filter_heatmap, filter_regional_stats from ..exceptions import UnsupportedOperation @@ -422,6 +423,12 @@ def _get_uri_for_file(self, file_path: str) -> str: template = self._get_template(route, subs) + try: + version = run_until_finished( + self._get_version(subs["project"], subs["experiment"]) + ) + except TypeError: + version = self._get_version(subs["project"], subs["experiment"]) route_arg_names = extract_substitutions(route) try: @@ -434,7 +441,7 @@ def _get_uri_for_file(self, file_path: str) -> str: except Exception: continue else: - return build_uri(route, route_args, kwargs) + return build_uri(route, route_args, kwargs | {"version": version}) raise ValueError(f"Unable to build URI for file path {file_path}") diff --git a/src/aerovaldb/lock/lock.py b/src/aerovaldb/lock/lock.py index 93b7cd5..c676243 100644 --- a/src/aerovaldb/lock/lock.py +++ b/src/aerovaldb/lock/lock.py @@ -3,7 +3,7 @@ import fcntl import asyncio import pathlib -from ..utils import has_async_loop +from ..utils import has_async_loop, run_until_finished logger = logging.getLogger(__name__) @@ -78,8 +78,9 @@ def acquire(self): logger.debug("Acquiring lock with lockfile %s", self._lock_file) if has_async_loop(): - task = asyncio.ensure_future(self._aiolock.acquire()) - asyncio.wait(task) + run_until_finished(self._aiolock.acquire) + # task = asyncio.ensure_future(self._aiolock.acquire()) + # asyncio.wait(task) fcntl.lockf(self._lock_handle, fcntl.LOCK_EX) self._acquired = True diff --git a/src/aerovaldb/routes.py b/src/aerovaldb/routes.py index 8447282..8f5ee93 100644 --- a/src/aerovaldb/routes.py +++ b/src/aerovaldb/routes.py @@ -29,14 +29,12 @@ ROUTE_MAP = "/v0/map/{project}/{experiment}/{network}/{obsvar}/{layer}/{model}/{modvar}" ROUTE_SCATTER = ( - "/v0/scat/{project}/{experiment}/{network}/{obsvar}/{layer}/{model}/{modvar}/{time}" + "/v0/scat/{project}/{experiment}/{network}/{obsvar}/{layer}/{model}/{modvar}" ) ROUTE_PROFILES = "/v0/profiles/{project}/{experiment}/{location}/{network}/{obsvar}" -ROUTE_HEATMAP_TIMESERIES = ( - "/v0/hm_ts/{project}/{experiment}/{region}/{network}/{obsvar}/{layer}" -) +ROUTE_HEATMAP_TIMESERIES = "/v0/hm_ts/{project}/{experiment}" ROUTE_FORECAST = ( "/v0/forecast/{project}/{experiment}/{region}/{network}/{obsvar}/{layer}" diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index d56b7b3..0c0df1c 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -14,6 +14,7 @@ async_and_sync, build_uri, extract_substitutions, + validate_filename_component, ) from aerovaldb.utils.string_mapper import StringMapper, VersionConstraintMapper import os @@ -79,11 +80,50 @@ class AerovalSqliteDB(AerovalDB): "modvar", ], "profiles": extract_substitutions(ROUTE_PROFILES), - "heatmap_timeseries": extract_substitutions(ROUTE_HEATMAP_TIMESERIES), + "heatmap_timeseries0": [ + "project", + "experiment", + "region", + "network", + "obsvar", + "layer", + ], + "heatmap_timeseries1": ["project", "experiment", "network", "obsvar", "layer"], + "heatmap_timeseries2": [ + "project", + "experiment", + ], "forecast": extract_substitutions(ROUTE_FORECAST), + "gridded_map": extract_substitutions(ROUTE_GRIDDED_MAP), "report": extract_substitutions(ROUTE_REPORT), } + TABLE_NAME_TO_ROUTE = { + "glob_stats": ROUTE_GLOB_STATS, + "contour": ROUTE_CONTOUR, + "timeseries": ROUTE_TIMESERIES, + "timeseries_weekly": ROUTE_TIMESERIES_WEEKLY, + "experiments": ROUTE_EXPERIMENTS, + "config": ROUTE_CONFIG, + "menu": ROUTE_MENU, + "statistics": ROUTE_STATISTICS, + "ranges": ROUTE_RANGES, + "regions": ROUTE_REGIONS, + "models_style0": ROUTE_MODELS_STYLE, + "models_style1": ROUTE_MODELS_STYLE, + "map0": ROUTE_MAP, + "map1": ROUTE_MAP, + "scatter0": ROUTE_SCATTER, + "scatter1": ROUTE_SCATTER, + "profiles": ROUTE_PROFILES, + "heatmap_timeseries0": ROUTE_HEATMAP_TIMESERIES, + "heatmap_timeseries1": ROUTE_HEATMAP_TIMESERIES, + "heatmap_timeseries2": ROUTE_HEATMAP_TIMESERIES, + "forecast": ROUTE_FORECAST, + "gridded_map": ROUTE_GRIDDED_MAP, + "report": ROUTE_REPORT, + } + def __init__(self, database: str, /, **kwargs): use_locking = os.environ.get("AVDB_USE_LOCKING", "") if use_locking == "0" or use_locking == "": @@ -139,7 +179,21 @@ def __init__(self, database: str, /, **kwargs): ), ], ROUTE_PROFILES: "profiles", - ROUTE_HEATMAP_TIMESERIES: "heatmap_timeseries", + ROUTE_HEATMAP_TIMESERIES: [ + VersionConstraintMapper( + "heatmap_timeseries0", + min_version="0.13.2", # https://github.com/metno/pyaerocom/blob/4478b4eafb96f0ca9fd722be378c9711ae10c1f6/setup.cfg + ), + VersionConstraintMapper( + "heatmap_timeseries1", + min_version="0.12.2", + max_version="0.13.2", + ), + VersionConstraintMapper( + "heatmap_timeseries2", + max_version="0.12.2", + ), + ], ROUTE_FORECAST: "forecast", ROUTE_GRIDDED_MAP: "gridded_map", ROUTE_REPORT: "report", @@ -294,19 +348,17 @@ async def _get(self, route, route_args, *args, **kwargs): if access_type in [AccessType.URI]: return build_uri(route, route_args, kwargs) - cur = self._con.cursor() - - table_name = await self.TABLE_NAME_LOOKUP.lookup(route) - args = route_args | kwargs + cur = self._con.cursor() + table_name = await self.TABLE_NAME_LOOKUP.lookup(route, **args) args = { k: v for k, v in args.items() if k in AerovalSqliteDB.TABLE_COLUMN_NAMES[table_name] } - # for a in args: - # if not (args in AerovalSqliteDB.TABLE_COLUMN_NAMES[table_name]): - # args.pop(a) + + [validate_filename_component(x) for x in args.values()] + columnlist, substitutionlist = self._get_column_list_and_substitution_list(args) cur.execute( f""" @@ -416,10 +468,11 @@ async def put_by_uri(self, obj, uri: str): def list_all(self): cur = self._con.cursor() - for route in self.TABLE_NAME_LOOKUP: + for table_name in self.TABLE_COLUMN_NAMES.keys(): + route = AerovalSqliteDB.TABLE_NAME_TO_ROUTE[table_name] cur.execute( f""" - SELECT * FROM {table} + SELECT * FROM {table_name} """ ) result = cur.fetchall() diff --git a/src/aerovaldb/utils/__init__.py b/src/aerovaldb/utils/__init__.py index 7b02a86..526f6eb 100644 --- a/src/aerovaldb/utils/__init__.py +++ b/src/aerovaldb/utils/__init__.py @@ -1,4 +1,4 @@ -from .asyncio import async_and_sync, has_async_loop +from .asyncio import async_and_sync, has_async_loop, run_until_finished from .string_utils import str_to_bool, validate_filename_component from .json import json_dumps_wrapper from .uri import extract_substitutions, build_uri, parse_formatted_string, parse_uri diff --git a/src/aerovaldb/utils/asyncio.py b/src/aerovaldb/utils/asyncio.py index 0719e89..e5aa2a4 100644 --- a/src/aerovaldb/utils/asyncio.py +++ b/src/aerovaldb/utils/asyncio.py @@ -38,3 +38,10 @@ def async_and_sync_wrap(*args, **kwargs): return asyncio.run(function(*args, **kwargs)) return async_and_sync_wrap + + +@async_and_sync +async def run_until_finished(coroutine): + task = asyncio.create_task(coroutine) + await asyncio.wait(task) + return task.result() diff --git a/src/aerovaldb/utils/string_mapper/mapper.py b/src/aerovaldb/utils/string_mapper/mapper.py index e082a8c..fa84fe5 100644 --- a/src/aerovaldb/utils/string_mapper/mapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -65,12 +65,21 @@ async def lookup(self, key: str, **kwargs) -> str: :return The looked up string value. """ + if (version := kwargs.pop("version", None)) is None: + version_provider = self._version_provider + else: + + async def version_helper(p, e): + return Version(version) + + version_provider = version_helper + try: values = self._lookuptable[key] except KeyError as e: raise KeyError(f"Key '{key}' does not exist in lookup table.") from e - kwargs = kwargs | {"version_provider": self._version_provider} + kwargs = kwargs | {"version_provider": version_provider} return_value = None for v in values: try: diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index efa311e..6127245 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -57,7 +57,7 @@ def test_jsonfiledb__get_uri_for_file(tmp_path): db: AerovalJsonFileDB assert ( db._get_uri_for_file(str(tmp_path / "project/experiments.json")) - == "/v0/experiments/project" + == "/v0/experiments/project?version=0.0.1" ) diff --git a/tests/sqlitedb/test_sqlitedb.py b/tests/sqlitedb/test_sqlitedb.py index 85445d1..7302cac 100644 --- a/tests/sqlitedb/test_sqlitedb.py +++ b/tests/sqlitedb/test_sqlitedb.py @@ -21,7 +21,7 @@ def test_db_initialization(tmp_path): # Check that all tables are properly initialized. cur = db._con.cursor() - for table in AerovalSqliteDB.TABLE_NAME_LOOKUP.values(): + for table in AerovalSqliteDB.TABLE_COLUMN_NAMES: cur.execute( f""" PRAGMA table_info({table}) diff --git a/tests/test-db/sqlite/test.sqlite b/tests/test-db/sqlite/test.sqlite index b3e25c630e79bd56bd6a3392d4c187ee6583c045..1bf35d98cf1a87ee58d28d3abddb44473680b9cf 100644 GIT binary patch literal 225280 zcmeI*Z*1G>eFtz-jwM-+%_LW!>MCw%yE<8oWJ$JV$5C@R$7USe#qymj*Gca#nqt$A zF8bq0bnenNJ4g<2U4gx6-wYeDzUdXkir#-uI7|An$hid15q?fA>Y`t#@^_3)$TM@oDi@>zN9dS2k!&2puf6WB@W6h+h2ODs!K zRFM45kUw?0N*X7elyQ+WN$yn@s;xZTw&vOHaZ?1k4=wn z@b{x5bl33AdWFvk=6O71tsKlxDCWP&4JHUc00Izz00bZa0SG_<0uX=z1dg7-N4mmv z^O1sJU)OMtc2GcS|L^+~s{ilF4JHUc00Izz00bZa0SG_<0uVT40`-qN+`i#+iNx;E zcBT9lzb^3aZ1a_TkuM3G75?51Us``K5uKO{M=yk9(`>s#M0$I1Msa%h_?i6Zq32;eM7!tr(1c6nu(lG_@LSb#NqgRs zYic%X#6^;y6)w5`{~HwZ4dzFOtdgTR5P$##AOHafKmY;|fB*y_0D-)iIq=b=WUzVELu&u;{~5*HAvc&H009U<00Izz00bZa0SG_<0!LV&MvJ{A2EB>I zUEQx|at=M|%0O=Uf>BFlj{z z00bZa0SG_<0uX=z1Rwwb2<(%X|bxUoPT)G%8f`TU&f(XL}toE0r zE11lhOs#O~47;4_80)&sy6>uUr<~r^2uZisrS}j zWL(}zgno2h5Qj$9WzZ#f)LK1vxv_Q&8VuHB{Vw0iuzUA`{SAhO zs$$yTMCeLxzHQKDwZBp5@YW#OC!4)(e`{cBW+VP}5axle%i5+{r$W6R>~(pEhv`Q{ zO>wHZxYf$k6q&~^BqvWgy^F&(Hj540!=Czx$K_k{xp%j$;#j#tvT~=QTSc&vU01)F z!`CdIRXtSZ>u}%>_sB@RRp2VcQSO?MOheXe<(uQxEMGfFor=`jZ@OGwpO4;Mks@_3 zgj1D}VpMs}c7Q6oNa@Po#=F3S*+n*%D;NyBo!&X0S@hLP`)jX zsIEOyJ*~n|o5L~!TV%DWU)ro4j>BlT%^j##BJEL=nQ1g?bC{TE-<(@yZPTn%VOgiU zq#fw)k`$I&fwe~`R2V5ZwRGF@xmn@w?eL}b2Zt#*9~UYZoIKHL$EdAvHl{u9UZY*! zp&`0POKwG8$dMn!IrZML$4pHfyURZavG;R@9eyu`@->^h91KPrPVfAX*>*x*R5qqQ z>LNen-QtgVvHw5Ke4QfyF+l(V5P$##AOHafKmY;|fB*y_aKHkern`NK1gEsA8Fl}U z$Kxi+re_6^aS6r#|1|SUiu}g}0SG_<0uX=z1Rwwb2tWV=5P-n5B;e`tsn-qc^DI%P z={KmJS)O*YGS(5Qm6M|=nKG&3l zr@u zBbUa|$25Lg`e;4zOtQ7%)3GSZd0pPYL3&s2!ro|bO06Vnw^;HKAIVrc|52y+@}OBP z$*U}NeI+Y+x<389%eOk}-hJ09p0x93stpfSG^@yJm_-*i&EeH5uT>ptn76}$79Wt| z9$E!jLy;ODHo=*OT*E9Ta9Z58%G)kn$0G1E&$+y#qjddq(tfO=$PEwsR2?Z68(wG~ zkUF3bcaIhf#y{fpCP&ReuVFp1v4$u8fNw!sAbglf^nD%&}Ry-$n;`d7%FzprX z(MMEGY4fF}AI(^N?&I>Wu~n;Y2;vuX@lRcQtN0W1n$#lgG;_mDR`o}msk6~0_W$qd z`xeFc`)`puOb~zo1Rwwb2tWV=5P$##AaKY8YNK5)UoerVUDjRrK3pzr81@98o-j#P zI*5~lXxV&evpjNo9e}uy;O5KG;?w3;Bbol6<*i<+BRy_k59!+l5Mr=lkO1|?G*bJsBhQ$r@GO5 zP0bp2E10#{tw6s1pM&|1*#3{!|Km}|v6c{k00bZa0SG_<0uX=z1R!wq1knC}^gc6` z7y=N000bZa0SG_<0uX=z1P)%Hz4reJ!^Zdcgx$vXeQ5umnKkSdFf(hj3xat5KidBf zzT%+-5P$##AOHafKmY;|fB*y_0D%?)X#a14gO?xx0SG_<0uX=z1Rwwb2tWV=CIaoX z|4&{pYJ87RUa;Btz7OsHvxXxkOwCT(ECSm9O=<#?ApijgKmY;|fB*y_009U<00M_c zU|-t*Pto6`96zGJ+5b;{zv}zzlV9ttdA{IT>gjd;!1*uEKj^-5!teMM{fH`)e@{Qr zFFyS;j_VTYXZHbOmmVi{@5H4R&VRz`y*x-$`O*gePW4_PFYwvij!>4KD${YwpT*julhJbZ;ffC>IC~Y2%NEO1$uPxpFsDzFobatAq-< z2Ye+|EN}1y@m5R{*IOGiRRtFBWs(ecBV+t}Q6e*c{jF-b#L6%0io2RhzP8GZJ#mSH z!TL)RF5k_7`%$k_?{>B~as*dj@1)hOW~;)30w3G+M0Hhap}i&O)0cpkoU}_LuroZ*16j!>{N8mMxEZJ zfLV0!S&Qu6s`v5QZ$w<)K!C2lE62JRlZKUaNF|n|zcF9y&45n%!wqUNM8}^XM5B#t zQFZ0p3DKx-&0B@2K6g9yE86K04L?DMwj{I_qI#58A*#>aK4Hf~G-NkKqkBU%dbW1| zycD9l=XD{vSJ|@6mUwiayO{LNkOJ0w%!9ad=b{wwSx?=p%>(zOgYQ& zGXKeB`eoJRw~#Kjq($pkENhK4TW2E+ATA;oD z|8c|4_wl&h&i7B>|34m`HarC(I*tDS$GRe8}Q|00Izz00bZa0SG_<0!Lb4U)ukvQ%BkWg@*tHAOHafKmY;|fB*y_ z009U>KoY2L}>~9c3tMZOB1o=xurAZE4_bX<%D&Lx$ZJ zht4)UNq%a|B-zfn^JHl7$lH10PF7s}i05H#iat57D`L1*4r|4%hQ)EZwdYS%?5Wuc zM#Uz{Pf~38{Qr9tbC3DM1O15c<`94Y1Rwwb2tWV=5P$##AOL}9N#K%4J&-}Se;C8@ zsz*J>(4)-#lt&oE_J5k$qR4+t5P$##AOHafKmY;|fB*y_009U*oj~n*y4M$!Ixr*> z>$)HQ;c{W4WoH1R7XSFPVMhRxY_}r->9!!Y|I^I(De@l^1Rwwb2tWV=5P$##AOHaf zKmY=VSRm1(99mHO1ZZa%SRPmq+y76|A5f0((I52xyzd8nUpe{3UcvJzPp|u*UEgwk z(Rs5wdcx`W1$D>s?{*35FK{m3@{oJ?u29KuZSj?SsY+3fM|S5&J~M&(nYhchIN;u0 z(Pa6Gkj;>DP^v<%xSh?&;#qQfN>u!tPjhn_j?JW#i;G;EC7*?f^U8XDCVlt&mB}-a;bwPYnKJC9l}!3-Cb`6o1^hR|{$kj_ z!TPUV^e?JnEt&K~M6T0`|QVW48r#CS`Q~A;c z|4#K@AusUR+>TI|o=Xtfm{f?mci!b48loT3Lhg2fCr9Bn^98=D-jAtPb62i&?Bi73 zTZ%}^5xkHVX`w=SJtySLrI2zUFC?Fh6f&G&w#aZdGRChLB`fpS#q)?*`DNYmS5wK? zR=F|zMGOY(fh%&v7L|Inv%QfcjQV<2B`YcLm6&?3nkn_maz0&Y4yTtB$Wvjn1r`xs}N0eF~^}b2UG!oVPI)LVAG|kwSRHvdc@{-d#KV%k_ zYT2`oD^j3-{<0jMK`A=b9P0fGs-F~{YU-Fy$s{_Pf>Jt1FeIHMDBhB@1dZeHVB*xW zuwul%*j~g&6|&|%f>p%oH~1zItIue&9?E-%4o2*y_9E6y$|_>@045Qu&)Ak!$0GJq zHX=6qIAWvzdORUV?4`ztea!a>qlk^}W5kO4$RicDA+f|HcAvW_e~VkS+S`(^E#g;~ zm?~+tmlgBswusH~YM9BY28uIvc+Yxf*8X=}BNfy5G+g24*fh*!TcuMm8Mt8a-8LqD zw~dX|&&{~J0|WG;HYYH5l&(0SG_<0uX=z1Rwwb2tWV=$3);zUjJ{CVm6P7KUNb05P$##AOHafKmY;| zfB*y_&@q9Ux8d@C;)%y^@#})tZNc&^eWS+yc-;8%e{oxv|C72c;QD_Z`^2$e2tWV= z5P$##AOHafKmY;|fWUqWJni-W9Q}VsF}KJKCI~z0wb0Zz53}vhyQY8qC+IJv=@+X_9QGnfy*MB2**tkM6M2X&@{Q>K&HeQf%OV zk?xg#fHxMl?H}Q$D=>Az@Em`Vyk9FIxBtIKG4C;dc2sHz$_W7oKmY;|fB*y_009U< z00Iy=q5>0c`H+XLMt|vWfrE3)2Ogfd>0eq*U+-zE*YR5v^SAw9>-&7qk9*#82b_P| z{j&4juAh_SpPsBzU+;aHsDAxmWqVK4KRwtI;N3eaP`{OO`S?lqqli$+Z*B3Fo$ZaB zz-KdLXw9mSD{f~qa(ea-pA(9??W{QHW|gnx`D)A{X+F)(WjHpIPA)ETX||oEdDy0D z?rMfCV)fMpaf!<%*5K`Q^IYK;lPO&)S+KGggJrNQT!t-*tHoYoS2F3VndA~T7VzH; z`-@@!2J63e(Z6)jzZnRzfzaNR6L}e+7?m{P>19L)FEl(9x|L+2FP$XGRCjVHPHO^x2ok5E5EE; z{%R`u+A23@vwXoIciri|IZ0Fb(gy!d^75@k`#zIvVhqCAHV;f>WF2tWV= z5P$##AOHafKmY;|=zu_trk%cEBJq&)#(#@n7qqiE!{x$;;gt)g<0i>X*8^0R7||9P zkuPURPNf#AS)n>Ep2bNnU?{f#)6Bn8&b_kD`F(f|D;y^1J21Rwwb2tWV=5P$##AOHafJo5sz(+-b1Vqj6% zmM&lIQ@K2{e05~g@%G~JsA2nhJlfR0J{~b>Vc#rQ`1M>>h=>CN`e~0kFrZqd{TmlB z9yK1=Fgn?8IWhj^=da!F?(d!WVps9Rn~oI8fQbmyhip+a@2=F*;0Eym|a5lWcAH zbZT^tTJd$4cW{v2jZ4EbH5yE-2+|--@+-E-T@vy_fj1t^?O+CblL{0Jo`1vXy*y|( z8i~A;*_eDkQ=k5f%eOk}-hJ09wzewyjSYSyO9p{#c&GwejfvARi)E;_%4=1h8s_bA z(8X`ca1X75uAxW`51Zgj!>?f$6F6;@oK|_;h3ix_);()3@8~GK`??g34MlEvIHl@H zaoO;K?ch`fWGt39_`+d|(Z+HGgYg?qZ*tTuR2$Yi8*6wHC;LRV_|2dm__WQTtkgyl z^YX}chokj{RhvUuspV1co1{$RRLyT9r3I;J#U^0(WYg zz`feuFsTzp!=z3`>+WS~nAF`FlVMVgK8uK2+f9Z^ov^(r(S0=Hkj@A4MLj5u|L5rc z8O7WoH<%y*0SG_<0uX=z1Rwwb2tWV=M_8anJKW^_0B<63S9f-Rd=y+dIB(eVKW5y~ zK4z<<{d&11ly@r9`2Yj7o172OP=?lL16Y)6YI@eFxFp%m2>=b{lKcO^PciQ^KRm)8 zF^UcW2tWV=5P$##AOHafKmY;|IQjykPVw*s-Rj_lW7;VmUQjRcXZzrSbp1ajLNVVU zH<%y*0SG_<0uX=z1Rwwb2tWV=5D*1E>u~vo8yfMoi|I?Jyc)I1$7hVk?3;<}n(E~< zVXH>h79^JxT%pN(`x6O9cN`#Jx}PiLH^SnWe#S*G?!_=edNGidaxmYen4gdvOb~zo w1Rwwb2tWV=5P$##AOHaf97=(+-Q;+LzGr}RG{P}LUi7P70<=RFjfV^VA1KM$@&Et; delta 3257 zcmds3U2Gd!6`ng|&)8#ou2UzSIFrQoB(~%HI88#b+7(S&qO#jRpwLyst`^I6CaJsr zN$lxnvtgRvAjHEWg=i!O3884EKJ44B*Gsb$u~nZyJV5)hh=mmuKPzAbMMzM@N;UW1 z88>#DeL&)gQGDm*+wBrA5;F|9z{#{?hwe&kgt)i`zE4n=<=D&F2`1upj z^S`gI_%W4-2UKkR+NS(KS@j6U?~upoy*U@JodU=8qJPa80H91P0GdwBWD z5RqYmh(`tmd1XP4#{qiQ1}raXg=(g%uNSmpi`{L(H4wJt#nm+JNu{3X=H=6ID(Z`` z0RJ(J*Rmn}N*7fEou;@IZ8={qRh`%2Ho}f`9~>(>c9@*s`lA7YRJOFN zRV#GtYzLygHk^@8Qtf@#M->o=cOkArkYh1`&QYYQp4IbJJ)f)YTZhd4zjoX8?woV# z5HCL+vpR;^u=)GHbfrS8P>{R206JkSrj;%i@h{j$g?QSCB!lL;-zCO7)&i|($(2h> z`DJ^Q+5l`{8^nl2&eCiC;6o_^SA6vHq9i4uy}W{N0S>BHTTH*1`BT_~EBBI?(`j`L zaRH2pNapN}t($Z2 zI_T+}-X^E?&G+iA;2RLI=Gze!Oq;)m41nqS$B}b$}RRKS52i=Z~OBHSPvR2Bi|L>wJ&4A?nNc4JR zjt>g(6a&8tPu0*Y^n9L35SEwCdtd3bB`{yec)doBuq6Il+i^d>)>sXCxhYSwq~6F7 zs7VHH!^s*-K=FCjqs01@u>6Ag&Fv8i7Wb=08F=O$_pR+Y+YQuuW0P!G8TbvT*3e-n z=9=4!54Wd%##(cC5i_xOr}18QkO%O62EJeKxP2Q21_7rWcgmbFlu3Y}vq6$_PI``k z&r3g*o~xlVQo6*sl@ywG7VqDcv2tON=2^wQOun>Krs=%oWAxd$>;7N<~R z-AW7@k6}=8LQefW0+SeQ=0#n(e4&ufVR*fAS)*ApkP6)M7?Q|sr{R-e9;WvIn0lij z1bLl-Z$iC>F2e4oSeFu8IGCqLH9iMi^Rb2fm_XfYM8N+IggYp5@GMmWo~@xdD8)IK z!n74ik81pvA_Y}LBxMEPV&GfwCVa2n5xoZeevAwprLA`%_^ilD*BH0~ebTiW`nKfP zI8jOA+aZUUjP7{HT Qzc_ZOS}yH1G8_p02Nl!eM*si- From 53ef2fba62ecdfa7cde677b154623b6bb92cefce Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 16 Aug 2024 10:25:25 +0200 Subject: [PATCH 20/28] Remove pytest hook --- .pre-commit-config.yaml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a9b9997..016b2d0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,16 +5,6 @@ repos: rev: 23.9.1 hooks: - id: black -- repo: local - hooks: - - id: tox-code-checks - name: Run tox targets -- tests - stages: [commit] - language: system - types: [python] - pass_filenames: false - verbose: true - entry: tox -v - repo: https://github.com/pre-commit/mirrors-mypy rev: 'v1.10.0' hooks: From 3768c6807e970215fef0390f413bb56a8526bf98 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 16 Aug 2024 10:35:17 +0200 Subject: [PATCH 21/28] models-style test works --- src/aerovaldb/jsondb/jsonfiledb.py | 3 +++ src/aerovaldb/sqlitedb/sqlitedb.py | 13 +++++++++++-- src/aerovaldb/utils/string_mapper/mapper.py | 16 ++++++++++++---- tests/test-db/sqlite/test.sqlite | Bin 225280 -> 225280 bytes 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index fa5793b..a58b76f 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -423,12 +423,15 @@ def _get_uri_for_file(self, file_path: str) -> str: template = self._get_template(route, subs) + # TODO: Ugly hack need to fix try: version = run_until_finished( self._get_version(subs["project"], subs["experiment"]) ) except TypeError: version = self._get_version(subs["project"], subs["experiment"]) + except KeyError: + version = Version("0.0.1") route_arg_names = extract_substitutions(route) try: diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 0c0df1c..0394371 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -16,7 +16,11 @@ extract_substitutions, validate_filename_component, ) -from aerovaldb.utils.string_mapper import StringMapper, VersionConstraintMapper +from aerovaldb.utils.string_mapper import ( + StringMapper, + VersionConstraintMapper, + PriorityMapper, +) import os from ..lock import FakeLock, FileLock from hashlib import md5 @@ -157,7 +161,12 @@ def __init__(self, database: str, /, **kwargs): ROUTE_STATISTICS: "statistics", ROUTE_RANGES: "ranges", ROUTE_REGIONS: "regions", - ROUTE_MODELS_STYLE: ["models_style0", "models_style1"], + ROUTE_MODELS_STYLE: PriorityMapper( + { + "models_style0": "{project}/{experiment}", + "models_style1": "{project}", + } + ), ROUTE_MAP: [ VersionConstraintMapper( "map0", diff --git a/src/aerovaldb/utils/string_mapper/mapper.py b/src/aerovaldb/utils/string_mapper/mapper.py index fa84fe5..6760adb 100644 --- a/src/aerovaldb/utils/string_mapper/mapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -170,14 +170,22 @@ class PriorityMapper(Mapper): parameters. """ - def __init__(self, templates: list[str]): - self.templates = templates + def __init__(self, templates: list[str] | dict): + if isinstance(templates, list): + self.templates = templates + self.match = templates + elif isinstance(templates, dict): + self.templates = [] + self.match = [] + for k, v in templates.items(): + self.templates.append(k) + self.match.append(v) async def __call__(self, *args, **kwargs) -> str: selected_template = None - for t in self.templates: + for t, m in zip(self.templates, self.match): try: - t.format(**kwargs) + m.format(**kwargs) selected_template = t break except: diff --git a/tests/test-db/sqlite/test.sqlite b/tests/test-db/sqlite/test.sqlite index 1bf35d98cf1a87ee58d28d3abddb44473680b9cf..03bbfe48504882b0d5387a62247e3bc8d5e1d179 100644 GIT binary patch delta 1814 zcmZuxF>4e-7-e>k*m$`ugdmD*G-q*extZPB+1X&Sil7JqA%cRas}^D-SSNvvrJXX! zmS^D~Fc;h|h&G~#UT<+Wg4IcgAi`ao*&{Qf`Kp7DH}Bqi-}`oBP~RBTpB_W4XU8v~ z)~h>Lebnl0`HP(TeKh)^L^;_=8Lx-4JR?^OO7S;(cb9u_x5QWZp=XyF<%p(#-T{Y z%T^9BHWahnruaC1t`*<}&LOFCkkA`jh*|+haKMoa_)M|pqq9Z>hBaWNp&a=rwrD8< zy-`8(X0hs{MI)Glb;LYT%lMx+f(sd`LRP$%T-X^_&dRZmro6KD2w|%hECxlhSSk5e zwXz2tQJ@xmKsAHYU7cR&0g{oZ5E71V$Oq*mPc#)A> zWbt!|yyEf@-8KI}V6SB2RX%uUg~;I$`6NUMu9c&PGoCl}0eE60s7|anpE?hjht85^kHPlX#NKkly=O=XELrBJu0UcB zzxLdjj?RMwaGipxPPbBbMRVgA+=)`qrTS=s0OTK5>q; zUfj48kXCOeSZV0b&!u1WS-Bj-GNX$SMx1idrg57^On-5xF^dbp6~;k`ed zLyjtoQB@Wj)$`rk)_{-#L-q1MbZYzQnHP>GLL<=2_teL6 zsFLwoCCfupmaEzBy!tSHVo4FB21Kk4l;qVNBAg%u90#2~237M1uPjoE|AwIa!4DNrwXlA;)IYt}%K{92H-MGzMt(N+HY+E9#v{PQ zQ|zYRE5`}oPGDmDcjg^+V2)M`xT#jY-tn$gE@Jd=6frk>>~_4bC!6-y4e_F=W_Ka~ z()H%@wCi1Qv{BO{o|x7f-e0sZMEI{0LGf Date: Fri, 16 Aug 2024 10:46:42 +0200 Subject: [PATCH 22/28] All tests pass --- src/aerovaldb/jsondb/jsonfiledb.py | 2 +- src/aerovaldb/sqlitedb/sqlitedb.py | 35 +++++++++++++++++---- src/aerovaldb/{jsondb => utils}/filter.py | 0 src/aerovaldb/utils/string_mapper/mapper.py | 2 +- 4 files changed, 31 insertions(+), 8 deletions(-) rename src/aerovaldb/{jsondb => utils}/filter.py (100%) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index a58b76f..dc0fbe2 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -25,7 +25,7 @@ extract_substitutions, run_until_finished, ) -from .filter import filter_heatmap, filter_regional_stats +from ..utils.filter import filter_heatmap, filter_regional_stats from ..exceptions import UnsupportedOperation from .cache import JSONLRUCache from ..routes import * diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 0394371..702ff74 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -1,9 +1,11 @@ import sqlite3 +from typing import Any, Awaitable, Callable from async_lru import alru_cache -from pkg_resources import DistributionNotFound, get_distribution +from pkg_resources import DistributionNotFound, get_distribution # type: ignore import simplejson # type: ignore import aerovaldb +from aerovaldb.utils.filter import filter_heatmap, filter_regional_stats from ..exceptions import UnsupportedOperation, UnusedArguments from ..aerovaldb import AerovalDB from ..routes import * @@ -210,6 +212,11 @@ def __init__(self, database: str, /, **kwargs): version_provider=self._get_version, ) + self.FILTERS: dict[str, Callable[..., Awaitable[Any]]] = { + ROUTE_REG_STATS: filter_regional_stats, + ROUTE_HEATMAP: filter_heatmap, + } + @async_and_sync @alru_cache(maxsize=2048) async def _get_version(self, project: str, experiment: str) -> Version: @@ -377,6 +384,7 @@ async def _get(self, route, route_args, *args, **kwargs): """, args, ) + filter_func = self.FILTERS.get(route, None) try: fetched = cur.fetchall() if not fetched: @@ -403,14 +411,29 @@ async def _get(self, route, route_args, *args, **kwargs): f"No object found for route, {route}, with args {route_args}, {kwargs}" ) from e - if access_type == AccessType.JSON_STR: - return fetched["json"] + json = fetched["json"] + # No filtered. + if filter_func is None: + if access_type == AccessType.JSON_STR: + return json + + if access_type == AccessType.OBJ: + dt = simplejson.loads(json, allow_nan=True) - if access_type == AccessType.OBJ: - dt = simplejson.loads(fetched["json"], allow_nan=True) return dt - assert False # Should never happen. + # Filtered. + if filter_func is not None: + obj = simplejson.loads(fetched["json"], allow_nan=True) + + obj = filter_func(obj, **route_args) + if access_type == AccessType.OBJ: + return obj + + if access_type == AccessType.JSON_STR: + return json_dumps_wrapper(obj) + + raise UnsupportedOperation async def _put(self, obj, route, route_args, *args, **kwargs): assert len(args) == 0 diff --git a/src/aerovaldb/jsondb/filter.py b/src/aerovaldb/utils/filter.py similarity index 100% rename from src/aerovaldb/jsondb/filter.py rename to src/aerovaldb/utils/filter.py diff --git a/src/aerovaldb/utils/string_mapper/mapper.py b/src/aerovaldb/utils/string_mapper/mapper.py index 6760adb..76f139e 100644 --- a/src/aerovaldb/utils/string_mapper/mapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -28,7 +28,7 @@ class StringMapper: additional constraints (such as version). """ - def __init__(self, lookup_table: Mapping, /, version_provider: VersionProvider): + def __init__(self, lookup_table: dict, /, version_provider: VersionProvider): """ :param lookup_table : A configuration lookuptable. """ From f6c15f7f1fc1e5366b59d0e42d768bdc4548fc98 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 16 Aug 2024 11:17:13 +0200 Subject: [PATCH 23/28] Improve documentation --- src/aerovaldb/lock/lock.py | 2 -- src/aerovaldb/utils/asyncio.py | 7 +++++++ src/aerovaldb/utils/filter.py | 5 +++++ src/aerovaldb/utils/string_mapper/mapper.py | 10 ++++++++++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/aerovaldb/lock/lock.py b/src/aerovaldb/lock/lock.py index c676243..729ca21 100644 --- a/src/aerovaldb/lock/lock.py +++ b/src/aerovaldb/lock/lock.py @@ -79,8 +79,6 @@ def acquire(self): if has_async_loop(): run_until_finished(self._aiolock.acquire) - # task = asyncio.ensure_future(self._aiolock.acquire()) - # asyncio.wait(task) fcntl.lockf(self._lock_handle, fcntl.LOCK_EX) self._acquired = True diff --git a/src/aerovaldb/utils/asyncio.py b/src/aerovaldb/utils/asyncio.py index e5aa2a4..7db799b 100644 --- a/src/aerovaldb/utils/asyncio.py +++ b/src/aerovaldb/utils/asyncio.py @@ -42,6 +42,13 @@ def async_and_sync_wrap(*args, **kwargs): @async_and_sync async def run_until_finished(coroutine): + """ + Takes a aio coroutine, runs it and waits for it to finish. + + :param coroutine : The coroutine to be ran. + :return + The result from running coroutine. + """ task = asyncio.create_task(coroutine) await asyncio.wait(task) return task.result() diff --git a/src/aerovaldb/utils/filter.py b/src/aerovaldb/utils/filter.py index 49dc29a..607bd21 100644 --- a/src/aerovaldb/utils/filter.py +++ b/src/aerovaldb/utils/filter.py @@ -1,3 +1,8 @@ +# These filters are used to filter end points return a subset of a file / endpoint. +# Currently this only applies to regional_stats and heatmap, which both return a +# subset of the globstats endpoint. + + def filter_regional_stats(data, variable: str, network: str, layer: str, **kwargs): """ Filters regional stats out of a glob_stats data object. diff --git a/src/aerovaldb/utils/string_mapper/mapper.py b/src/aerovaldb/utils/string_mapper/mapper.py index 76f139e..8202380 100644 --- a/src/aerovaldb/utils/string_mapper/mapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -171,6 +171,16 @@ class PriorityMapper(Mapper): """ def __init__(self, templates: list[str] | dict): + """ + :param templates + If list, a list of template strings which will + be matched in order. + If dict, a mapping between the template string + and an alternative string to match against is + expected (i. e. {"test", "{a}"} would try matching + against "{a}" during lookup, but return "test" if + match.) + """ if isinstance(templates, list): self.templates = templates self.match = templates From ebad44f1dc2334571e425914bf2d92f0d5399593 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 16 Aug 2024 13:50:02 +0200 Subject: [PATCH 24/28] Uncomment --- scripts/build_sqlite_test_database.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/scripts/build_sqlite_test_database.py b/scripts/build_sqlite_test_database.py index 2693871..7957b7d 100644 --- a/scripts/build_sqlite_test_database.py +++ b/scripts/build_sqlite_test_database.py @@ -18,18 +18,17 @@ ) sqlitedb.put_by_uri(data, uri) -# json_list = list(jsondb.list_all()) -# sqlite_list = list(sqlitedb.list_all()) -# print("The following URIs exist in jsondb but not sqlitedb") -# for x in json_list: -# if not (x in sqlite_list): -# print(x) -# -# print("The following URIs exist in sqlitedb but not jsondb") -# -# for x in sqlite_list: -# if not (x in json_list): -# print(x) +json_list = list(jsondb.list_all()) +sqlite_list = list(sqlitedb.list_all()) +print("The following URIs exist in jsondb but not sqlitedb") +for x in json_list: + if not (x in sqlite_list): + print(x) + +print("The following URIs exist in sqlitedb but not jsondb") +for x in sqlite_list: + if not (x in json_list): + print(x) print(f"jsondb number of assets: {len(list(jsondb.list_all()))}") print(f"sqlite number of assets: {len(list(sqlitedb.list_all()))}") From 225ac13702d14d784eae86b863b83c8b7847339a Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 16 Aug 2024 14:23:18 +0200 Subject: [PATCH 25/28] Test list_timeseries for both json and sqlite --- src/aerovaldb/sqlitedb/sqlitedb.py | 84 ++++++++++++++++++++++++++++-- tests/jsondb/test_jsonfiledb.py | 26 --------- tests/test_aerovaldb.py | 32 +++++++++++- 3 files changed, 112 insertions(+), 30 deletions(-) diff --git a/src/aerovaldb/sqlitedb/sqlitedb.py b/src/aerovaldb/sqlitedb/sqlitedb.py index 702ff74..68e8953 100644 --- a/src/aerovaldb/sqlitedb/sqlitedb.py +++ b/src/aerovaldb/sqlitedb/sqlitedb.py @@ -515,15 +515,15 @@ def list_all(self): route_args = {} kwargs = {} for k in r.keys(): - if k == "json": + if k in ["json", "ctime", "mtime"]: continue if k in arg_names: route_args[k] = r[k] else: kwargs[k] = r[k] - route = build_uri(route, route_args, kwargs) - yield route + uri = build_uri(route, route_args, kwargs) + yield uri def _get_lock_file(self) -> str: os.makedirs(os.path.expanduser("~/.aerovaldb/.lock/"), exist_ok=True) @@ -538,3 +538,81 @@ def lock(self): return FileLock(self._get_lock_file()) return FakeLock() + + def list_glob_stats( + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, + ): + if access_type != AccessType.URI: + raise ValueError( + f"Invalid access_type. Got {access_type}, expected AccessType.URI" + ) + + cur = self._con.cursor() + cur.execute( + f""" + SELECT * FROM glob_stats + WHERE project=? AND experiment=? + """, + (project, experiment), + ) + result = cur.fetchall() + + route = AerovalSqliteDB.TABLE_NAME_TO_ROUTE["glob_stats"] + for r in result: + arg_names = extract_substitutions(route) + route_args = {} + kwargs = {} + for k in r.keys(): + if k in ["json", "ctime", "mtime"]: + continue + + if k in arg_names: + route_args[k] = r[k] + else: + kwargs[k] = r[k] + + uri = build_uri(route, route_args, kwargs) + yield uri + + def list_timeseries( + self, + project: str, + experiment: str, + /, + access_type: str | AccessType = AccessType.URI, + ): + if access_type != AccessType.URI: + raise ValueError( + f"Invalid access_type. Got {access_type}, expected AccessType.URI" + ) + + cur = self._con.cursor() + cur.execute( + f""" + SELECT * FROM timeseries + WHERE project=? AND experiment=? + """, + (project, experiment), + ) + result = cur.fetchall() + + route = AerovalSqliteDB.TABLE_NAME_TO_ROUTE["timeseries"] + for r in result: + arg_names = extract_substitutions(route) + route_args = {} + kwargs = {} + for k in r.keys(): + if k in ["json", "ctime", "mtime"]: + continue + + if k in arg_names: + route_args[k] = r[k] + else: + kwargs[k] = r[k] + + uri = build_uri(route, route_args, kwargs) + yield uri diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index 6127245..99eba17 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -3,18 +3,6 @@ from aerovaldb.jsondb.jsonfiledb import AerovalJsonFileDB -def test_version1(): - """ """ - with aerovaldb.open("json_files:./tests/test-db/json") as db: - assert str(db._get_version("project", "experiment")) == "0.13.5" - - -def test_version2(): - """ """ - with aerovaldb.open("json_files:./tests/test-db/json") as db: - assert str(db._get_version("project", "experiment-old")) == "0.0.5" - - def test_list_experiments(): with aerovaldb.open("json_files:./tests/test-db/json") as db: experiments = db._list_experiments("project") @@ -38,20 +26,6 @@ def test_get_experiments(): } -def test_list_timeseries(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - timeseries = db.list_timeseries("project", "experiment") - - assert len(list(timeseries)) == 1 - - -def test_list_glob_stats(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - glob_stats = list(db.list_glob_stats("project", "experiment")) - - assert len(glob_stats) == 1 - - def test_jsonfiledb__get_uri_for_file(tmp_path): with aerovaldb.open(f"json_files:{str(tmp_path)}") as db: db: AerovalJsonFileDB diff --git a/tests/test_aerovaldb.py b/tests/test_aerovaldb.py index 418de92..d7de8f8 100644 --- a/tests/test_aerovaldb.py +++ b/tests/test_aerovaldb.py @@ -5,7 +5,7 @@ # - json_files: tests/jsondb/test_jsonfiledb.py # - sqlitedb: tests/sqlitedb/test_sqlitedb.py -import simplejson +import simplejson # type: ignore import aerovaldb import pytest import random @@ -432,3 +432,33 @@ def test_getter_with_default_error(testdb): db.get_by_uri( "/v0/report/project/experiment/invalid-json", default={"data": "data"} ) + + +@TESTDB_PARAMETRIZATION +def test_version1(testdb): + """ """ + with aerovaldb.open(testdb) as db: + assert str(db._get_version("project", "experiment")) == "0.13.5" + + +@TESTDB_PARAMETRIZATION +def test_version2(testdb): + """ """ + with aerovaldb.open(testdb) as db: + assert str(db._get_version("project", "experiment-old")) == "0.0.5" + + +@TESTDB_PARAMETRIZATION +def test_list_glob_stats(testdb): + with aerovaldb.open(testdb) as db: + glob_stats = list(db.list_glob_stats("project", "experiment")) + + assert len(glob_stats) == 1 + + +@TESTDB_PARAMETRIZATION +def test_list_timeseries(testdb): + with aerovaldb.open(testdb) as db: + timeseries = db.list_timeseries("project", "experiment") + + assert len(list(timeseries)) == 1 From 9d9090d8b4c3de7c0d5c8e52032b793db100cc74 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:54:26 +0200 Subject: [PATCH 26/28] WIP --- src/aerovaldb/jsondb/jsonfiledb.py | 65 ------------------------------ tests/jsondb/test_jsonfiledb.py | 23 ----------- 2 files changed, 88 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index d631ddf..c383bff 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -273,47 +273,6 @@ async def _put(self, obj, route, route_args, *args, **kwargs): with open(file_path, "w") as f: f.write(json) - @async_and_sync - async def get_experiments(self, project: str, /, *args, exp_order=None, **kwargs): - # If an experiments.json file exists, read it. - try: - access_type = self._normalize_access_type(kwargs.pop("access_type", None)) - experiments = await self._get( - ROUTE_EXPERIMENTS, - {"project": project}, - access_type=access_type, - ) - except FileNotFoundError: - pass - else: - return experiments - - # Otherwise generate it based on config and expinfo.public information. - experiments = {} - for exp in self._list_experiments(project, has_results=True): - public = False - try: - config = await self.get_config(project, exp) - except FileNotFoundError: - pass - else: - public = config.get("exp_info", {}).get("public", False) - experiments[exp] = {"public": public} - - access_type = self._normalize_access_type(kwargs.pop("access_type", None)) - - experiments = dict(experiments.items()) - if access_type == AccessType.FILE_PATH: - raise UnsupportedOperation( - f"get_experiments() does not support access_type {access_type}." - ) - - if access_type == AccessType.JSON_STR: - json = json_dumps_wrapper(experiments) - return json - - return experiments - def rm_experiment_data(self, project: str, experiment: str) -> None: """Deletes ALL data associated with an experiment. @@ -555,30 +514,6 @@ def list_map( yield self._get_uri_for_file(f) - def _list_experiments( - self, project: str, /, has_results: bool = False - ) -> list[str]: - project_path = os.path.join(self._basedir, project) - experiments = [] - - if not os.path.exists(project_path): - return [] - - for f in os.listdir(project_path): - if not has_results: - if os.path.isdir(os.path.join(project_path, f)): - experiments.append(f) - else: - if not os.path.isdir(os.path.join(project_path, f)): - continue - glb = os.path.join(project_path, f, "map", "*.json") - if len(glob.glob(glb)) == 0: - continue - - experiments.append(f) - - return experiments - @async_and_sync async def get_by_uri( self, diff --git a/tests/jsondb/test_jsonfiledb.py b/tests/jsondb/test_jsonfiledb.py index 99eba17..b3a1fa3 100644 --- a/tests/jsondb/test_jsonfiledb.py +++ b/tests/jsondb/test_jsonfiledb.py @@ -3,29 +3,6 @@ from aerovaldb.jsondb.jsonfiledb import AerovalJsonFileDB -def test_list_experiments(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - experiments = db._list_experiments("project") - assert set(experiments) == set( - ["experiment", "experiment-old", "empty-experiment"] - ) - - -def test_list_experiments_results_only(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - experiments = db._list_experiments("project", has_results=True) - assert set(experiments) == set(["experiment", "experiment-old"]) - - -def test_get_experiments(): - with aerovaldb.open("json_files:./tests/test-db/json") as db: - experiments = db.get_experiments("project-no-experiments-json") - - assert experiments == { - "experiment": {"public": True}, - } - - def test_jsonfiledb__get_uri_for_file(tmp_path): with aerovaldb.open(f"json_files:{str(tmp_path)}") as db: db: AerovalJsonFileDB From ce6bb17e37f0b8a87d5a900186a520aa5857f3a8 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:41:03 +0200 Subject: [PATCH 27/28] Code review --- src/aerovaldb/utils/string_mapper/mapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aerovaldb/utils/string_mapper/mapper.py b/src/aerovaldb/utils/string_mapper/mapper.py index 8202380..9138c6d 100644 --- a/src/aerovaldb/utils/string_mapper/mapper.py +++ b/src/aerovaldb/utils/string_mapper/mapper.py @@ -3,7 +3,7 @@ from abc import ABC from packaging.version import Version -logger = logging.getLogger() +logger = logging.getLogger(__name__) VersionProvider = Callable[[str, str], Awaitable[Version]] From ea6343447df98fe3a6ab2862361756563f070d69 Mon Sep 17 00:00:00 2001 From: thorbjoernl <51087536+thorbjoernl@users.noreply.github.com> Date: Fri, 23 Aug 2024 12:41:27 +0200 Subject: [PATCH 28/28] Code review 2 --- src/aerovaldb/jsondb/jsonfiledb.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/aerovaldb/jsondb/jsonfiledb.py b/src/aerovaldb/jsondb/jsonfiledb.py index f070a0a..a65c0a5 100644 --- a/src/aerovaldb/jsondb/jsonfiledb.py +++ b/src/aerovaldb/jsondb/jsonfiledb.py @@ -148,9 +148,6 @@ async def _get_version(self, project: str, experiment: str) -> Version: version = Version("0.0.1") finally: return version - # except simplejson.JSONDecodeError: - # # Work around for https://github.com/metno/aerovaldb/issues/28 - # return Version("0.14.0") try: version_str = config["exp_info"]["pyaerocom_version"]