Skip to content

Commit

Permalink
db: Support queries with multi-value IDs
Browse files Browse the repository at this point in the history
  • Loading branch information
spbnick committed Oct 23, 2024
1 parent ec123bd commit 04b0633
Show file tree
Hide file tree
Showing 9 changed files with 485 additions and 254 deletions.
38 changes: 19 additions & 19 deletions kcidb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,13 @@ def query_iter(self, ids=None,
Args:
ids: A dictionary of object list names, and lists
of IDs of objects to match. None means empty
dictionary.
dictionary. Each ID is either a tuple of
values or a single value (equivalent to a
single-value tuple). The values should match
the types, the order, and the number of the
object's ID fields as described by the
database's I/O schema (the "id_fields"
attribute).
children: True if children of matched objects should be
matched as well.
parents: True if parents of matched objects should be
Expand All @@ -157,20 +163,14 @@ def query_iter(self, ids=None,
`NotImplementedError`, if not supplied with a dataset name at
initialization time;
"""
assert ids is None or isinstance(ids, dict)
if ids is None:
ids = {}
assert all(isinstance(k, str) and isinstance(v, list) and
all(isinstance(e, str) for e in v)
for k, v in ids.items())
if not self.db_client:
raise NotImplementedError

assert self.db_client.query_ids_are_valid(ids)
assert isinstance(objects_per_report, int)
assert objects_per_report >= 0
assert isinstance(with_metadata, bool)

if not self.db_client:
raise NotImplementedError

return self.db_client.query_iter(ids=ids,
children=children, parents=parents,
objects_per_report=objects_per_report,
Expand All @@ -184,7 +184,12 @@ def query(self, ids=None, children=False, parents=False,
Args:
ids: A dictionary of object list names, and lists of
IDs of objects to match. None means empty
dictionary.
dictionary. Each ID is either a tuple of values or
a single value (equivalent to a single-value
tuple). The values should match the types, the
order, and the number of the object's ID fields as
described by the database's I/O schema (the
"id_fields" attribute).
children: True if children of matched objects should be
matched as well.
parents: True if parents of matched objects should be
Expand All @@ -200,16 +205,11 @@ def query(self, ids=None, children=False, parents=False,
`NotImplementedError`, if not supplied with a dataset name at
initialization time;
"""
assert ids is None or isinstance(ids, dict)
if ids is None:
ids = {}
assert all(isinstance(k, str) and isinstance(v, list) and
all(isinstance(e, str) for e in v)
for k, v in ids.items())
assert isinstance(with_metadata, bool)

if not self.db_client:
raise NotImplementedError
assert self.db_client.query_ids_are_valid(ids)
assert isinstance(with_metadata, bool)

return self.db_client.query(ids=ids,
children=children, parents=parents,
with_metadata=with_metadata)
Expand Down
116 changes: 96 additions & 20 deletions kcidb/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,80 @@ def dump(self, with_metadata=True):
except StopIteration:
return self.get_schema()[1].new()

# No, it's not, pylint: disable=too-many-return-statements
def query_ids_are_valid(self, ids):
"""
Verify the IDs to be queried are valid according to the I/O version
supported by the database. The database must be initialized.
Args:
ids: A dictionary of object list names, and lists of IDs of
objects to match. None means empty dictionary. Each ID is
either a tuple of values or a single value (equivalent to
a single-value tuple). The values should match the types,
the order, and the number of the object's ID fields as
described by the database's I/O schema (the "id_fields"
attribute).
Returns:
True if the IDs are valid, false otherwise.
"""
assert LIGHT_ASSERTS or self.is_initialized()
id_fields = self.get_schema()[1].id_fields

if ids is None:
return True
if not isinstance(ids, dict):
return False
for obj_list_name, values_list in ids.items():
if obj_list_name not in id_fields:
return False
obj_id_fields = id_fields[obj_list_name]
if not isinstance(values_list, list):
return False
for values in values_list:
if not isinstance(values, tuple):
values = (values,)
if len(values) != len(obj_id_fields):
return False
for value, type in zip(values, obj_id_fields.values()):
if not isinstance(value, type):
return False
return True

def query_ids_normalize(self, ids):
"""
Normalize the IDs to be queried to always be a dictionary of object
list names and lists of IDs, where each ID is a tuple with ID field
values for the corresponding object type.
Args:
ids: A dictionary of object list names, and lists of IDs of
objects to match. None means empty dictionary. Each ID is
either a tuple of values or a single value (equivalent to
a single-value tuple). The values should match the types,
the order, and the number of the object's ID fields as
described by the database's I/O schema (the "id_fields"
attribute).
Returns:
The normalized IDs: a dictionary of object list names, and lists
of IDs of objects to match. Each ID is a tuple of values. The
values should match the types, the order, and the number of the
object's ID fields as described by the database's I/O schema (the
"id_fields" attribute).
"""
assert self.query_ids_are_valid(ids)
new_ids = {
obj_list_name: [
values if isinstance(values, tuple) else (values,)
for values in values_list
]
for obj_list_name, values_list in (ids or {}).items()
}
assert self.query_ids_are_valid(new_ids)
return new_ids

# We can live with this for now, pylint: disable=too-many-arguments
# Or if you prefer, pylint: disable=too-many-positional-arguments
def query_iter(self, ids=None,
Expand All @@ -330,7 +404,13 @@ def query_iter(self, ids=None,
Args:
ids: A dictionary of object list names, and lists
of IDs of objects to match. None means empty
dictionary.
dictionary. Each ID is either a tuple of
values or a single value (equivalent to a
single-value tuple). The values should match
the types, the order, and the number of the
object's ID fields as described by the
database's I/O schema (the "id_fields"
attribute).
children: True if children of matched objects should be
matched as well.
parents: True if parents of matched objects should be
Expand All @@ -346,17 +426,12 @@ def query_iter(self, ids=None,
objects.
"""
assert LIGHT_ASSERTS or self.is_initialized()
if ids is None:
ids = {}
assert isinstance(ids, dict)
assert all(isinstance(k, str) and isinstance(v, list) and
all(isinstance(e, str) for e in v)
for k, v in ids.items())
assert self.query_ids_are_valid(ids)
assert isinstance(objects_per_report, int)
assert objects_per_report >= 0
assert isinstance(with_metadata, bool)
yield from self.driver.query_iter(
ids=ids,
ids=self.query_ids_normalize(ids),
children=children, parents=parents,
objects_per_report=objects_per_report,
with_metadata=with_metadata
Expand All @@ -368,12 +443,18 @@ def query(self, ids=None, children=False, parents=False,
Match and fetch objects from the database.
Args:
ids: A dictionary of object list names, and lists of IDs of
objects to match. None means empty dictionary.
children: True if children of matched objects should be matched
as well.
parents: True if parents of matched objects should be matched
as well.
ids: A dictionary of object list names, and lists of
IDs of objects to match. None means empty
dictionary. Each ID is either a tuple of values or
a single value (equivalent to a single-value
tuple). The values should match the types, the
order, and the number of the object's ID fields as
described by the database's I/O schema (the
"id_fields" attribute).
children: True if children of matched objects should be
matched as well.
parents: True if parents of matched objects should be
matched as well.
with_metadata: True, if metadata fields should be fetched as
well. False, if not.
Expand All @@ -382,12 +463,7 @@ def query(self, ids=None, children=False, parents=False,
version.
"""
assert LIGHT_ASSERTS or self.is_initialized()
assert ids is None or (
isinstance(ids, dict) and
all(isinstance(k, str) and isinstance(v, list) and
all(isinstance(e, str) for e in v)
for k, v in ids.items())
)
assert self.query_ids_are_valid(ids)
assert isinstance(with_metadata, bool)
try:
return next(self.query_iter(ids=ids,
Expand Down
48 changes: 42 additions & 6 deletions kcidb/db/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,41 @@ def dump_iter(self, objects_per_report, with_metadata):
assert isinstance(with_metadata, bool)
assert self.is_initialized()

# No, it's not, pylint: disable=too-many-return-statements
def query_ids_are_valid(self, ids):
"""
Verify the IDs to be queried are valid according to the I/O version
supported by the database. The database must be initialized.
Args:
ids: A dictionary of object list names, and lists of IDs of
objects to match. Each ID is a tuple of values. The values
should match the types, the order, and the number of the
object's ID fields as described by the database's I/O
schema (the "id_fields" attribute).
Returns:
True if the IDs are valid, false otherwise.
"""
assert LIGHT_ASSERTS or self.is_initialized()
id_fields = self.get_schema()[1].id_fields
if not isinstance(ids, dict):
return False
for obj_list_name, values_list in ids.items():
if obj_list_name not in id_fields:
return False
obj_id_fields = id_fields[obj_list_name]
if not isinstance(values_list, list):
return False
for values in values_list:
if not isinstance(values, tuple) or \
len(values) != len(obj_id_fields):
return False
for value, type in zip(values, obj_id_fields.values()):
if not isinstance(value, type):
return False
return True

# We can live with this for now, pylint: disable=too-many-arguments
# Or if you prefer, pylint: disable=too-many-positional-arguments
@abstractmethod
Expand All @@ -203,7 +238,11 @@ def query_iter(self, ids, children, parents, objects_per_report,
Args:
ids: A dictionary of object list names, and lists
of IDs of objects to match.
of IDs of objects to match. Each ID is a tuple
of values. The values should match the types,
the order, and the number of the object's ID
fields as described by the database's I/O
schema (the "id_fields" attribute).
children: True if children of matched objects should be
matched as well.
parents: True if parents of matched objects should be
Expand All @@ -218,14 +257,11 @@ def query_iter(self, ids, children, parents, objects_per_report,
database schema's I/O schema version, each containing at most the
specified number of objects.
"""
assert isinstance(ids, dict)
assert all(isinstance(k, str) and isinstance(v, list) and
all(isinstance(e, str) for e in v)
for k, v in ids.items())
assert self.is_initialized()
assert self.query_ids_are_valid(ids)
assert isinstance(objects_per_report, int)
assert objects_per_report >= 0
assert isinstance(with_metadata, bool)
assert self.is_initialized()

@abstractmethod
def oo_query(self, pattern_set):
Expand Down
Loading

0 comments on commit 04b0633

Please sign in to comment.