Skip to content

Commit

Permalink
DEV-2783: add unique link test (#1069)
Browse files Browse the repository at this point in the history
Add test for detecting duplicate link definition via name or backref.
  • Loading branch information
kulgan authored Jun 14, 2024
1 parent 08b5658 commit a5deb2c
Show file tree
Hide file tree
Showing 4 changed files with 108 additions and 8 deletions.
7 changes: 4 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@ author_email = [email protected]
maintainer = GDC Feature Team
maintainer_email = [email protected]
license = Apache Software License 2.0
license_file = LICENSE
license_files = LICENSE
url = https://github.com/NCI-GDC/gdcdictionary
keywords = Genomic Data Commons, JSON schema, Data Dictionary, GDC, Cancer Research
classifiers =
Programming Language :: PythonLicense :: OSI Approved :: Apache Software License
Operating System :: Unix
Operating System :: POSIX :: Linux
License :: OSI Approved :: Apache Software License
Topic :: Software Development :: Libraries :: Python Modules
Topic :: Internet
Programming Language :: Python
Programming Language :: Python :: 3.7
Programming Language :: Python :: 3.8
Programming Language :: Python :: 3.9
Expand All @@ -25,7 +26,7 @@ classifiers =

[options]
zip_safe = True
packages = find:
packages = find_namespace:
package_dir =
=src
python_requires = >=3.7
Expand Down
8 changes: 4 additions & 4 deletions src/gdcdictionary/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ def from_values(cls, schema: str, message: str, keys: List[str]) -> SchemaValida
if "Additional properties are not allowed" in message:
message = f"Key(s) {keys} not a valid property for type '{schema}'"
logger.debug(
"json schema violation for '%s'",
"gdcdictionary schema violation for '%s'",
schema,
extra={"keys": keys, "message": message},
extra={"keys": keys, "violation": message},
)
return SchemaValidationError(schema, message, keys)

Expand Down Expand Up @@ -120,7 +120,7 @@ def iter_errors(
extra={
"partial": partial,
"keys": violation.keys,
"message": violation.message,
"violation": violation.message,
},
)
violation = SchemaValidationError.from_values(self.name, message, keys)
Expand All @@ -130,7 +130,7 @@ def iter_errors(
extra={
"partial": partial,
"keys": violation.keys,
"message": violation.message,
"violation": violation.message,
},
)
continue
Expand Down
99 changes: 99 additions & 0 deletions tests/test_gdcdictionary.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
import dataclasses
from typing import Any, Mapping, Sequence

try:
from importlib.resources import files
except ImportError:
Expand All @@ -8,6 +11,97 @@
import gdcdictionary


@dataclasses.dataclass(frozen=True)
class Association:
"""Represents a directed link between source and target.
A single link definition in a node will expand into two
associations, one for each direction.
For example, the definition below on the node `aliquot`
```
id: aliquot
links:
- exclusive: true
required: true
subgroup:
- name: analytes
backref: aliquots
label: derived_from
target_type: analyte
multiplicity: many_to_one
required: false
- name: samples
backref: aliquots
label: derived_from
target_type: sample
multiplicity: many_to_many
required: false
- name: centers
backref: aliquots
label: shipped_to
target_type: center
multiplicity: many_to_one
required: false
```
This results in 6 possible associations
centers: aliquot --> center
aliquots: center --> aliquot
analytes: aliquot --> analyte
aliquots: analyte --> aliquot
samples: aliquot --> sample
aliquots: sample --> aliquot
Within a source node, the association name should be unique.
For example, on aliquot there should be only one association named `centers`
"""

name: str
source: str
target: str = dataclasses.field(compare=False, hash=False)


class Associations(set):
"""An unordered collection of unique association."""

def add(self, association: Association) -> None:
"""Raise key error if entry already exists."""
if association in self:
raise KeyError(f"{association} already exists - Duplicate links not allowed.")
super().add(association)

def update(self, associations: "Associations") -> None:
"""Raise key error if entry already exists."""
for association in associations:
if association in self:
raise ValueError(f"{association} already exists - Duplicate links not allowed.")
super().update(associations)


def extract_links(source: str, links: Sequence[Mapping[str, Any]]) -> Associations:
"""Inspect links collected from the schema of a node and generate Associations."""
associations = Associations()
for link in links:
if "subgroup" in link:
associations.update(extract_links(source, link["subgroup"]))
continue
forward = Association(name=link["name"], source=source, target=link["target_type"])
reverse = Association(name=link["backref"], target=source, source=link["target_type"])

associations.add(forward)
associations.add(reverse)
return associations


def read_associations(dictionary: gdcdictionary.GDCDictionary) -> Associations:
"""Read all association in the dictionary."""
associations = Associations()
for schema in dictionary.schema.values():
name = schema["id"]
associations.update(extract_links(name, schema["links"]))
return associations


def test_load_dictionary__invalid_location() -> None:
with pytest.raises(IOError):
gdcdictionary.GDCDictionary(root_dir="invalid/dir/path")
Expand All @@ -19,3 +113,8 @@ def test_load_dictionary() -> None:
root_dir=str(path.parent / "src/gdcdictionary/schemas"), lazy=False
)
assert dictionary.loaded is True


def test_unique_associations():
"""passes if no exception is raised"""
read_associations(gdcdictionary.gdcdictionary)
2 changes: 1 addition & 1 deletion tests/test_validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def test_validate_instances__invalid_types(partial: bool) -> None:
)


def test_partials_validation():
def test_partials_validation() -> None:
# example missing required fields
instances = [
{"type": "case", "days_to_consent": 123, "submitter_id": "UNSC-2"},
Expand Down

0 comments on commit a5deb2c

Please sign in to comment.