forked from OBOFoundry/OBOFoundry.github.io
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_schema.py
81 lines (64 loc) · 2.03 KB
/
check_schema.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# -*- coding: utf-8 -*-
"""A script for automatically checking the schema compliance of all ontologies.
Run with: ``python check_schema.py``.
Author: `Charles Tapley Hoyt <https://cthoyt.com>`_.
"""
import json
from collections import Counter, defaultdict
import click
from tabulate import tabulate
from utils import SCHEMA_PATH, get_data
#: These keys are automatically populated during build
SKIP_KEYS = {
"ontology_purl",
}
@click.command()
@click.option("--max-cutoff", type=int, default=3, show_default=True)
@click.option("--links", is_flag=True)
def main(max_cutoff: int, links: bool):
"""Check schema usage."""
_check_schema(max_cutoff=max_cutoff, links=links)
def _check_schema(max_cutoff: int = 3, links: bool = True):
ontologies = get_data()
property_usage = Counter()
for data in ontologies.values():
for key in data:
property_usage[key] += 1
keys = {k for k, v in property_usage.items() if v <= max_cutoff}
r = defaultdict(set)
for prefix, data in ontologies.items():
for key in keys:
if key in data:
r[key].add(prefix)
print(f"Fields used at most {max_cutoff} times:")
print(
tabulate(
[
(
k,
", ".join(
(
f"[{prefix}](https://obofoundry.org/ontologies/{prefix})"
if links
else prefix
)
for prefix in prefixes
),
)
for k, prefixes in r.items()
],
tablefmt="github",
headers=["key", "ontologies"],
)
)
with SCHEMA_PATH.open() as file:
schema = json.load(file)
unused = {
prop
for prop in schema["properties"]
if prop not in property_usage and prop not in SKIP_KEYS
}
print("Unused properties:")
print(*sorted(unused), sep="\n")
if __name__ == "__main__":
main()