Skip to content

Commit

Permalink
Merge pull request #1598 from HumanCellAtlas/staging
Browse files Browse the repository at this point in the history
Fixed schema_linter.py and human_readable_json.py script bugs
  • Loading branch information
arschat authored Dec 17, 2024
2 parents def5aeb + 293e535 commit f384f24
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 45 deletions.
4 changes: 2 additions & 2 deletions src/human_readable_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,9 +118,9 @@ def generateMarkdown(self, schemas, entity_type):
# print(schema["title"] + "\t "+ property + "\t"+ link)

if "enum" in schema["properties"][property]:
enum_values = ", ".join(schema["properties"][property]["enum"])
enum_values = ", ".join(str(enum) for enum in schema["properties"][property]["enum"])
elif "enum" in schema["properties"][property].get("items", ""):
enum_values = ", ".join(schema["properties"][property]["items"]["enum"])
enum_values = ", ".join(str(enum) for enum in schema["properties"][property]["items"]["enum"])
else:
enum_values = ""
file.write(property + " | "+
Expand Down
68 changes: 25 additions & 43 deletions src/schema_linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import re
import json
import sys
import argparse
from urllib.request import urlopen
from urllib.error import HTTPError

Expand All @@ -16,7 +15,7 @@

required_schema_fields = ['$schema', 'description', 'additionalProperties', 'title', 'name', 'type', 'properties']

allowed_schema_fields = ['$schema', 'description', 'additionalProperties', 'required', 'title', 'name', 'type', 'properties', 'definitions', 'dependencies', 'if', 'then', 'else', 'minProperties']
allowed_schema_fields = ['$schema', 'description', 'additionalProperties', 'required', 'title', 'name', 'type', 'properties', 'definitions', 'dependencies', 'if', 'then', 'else', 'allOf', 'minProperties']

# Properties

Expand All @@ -38,24 +37,6 @@

graph_restriction_attributes = ['ontologies', 'classes', 'relations', 'direct', 'include_self']

# Accepted environments and conversion for OLS API url

ENVIRONMENTS = ['develop', 'integration', 'staging', 'master']

OLS_ENVIRONMENT = {
"develop": "dev",
"integration": "integration",
"staging": "staging",
"master": "staging"
}

def argument_parser():
# Create the parser, define arguments and return it
parser = argparse.ArgumentParser()
parser.add_argument('--environment', '-e', type=str, dest='environment', help='environment the OLS API feeds on',
default='staging', action='store', choices=ENVIRONMENTS)
return parser


class SchemaLinter:
def __init__(self):
Expand Down Expand Up @@ -163,26 +144,30 @@ def lintSchema(self, path, ols_api):
errors.append(schema_filename + ".json: Keyword `type` missing from property `" + property + "`.")

else:
# type attribute must be set to one of the valid JSON types
if properties[property]['type'] not in ["string", "number", "boolean", "array", "object", "integer"]:
errors.append(schema_filename + ".json: Type `" + properties[property]['type'] + "` is not a valid JSON type.")

# Property of type array must contain the attribute items
if properties[property]['type'] == "array" and 'items' not in properties[property].keys():
errors.append(schema_filename + ".json: Property `" + property + "` is type array but doesn't contain items.")

# Property of type array must contains the attribute items
# items must have either type or $ref attribute
if properties[property]['type'] == "array" and 'items' in properties[property].keys() and '$ref' not in properties[property]['items'].keys() and 'type' not in properties[property]['items'].keys():
errors.append(schema_filename + ".json: Property `" + property + "` is type array but items attribute doesn't contain type or $ref attribute.")

# Property of type object must contains the attribute $ref
if properties[property]['type'] == "object" and '$ref' not in properties[property].keys():
errors.append(schema_filename + ".json: Property `" + property + "` is type object but doesn't contain $ref.")
# change property to list to test all values of array
properties[property]['type'] = properties[property]['type'] if isinstance(properties[property]['type'], list) else [properties[property]['type']]

for property_type in properties[property]['type']:
# type attribute must be set to one of the valid JSON types
if property_type not in ["string", "number", "boolean", "array", "object", "integer", "null"]:
errors.append(schema_filename + ".json: Type `" + property_type + "` is not a valid JSON type.")

# Property of type array must contain the attribute items
if property_type == "array" and 'items' not in properties[property].keys():
errors.append(schema_filename + ".json: Property `" + property + "` is type array but doesn't contain items.")

# Property of type array must contains the attribute items
# items must have either type or $ref attribute
if property_type == "array" and 'items' in properties[property].keys() and '$ref' not in properties[property]['items'].keys() and 'type' not in properties[property]['items'].keys():
errors.append(schema_filename + ".json: Property `" + property + "` is type array but items attribute doesn't contain type or $ref attribute.")

# Property of type object must contains the attribute $ref
if property_type == "object" and '$ref' not in properties[property].keys():
errors.append(schema_filename + ".json: Property `" + property + "` is type object but doesn't contain $ref.")

# format must be a valid JSON format
if 'format' in properties[property].keys() and properties[property]['format'] not in ["date", "date-time", "email"]:
errors.append(schema_filename + ".json: Format `" + properties[property]['format'] + "` is not a valid JSON format).")
if 'format' in properties[property].keys() and properties[property]['format'] not in ["date", "date-time", "email", "uri"]:
errors.append(schema_filename + ".json: Format `" + properties[property]['format'] + "` is not a valid JSON format.")

# description should be a sentence - start with capital letter and end with full stop
if 'description' in properties[property].keys() and not re.match('^[A-Z][^?!]*[.]$', properties[property]['description']):
Expand Down Expand Up @@ -329,11 +314,8 @@ def get_json_from_file(self, filename, warn = False):


if __name__ == "__main__":
# Define the environment, transforming 'develop' and 'master' to their respective OLS valid environment to
# define the proper OLS API URL.
arguments = argument_parser().parse_args(sys.argv[1:])
environment = OLS_ENVIRONMENT[arguments.environment]
ols_api = 'https://ontology.{}.archive.data.humancellatlas.org/api'.format(environment)
# As of November 2024, we are now using the OLS4 for ontology checkup
ols_api = 'https://www.ebi.ac.uk/ols/api'

schema_path = '../json_schema' if cwd == 'src' else 'json_schema'
jsons = [os.path.join(dirpath, f)
Expand Down

0 comments on commit f384f24

Please sign in to comment.