Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
apriltuesday committed Sep 10, 2024
1 parent 99589a0 commit 6616329
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 14 deletions.
2 changes: 1 addition & 1 deletion eva_sub_cli/executables/xlsx2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, xlsx_filename, conf_filename):
try:
self.workbook = load_workbook(xlsx_filename, read_only=True)
except Exception as e:
self.add_error(f'Error loading {xlsx_filename}: {e}')
self.add_error(f'Error loading {xlsx_filename}: {repr(e)}')
self.file_loaded = False
return
self.worksheets = []
Expand Down
8 changes: 8 additions & 0 deletions eva_sub_cli/validators/validation_results_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,9 @@ def convert_metadata_attribute(sheet, json_attribute, xls2json_conf):
attributes_dict = {}
attributes_dict.update(xls2json_conf[sheet].get('required', {}))
attributes_dict.update(xls2json_conf[sheet].get('optional', {}))
attributes_dict['Scientific Name'] = 'species'
attributes_dict['BioSample Name'] = 'name'

for attribute in attributes_dict:
if attributes_dict[attribute] == json_attribute:
return attribute
Expand All @@ -185,7 +188,12 @@ def parse_metadata_property(property_str):


def parse_sample_metadata_property(property_str):
# Check characteristics
match = re.match(r'/sample/(\d+)/bioSampleObject/characteristics/(\w+)', property_str)
if match:
return 'sample', match.group(1), match.group(2)
# Check name
match = re.match(r'/sample/(\d+)/bioSampleObject/name', property_str)
if match:
return 'sample', match.group(1), 'name'
return None, None, None
3 changes: 1 addition & 2 deletions eva_sub_cli/validators/validator.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/env python
import csv
import datetime
import glob
import json
import logging
import os
Expand Down Expand Up @@ -345,7 +344,7 @@ def _convert_biovalidator_validation_to_spreadsheet(self):
sheet = convert_metadata_sheet(sheet_json, xls2json_conf)
row = convert_metadata_row(sheet, row_json, xls2json_conf)
column = convert_metadata_attribute(sheet, attribute_json, xls2json_conf)
if row_json is None and attribute_json is None:
if row_json is None and attribute_json is None and sheet is not None:
new_description = f'Sheet "{sheet}" is missing'
elif row_json is None:
if 'have required' not in error['description']:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- column: Tax ID
description: Worksheet Project is missing required header Tax ID
- column: ''
description: 'Error loading problem.xlsx: Exception()'
row: ''
sheet: Project
sheet: ''
31 changes: 24 additions & 7 deletions tests/test_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def test__collect_validation_workflow_results_with_metadata_xlsx(self):
],
'spreadsheet_errors': [
# NB. Wouldn't normally get conversion error + validation errors together, but it is supported.
{'sheet': 'Project', 'row': '', 'column': 'Tax ID',
'description': 'Worksheet Project is missing required header Tax ID'},
{'sheet': '', 'row': '', 'column': '',
'description': 'Error loading problem.xlsx: Exception()'},
{'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
{'sheet': 'Project', 'row': 2, 'column': 'Project Title',
'description': 'Column "Project Title" is not populated'},
Expand Down Expand Up @@ -170,8 +170,8 @@ def test__collect_validation_workflow_results_with_metadata_json(self):
'description': 'alias_1,alias_2 present in Samples not in Analysis'},
],
'spreadsheet_errors': [
{'sheet': 'Project', 'row': '', 'column': 'Tax ID',
'description': 'Worksheet Project is missing required header Tax ID'}
{'sheet': '', 'row': '', 'column': '',
'description': 'Error loading problem.xlsx: Exception()'}
]
}
}
Expand Down Expand Up @@ -223,6 +223,19 @@ def test_convert_biovalidator_validation_to_spreadsheet(self):
{'property': '/sample/0/bioSampleObject',
'description': "should have required property 'bioSampleObject'"},
{'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'},
# Missing BioSamples attributes
{'property': '/sample/3/bioSampleObject/name',
'description': "must have required property 'name'"},
{'property': '/sample/3/bioSampleObject/characteristics/organism',
'description': "must have required property 'organism'"},
{'property': '/sample/3/bioSampleObject/characteristics/Organism',
'description': "must have required property 'Organism'"},
{'property': '/sample/3/bioSampleObject/characteristics/species',
'description': "must have required property 'species'"},
{'property': '/sample/3/bioSampleObject/characteristics/Species',
'description': "must have required property 'Species'"},
{'property': '/sample/3/bioSampleObject/characteristics',
'description': 'must match a schema in anyOf'},
# Semantic checks
{'property': '/project/childProjects/1', 'description': 'PRJEBNA does not exist or is private'},
{'property': '/sample/2/bioSampleObject/characteristics/taxId',
Expand All @@ -248,6 +261,10 @@ def test_convert_biovalidator_validation_to_spreadsheet(self):
'description': 'Column "Reference" is not populated'},
{'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession',
'description': 'Column "Sample Accession" is not populated'},
{'sheet': 'Sample', 'row': 6, 'column': 'BioSample Name',
'description': 'Column "BioSample Name" is not populated'},
{'sheet': 'Sample', 'row': 6, 'column': 'Scientific Name',
'description': 'Column "Scientific Name" is not populated'},
{'sheet': 'Project', 'row': 2, 'column': 'Child Project(s)',
'description': 'PRJEBNA does not exist or is private'},
{'sheet': 'Sample', 'row': 5, 'column': 'Tax Id', 'description': '1234 is not a valid taxonomy code'},
Expand All @@ -261,8 +278,8 @@ def test_collect_conversion_errors(self):
self.validator.results['metadata_check'] = {}
self.validator._load_spreadsheet_conversion_errors()
assert self.validator.results['metadata_check']['spreadsheet_errors'] == [{
'column': 'Tax ID',
'description': 'Worksheet Project is missing required header Tax ID',
'column': '',
'description': 'Error loading problem.xlsx: Exception()',
'row': '',
'sheet': 'Project'
'sheet': ''
}]
1 change: 0 additions & 1 deletion tests/test_xlsx2json.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import json
import os
from copy import deepcopy
from unittest import TestCase

import jsonschema
Expand Down

0 comments on commit 6616329

Please sign in to comment.