diff --git a/eva_sub_cli/executables/xlsx2json.py b/eva_sub_cli/executables/xlsx2json.py index 642c5d3..e3c40b3 100644 --- a/eva_sub_cli/executables/xlsx2json.py +++ b/eva_sub_cli/executables/xlsx2json.py @@ -52,7 +52,7 @@ def __init__(self, xlsx_filename, conf_filename): try: self.workbook = load_workbook(xlsx_filename, read_only=True) except Exception as e: - self.add_error(f'Error loading {xlsx_filename}: {e}') + self.add_error(f'Error loading {xlsx_filename}: {repr(e)}') self.file_loaded = False return self.worksheets = [] diff --git a/eva_sub_cli/validators/validation_results_parsers.py b/eva_sub_cli/validators/validation_results_parsers.py index 321abc4..ed92ad0 100644 --- a/eva_sub_cli/validators/validation_results_parsers.py +++ b/eva_sub_cli/validators/validation_results_parsers.py @@ -164,6 +164,9 @@ def convert_metadata_attribute(sheet, json_attribute, xls2json_conf): attributes_dict = {} attributes_dict.update(xls2json_conf[sheet].get('required', {})) attributes_dict.update(xls2json_conf[sheet].get('optional', {})) + attributes_dict['Scientific Name'] = 'species' + attributes_dict['BioSample Name'] = 'name' + for attribute in attributes_dict: if attributes_dict[attribute] == json_attribute: return attribute @@ -185,7 +188,12 @@ def parse_metadata_property(property_str): def parse_sample_metadata_property(property_str): + # Check characteristics match = re.match(r'/sample/(\d+)/bioSampleObject/characteristics/(\w+)', property_str) if match: return 'sample', match.group(1), match.group(2) + # Check name + match = re.match(r'/sample/(\d+)/bioSampleObject/name', property_str) + if match: + return 'sample', match.group(1), 'name' return None, None, None diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py index f3e591d..79e23b2 100755 --- a/eva_sub_cli/validators/validator.py +++ b/eva_sub_cli/validators/validator.py @@ -1,7 +1,6 @@ #!/usr/bin/env python import csv import datetime -import glob import json import logging import os @@ -345,7 +344,7 @@ def _convert_biovalidator_validation_to_spreadsheet(self): sheet = convert_metadata_sheet(sheet_json, xls2json_conf) row = convert_metadata_row(sheet, row_json, xls2json_conf) column = convert_metadata_attribute(sheet, attribute_json, xls2json_conf) - if row_json is None and attribute_json is None: + if row_json is None and attribute_json is None and sheet is not None: new_description = f'Sheet "{sheet}" is missing' elif row_json is None: if 'have required' not in error['description']: diff --git a/tests/resources/validation_reports/validation_output/other_validations/metadata_conversion_errors.yml b/tests/resources/validation_reports/validation_output/other_validations/metadata_conversion_errors.yml index fd5af64..13b09b9 100644 --- a/tests/resources/validation_reports/validation_output/other_validations/metadata_conversion_errors.yml +++ b/tests/resources/validation_reports/validation_output/other_validations/metadata_conversion_errors.yml @@ -1,4 +1,4 @@ -- column: Tax ID - description: Worksheet Project is missing required header Tax ID +- column: '' + description: 'Error loading problem.xlsx: Exception()' row: '' - sheet: Project + sheet: '' diff --git a/tests/test_validator.py b/tests/test_validator.py index a9031e3..19f0ea9 100644 --- a/tests/test_validator.py +++ b/tests/test_validator.py @@ -80,8 +80,8 @@ def test__collect_validation_workflow_results_with_metadata_xlsx(self): ], 'spreadsheet_errors': [ # NB. Wouldn't normally get conversion error + validation errors together, but it is supported. - {'sheet': 'Project', 'row': '', 'column': 'Tax ID', - 'description': 'Worksheet Project is missing required header Tax ID'}, + {'sheet': '', 'row': '', 'column': '', + 'description': 'Error loading problem.xlsx: Exception()'}, {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'}, {'sheet': 'Project', 'row': 2, 'column': 'Project Title', 'description': 'Column "Project Title" is not populated'}, @@ -170,8 +170,8 @@ def test__collect_validation_workflow_results_with_metadata_json(self): 'description': 'alias_1,alias_2 present in Samples not in Analysis'}, ], 'spreadsheet_errors': [ - {'sheet': 'Project', 'row': '', 'column': 'Tax ID', - 'description': 'Worksheet Project is missing required header Tax ID'} + {'sheet': '', 'row': '', 'column': '', + 'description': 'Error loading problem.xlsx: Exception()'} ] } } @@ -223,6 +223,19 @@ def test_convert_biovalidator_validation_to_spreadsheet(self): {'property': '/sample/0/bioSampleObject', 'description': "should have required property 'bioSampleObject'"}, {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'}, + # Missing BioSamples attributes + {'property': '/sample/3/bioSampleObject/name', + 'description': "must have required property 'name'"}, + {'property': '/sample/3/bioSampleObject/characteristics/organism', + 'description': "must have required property 'organism'"}, + {'property': '/sample/3/bioSampleObject/characteristics/Organism', + 'description': "must have required property 'Organism'"}, + {'property': '/sample/3/bioSampleObject/characteristics/species', + 'description': "must have required property 'species'"}, + {'property': '/sample/3/bioSampleObject/characteristics/Species', + 'description': "must have required property 'Species'"}, + {'property': '/sample/3/bioSampleObject/characteristics', + 'description': 'must match a schema in anyOf'}, # Semantic checks {'property': '/project/childProjects/1', 'description': 'PRJEBNA does not exist or is private'}, {'property': '/sample/2/bioSampleObject/characteristics/taxId', @@ -248,6 +261,10 @@ def test_convert_biovalidator_validation_to_spreadsheet(self): 'description': 'Column "Reference" is not populated'}, {'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession', 'description': 'Column "Sample Accession" is not populated'}, + {'sheet': 'Sample', 'row': 6, 'column': 'BioSample Name', + 'description': 'Column "BioSample Name" is not populated'}, + {'sheet': 'Sample', 'row': 6, 'column': 'Scientific Name', + 'description': 'Column "Scientific Name" is not populated'}, {'sheet': 'Project', 'row': 2, 'column': 'Child Project(s)', 'description': 'PRJEBNA does not exist or is private'}, {'sheet': 'Sample', 'row': 5, 'column': 'Tax Id', 'description': '1234 is not a valid taxonomy code'}, @@ -261,8 +278,8 @@ def test_collect_conversion_errors(self): self.validator.results['metadata_check'] = {} self.validator._load_spreadsheet_conversion_errors() assert self.validator.results['metadata_check']['spreadsheet_errors'] == [{ - 'column': 'Tax ID', - 'description': 'Worksheet Project is missing required header Tax ID', + 'column': '', + 'description': 'Error loading problem.xlsx: Exception()', 'row': '', - 'sheet': 'Project' + 'sheet': '' }] diff --git a/tests/test_xlsx2json.py b/tests/test_xlsx2json.py index a251fb2..b9c2e45 100644 --- a/tests/test_xlsx2json.py +++ b/tests/test_xlsx2json.py @@ -1,6 +1,5 @@ import json import os -from copy import deepcopy from unittest import TestCase import jsonschema