update tests

EBIvariation · Sep 10, 2024 · 6616329 · 6616329
1 parent 99589a0
commit 6616329
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 14 deletions.
diff --git a/eva_sub_cli/executables/xlsx2json.py b/eva_sub_cli/executables/xlsx2json.py
@@ -52,7 +52,7 @@ def __init__(self, xlsx_filename, conf_filename):
         try:
             self.workbook = load_workbook(xlsx_filename, read_only=True)
         except Exception as e:
-            self.add_error(f'Error loading {xlsx_filename}: {e}')
+            self.add_error(f'Error loading {xlsx_filename}: {repr(e)}')
             self.file_loaded = False
             return
         self.worksheets = []

diff --git a/eva_sub_cli/validators/validation_results_parsers.py b/eva_sub_cli/validators/validation_results_parsers.py
@@ -164,6 +164,9 @@ def convert_metadata_attribute(sheet, json_attribute, xls2json_conf):
     attributes_dict = {}
     attributes_dict.update(xls2json_conf[sheet].get('required', {}))
     attributes_dict.update(xls2json_conf[sheet].get('optional', {}))
+    attributes_dict['Scientific Name'] = 'species'
+    attributes_dict['BioSample Name'] = 'name'
+
     for attribute in attributes_dict:
         if attributes_dict[attribute] == json_attribute:
             return attribute
@@ -185,7 +188,12 @@ def parse_metadata_property(property_str):
 
 
 def parse_sample_metadata_property(property_str):
+    # Check characteristics
     match = re.match(r'/sample/(\d+)/bioSampleObject/characteristics/(\w+)', property_str)
     if match:
         return 'sample', match.group(1), match.group(2)
+    # Check name
+    match = re.match(r'/sample/(\d+)/bioSampleObject/name', property_str)
+    if match:
+        return 'sample', match.group(1), 'name'
     return None, None, None
diff --git a/eva_sub_cli/validators/validator.py b/eva_sub_cli/validators/validator.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import csv
 import datetime
-import glob
 import json
 import logging
 import os
@@ -345,7 +344,7 @@ def _convert_biovalidator_validation_to_spreadsheet(self):
             sheet = convert_metadata_sheet(sheet_json, xls2json_conf)
             row = convert_metadata_row(sheet, row_json, xls2json_conf)
             column = convert_metadata_attribute(sheet, attribute_json, xls2json_conf)
-            if row_json is None and attribute_json is None:
+            if row_json is None and attribute_json is None and sheet is not None:
                 new_description = f'Sheet "{sheet}" is missing'
             elif row_json is None:
                 if 'have required' not in error['description']:

diff --git a/...ces/validation_reports/validation_output/other_validations/metadata_conversion_errors.yml b/...ces/validation_reports/validation_output/other_validations/metadata_conversion_errors.yml
@@ -1,4 +1,4 @@
-- column: Tax ID
-  description: Worksheet Project is missing required header Tax ID
+- column: ''
+  description: 'Error loading problem.xlsx: Exception()'
   row: ''
-  sheet: Project
+  sheet: ''
diff --git a/tests/test_validator.py b/tests/test_validator.py
@@ -80,8 +80,8 @@ def test__collect_validation_workflow_results_with_metadata_xlsx(self):
                 ],
                 'spreadsheet_errors': [
                     # NB. Wouldn't normally get conversion error + validation errors together, but it is supported.
-                    {'sheet': 'Project', 'row': '', 'column': 'Tax ID',
-                     'description': 'Worksheet Project is missing required header Tax ID'},
+                    {'sheet': '', 'row': '', 'column': '',
+                     'description': 'Error loading problem.xlsx: Exception()'},
                     {'sheet': 'Files', 'row': '', 'column': '', 'description': 'Sheet "Files" is missing'},
                     {'sheet': 'Project', 'row': 2, 'column': 'Project Title',
                      'description': 'Column "Project Title" is not populated'},
@@ -170,8 +170,8 @@ def test__collect_validation_workflow_results_with_metadata_json(self):
                      'description': 'alias_1,alias_2 present in Samples not in Analysis'},
                 ],
                 'spreadsheet_errors': [
-                    {'sheet': 'Project', 'row': '', 'column': 'Tax ID',
-                     'description': 'Worksheet Project is missing required header Tax ID'}
+                    {'sheet': '', 'row': '', 'column': '',
+                     'description': 'Error loading problem.xlsx: Exception()'}
                 ]
             }
         }
@@ -223,6 +223,19 @@ def test_convert_biovalidator_validation_to_spreadsheet(self):
                 {'property': '/sample/0/bioSampleObject',
                  'description': "should have required property 'bioSampleObject'"},
                 {'property': '/sample/0', 'description': 'should match exactly one schema in oneOf'},
+                # Missing BioSamples attributes
+                {'property': '/sample/3/bioSampleObject/name',
+                 'description': "must have required property 'name'"},
+                {'property': '/sample/3/bioSampleObject/characteristics/organism',
+                 'description': "must have required property 'organism'"},
+                {'property': '/sample/3/bioSampleObject/characteristics/Organism',
+                 'description': "must have required property 'Organism'"},
+                {'property': '/sample/3/bioSampleObject/characteristics/species',
+                 'description': "must have required property 'species'"},
+                {'property': '/sample/3/bioSampleObject/characteristics/Species',
+                 'description': "must have required property 'Species'"},
+                {'property': '/sample/3/bioSampleObject/characteristics',
+                 'description': 'must match a schema in anyOf'},
                 # Semantic checks
                 {'property': '/project/childProjects/1', 'description': 'PRJEBNA does not exist or is private'},
                 {'property': '/sample/2/bioSampleObject/characteristics/taxId',
@@ -248,6 +261,10 @@ def test_convert_biovalidator_validation_to_spreadsheet(self):
              'description': 'Column "Reference" is not populated'},
             {'sheet': 'Sample', 'row': 3, 'column': 'Sample Accession',
              'description': 'Column "Sample Accession" is not populated'},
+            {'sheet': 'Sample', 'row': 6, 'column': 'BioSample Name',
+             'description': 'Column "BioSample Name" is not populated'},
+            {'sheet': 'Sample', 'row': 6, 'column': 'Scientific Name',
+             'description': 'Column "Scientific Name" is not populated'},
             {'sheet': 'Project', 'row': 2, 'column': 'Child Project(s)',
              'description': 'PRJEBNA does not exist or is private'},
             {'sheet': 'Sample', 'row': 5, 'column': 'Tax Id', 'description': '1234 is not a valid taxonomy code'},
@@ -261,8 +278,8 @@ def test_collect_conversion_errors(self):
         self.validator.results['metadata_check'] = {}
         self.validator._load_spreadsheet_conversion_errors()
         assert self.validator.results['metadata_check']['spreadsheet_errors'] == [{
-                'column': 'Tax ID',
-                'description': 'Worksheet Project is missing required header Tax ID',
+                'column': '',
+                'description': 'Error loading problem.xlsx: Exception()',
                 'row': '',
-                'sheet': 'Project'
+                'sheet': ''
             }]
diff --git a/tests/test_xlsx2json.py b/tests/test_xlsx2json.py
@@ -1,6 +1,5 @@
 import json
 import os
-from copy import deepcopy
 from unittest import TestCase
 
 import jsonschema