diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py index 50e9ecb1cd..deb032ac55 100644 --- a/seqr/views/apis/individual_api_tests.py +++ b/seqr/views/apis/individual_api_tests.py @@ -263,7 +263,7 @@ def test_edit_individuals(self, mock_pm_group): self.assertEqual(response.status_code, 400) self.assertListEqual(response.json()['errors'], [ 'Invalid parental guid I000020_na65432', - 'NA21234 is recorded as Female and also as the father of NA21987', + 'NA21234 is recorded as Female sex and also as the father of NA21987', ]) update_json = deepcopy(EXTERNAL_WORKSPACE_INDIVIDUAL_UPDATE_DATA) @@ -441,8 +441,8 @@ def test_individuals_table_handler_errors(self): 'errors': [ 'Invalid proband relationship "Father" for NA19675_1 with given gender Female', 'NA19675_1 is recorded as their own father', - 'NA19675_1 is recorded as Female and also as the father of NA19675_1', - 'NA19675_1 is recorded as Female and also as the father of NA19675_2', + 'NA19675_1 is recorded as Female sex and also as the father of NA19675_1', + 'NA19675_1 is recorded as Female sex and also as the father of NA19675_2', 'NA19675_1 is recorded as the father of NA19675_2 but they have different family ids: 1 and 2', 'NA19675_1 is included as 2 separate records, but must be unique within the project', ], @@ -530,7 +530,7 @@ def test_individuals_sample_manifest_table_handler(self, mock_email, mock_no_val def _send_request_data(data): return self.client.post(receive_url, {'f': SimpleUploadedFile( - 'sample_manifest.tsv', '\n'.join(['\t'.join(row) for row in data]).encode('utf-8')), + 'sample_manifest.tsv', '\n'.join(['\t'.join([str(c) for c in row]) for row in data]).encode('utf-8')), }) header_2 = [ @@ -580,7 +580,7 @@ def _send_request_data(data): data[2] = header_3 data += [ ['SK-3QVD', 'A02', 'SM-IRW6C', 'PED073', 'SCO_PED073B_GA0339', 'SCO_PED073B_GA0339_1', '', '', 'male', - 'unaffected', 'UBERON:0000479 (tissue)', 'blood plasma', '', 'Unknown', '20', '94.8', 'probably dad', '', + 'unaffected', 'UBERON:0000479 (tissue)', 'blood plasma', '', 'Unknown', '20', 94.8, 'probably dad', '', '', 'GMB', '1234'], ['SK-3QVD', 'A03', 'SM-IRW69', 'PED073', 'SCO_PED073C_GA0340', 'SCO_PED073C_GA0340_1', 'SCO_PED073B_GA0339_1', 'SCO_PED073A_GA0338_1', 'female', 'affected', 'UBERON:0002371 (bone marrow)', @@ -607,7 +607,12 @@ def _send_request_data(data): missing_columns_error, 'Consent code in manifest "GMB" does not match project consent code "HMB"', ]}) - data[3][12] = 'No' + data[3][12] = 'Maybe' + response = _send_request_data(data) + self.assertEqual(response.status_code, 400) + self.assertDictEqual(response.json(), {'warnings': None, 'errors': ['Invalid value "Maybe" for Tissue Affected Status in row #1']}) + + data[3][12] = 'Unknown' data[3][17] = 'microcephaly' data[3][18] = 'MONDO:0001149' data[3][-2] = '' @@ -667,7 +672,7 @@ def _send_request_data(data): # Test original file copy is correct original_wb = load_workbook(BytesIO(mock_email.call_args.kwargs['attachments'][1][1])) original_ws = original_wb.active - self.assertListEqual([[cell.value or '' for cell in row] for row in original_ws], data) + self.assertListEqual([[cell.value or '' for cell in row] for row in original_ws], [[str(c) for c in row] for row in data]) url = reverse(save_individuals_table_handler, args=[PROJECT_GUID, response_json['uploadedFileId']]) response = self.client.post(url) @@ -691,7 +696,7 @@ def _send_request_data(data): indiv_1 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073B_GA0339_1') self.assertDictEqual({k: v for k, v in indiv_1.items() if k in test_keys}, { 'affected': 'N', 'notes': 'probably dad', 'sex': 'M', 'maternalId': None, 'paternalId': None, - 'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False, + 'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': None, 'probandRelationship': 'F', }) indiv_2 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073C_GA0341_1') diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py index ce568b9dbd..1fd71bef1c 100644 --- a/seqr/views/utils/pedigree_info_utils.py +++ b/seqr/views/utils/pedigree_info_utils.py @@ -103,7 +103,7 @@ def _parse_pedigree_table_rows(parsed_file, filename, header=None, rows=None): i + 1, len(row), ', '.join(row), len(header), ', '.join(header) )) - formatted_rows = [{header_item: field.strip() for header_item, field in zip(header, row)} for row in rows] + formatted_rows = [{header_item: str(field).strip() for header_item, field in zip(header, row)} for row in rows] return formatted_rows, header except Exception as e: @@ -187,7 +187,7 @@ def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None): try: value = _format_value(value, column) - except ValueError: + except (KeyError, ValueError): errors.append(f'Invalid value "{value}" for {_to_title_case(_to_snake_case(column))} in row #{i + 1}') continue @@ -227,7 +227,7 @@ def _format_value(value, column): if format_func: if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}): value = format_func(value) - if value is None and column not in JsonConstants.JSON_COLUMNS: + if value is None and column not in JsonConstants.NULLABLE_COLUMNS: raise ValueError() elif value == '': value = None @@ -299,7 +299,7 @@ def validate_fam_file_records(records, fail_on_warnings=False, errors=None): actual_sex = records_by_id[parent_id][JsonConstants.SEX_COLUMN] if actual_sex != expected_sex: actual_sex_label = dict(Individual.SEX_CHOICES)[actual_sex] - errors.append("%(parent_id)s is recorded as %(actual_sex_label)s and also as the %(parent_id_type)s of %(individual_id)s" % locals()) + errors.append("%(parent_id)s is recorded as %(actual_sex_label)s sex and also as the %(parent_id_type)s of %(individual_id)s" % locals()) # is the parent in the same family? parent = records_by_id[parent_id] @@ -784,6 +784,8 @@ class JsonConstants: TISSUE_AFFECTED_STATUS = 'tissueAffectedStatus' JSON_COLUMNS = {MATERNAL_ETHNICITY, PATERNAL_ETHNICITY, BIRTH_YEAR, DEATH_YEAR, ONSET_AGE, AFFECTED_RELATIVES} + NULLABLE_COLUMNS = {TISSUE_AFFECTED_STATUS} + NULLABLE_COLUMNS.update(JSON_COLUMNS) FORMAT_COLUMNS = { SEX_COLUMN: _parse_sex, @@ -794,7 +796,7 @@ class JsonConstants: PRIMARY_BIOSAMPLE: lambda value: next( (code for code, uberon_code in Individual.BIOSAMPLE_CHOICES if value.startswith(uberon_code)), None), ANALYTE_TYPE: Individual.ANALYTE_REVERSE_LOOKUP.get, - TISSUE_AFFECTED_STATUS: {'Yes': True, 'No': False}.get, + TISSUE_AFFECTED_STATUS: lambda value: {'Yes': True, 'No': False, 'Unknown': None}[value], } FORMAT_COLUMNS.update({col: json.loads for col in JSON_COLUMNS})