Skip to content

Commit

Permalink
Merge pull request #3829 from broadinstitute/sample-manifest-float-bug
Browse files Browse the repository at this point in the history
Sample manifest bug
  • Loading branch information
hanars authored Jan 22, 2024
2 parents 5601daa + 7c9e655 commit 12c58f0
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
21 changes: 13 additions & 8 deletions seqr/views/apis/individual_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ def test_edit_individuals(self, mock_pm_group):
self.assertEqual(response.status_code, 400)
self.assertListEqual(response.json()['errors'], [
'Invalid parental guid I000020_na65432',
'NA21234 is recorded as Female and also as the father of NA21987',
'NA21234 is recorded as Female sex and also as the father of NA21987',
])

update_json = deepcopy(EXTERNAL_WORKSPACE_INDIVIDUAL_UPDATE_DATA)
Expand Down Expand Up @@ -441,8 +441,8 @@ def test_individuals_table_handler_errors(self):
'errors': [
'Invalid proband relationship "Father" for NA19675_1 with given gender Female',
'NA19675_1 is recorded as their own father',
'NA19675_1 is recorded as Female and also as the father of NA19675_1',
'NA19675_1 is recorded as Female and also as the father of NA19675_2',
'NA19675_1 is recorded as Female sex and also as the father of NA19675_1',
'NA19675_1 is recorded as Female sex and also as the father of NA19675_2',
'NA19675_1 is recorded as the father of NA19675_2 but they have different family ids: 1 and 2',
'NA19675_1 is included as 2 separate records, but must be unique within the project',
],
Expand Down Expand Up @@ -530,7 +530,7 @@ def test_individuals_sample_manifest_table_handler(self, mock_email, mock_no_val

def _send_request_data(data):
return self.client.post(receive_url, {'f': SimpleUploadedFile(
'sample_manifest.tsv', '\n'.join(['\t'.join(row) for row in data]).encode('utf-8')),
'sample_manifest.tsv', '\n'.join(['\t'.join([str(c) for c in row]) for row in data]).encode('utf-8')),
})

header_2 = [
Expand Down Expand Up @@ -580,7 +580,7 @@ def _send_request_data(data):
data[2] = header_3
data += [
['SK-3QVD', 'A02', 'SM-IRW6C', 'PED073', 'SCO_PED073B_GA0339', 'SCO_PED073B_GA0339_1', '', '', 'male',
'unaffected', 'UBERON:0000479 (tissue)', 'blood plasma', '', 'Unknown', '20', '94.8', 'probably dad', '',
'unaffected', 'UBERON:0000479 (tissue)', 'blood plasma', '', 'Unknown', '20', 94.8, 'probably dad', '',
'', 'GMB', '1234'],
['SK-3QVD', 'A03', 'SM-IRW69', 'PED073', 'SCO_PED073C_GA0340', 'SCO_PED073C_GA0340_1',
'SCO_PED073B_GA0339_1', 'SCO_PED073A_GA0338_1', 'female', 'affected', 'UBERON:0002371 (bone marrow)',
Expand All @@ -607,7 +607,12 @@ def _send_request_data(data):
missing_columns_error, 'Consent code in manifest "GMB" does not match project consent code "HMB"',
]})

data[3][12] = 'No'
data[3][12] = 'Maybe'
response = _send_request_data(data)
self.assertEqual(response.status_code, 400)
self.assertDictEqual(response.json(), {'warnings': None, 'errors': ['Invalid value "Maybe" for Tissue Affected Status in row #1']})

data[3][12] = 'Unknown'
data[3][17] = 'microcephaly'
data[3][18] = 'MONDO:0001149'
data[3][-2] = ''
Expand Down Expand Up @@ -667,7 +672,7 @@ def _send_request_data(data):
# Test original file copy is correct
original_wb = load_workbook(BytesIO(mock_email.call_args.kwargs['attachments'][1][1]))
original_ws = original_wb.active
self.assertListEqual([[cell.value or '' for cell in row] for row in original_ws], data)
self.assertListEqual([[cell.value or '' for cell in row] for row in original_ws], [[str(c) for c in row] for row in data])

url = reverse(save_individuals_table_handler, args=[PROJECT_GUID, response_json['uploadedFileId']])
response = self.client.post(url)
Expand All @@ -691,7 +696,7 @@ def _send_request_data(data):
indiv_1 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073B_GA0339_1')
self.assertDictEqual({k: v for k, v in indiv_1.items() if k in test_keys}, {
'affected': 'N', 'notes': 'probably dad', 'sex': 'M', 'maternalId': None, 'paternalId': None,
'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False,
'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': None,
'probandRelationship': 'F',
})
indiv_2 = next(i for i in response_json['individualsByGuid'].values() if i['individualId'] == 'SCO_PED073C_GA0341_1')
Expand Down
12 changes: 7 additions & 5 deletions seqr/views/utils/pedigree_info_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def _parse_pedigree_table_rows(parsed_file, filename, header=None, rows=None):
i + 1, len(row), ', '.join(row), len(header), ', '.join(header)
))

formatted_rows = [{header_item: field.strip() for header_item, field in zip(header, row)} for row in rows]
formatted_rows = [{header_item: str(field).strip() for header_item, field in zip(header, row)} for row in rows]
return formatted_rows, header

except Exception as e:
Expand Down Expand Up @@ -187,7 +187,7 @@ def _convert_fam_file_rows_to_json(column_map, rows, required_columns=None):

try:
value = _format_value(value, column)
except ValueError:
except (KeyError, ValueError):
errors.append(f'Invalid value "{value}" for {_to_title_case(_to_snake_case(column))} in row #{i + 1}')
continue

Expand Down Expand Up @@ -227,7 +227,7 @@ def _format_value(value, column):
if format_func:
if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
value = format_func(value)
if value is None and column not in JsonConstants.JSON_COLUMNS:
if value is None and column not in JsonConstants.NULLABLE_COLUMNS:
raise ValueError()
elif value == '':
value = None
Expand Down Expand Up @@ -299,7 +299,7 @@ def validate_fam_file_records(records, fail_on_warnings=False, errors=None):
actual_sex = records_by_id[parent_id][JsonConstants.SEX_COLUMN]
if actual_sex != expected_sex:
actual_sex_label = dict(Individual.SEX_CHOICES)[actual_sex]
errors.append("%(parent_id)s is recorded as %(actual_sex_label)s and also as the %(parent_id_type)s of %(individual_id)s" % locals())
errors.append("%(parent_id)s is recorded as %(actual_sex_label)s sex and also as the %(parent_id_type)s of %(individual_id)s" % locals())

# is the parent in the same family?
parent = records_by_id[parent_id]
Expand Down Expand Up @@ -784,6 +784,8 @@ class JsonConstants:
TISSUE_AFFECTED_STATUS = 'tissueAffectedStatus'

JSON_COLUMNS = {MATERNAL_ETHNICITY, PATERNAL_ETHNICITY, BIRTH_YEAR, DEATH_YEAR, ONSET_AGE, AFFECTED_RELATIVES}
NULLABLE_COLUMNS = {TISSUE_AFFECTED_STATUS}
NULLABLE_COLUMNS.update(JSON_COLUMNS)

FORMAT_COLUMNS = {
SEX_COLUMN: _parse_sex,
Expand All @@ -794,7 +796,7 @@ class JsonConstants:
PRIMARY_BIOSAMPLE: lambda value: next(
(code for code, uberon_code in Individual.BIOSAMPLE_CHOICES if value.startswith(uberon_code)), None),
ANALYTE_TYPE: Individual.ANALYTE_REVERSE_LOOKUP.get,
TISSUE_AFFECTED_STATUS: {'Yes': True, 'No': False}.get,
TISSUE_AFFECTED_STATUS: lambda value: {'Yes': True, 'No': False, 'Unknown': None}[value],
}
FORMAT_COLUMNS.update({col: json.loads for col in JSON_COLUMNS})

Expand Down

0 comments on commit 12c58f0

Please sign in to comment.