Skip to content

Commit

Permalink
Merge pull request #3026 from broadinstitute/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
hanars authored Oct 24, 2022
2 parents 90665bd + 0bcaabe commit 0e7193b
Show file tree
Hide file tree
Showing 14 changed files with 70 additions and 59 deletions.
3 changes: 0 additions & 3 deletions seqr/utils/elasticsearch/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,11 +328,8 @@
'dbnsfp_DANN_score': {},
'eigen_Eigen_phred': {},
'dbnsfp_FATHMM_pred': {},
'dbnsfp_GERP_RS': {'response_key': 'gerp_rs'},
'mpc_MPC': {},
'dbnsfp_MetaSVM_pred': {},
'dbnsfp_MutationTaster_pred': {'response_key': 'mut_taster'},
'dbnsfp_phastCons100way_vertebrate': {'response_key': 'phastcons_100_vert'},
'dbnsfp_Polyphen2_HVAR_pred': {'response_key': 'polyphen'},
'gnomad_non_coding_constraint_z_score': {'response_key': 'gnomad_noncoding'},
'primate_ai_score': {'response_key': 'primate_ai'},
Expand Down
6 changes: 0 additions & 6 deletions seqr/utils/elasticsearch/es_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,13 +771,10 @@
'contig',
'variantId',
'dbnsfp_MutationTaster_pred',
'dbnsfp_phastCons100way_vertebrate',
'dbnsfp_MetaSVM_pred',
'mpc_MPC',
'dbnsfp_DANN_score',
'eigen_Eigen_phred',
'dbnsfp_REVEL_score',
'dbnsfp_GERP_RS',
'splice_ai_delta_score',
'splice_ai_splice_consequence',
'dbnsfp_FATHMM_pred',
Expand Down Expand Up @@ -893,13 +890,10 @@
"common_low_heteroplasmy",
"contig",
"dbnsfp_FATHMM_pred",
"dbnsfp_GERP_RS",
"dbnsfp_MetaSVM_pred",
"dbnsfp_MutationTaster_pred",
"dbnsfp_Polyphen2_HVAR_pred",
"dbnsfp_REVEL_score",
"dbnsfp_SIFT_pred",
"dbnsfp_phastCons100way_vertebrate",
"end",
"filters",
"genotypes",
Expand Down
14 changes: 9 additions & 5 deletions seqr/views/apis/anvil_workspace_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from seqr.views.utils.file_utils import load_uploaded_file
from seqr.views.utils.terra_api_utils import add_service_account, has_service_account_access, TerraAPIException, \
TerraRefreshTokenFailedException
from seqr.views.utils.pedigree_info_utils import parse_pedigree_table
from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, JsonConstants
from seqr.views.utils.individual_utils import add_or_update_individuals_and_families, get_updated_pedigree_json
from seqr.utils.communication_utils import safe_post_to_slack, send_html_email
from seqr.utils.file_utils import does_file_exist, mv_file_to_gs, get_gs_file_list
Expand Down Expand Up @@ -245,7 +245,10 @@ def add_workspace_data(request, project_guid):
def _parse_uploaded_pedigree(request_json, user):
# Parse families/individuals in the uploaded pedigree file
json_records = load_uploaded_file(request_json['uploadedFileId'])
pedigree_records, _ = parse_pedigree_table(json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True)
pedigree_records, _ = parse_pedigree_table(
json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True, required_columns=[
JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN,
])

missing_samples = [record['individualId'] for record in pedigree_records
if record['individualId'] not in request_json['vcfSamples']]
Expand Down Expand Up @@ -278,7 +281,7 @@ def _trigger_add_workspace_data(project, pedigree_records, user, data_path, samp
# use airflow api to trigger AnVIL dags
trigger_success = _trigger_data_loading(project, data_path, sample_type, user)
# Send a slack message to the slack channel
_send_load_data_slack_msg(project, ids_path, data_path, sample_type, user)
_send_load_data_slack_msg(project, ids_path, data_path, len(updated_individuals), sample_type, user)
AirtableSession(user, base=AirtableSession.ANVIL_BASE).safe_create_record(
'AnVIL Seqr Loading Requests Tracking', {
'Requester Name': user.get_full_name(),
Expand Down Expand Up @@ -323,10 +326,10 @@ def _get_loading_project_path(project, sample_type):
def _get_seqr_project_url(project):
return f'{BASE_URL}project/{project.guid}/project_page'

def _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user):
def _send_load_data_slack_msg(project, ids_path, data_path, sample_count, sample_type, user):
pipeline_dag = _construct_dag_variables(project, data_path, sample_type)
message_content = """
*{user}* requested to load {sample_type} data ({genome_version}) from AnVIL workspace *{namespace}/{name}* at
*{user}* requested to load {sample_count} {sample_type} samples ({genome_version}) from AnVIL workspace *{namespace}/{name}* at
{path} to seqr project <{project_url}|*{project_name}*> (guid: {guid})
The sample IDs to load have been uploaded to {ids_path}.
Expand All @@ -342,6 +345,7 @@ def _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user):
project_url=_get_seqr_project_url(project),
guid=project.guid,
project_name=project.name,
sample_count=sample_count,
sample_type=sample_type,
genome_version=GENOME_VERSION_LOOKUP.get(project.genome_version),
dag_name = "seqr_vcf_to_es_AnVIL_{anvil_type}_v{version}".format(anvil_type=sample_type, version=DAG_VERSION),
Expand Down
13 changes: 11 additions & 2 deletions seqr/views/apis/anvil_workspace_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"Notes", "familyNotes"],
["1", "NA19675", "NA19675_1", "NA19678", "", "Female", "Affected", "A affected individual, test1-zsf", ""],
["1", "NA19678", "", "", "", "Male", "Unaffected", "a individual note", ""],
["21", "HG00735", "", "", "", "Female", "Unaffected", "", "a new family"]]
["21", "HG00735", "", "", "", "", "", "", "a new family"]]

BAD_SAMPLE_DATA = [["1", "NA19674", "NA19674_1", "NA19678", "NA19679", "Female", "Affected", "A affected individual, test1-zsf", ""]]

Expand Down Expand Up @@ -666,6 +666,14 @@ def _test_errors(self, url, fields, workspace_name):
self.assertEqual(response.reason_phrase, f'Field(s) "{field_str}" are required')
self.mock_get_ws_access_level.assert_called_with(self.manager_user, TEST_WORKSPACE_NAMESPACE, workspace_name)

# test missing columns
self.mock_load_file.return_value = [['family', 'individual'], ['1', '2']]
response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY))
self.assertEqual(response.status_code, 400)
response_json = response.json()
self.assertListEqual(response_json['errors'], [
'Error while converting uploaded pedigree file rows to json: Sex, Affected not specified in row #1'])

# test sample data error
self.mock_load_file.return_value = LOAD_SAMPLE_DATA + BAD_SAMPLE_DATA
response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY))
Expand Down Expand Up @@ -751,7 +759,7 @@ def _assert_valid_operation(self, project, test_add_data=True):
self.assertEqual(responses.calls[call_cnt+1].request.headers['Authorization'], 'Bearer {}'.format(MOCK_AIRTABLE_KEY))

slack_message = """
*[email protected]* requested to load WES data ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at
*[email protected]* requested to load 3 WES samples ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at
gs://test_bucket/test_path.vcf to seqr project <http://testserver/project/{guid}/project_page|*{project_name}*> (guid: {guid})
The sample IDs to load have been uploaded to gs://seqr-datasets/v02/{version}/AnVIL_WES/{guid}/base/{guid}_ids.txt.
Expand Down Expand Up @@ -842,6 +850,7 @@ def test_create_project_from_workspace_loading_delay_email(self):
'{}/api/v1/dags/seqr_vcf_to_es_AnVIL_WES_v0.0.1/tasks'.format(MOCK_AIRFLOW_URL),
headers={'Authorization': 'Bearer {}'.format(MOCK_TOKEN)},
json={"tasks": [
{"task_id": "pyspark_compute_project_R0006_anvil_no_project_workspace"},
{"task_id": "pyspark_compute_project_R0007_anvil_no_project_workspace"},
{"task_id": "pyspark_compute_project_R0008_anvil_no_project_workspace"}],
"total_entries": 2},
Expand Down
2 changes: 1 addition & 1 deletion seqr/views/apis/individual_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def test_individuals_table_handler(self):
self.assertDictEqual(response.json(), {'errors': mock.ANY, 'warnings': []})
errors = response.json()['errors']
self.assertEqual(len(errors), 1)
self.assertEqual(errors[0].split('\n')[0],"Error while converting test.tsv rows to json: Individual Id not specified in row #1:")
self.assertEqual(errors[0], "Error while converting test.tsv rows to json: Individual Id not specified in row #1")

response = self.client.post(individuals_url, {'f': SimpleUploadedFile(
'test.tsv', 'Family ID Individual ID Previous Individual ID\n"1" "NA19675_1" "NA19675"'.encode('utf-8'))})
Expand Down
1 change: 1 addition & 0 deletions seqr/views/utils/individual_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def _update_from_record(record, user, families_by_id, individual_lookup, updated
individual = create_model_from_json(
Individual, {'family': family, 'individual_id': individual_id, 'case_review_status': 'I'}, user)
updated_families.add(family)
updated_individuals.add(individual)
individual_lookup[individual_id][family] = individual

record['family'] = family
Expand Down
31 changes: 18 additions & 13 deletions seqr/views/utils/pedigree_info_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from seqr.utils.communication_utils import send_html_email
from seqr.utils.logging_utils import SeqrLogger
from seqr.utils.middleware import ErrorsWarningsException
from seqr.views.utils.json_utils import _to_snake_case
from seqr.views.utils.json_utils import _to_snake_case, _to_title_case
from seqr.views.utils.permissions_utils import user_is_pm, get_pm_user_emails
from seqr.models import Individual

Expand All @@ -19,7 +19,7 @@
RELATIONSHIP_REVERSE_LOOKUP = {v.lower(): k for k, v in Individual.RELATIONSHIP_LOOKUP.items()}


def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False):
def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False, required_columns=None):
"""Validates and parses pedigree information from a .fam, .tsv, or Excel file.
Args:
Expand Down Expand Up @@ -98,7 +98,7 @@ def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warn
else:
logger.info("Parsing regular pedigree file", user)

json_records = _convert_fam_file_rows_to_json(rows)
json_records = _convert_fam_file_rows_to_json(rows, required_columns=required_columns)
except Exception as e:
raise ErrorsWarningsException(['Error while converting {} rows to json: {}'.format(filename, e)], [])

Expand Down Expand Up @@ -130,7 +130,7 @@ def _parse_affected(affected):
return None


def _convert_fam_file_rows_to_json(rows):
def _convert_fam_file_rows_to_json(rows, required_columns=None):
"""Parse the values in rows and convert them to a json representation.
Args:
Expand Down Expand Up @@ -163,10 +163,12 @@ def _convert_fam_file_rows_to_json(rows):
json_record = _parse_row_dict(row_dict, i)

# validate
if not json_record.get(JsonConstants.FAMILY_ID_COLUMN):
raise ValueError("Family Id not specified in row #%d:\n%s" % (i+1, json_record))
if not json_record.get(JsonConstants.INDIVIDUAL_ID_COLUMN):
raise ValueError("Individual Id not specified in row #%d:\n%s" % (i+1, json_record))
columns = [JsonConstants.FAMILY_ID_COLUMN, JsonConstants.INDIVIDUAL_ID_COLUMN]
if required_columns:
columns += required_columns
missing_cols = [col for col in columns if not json_record.get(col)]
if missing_cols:
raise ValueError(f"{', '.join([_to_title_case(_to_snake_case(col)) for col in missing_cols])} not specified in row #{i + 1}")

json_results.append(json_record)

Expand Down Expand Up @@ -194,11 +196,14 @@ def _parse_row_dict(row_dict, i):

if column:
format_func = JsonConstants.FORMAT_COLUMNS.get(column)
if format_func and (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
parsed_value = format_func(value)
if parsed_value is None and column not in JsonConstants.JSON_COLUMNS:
raise ValueError(f'Invalid value "{value}" for {_to_snake_case(column)} in row #{i + 1}')
value = parsed_value
if format_func:
if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
parsed_value = format_func(value)
if parsed_value is None and column not in JsonConstants.JSON_COLUMNS:
raise ValueError(f'Invalid value "{value}" for {_to_snake_case(column)} in row #{i + 1}')
value = parsed_value
elif value == '':
value = None
json_record[column] = value
return json_record

Expand Down
16 changes: 8 additions & 8 deletions seqr/views/utils/pedigree_info_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,17 +27,17 @@ def test_parse_pedigree_table(self):
[['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'],
['', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user)
self.assertEqual(len(ec.exception.errors), 1)
self.assertEqual(ec.exception.errors[0].split('\n')[0],
"Error while converting {} rows to json: Family Id not specified in row #1:".format(FILENAME))
self.assertEqual(ec.exception.errors[0],
"Error while converting {} rows to json: Family Id, Individual Id not specified in row #1".format(FILENAME))
self.assertListEqual(ec.exception.warnings, [])

with self.assertRaises(ErrorsWarningsException) as ec:
parse_pedigree_table(
[['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'],
['fam1', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user)
self.assertEqual(len(ec.exception.errors), 1)
self.assertEqual(ec.exception.errors[0].split('\n')[0],
"Error while converting {} rows to json: Individual Id not specified in row #1:".format(FILENAME))
self.assertEqual(ec.exception.errors[0],
"Error while converting {} rows to json: Individual Id not specified in row #1".format(FILENAME))
self.assertListEqual(ec.exception.warnings, [])

with self.assertRaises(ErrorsWarningsException) as ec:
Expand Down Expand Up @@ -88,8 +88,8 @@ def test_parse_pedigree_table(self):
'maternalId': 'ind2', 'notes': 'some notes', 'codedPhenotype': 'HPO:12345', 'probandRelationship': '',
'previousIndividualId': 'ind1_old_id'},
{'familyId': 'fam1', 'individualId': 'ind2', 'sex': 'F', 'affected': 'N', 'paternalId': '',
'maternalId': 'ind3', 'notes': '', 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M',
'previousIndividualId': ''},
'maternalId': 'ind3', 'notes': None, 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M',
'previousIndividualId': None},
])
self.assertListEqual(warnings, no_error_warnings)

Expand Down Expand Up @@ -180,9 +180,9 @@ def test_parse_sample_manifest(self, mock_email, mock_pm_group):
records, warnings = parse_pedigree_table(original_data, FILENAME, self.pm_user, project=project)
self.assertListEqual(records, [
{'affected': 'N', 'maternalId': '', 'notes': 'probably dad', 'individualId': 'SCO_PED073B_GA0339_1',
'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': '',
'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': None,
'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False,},
{'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': '', 'individualId': 'SCO_PED073C_GA0340_1',
{'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': None, 'individualId': 'SCO_PED073C_GA0340_1',
'sex': 'F', 'familyId': 'PED073', 'paternalId': 'SCO_PED073B_GA0339_1', 'codedPhenotype': 'Perinatal death',
'primaryBiosample': 'BM', 'analyteType': 'D', 'tissueAffectedStatus': True,
}])
Expand Down
22 changes: 9 additions & 13 deletions seqr/views/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -896,9 +896,8 @@ def call_request_json(self, index=-1):
'pos': 248367227,
'predictions': {'splice_ai': 0.75, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None,
'polyphen': None, 'dann': None, 'sift': None, 'cadd': '25.9', 'metasvm': None, 'primate_ai': None,
'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None,
'splice_ai_consequence': None, 'gnomad_noncoding': 1.01272,},
'polyphen': None, 'dann': None, 'sift': None, 'cadd': '25.9', 'primate_ai': None,
'mpc': None, 'strvctvre': None, 'splice_ai_consequence': None, 'gnomad_noncoding': 1.01272,},
'ref': 'TC',
'rsid': None,
'screenRegionType': 'dELS',
Expand Down Expand Up @@ -982,8 +981,8 @@ def call_request_json(self, index=-1):
'predictions': {
'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None, 'gnomad_noncoding': None,
'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None, 'polyphen': None,
'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': 1, 'gerp_rs': None,
'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None, 'splice_ai_consequence': None,
'dann': None, 'sift': None, 'cadd': None, 'primate_ai': 1,
'mpc': None, 'strvctvre': None, 'splice_ai_consequence': None,
},
'ref': 'GAGA',
'rsid': None,
Expand Down Expand Up @@ -1068,9 +1067,8 @@ def call_request_json(self, index=-1):
'pos': 49045487,
'predictions': {'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None, 'gnomad_noncoding': None,
'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': None,
'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': 0.374,
'splice_ai_consequence': None},
'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'primate_ai': None,
'mpc': None, 'strvctvre': 0.374, 'splice_ai_consequence': None},
'ref': None,
'rsid': None,
'screenRegionType': None,
Expand Down Expand Up @@ -1156,9 +1154,8 @@ def call_request_json(self, index=-1):
'pos': 49045387,
'predictions': {'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None,
'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': None,
'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None, 'gnomad_noncoding': None,
'splice_ai_consequence': None},
'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'primate_ai': None,
'mpc': None, 'strvctvre': None, 'gnomad_noncoding': None, 'splice_ai_consequence': None},
'ref': None,
'rsid': None,
'screenRegionType': None,
Expand Down Expand Up @@ -1235,8 +1232,7 @@ def call_request_json(self, index=-1):
},
'pos': 10195,
'predictions': {'hmtvar': 0.71, 'apogee': 0.42, 'cadd': None, 'dann': None, 'eigen': None, 'fathmm': 'T',
'gerp_rs': '5.07', 'haplogroup_defining': None, 'metasvm': None, 'mitotip': None,
'mpc': None, 'mut_taster': 'N', 'phastcons_100_vert': '0.958000', 'polyphen': None,
'haplogroup_defining': None, 'mitotip': None, 'mpc': None, 'mut_taster': 'N', 'polyphen': None,
'primate_ai': None, 'revel': None, 'sift': 'D', 'splice_ai': None, 'splice_ai_consequence': None,
'strvctvre': None, 'gnomad_noncoding': None,},
'ref': 'C',
Expand Down
4 changes: 2 additions & 2 deletions ui/shared/components/page/AcceptCookies.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ const AcceptCookies = () => (
content={
<Modal.Content>
seqr collects cookies to improve our user experience and ensure the secure functioning of our site. For more
details, see our &npsp;
<Link target="_blank" to="/privacy_policy">Privacy Policy</Link>
details, see our
<Link target="_blank" to="/privacy_policy"> Privacy Policy</Link>
. By clicking &quot;Accept&quot;, you consent to the use of these cookies.
</Modal.Content>
}
Expand Down
Loading

0 comments on commit 0e7193b

Please sign in to comment.