Merge pull request #3026 from broadinstitute/dev

Dev
broadinstitute · Oct 24, 2022 · 0e7193b · 0e7193b
2 parents 90665bd + 0bcaabe
commit 0e7193b
Show file tree

Hide file tree

Showing 14 changed files with 70 additions and 59 deletions.
diff --git a/seqr/utils/elasticsearch/constants.py b/seqr/utils/elasticsearch/constants.py
@@ -328,11 +328,8 @@
     'dbnsfp_DANN_score': {},
     'eigen_Eigen_phred': {},
     'dbnsfp_FATHMM_pred': {},
-    'dbnsfp_GERP_RS': {'response_key': 'gerp_rs'},
     'mpc_MPC': {},
-    'dbnsfp_MetaSVM_pred': {},
     'dbnsfp_MutationTaster_pred': {'response_key': 'mut_taster'},
-    'dbnsfp_phastCons100way_vertebrate': {'response_key': 'phastcons_100_vert'},
     'dbnsfp_Polyphen2_HVAR_pred': {'response_key': 'polyphen'},
     'gnomad_non_coding_constraint_z_score': {'response_key': 'gnomad_noncoding'},
     'primate_ai_score': {'response_key': 'primate_ai'},

diff --git a/seqr/utils/elasticsearch/es_utils_tests.py b/seqr/utils/elasticsearch/es_utils_tests.py
@@ -771,13 +771,10 @@
     'contig',
     'variantId',
     'dbnsfp_MutationTaster_pred',
-    'dbnsfp_phastCons100way_vertebrate',
-    'dbnsfp_MetaSVM_pred',
     'mpc_MPC',
     'dbnsfp_DANN_score',
     'eigen_Eigen_phred',
     'dbnsfp_REVEL_score',
-    'dbnsfp_GERP_RS',
     'splice_ai_delta_score',
     'splice_ai_splice_consequence',
     'dbnsfp_FATHMM_pred',
@@ -893,13 +890,10 @@
     "common_low_heteroplasmy",
     "contig",
     "dbnsfp_FATHMM_pred",
-    "dbnsfp_GERP_RS",
-    "dbnsfp_MetaSVM_pred",
     "dbnsfp_MutationTaster_pred",
     "dbnsfp_Polyphen2_HVAR_pred",
     "dbnsfp_REVEL_score",
     "dbnsfp_SIFT_pred",
-    "dbnsfp_phastCons100way_vertebrate",
     "end",
     "filters",
     "genotypes",

diff --git a/seqr/views/apis/anvil_workspace_api.py b/seqr/views/apis/anvil_workspace_api.py
@@ -24,7 +24,7 @@
 from seqr.views.utils.file_utils import load_uploaded_file
 from seqr.views.utils.terra_api_utils import add_service_account, has_service_account_access, TerraAPIException, \
     TerraRefreshTokenFailedException
-from seqr.views.utils.pedigree_info_utils import parse_pedigree_table
+from seqr.views.utils.pedigree_info_utils import parse_pedigree_table, JsonConstants
 from seqr.views.utils.individual_utils import add_or_update_individuals_and_families, get_updated_pedigree_json
 from seqr.utils.communication_utils import safe_post_to_slack, send_html_email
 from seqr.utils.file_utils import does_file_exist, mv_file_to_gs, get_gs_file_list
@@ -245,7 +245,10 @@ def add_workspace_data(request, project_guid):
 def _parse_uploaded_pedigree(request_json, user):
     # Parse families/individuals in the uploaded pedigree file
     json_records = load_uploaded_file(request_json['uploadedFileId'])
-    pedigree_records, _ = parse_pedigree_table(json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True)
+    pedigree_records, _ = parse_pedigree_table(
+        json_records, 'uploaded pedigree file', user=user, fail_on_warnings=True, required_columns=[
+            JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN,
+        ])
 
     missing_samples = [record['individualId'] for record in pedigree_records
                        if record['individualId'] not in request_json['vcfSamples']]
@@ -278,7 +281,7 @@ def _trigger_add_workspace_data(project, pedigree_records, user, data_path, samp
     # use airflow api to trigger AnVIL dags
     trigger_success = _trigger_data_loading(project, data_path, sample_type, user)
     # Send a slack message to the slack channel
-    _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user)
+    _send_load_data_slack_msg(project, ids_path, data_path, len(updated_individuals), sample_type, user)
     AirtableSession(user, base=AirtableSession.ANVIL_BASE).safe_create_record(
         'AnVIL Seqr Loading Requests Tracking', {
             'Requester Name': user.get_full_name(),
@@ -323,10 +326,10 @@ def _get_loading_project_path(project, sample_type):
 def _get_seqr_project_url(project):
     return f'{BASE_URL}project/{project.guid}/project_page'
 
-def _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user):
+def _send_load_data_slack_msg(project, ids_path, data_path, sample_count, sample_type, user):
     pipeline_dag = _construct_dag_variables(project, data_path, sample_type)
     message_content = """
-        *{user}* requested to load {sample_type} data ({genome_version}) from AnVIL workspace *{namespace}/{name}* at 
+        *{user}* requested to load {sample_count} {sample_type} samples ({genome_version}) from AnVIL workspace *{namespace}/{name}* at 
         {path} to seqr project <{project_url}|*{project_name}*> (guid: {guid})  
   
         The sample IDs to load have been uploaded to {ids_path}.  
@@ -342,6 +345,7 @@ def _send_load_data_slack_msg(project, ids_path, data_path, sample_type, user):
         project_url=_get_seqr_project_url(project),
         guid=project.guid,
         project_name=project.name,
+        sample_count=sample_count,
         sample_type=sample_type,
         genome_version=GENOME_VERSION_LOOKUP.get(project.genome_version),
         dag_name = "seqr_vcf_to_es_AnVIL_{anvil_type}_v{version}".format(anvil_type=sample_type, version=DAG_VERSION),

diff --git a/seqr/views/apis/anvil_workspace_api_tests.py b/seqr/views/apis/anvil_workspace_api_tests.py
@@ -18,7 +18,7 @@
      "Notes", "familyNotes"],
     ["1", "NA19675", "NA19675_1", "NA19678", "", "Female", "Affected", "A affected individual, test1-zsf", ""],
     ["1", "NA19678", "", "", "", "Male", "Unaffected", "a individual note", ""],
-    ["21", "HG00735", "", "", "", "Female", "Unaffected", "", "a new family"]]
+    ["21", "HG00735", "", "", "", "", "", "", "a new family"]]
 
 BAD_SAMPLE_DATA = [["1", "NA19674", "NA19674_1", "NA19678", "NA19679", "Female", "Affected", "A affected individual, test1-zsf", ""]]
 
@@ -666,6 +666,14 @@ def _test_errors(self, url, fields, workspace_name):
         self.assertEqual(response.reason_phrase, f'Field(s) "{field_str}" are required')
         self.mock_get_ws_access_level.assert_called_with(self.manager_user, TEST_WORKSPACE_NAMESPACE, workspace_name)
 
+        # test missing columns
+        self.mock_load_file.return_value = [['family', 'individual'], ['1', '2']]
+        response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY))
+        self.assertEqual(response.status_code, 400)
+        response_json = response.json()
+        self.assertListEqual(response_json['errors'], [
+            'Error while converting uploaded pedigree file rows to json: Sex, Affected not specified in row #1'])
+
         # test sample data error
         self.mock_load_file.return_value = LOAD_SAMPLE_DATA + BAD_SAMPLE_DATA
         response = self.client.post(url, content_type='application/json', data=json.dumps(REQUEST_BODY))
@@ -751,7 +759,7 @@ def _assert_valid_operation(self, project, test_add_data=True):
         self.assertEqual(responses.calls[call_cnt+1].request.headers['Authorization'], 'Bearer {}'.format(MOCK_AIRTABLE_KEY))
 
         slack_message = """
-        *[email protected]* requested to load WES data ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at 
+        *[email protected]* requested to load 3 WES samples ({version}) from AnVIL workspace *my-seqr-billing/{workspace_name}* at 
         gs://test_bucket/test_path.vcf to seqr project <http://testserver/project/{guid}/project_page|*{project_name}*> (guid: {guid})  
   
         The sample IDs to load have been uploaded to gs://seqr-datasets/v02/{version}/AnVIL_WES/{guid}/base/{guid}_ids.txt.  
@@ -842,6 +850,7 @@ def test_create_project_from_workspace_loading_delay_email(self):
                       '{}/api/v1/dags/seqr_vcf_to_es_AnVIL_WES_v0.0.1/tasks'.format(MOCK_AIRFLOW_URL),
                       headers={'Authorization': 'Bearer {}'.format(MOCK_TOKEN)},
                       json={"tasks": [
+                            {"task_id": "pyspark_compute_project_R0006_anvil_no_project_workspace"},
                             {"task_id": "pyspark_compute_project_R0007_anvil_no_project_workspace"},
                             {"task_id": "pyspark_compute_project_R0008_anvil_no_project_workspace"}],
                             "total_entries": 2},

diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py
@@ -295,7 +295,7 @@ def test_individuals_table_handler(self):
         self.assertDictEqual(response.json(), {'errors': mock.ANY, 'warnings': []})
         errors = response.json()['errors']
         self.assertEqual(len(errors), 1)
-        self.assertEqual(errors[0].split('\n')[0],"Error while converting test.tsv rows to json: Individual Id not specified in row #1:")
+        self.assertEqual(errors[0], "Error while converting test.tsv rows to json: Individual Id not specified in row #1")
 
         response = self.client.post(individuals_url, {'f': SimpleUploadedFile(
             'test.tsv', 'Family ID	Individual ID	Previous Individual ID\n"1"	"NA19675_1"	"NA19675"'.encode('utf-8'))})

diff --git a/seqr/views/utils/individual_utils.py b/seqr/views/utils/individual_utils.py
@@ -101,6 +101,7 @@ def _update_from_record(record, user, families_by_id, individual_lookup, updated
             individual = create_model_from_json(
                 Individual, {'family': family, 'individual_id': individual_id, 'case_review_status': 'I'}, user)
             updated_families.add(family)
+            updated_individuals.add(individual)
             individual_lookup[individual_id][family] = individual
 
     record['family'] = family

diff --git a/seqr/views/utils/pedigree_info_utils.py b/seqr/views/utils/pedigree_info_utils.py
@@ -9,7 +9,7 @@
 from seqr.utils.communication_utils import send_html_email
 from seqr.utils.logging_utils import SeqrLogger
 from seqr.utils.middleware import ErrorsWarningsException
-from seqr.views.utils.json_utils import _to_snake_case
+from seqr.views.utils.json_utils import _to_snake_case, _to_title_case
 from seqr.views.utils.permissions_utils import user_is_pm, get_pm_user_emails
 from seqr.models import Individual
 
@@ -19,7 +19,7 @@
 RELATIONSHIP_REVERSE_LOOKUP = {v.lower(): k for k, v in Individual.RELATIONSHIP_LOOKUP.items()}
 
 
-def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False):
+def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warnings=False, required_columns=None):
     """Validates and parses pedigree information from a .fam, .tsv, or Excel file.
 
     Args:
@@ -98,7 +98,7 @@ def parse_pedigree_table(parsed_file, filename, user, project=None, fail_on_warn
         else:
             logger.info("Parsing regular pedigree file", user)
 
-        json_records = _convert_fam_file_rows_to_json(rows)
+        json_records = _convert_fam_file_rows_to_json(rows, required_columns=required_columns)
     except Exception as e:
         raise ErrorsWarningsException(['Error while converting {} rows to json: {}'.format(filename, e)], [])
 
@@ -130,7 +130,7 @@ def _parse_affected(affected):
     return None
 
 
-def _convert_fam_file_rows_to_json(rows):
+def _convert_fam_file_rows_to_json(rows, required_columns=None):
     """Parse the values in rows and convert them to a json representation.
 
     Args:
@@ -163,10 +163,12 @@ def _convert_fam_file_rows_to_json(rows):
         json_record = _parse_row_dict(row_dict, i)
 
         # validate
-        if not json_record.get(JsonConstants.FAMILY_ID_COLUMN):
-            raise ValueError("Family Id not specified in row #%d:\n%s" % (i+1, json_record))
-        if not json_record.get(JsonConstants.INDIVIDUAL_ID_COLUMN):
-            raise ValueError("Individual Id not specified in row #%d:\n%s" % (i+1, json_record))
+        columns = [JsonConstants.FAMILY_ID_COLUMN, JsonConstants.INDIVIDUAL_ID_COLUMN]
+        if required_columns:
+            columns += required_columns
+        missing_cols = [col for col in columns if not json_record.get(col)]
+        if missing_cols:
+            raise ValueError(f"{', '.join([_to_title_case(_to_snake_case(col)) for col in missing_cols])} not specified in row #{i + 1}")
 
         json_results.append(json_record)
 
@@ -194,11 +196,14 @@ def _parse_row_dict(row_dict, i):
 
         if column:
             format_func = JsonConstants.FORMAT_COLUMNS.get(column)
-            if format_func and (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
-                parsed_value = format_func(value)
-                if parsed_value is None and column not in JsonConstants.JSON_COLUMNS:
-                    raise ValueError(f'Invalid value "{value}" for {_to_snake_case(column)} in row #{i + 1}')
-                value = parsed_value
+            if format_func:
+                if (value or column in {JsonConstants.SEX_COLUMN, JsonConstants.AFFECTED_COLUMN}):
+                    parsed_value = format_func(value)
+                    if parsed_value is None and column not in JsonConstants.JSON_COLUMNS:
+                        raise ValueError(f'Invalid value "{value}" for {_to_snake_case(column)} in row #{i + 1}')
+                    value = parsed_value
+            elif value == '':
+                value = None
             json_record[column] = value
     return json_record
 

diff --git a/seqr/views/utils/pedigree_info_utils_tests.py b/seqr/views/utils/pedigree_info_utils_tests.py
@@ -27,17 +27,17 @@ def test_parse_pedigree_table(self):
                 [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'],
                 ['', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user)
         self.assertEqual(len(ec.exception.errors), 1)
-        self.assertEqual(ec.exception.errors[0].split('\n')[0],
-                         "Error while converting {} rows to json: Family Id not specified in row #1:".format(FILENAME))
+        self.assertEqual(ec.exception.errors[0],
+                         "Error while converting {} rows to json: Family Id, Individual Id not specified in row #1".format(FILENAME))
         self.assertListEqual(ec.exception.warnings, [])
 
         with self.assertRaises(ErrorsWarningsException) as ec:
             parse_pedigree_table(
                 [['family_id', 'individual_id', 'sex', 'affected', 'father', 'mother'],
                  ['fam1', '', 'male', 'u', '.', 'ind2']], FILENAME, self.collaborator_user)
         self.assertEqual(len(ec.exception.errors), 1)
-        self.assertEqual(ec.exception.errors[0].split('\n')[0],
-                         "Error while converting {} rows to json: Individual Id not specified in row #1:".format(FILENAME))
+        self.assertEqual(ec.exception.errors[0],
+                         "Error while converting {} rows to json: Individual Id not specified in row #1".format(FILENAME))
         self.assertListEqual(ec.exception.warnings, [])
 
         with self.assertRaises(ErrorsWarningsException) as ec:
@@ -88,8 +88,8 @@ def test_parse_pedigree_table(self):
              'maternalId': 'ind2', 'notes': 'some notes', 'codedPhenotype': 'HPO:12345', 'probandRelationship': '',
              'previousIndividualId': 'ind1_old_id'},
             {'familyId': 'fam1', 'individualId': 'ind2', 'sex': 'F', 'affected': 'N', 'paternalId': '',
-             'maternalId': 'ind3', 'notes': '', 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M',
-             'previousIndividualId': ''},
+             'maternalId': 'ind3', 'notes': None, 'codedPhenotype': 'HPO:56789', 'probandRelationship': 'M',
+             'previousIndividualId': None},
         ])
         self.assertListEqual(warnings, no_error_warnings)
 
@@ -180,9 +180,9 @@ def test_parse_sample_manifest(self, mock_email, mock_pm_group):
         records, warnings = parse_pedigree_table(original_data, FILENAME, self.pm_user, project=project)
         self.assertListEqual(records, [
             {'affected': 'N', 'maternalId': '', 'notes': 'probably dad', 'individualId': 'SCO_PED073B_GA0339_1',
-             'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': '',
+             'sex': 'M', 'familyId': 'PED073', 'paternalId': '', 'codedPhenotype': None,
              'primaryBiosample': 'T', 'analyteType': 'B', 'tissueAffectedStatus': False,},
-            {'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': '', 'individualId': 'SCO_PED073C_GA0340_1',
+            {'affected': 'A', 'maternalId': 'SCO_PED073A_GA0338_1', 'notes': None, 'individualId': 'SCO_PED073C_GA0340_1',
              'sex': 'F', 'familyId': 'PED073', 'paternalId': 'SCO_PED073B_GA0339_1', 'codedPhenotype': 'Perinatal death',
              'primaryBiosample': 'BM', 'analyteType': 'D', 'tissueAffectedStatus': True,
              }])

diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py
@@ -896,9 +896,8 @@ def call_request_json(self, index=-1):
         'pos': 248367227,
         'predictions': {'splice_ai': 0.75, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
                         'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None,
-                        'polyphen': None, 'dann': None, 'sift': None, 'cadd': '25.9', 'metasvm': None, 'primate_ai': None,
-                        'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None,
-                        'splice_ai_consequence': None, 'gnomad_noncoding': 1.01272,},
+                        'polyphen': None, 'dann': None, 'sift': None, 'cadd': '25.9', 'primate_ai': None,
+                        'mpc': None, 'strvctvre': None, 'splice_ai_consequence': None, 'gnomad_noncoding': 1.01272,},
         'ref': 'TC',
         'rsid': None,
         'screenRegionType': 'dELS',
@@ -982,8 +981,8 @@ def call_request_json(self, index=-1):
         'predictions': {
             'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None, 'gnomad_noncoding': None,
             'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None, 'polyphen': None,
-            'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': 1, 'gerp_rs': None,
-            'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None, 'splice_ai_consequence': None,
+            'dann': None, 'sift': None, 'cadd': None, 'primate_ai': 1,
+            'mpc': None, 'strvctvre': None, 'splice_ai_consequence': None,
         },
         'ref': 'GAGA',
         'rsid': None,
@@ -1068,9 +1067,8 @@ def call_request_json(self, index=-1):
     'pos': 49045487,
     'predictions': {'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
                     'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None, 'gnomad_noncoding': None,
-                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': None,
-                    'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': 0.374,
-                    'splice_ai_consequence': None},
+                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'primate_ai': None,
+                    'mpc': None, 'strvctvre': 0.374, 'splice_ai_consequence': None},
     'ref': None,
     'rsid': None,
     'screenRegionType': None,
@@ -1156,9 +1154,8 @@ def call_request_json(self, index=-1):
     'pos': 49045387,
     'predictions': {'splice_ai': None, 'eigen': None, 'revel': None, 'mut_taster': None, 'fathmm': None,
                     'hmtvar': None, 'apogee': None, 'haplogroup_defining': None, 'mitotip': None,
-                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'metasvm': None, 'primate_ai': None,
-                    'gerp_rs': None, 'mpc': None, 'phastcons_100_vert': None, 'strvctvre': None, 'gnomad_noncoding': None,
-                    'splice_ai_consequence': None},
+                    'polyphen': None, 'dann': None, 'sift': None, 'cadd': None, 'primate_ai': None,
+                    'mpc': None, 'strvctvre': None, 'gnomad_noncoding': None, 'splice_ai_consequence': None},
     'ref': None,
     'rsid': None,
     'screenRegionType': None,
@@ -1235,8 +1232,7 @@ def call_request_json(self, index=-1):
         },
     'pos': 10195,
     'predictions': {'hmtvar': 0.71, 'apogee': 0.42, 'cadd': None, 'dann': None, 'eigen': None, 'fathmm': 'T',
-                    'gerp_rs': '5.07', 'haplogroup_defining': None, 'metasvm': None, 'mitotip': None,
-                    'mpc': None, 'mut_taster': 'N', 'phastcons_100_vert': '0.958000', 'polyphen': None,
+                    'haplogroup_defining': None, 'mitotip': None, 'mpc': None, 'mut_taster': 'N', 'polyphen': None,
                     'primate_ai': None, 'revel': None, 'sift': 'D', 'splice_ai': None, 'splice_ai_consequence': None,
                     'strvctvre': None, 'gnomad_noncoding': None,},
     'ref': 'C',

diff --git a/ui/shared/components/page/AcceptCookies.jsx b/ui/shared/components/page/AcceptCookies.jsx
@@ -23,8 +23,8 @@ const AcceptCookies = () => (
       content={
         <Modal.Content>
           seqr collects cookies to improve our user experience and ensure the secure functioning of our site. For more
-          details, see our &npsp;
-          <Link target="_blank" to="/privacy_policy">Privacy Policy</Link>
+          details, see our
+          <Link target="_blank" to="/privacy_policy"> Privacy Policy</Link>
           . By clicking &quot;Accept&quot;, you consent to the use of these cookies.
         </Modal.Content>
       }