Merge branch 'dev' of https://github.com/broadinstitute/seqr into con…

…flictingPathogenicities-ui
broadinstitute · Jul 18, 2023 · 47fce36 · 47fce36
2 parents 4216bce + 37535d1
commit 47fce36
Show file tree

Hide file tree

Showing 6 changed files with 61 additions and 34 deletions.
diff --git a/seqr/management/tests/load_rna_seq_outlier_tests.py b/seqr/management/tests/load_rna_seq_outlier_tests.py
@@ -5,12 +5,12 @@
 from django.test import TestCase
 
 from seqr.models import Sample, RnaSeqOutlier
-from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers
 
 RNA_FILE_ID = 'tmp_-_2021-03-01T00:00:00_-_test_data_manager_-_new_muscle_samples.tsv.gz'
 EXISTING_SAMPLE_GUID = 'S000150_na19675_d2'
 
 class LoadRnaSeqTest(TestCase):
+    databases = '__all__'
     fixtures = ['users', '1kg_project', 'reference_data']
 
     @mock.patch('seqr.management.commands.load_rna_seq_outlier.logger.info')

diff --git a/seqr/views/apis/data_manager_api_tests.py b/seqr/views/apis/data_manager_api_tests.py
@@ -8,7 +8,7 @@
 from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
     update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \
     get_loaded_projects, load_data
-from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers, _get_json_for_models
+from seqr.views.utils.orm_to_json_utils import _get_json_for_models
 from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase
 from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses
 from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization
@@ -284,21 +284,21 @@
     'ENSG00000233750': {'gene_id': 'ENSG00000233750', 'tpm': '0.0'},
 }
 SAMPLE_GENE_SPLICE_DATA = {
-    'ENSG00000163092-2-167254166-167258349-*-psi3': {
+    'ENSG00000233750-2-167254166-167258349-*-psi3': {
         'chrom': '2', 'start': 167254166, 'end': 167258349, 'strand': '*', 'type': 'psi3',
-        'p_value': 1.56e-25, 'z_score': -4.9, 'delta_psi': -0.46, 'read_count': 166, 'gene_id': 'ENSG00000163092',
+        'p_value': 1.56e-25, 'z_score': -4.9, 'delta_psi': -0.46, 'read_count': 166, 'gene_id': 'ENSG00000233750',
         'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 1,
     },
-    'ENSG00000106554-7-132885746-132975168-*-psi5': {
+    'ENSG00000240361-7-132885746-132975168-*-psi5': {
         'chrom': '7', 'start': 132885746, 'end': 132975168, 'strand': '*', 'type': 'psi5',
-        'p_value': 1.08e-56, 'z_score': -6.53, 'delta_psi': -0.85, 'read_count': 231, 'gene_id': 'ENSG00000106554',
+        'p_value': 1.08e-56, 'z_score': -6.53, 'delta_psi': -0.85, 'read_count': 231, 'gene_id': 'ENSG00000240361',
         'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 0,
     },
 }
 SAMPLE_GENE_SPLICE_DATA2 = {
-    'ENSG00000163092-2-167258096-167258349-*-psi3': {
+    'ENSG00000233750-2-167258096-167258349-*-psi3': {
         'chrom': '2', 'start': 167258096, 'end': 167258349, 'strand': '*', 'type': 'psi3',
-        'p_value': 1.56e-25, 'z_score': 6.33, 'delta_psi': 0.45, 'read_count': 143, 'gene_id': 'ENSG00000163092',
+        'p_value': 1.56e-25, 'z_score': 6.33, 'delta_psi': 0.45, 'read_count': 143, 'gene_id': 'ENSG00000233750',
         'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 0,
     }
 }
@@ -730,39 +730,39 @@ def test_kibana_proxy(self):
                        'deltaPsi', 'readCount', 'tissue', 'dotSize', 'rareDiseaseSamplesWithJunction',
                        'rareDiseaseSamplesTotal'],
             'optional_headers': ['geneName', 'dotSize'],
-            'loaded_data_row': ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000106554', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
+            'loaded_data_row': ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000240361', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
                                 'psi5', 1.08E-56, 12.34, 0.85, 1297, 'fibroblasts', 0.53953638, 1, 20],
-            'no_existing_data': ['NA19678', '1kg project nåme with uniçøde', 'ENSG00000106554', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
+            'no_existing_data': ['NA19678', '1kg project nåme with uniçøde', 'ENSG00000240361', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
                                 'psi5', 1.08E-56, 12.34, 0.85, 1297, 'fibroblasts', 0.53953638, 1, 20],
             'duplicated_indiv_id_data': [
-                ['NA20870', 'Test Reprocessed Project', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2',
+                ['NA20870', 'Test Reprocessed Project', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2',
                  'psi3', 1.56E-25, 6.33, 0.45, 143, 'fibroblasts', 0.03454739, 1, 20],
-                ['NA20870', '1kg project nåme with uniçøde', 'ENSG00000163093', 'chr2', 167258096, 167258349, '*', 'XIRP2',
+                ['NA20870', '1kg project nåme with uniçøde', 'ENSG00000135953', 'chr2', 167258096, 167258349, '*', 'XIRP2',
                  'psi3', 1.56E-25, 6.33, 0.45, 143, 'muscle', 0.03454739, 1, 20],
             ],
-            'write_data': {'NA20870\t\t{"ENSG00000163092-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
+            'write_data': {'NA20870\t\t{"ENSG00000233750-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
                            ' "end": 167258349, "strand": "*", "type": "psi3", "p_value": 1.56e-25, "z_score": 6.33,'
-                           ' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000163092",'
+                           ' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000233750",'
                            ' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20, "rank": 0}}\n',
-                           'NA20870\t\t{"ENSG00000163093-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
+                           'NA20870\t\t{"ENSG00000135953-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
                            ' "end": 167258349, "strand": "*", "type": "psi3", "p_value": 1.56e-25, "z_score": 6.33,'
-                           ' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000163093",'
+                           ' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000135953",'
                            ' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20, "rank": 0}}\n',
             },
             'new_data': [
                 # existing sample NA19675_1
-                ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000163092', 'chr2', 167254166, 167258349, '*', 'XIRP2', 'psi3',
+                ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000233750', 'chr2', 167254166, 167258349, '*', 'XIRP2', 'psi3',
                  1.56E-25, -4.9, -0.46, 166, 'fibroblasts', 0.03850364, 1, 20],
-                ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000106554', 'chr7', 132885746, 132975168, '*', 'CHCHD3', 'psi5',
+                ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000240361', 'chr7', 132885746, 132975168, '*', 'CHCHD3', 'psi5',
                  1.08E-56, -6.53, -0.85, 231, 'fibroblasts', 0.53953638, 1, 20],
                 # no matched individual NA19675_D3
-                ['NA19675_D3', '1kg project nåme with uniçøde', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2',
+                ['NA19675_D3', '1kg project nåme with uniçøde', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2',
                  'psi3', 1.56E-25, 6.33, 0.45, 143, 'muscle', 0.03454739, 1, 20],
                 # a new sample NA20888
-                ['NA20888', 'Test Reprocessed Project', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2',
+                ['NA20888', 'Test Reprocessed Project', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2',
                  'psi3', 1.56E-25, 6.33, 0.45, 143, 'fibroblasts', 0.03454739, 1, 20],
                 # a project mismatched sample NA20878
-                ['NA20878', 'Test Reprocessed Project', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2', 'psi3',
+                ['NA20878', 'Test Reprocessed Project', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2', 'psi3',
                  1.56E-25, 6.33, 0.45, 143, 'fibroblasts', 0.03454739, 1, 20],
             ],
             'skipped_samples': 'NA19675_D3, NA20878',
@@ -775,11 +775,11 @@ def test_kibana_proxy(self):
                 models.values_list('gene_id', 'chrom', 'start', 'end', 'strand', 'type', 'p_value', 'z_score', 'delta_psi',
                                    'read_count', 'rare_disease_samples_with_junction', 'rare_disease_samples_total')),
             'expected_models_json': [
-                ('ENSG00000163092', '2', 167254166, 167258349, '*', 'psi3', 1.56e-25, -4.9, -0.46, 166, 1, 20),
-                ('ENSG00000106554', '7', 132885746, 132975168, '*', 'psi5', 1.08e-56, -6.53, -0.85, 231, 1, 20)
+                ('ENSG00000233750', '2', 167254166, 167258349, '*', 'psi3', 1.56e-25, -4.9, -0.46, 166, 1, 20),
+                ('ENSG00000240361', '7', 132885746, 132975168, '*', 'psi5', 1.08e-56, -6.53, -0.85, 231, 1, 20)
             ],
             'sample_guid': RNA_SPLICE_SAMPLE_GUID,
-            'row_id': 'ENSG00000106554-7-132885746-132886973-*-psi5',
+            'row_id': 'ENSG00000240361-7-132885746-132886973-*-psi5',
         },
     }
 
@@ -871,6 +871,13 @@ def _set_file_iter_stdout(rows):
                 self.assertEqual(response.status_code, 400)
                 self.assertDictEqual(response.json(), {'error': 'Unable to find matches for the following samples: NA19675_D3'})
 
+                unknown_gene_id_row1 = loaded_data_row[:2] + ['NOT_A_GENE_ID1'] + loaded_data_row[3:]
+                unknown_gene_id_row2 = loaded_data_row[:2] + ['NOT_A_GENE_ID2'] + loaded_data_row[3:]
+                _set_file_iter_stdout([header, unknown_gene_id_row1, unknown_gene_id_row2])
+                response = self.client.post(url, content_type='application/json', data=json.dumps(body))
+                self.assertEqual(response.status_code, 400)
+                self.assertEqual(response.json()['errors'][0], 'Unknown Gene IDs: NOT_A_GENE_ID1, NOT_A_GENE_ID2')
+
                 mapping_body = {'mappingFile': {'uploadedFileId': 'map.tsv'}}
                 mapping_body.update(body)
                 mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter]

diff --git a/seqr/views/apis/individual_api_tests.py b/seqr/views/apis/individual_api_tests.py
@@ -1002,14 +1002,16 @@ def test_get_individual_rna_seq_data(self):
                 'ENSG00000268903': mock.ANY,
             },
         }})
+        outliers_by_pos = {outlier['start']: outlier for outlier in
+                           response_json['rnaSeqData'][INDIVIDUAL_GUID]['spliceOutliers']['ENSG00000268903']}
         self.assertDictEqual(
             {
                 'chrom': '7', 'deltaPsi': 0.85, 'end': 132886973, 'geneId': 'ENSG00000268903', 'isSignificant': True,
                 'pValue': 1.08e-56, 'rareDiseaseSamplesTotal': 20, 'rareDiseaseSamplesWithJunction': 1,
                 'readCount': 1297, 'start': 132885746, 'strand': '*', 'type': 'psi5', 'zScore': 12.34,
                 'tissueType': 'F',
             },
-            response_json['rnaSeqData'][INDIVIDUAL_GUID]['spliceOutliers']['ENSG00000268903'][0]
+            outliers_by_pos[132885746]
         )
         self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953', 'ENSG00000268903'})
 
@@ -1025,14 +1027,14 @@ def test_get_individual_rna_seq_data_is_significant(self):
         significant_outliers = [outlier for outlier in response_rnaseq_data['outliers'].values() if outlier['isSignificant']]
         self.assertEqual(2, len(significant_outliers))
         self.assertListEqual(
-            [{field: outlier[field] for field in ['start', 'end', 'pValue', 'tissueType', 'isSignificant']}
-             for outlier in response_rnaseq_data['spliceOutliers']['ENSG00000268903']],
-            [{'start': 132885746, 'end': 132886973, 'pValue': 1.08e-56, 'tissueType': 'F', 'isSignificant': True},
-             {'start': 1001, 'end': 2001, 'pValue': 0.1, 'tissueType': 'F', 'isSignificant': False},
+            sorted([{field: outlier[field] for field in ['start', 'end', 'pValue', 'tissueType', 'isSignificant']}
+                    for outlier in response_rnaseq_data['spliceOutliers']['ENSG00000268903']], key=lambda r: r['start']),
+            [{'start': 1001, 'end': 2001, 'pValue': 0.1, 'tissueType': 'F', 'isSignificant': False},
              {'start': 3000, 'end': 4000, 'pValue': 0.0001, 'tissueType': 'F', 'isSignificant': True},
              {'start': 5000, 'end': 6000, 'pValue': 0.0001, 'tissueType': 'F', 'isSignificant': False},
              {'start': 7000, 'end': 8000, 'pValue': 0.001, 'tissueType': 'M', 'isSignificant': True},
-             {'start': 9000, 'end': 9100, 'pValue': 0.2, 'tissueType': 'M', 'isSignificant': False}],
+             {'start': 9000, 'end': 9100, 'pValue': 0.2, 'tissueType': 'M', 'isSignificant': False},
+             {'start': 132885746, 'end': 132886973, 'pValue': 1.08e-56, 'tissueType': 'F', 'isSignificant': True}],
         )
 
 

diff --git a/seqr/views/utils/dataset_utils.py b/seqr/views/utils/dataset_utils.py
@@ -14,6 +14,7 @@
 from seqr.views.utils.file_utils import parse_file
 from seqr.views.utils.permissions_utils import get_internal_projects
 from seqr.views.utils.json_utils import _to_snake_case, _to_camel_case
+from reference_data.models import GeneInfo
 from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL
 
 logger = SeqrLogger(__name__)
@@ -362,6 +363,7 @@ def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_column
     sample_id_to_tissue_type = {}
     samples_with_conflict_tissues = defaultdict(set)
     errors = []
+    gene_ids = set()
     for line in tqdm(f, unit=' rows'):
         row = dict(zip(header, _parse_tsv_row(line)))
         for sample_id, row_dict in parse_row(row):
@@ -376,10 +378,12 @@ def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_column
 
             sample_id_to_tissue_type[(sample_id, project)] = tissue_type
 
+            gene_ids.add(row_dict[GENE_ID_COL])
+
             if get_unique_key:
                 gene_or_unique_id = get_unique_key(row_dict)
             else:
-                gene_or_unique_id = row_dict['gene_id']
+                gene_or_unique_id = row_dict[GENE_ID_COL]
             existing_data = samples_by_id[(sample_id, project)].get(gene_or_unique_id)
             if existing_data and existing_data != row_dict:
                 errors.append(f'Error in {sample_id} data for {gene_or_unique_id}: mismatched entries '
@@ -390,6 +394,11 @@ def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_column
 
             samples_by_id[(sample_id, project)][gene_or_unique_id] = row_dict
 
+    matched_gene_ids = set(GeneInfo.objects.filter(gene_id__in=gene_ids).values_list('gene_id', flat=True))
+    unknown_gene_ids = gene_ids - matched_gene_ids
+    if unknown_gene_ids:
+        errors.append(f'Unknown Gene IDs: {", ".join(sorted(unknown_gene_ids))}')
+
     if errors:
         raise ErrorsWarningsException(errors)
 

diff --git a/ui/pages/Project/reducers.js b/ui/pages/Project/reducers.js
@@ -370,7 +370,10 @@ export const updateFamiliesTable = (updates, tableName) => (
   { type: tableName === CASE_REVIEW_TABLE_NAME ? UPDATE_CASE_REVIEW_TABLE_STATE : UPDATE_FAMILY_TABLE_STATE, updates }
 )
 
-export const updateFamiliesTableFilters = updates => ({ type: UPDATE_FAMILY_TABLE_FILTER_STATE, updates })
+export const updateFamiliesTableFilters = updates => (dispatch, getState) => {
+  const { currentProjectGuid } = getState()
+  dispatch({ type: UPDATE_FAMILY_TABLE_FILTER_STATE, updatesById: { [currentProjectGuid]: updates } })
+}
 
 export const updateSavedVariantTable = updates => ({ type: UPDATE_SAVED_VARIANT_TABLE_STATE, updates })
 
@@ -396,7 +399,7 @@ export const reducers = {
     familiesSortOrder: SORT_BY_FAMILY_NAME,
     familiesSortDirection: 1,
   }, false),
-  familyTableFilterState: createSingleObjectReducer(UPDATE_FAMILY_TABLE_FILTER_STATE),
+  familyTableFilterState: createObjectsByIdReducer(UPDATE_FAMILY_TABLE_FILTER_STATE),
   caseReviewTableState: createSingleObjectReducer(UPDATE_CASE_REVIEW_TABLE_STATE, {
     familiesFilter: SHOW_IN_REVIEW,
     familiesSortOrder: SORT_BY_FAMILY_ADDED_DATE,

diff --git a/ui/pages/Project/selectors.js b/ui/pages/Project/selectors.js
@@ -63,7 +63,7 @@ export const getMmeSubmissionsLoading = state => state.mmeSubmissionsLoading.isL
 export const getSamplesLoading = state => state.samplesLoading.isLoading
 export const getTagTypesLoading = state => state.tagTypesLoading.isLoading
 export const getFamilyTagTypeCounts = state => state.familyTagTypeCounts
-export const getFamiliesTableFilters = state => state.familyTableFilterState
+const getFamiliesTableFiltersByProject = state => state.familyTableFilterState
 
 export const getCurrentProject = createSelector(
   getProjectsByGuid, getProjectGuid, (projectsByGuid, currentProjectGuid) => projectsByGuid[currentProjectGuid],
@@ -432,6 +432,12 @@ const analysedByFilters = (filter, analysedByOptions) => {
   return filterGroups
 }
 
+export const getFamiliesTableFilters = createSelector(
+  getFamiliesTableFiltersByProject,
+  getProjectGuid,
+  (familyTableFiltersByProject, projectGuid) => (familyTableFiltersByProject || {})[projectGuid],
+)
+
 const getFamiliesFilterFunc = createSelector(
   (state, ownProps) => ownProps?.tableName === CASE_REVIEW_TABLE_NAME,
   state => state.caseReviewTableState.familiesFilter,