Skip to content

Commit

Permalink
Merge branch 'dev' of https://github.com/broadinstitute/seqr into con…
Browse files Browse the repository at this point in the history
…flictingPathogenicities-ui
  • Loading branch information
hanars committed Jul 18, 2023
2 parents 4216bce + 37535d1 commit 47fce36
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 34 deletions.
2 changes: 1 addition & 1 deletion seqr/management/tests/load_rna_seq_outlier_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from django.test import TestCase

from seqr.models import Sample, RnaSeqOutlier
from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers

RNA_FILE_ID = 'tmp_-_2021-03-01T00:00:00_-_test_data_manager_-_new_muscle_samples.tsv.gz'
EXISTING_SAMPLE_GUID = 'S000150_na19675_d2'

class LoadRnaSeqTest(TestCase):
databases = '__all__'
fixtures = ['users', '1kg_project', 'reference_data']

@mock.patch('seqr.management.commands.load_rna_seq_outlier.logger.info')
Expand Down
53 changes: 30 additions & 23 deletions seqr/views/apis/data_manager_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from seqr.views.apis.data_manager_api import elasticsearch_status, upload_qc_pipeline_output, delete_index, \
update_rna_seq, load_rna_seq_sample_data, load_phenotype_prioritization_data, write_pedigree, validate_callset, \
get_loaded_projects, load_data
from seqr.views.utils.orm_to_json_utils import get_json_for_rna_seq_outliers, _get_json_for_models
from seqr.views.utils.orm_to_json_utils import _get_json_for_models
from seqr.views.utils.test_utils import AuthenticationTestCase, AirflowTestCase
from seqr.utils.search.elasticsearch.es_utils_tests import urllib3_responses
from seqr.models import Individual, RnaSeqOutlier, RnaSeqTpm, RnaSeqSpliceOutlier, Sample, Project, PhenotypePrioritization
Expand Down Expand Up @@ -284,21 +284,21 @@
'ENSG00000233750': {'gene_id': 'ENSG00000233750', 'tpm': '0.0'},
}
SAMPLE_GENE_SPLICE_DATA = {
'ENSG00000163092-2-167254166-167258349-*-psi3': {
'ENSG00000233750-2-167254166-167258349-*-psi3': {
'chrom': '2', 'start': 167254166, 'end': 167258349, 'strand': '*', 'type': 'psi3',
'p_value': 1.56e-25, 'z_score': -4.9, 'delta_psi': -0.46, 'read_count': 166, 'gene_id': 'ENSG00000163092',
'p_value': 1.56e-25, 'z_score': -4.9, 'delta_psi': -0.46, 'read_count': 166, 'gene_id': 'ENSG00000233750',
'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 1,
},
'ENSG00000106554-7-132885746-132975168-*-psi5': {
'ENSG00000240361-7-132885746-132975168-*-psi5': {
'chrom': '7', 'start': 132885746, 'end': 132975168, 'strand': '*', 'type': 'psi5',
'p_value': 1.08e-56, 'z_score': -6.53, 'delta_psi': -0.85, 'read_count': 231, 'gene_id': 'ENSG00000106554',
'p_value': 1.08e-56, 'z_score': -6.53, 'delta_psi': -0.85, 'read_count': 231, 'gene_id': 'ENSG00000240361',
'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 0,
},
}
SAMPLE_GENE_SPLICE_DATA2 = {
'ENSG00000163092-2-167258096-167258349-*-psi3': {
'ENSG00000233750-2-167258096-167258349-*-psi3': {
'chrom': '2', 'start': 167258096, 'end': 167258349, 'strand': '*', 'type': 'psi3',
'p_value': 1.56e-25, 'z_score': 6.33, 'delta_psi': 0.45, 'read_count': 143, 'gene_id': 'ENSG00000163092',
'p_value': 1.56e-25, 'z_score': 6.33, 'delta_psi': 0.45, 'read_count': 143, 'gene_id': 'ENSG00000233750',
'rare_disease_samples_with_junction': 1, 'rare_disease_samples_total': 20, 'rank': 0,
}
}
Expand Down Expand Up @@ -730,39 +730,39 @@ def test_kibana_proxy(self):
'deltaPsi', 'readCount', 'tissue', 'dotSize', 'rareDiseaseSamplesWithJunction',
'rareDiseaseSamplesTotal'],
'optional_headers': ['geneName', 'dotSize'],
'loaded_data_row': ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000106554', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
'loaded_data_row': ['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000240361', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
'psi5', 1.08E-56, 12.34, 0.85, 1297, 'fibroblasts', 0.53953638, 1, 20],
'no_existing_data': ['NA19678', '1kg project nåme with uniçøde', 'ENSG00000106554', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
'no_existing_data': ['NA19678', '1kg project nåme with uniçøde', 'ENSG00000240361', 'chr7', 132885746, 132886973, '*', 'CHCHD3',
'psi5', 1.08E-56, 12.34, 0.85, 1297, 'fibroblasts', 0.53953638, 1, 20],
'duplicated_indiv_id_data': [
['NA20870', 'Test Reprocessed Project', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2',
['NA20870', 'Test Reprocessed Project', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2',
'psi3', 1.56E-25, 6.33, 0.45, 143, 'fibroblasts', 0.03454739, 1, 20],
['NA20870', '1kg project nåme with uniçøde', 'ENSG00000163093', 'chr2', 167258096, 167258349, '*', 'XIRP2',
['NA20870', '1kg project nåme with uniçøde', 'ENSG00000135953', 'chr2', 167258096, 167258349, '*', 'XIRP2',
'psi3', 1.56E-25, 6.33, 0.45, 143, 'muscle', 0.03454739, 1, 20],
],
'write_data': {'NA20870\t\t{"ENSG00000163092-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
'write_data': {'NA20870\t\t{"ENSG00000233750-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
' "end": 167258349, "strand": "*", "type": "psi3", "p_value": 1.56e-25, "z_score": 6.33,'
' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000163092",'
' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000233750",'
' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20, "rank": 0}}\n',
'NA20870\t\t{"ENSG00000163093-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
'NA20870\t\t{"ENSG00000135953-2-167258096-167258349-*-psi3": {"chrom": "2", "start": 167258096,'
' "end": 167258349, "strand": "*", "type": "psi3", "p_value": 1.56e-25, "z_score": 6.33,'
' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000163093",'
' "delta_psi": 0.45, "read_count": 143, "gene_id": "ENSG00000135953",'
' "rare_disease_samples_with_junction": 1, "rare_disease_samples_total": 20, "rank": 0}}\n',
},
'new_data': [
# existing sample NA19675_1
['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000163092', 'chr2', 167254166, 167258349, '*', 'XIRP2', 'psi3',
['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000233750', 'chr2', 167254166, 167258349, '*', 'XIRP2', 'psi3',
1.56E-25, -4.9, -0.46, 166, 'fibroblasts', 0.03850364, 1, 20],
['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000106554', 'chr7', 132885746, 132975168, '*', 'CHCHD3', 'psi5',
['NA19675_1', '1kg project nåme with uniçøde', 'ENSG00000240361', 'chr7', 132885746, 132975168, '*', 'CHCHD3', 'psi5',
1.08E-56, -6.53, -0.85, 231, 'fibroblasts', 0.53953638, 1, 20],
# no matched individual NA19675_D3
['NA19675_D3', '1kg project nåme with uniçøde', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2',
['NA19675_D3', '1kg project nåme with uniçøde', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2',
'psi3', 1.56E-25, 6.33, 0.45, 143, 'muscle', 0.03454739, 1, 20],
# a new sample NA20888
['NA20888', 'Test Reprocessed Project', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2',
['NA20888', 'Test Reprocessed Project', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2',
'psi3', 1.56E-25, 6.33, 0.45, 143, 'fibroblasts', 0.03454739, 1, 20],
# a project mismatched sample NA20878
['NA20878', 'Test Reprocessed Project', 'ENSG00000163092', 'chr2', 167258096, 167258349, '*', 'XIRP2', 'psi3',
['NA20878', 'Test Reprocessed Project', 'ENSG00000233750', 'chr2', 167258096, 167258349, '*', 'XIRP2', 'psi3',
1.56E-25, 6.33, 0.45, 143, 'fibroblasts', 0.03454739, 1, 20],
],
'skipped_samples': 'NA19675_D3, NA20878',
Expand All @@ -775,11 +775,11 @@ def test_kibana_proxy(self):
models.values_list('gene_id', 'chrom', 'start', 'end', 'strand', 'type', 'p_value', 'z_score', 'delta_psi',
'read_count', 'rare_disease_samples_with_junction', 'rare_disease_samples_total')),
'expected_models_json': [
('ENSG00000163092', '2', 167254166, 167258349, '*', 'psi3', 1.56e-25, -4.9, -0.46, 166, 1, 20),
('ENSG00000106554', '7', 132885746, 132975168, '*', 'psi5', 1.08e-56, -6.53, -0.85, 231, 1, 20)
('ENSG00000233750', '2', 167254166, 167258349, '*', 'psi3', 1.56e-25, -4.9, -0.46, 166, 1, 20),
('ENSG00000240361', '7', 132885746, 132975168, '*', 'psi5', 1.08e-56, -6.53, -0.85, 231, 1, 20)
],
'sample_guid': RNA_SPLICE_SAMPLE_GUID,
'row_id': 'ENSG00000106554-7-132885746-132886973-*-psi5',
'row_id': 'ENSG00000240361-7-132885746-132886973-*-psi5',
},
}

Expand Down Expand Up @@ -871,6 +871,13 @@ def _set_file_iter_stdout(rows):
self.assertEqual(response.status_code, 400)
self.assertDictEqual(response.json(), {'error': 'Unable to find matches for the following samples: NA19675_D3'})

unknown_gene_id_row1 = loaded_data_row[:2] + ['NOT_A_GENE_ID1'] + loaded_data_row[3:]
unknown_gene_id_row2 = loaded_data_row[:2] + ['NOT_A_GENE_ID2'] + loaded_data_row[3:]
_set_file_iter_stdout([header, unknown_gene_id_row1, unknown_gene_id_row2])
response = self.client.post(url, content_type='application/json', data=json.dumps(body))
self.assertEqual(response.status_code, 400)
self.assertEqual(response.json()['errors'][0], 'Unknown Gene IDs: NOT_A_GENE_ID1, NOT_A_GENE_ID2')

mapping_body = {'mappingFile': {'uploadedFileId': 'map.tsv'}}
mapping_body.update(body)
mock_subprocess.side_effect = [mock_does_file_exist, mock_file_iter]
Expand Down
14 changes: 8 additions & 6 deletions seqr/views/apis/individual_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -1002,14 +1002,16 @@ def test_get_individual_rna_seq_data(self):
'ENSG00000268903': mock.ANY,
},
}})
outliers_by_pos = {outlier['start']: outlier for outlier in
response_json['rnaSeqData'][INDIVIDUAL_GUID]['spliceOutliers']['ENSG00000268903']}
self.assertDictEqual(
{
'chrom': '7', 'deltaPsi': 0.85, 'end': 132886973, 'geneId': 'ENSG00000268903', 'isSignificant': True,
'pValue': 1.08e-56, 'rareDiseaseSamplesTotal': 20, 'rareDiseaseSamplesWithJunction': 1,
'readCount': 1297, 'start': 132885746, 'strand': '*', 'type': 'psi5', 'zScore': 12.34,
'tissueType': 'F',
},
response_json['rnaSeqData'][INDIVIDUAL_GUID]['spliceOutliers']['ENSG00000268903'][0]
outliers_by_pos[132885746]
)
self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953', 'ENSG00000268903'})

Expand All @@ -1025,14 +1027,14 @@ def test_get_individual_rna_seq_data_is_significant(self):
significant_outliers = [outlier for outlier in response_rnaseq_data['outliers'].values() if outlier['isSignificant']]
self.assertEqual(2, len(significant_outliers))
self.assertListEqual(
[{field: outlier[field] for field in ['start', 'end', 'pValue', 'tissueType', 'isSignificant']}
for outlier in response_rnaseq_data['spliceOutliers']['ENSG00000268903']],
[{'start': 132885746, 'end': 132886973, 'pValue': 1.08e-56, 'tissueType': 'F', 'isSignificant': True},
{'start': 1001, 'end': 2001, 'pValue': 0.1, 'tissueType': 'F', 'isSignificant': False},
sorted([{field: outlier[field] for field in ['start', 'end', 'pValue', 'tissueType', 'isSignificant']}
for outlier in response_rnaseq_data['spliceOutliers']['ENSG00000268903']], key=lambda r: r['start']),
[{'start': 1001, 'end': 2001, 'pValue': 0.1, 'tissueType': 'F', 'isSignificant': False},
{'start': 3000, 'end': 4000, 'pValue': 0.0001, 'tissueType': 'F', 'isSignificant': True},
{'start': 5000, 'end': 6000, 'pValue': 0.0001, 'tissueType': 'F', 'isSignificant': False},
{'start': 7000, 'end': 8000, 'pValue': 0.001, 'tissueType': 'M', 'isSignificant': True},
{'start': 9000, 'end': 9100, 'pValue': 0.2, 'tissueType': 'M', 'isSignificant': False}],
{'start': 9000, 'end': 9100, 'pValue': 0.2, 'tissueType': 'M', 'isSignificant': False},
{'start': 132885746, 'end': 132886973, 'pValue': 1.08e-56, 'tissueType': 'F', 'isSignificant': True}],
)


Expand Down
11 changes: 10 additions & 1 deletion seqr/views/utils/dataset_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from seqr.views.utils.file_utils import parse_file
from seqr.views.utils.permissions_utils import get_internal_projects
from seqr.views.utils.json_utils import _to_snake_case, _to_camel_case
from reference_data.models import GeneInfo
from settings import SEQR_SLACK_DATA_ALERTS_NOTIFICATION_CHANNEL, BASE_URL

logger = SeqrLogger(__name__)
Expand Down Expand Up @@ -362,6 +363,7 @@ def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_column
sample_id_to_tissue_type = {}
samples_with_conflict_tissues = defaultdict(set)
errors = []
gene_ids = set()
for line in tqdm(f, unit=' rows'):
row = dict(zip(header, _parse_tsv_row(line)))
for sample_id, row_dict in parse_row(row):
Expand All @@ -376,10 +378,12 @@ def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_column

sample_id_to_tissue_type[(sample_id, project)] = tissue_type

gene_ids.add(row_dict[GENE_ID_COL])

if get_unique_key:
gene_or_unique_id = get_unique_key(row_dict)
else:
gene_or_unique_id = row_dict['gene_id']
gene_or_unique_id = row_dict[GENE_ID_COL]
existing_data = samples_by_id[(sample_id, project)].get(gene_or_unique_id)
if existing_data and existing_data != row_dict:
errors.append(f'Error in {sample_id} data for {gene_or_unique_id}: mismatched entries '
Expand All @@ -390,6 +394,11 @@ def _load_rna_seq_file(file_path, user, mapping_file, parse_row, expected_column

samples_by_id[(sample_id, project)][gene_or_unique_id] = row_dict

matched_gene_ids = set(GeneInfo.objects.filter(gene_id__in=gene_ids).values_list('gene_id', flat=True))
unknown_gene_ids = gene_ids - matched_gene_ids
if unknown_gene_ids:
errors.append(f'Unknown Gene IDs: {", ".join(sorted(unknown_gene_ids))}')

if errors:
raise ErrorsWarningsException(errors)

Expand Down
7 changes: 5 additions & 2 deletions ui/pages/Project/reducers.js
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,10 @@ export const updateFamiliesTable = (updates, tableName) => (
{ type: tableName === CASE_REVIEW_TABLE_NAME ? UPDATE_CASE_REVIEW_TABLE_STATE : UPDATE_FAMILY_TABLE_STATE, updates }
)

export const updateFamiliesTableFilters = updates => ({ type: UPDATE_FAMILY_TABLE_FILTER_STATE, updates })
export const updateFamiliesTableFilters = updates => (dispatch, getState) => {
const { currentProjectGuid } = getState()
dispatch({ type: UPDATE_FAMILY_TABLE_FILTER_STATE, updatesById: { [currentProjectGuid]: updates } })
}

export const updateSavedVariantTable = updates => ({ type: UPDATE_SAVED_VARIANT_TABLE_STATE, updates })

Expand All @@ -396,7 +399,7 @@ export const reducers = {
familiesSortOrder: SORT_BY_FAMILY_NAME,
familiesSortDirection: 1,
}, false),
familyTableFilterState: createSingleObjectReducer(UPDATE_FAMILY_TABLE_FILTER_STATE),
familyTableFilterState: createObjectsByIdReducer(UPDATE_FAMILY_TABLE_FILTER_STATE),
caseReviewTableState: createSingleObjectReducer(UPDATE_CASE_REVIEW_TABLE_STATE, {
familiesFilter: SHOW_IN_REVIEW,
familiesSortOrder: SORT_BY_FAMILY_ADDED_DATE,
Expand Down
8 changes: 7 additions & 1 deletion ui/pages/Project/selectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ export const getMmeSubmissionsLoading = state => state.mmeSubmissionsLoading.isL
export const getSamplesLoading = state => state.samplesLoading.isLoading
export const getTagTypesLoading = state => state.tagTypesLoading.isLoading
export const getFamilyTagTypeCounts = state => state.familyTagTypeCounts
export const getFamiliesTableFilters = state => state.familyTableFilterState
const getFamiliesTableFiltersByProject = state => state.familyTableFilterState

export const getCurrentProject = createSelector(
getProjectsByGuid, getProjectGuid, (projectsByGuid, currentProjectGuid) => projectsByGuid[currentProjectGuid],
Expand Down Expand Up @@ -432,6 +432,12 @@ const analysedByFilters = (filter, analysedByOptions) => {
return filterGroups
}

export const getFamiliesTableFilters = createSelector(
getFamiliesTableFiltersByProject,
getProjectGuid,
(familyTableFiltersByProject, projectGuid) => (familyTableFiltersByProject || {})[projectGuid],
)

const getFamiliesFilterFunc = createSelector(
(state, ownProps) => ownProps?.tableName === CASE_REVIEW_TABLE_NAME,
state => state.caseReviewTableState.familiesFilter,
Expand Down

0 comments on commit 47fce36

Please sign in to comment.