Skip to content

Commit

Permalink
Merge pull request #3366 from broadinstitute/shifa/variant-rnaseq-label
Browse files Browse the repository at this point in the history
Add the RnaSeq Splice Junction outlier label to the variants.
  • Loading branch information
ShifaSZ authored Jul 10, 2023
2 parents f38e547 + 9190848 commit 689aa27
Show file tree
Hide file tree
Showing 13 changed files with 226 additions and 80 deletions.
16 changes: 8 additions & 8 deletions seqr/fixtures/1kg_project.json
Original file line number Diff line number Diff line change
Expand Up @@ -919,8 +919,8 @@
"sample_id": "NA19679_S",
"is_active": true,
"elasticsearch_index":null,
"tissue_type": "M",
"data_source": "muscle_samples.tsv.gz",
"tissue_type": "F",
"data_source": "fibs_samples.tsv.gz",
"loaded_date": "2017-02-05T06:42:55.397Z"
}
},
Expand Down Expand Up @@ -1376,7 +1376,7 @@
"fields": {
"sample": 151,
"rank": 0,
"gene_id": "ENSG00000106554",
"gene_id": "ENSG00000268903",
"p_value": 1.08E-56,
"z_score": 12.34,
"chrom": "7",
Expand Down Expand Up @@ -1416,7 +1416,7 @@
"fields": {
"sample": 151,
"rank": 3,
"gene_id": "ENSG00000106554",
"gene_id": "ENSG00000268903",
"p_value": 0.1,
"z_score": 12.34,
"chrom": "7",
Expand All @@ -1436,7 +1436,7 @@
"fields": {
"sample": 151,
"rank": 1,
"gene_id": "ENSG00000106554",
"gene_id": "ENSG00000268903",
"p_value": 0.0001,
"z_score": 12.34,
"chrom": "7",
Expand All @@ -1456,7 +1456,7 @@
"fields": {
"sample": 151,
"rank": 2,
"gene_id": "ENSG00000106554",
"gene_id": "ENSG00000268903",
"p_value": 0.0001,
"z_score": 12.34,
"chrom": "7",
Expand All @@ -1475,7 +1475,7 @@
"pk": 6,
"fields": {
"sample": 152,
"gene_id": "ENSG00000106554",
"gene_id": "ENSG00000268903",
"rank": 0,
"p_value": 0.001,
"z_score": 12.34,
Expand All @@ -1496,7 +1496,7 @@
"fields": {
"sample": 152,
"rank": 1,
"gene_id": "ENSG00000106554",
"gene_id": "ENSG00000268903",
"p_value": 0.2,
"z_score": 12.34,
"chrom": "7",
Expand Down
8 changes: 4 additions & 4 deletions seqr/views/apis/individual_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,17 +999,17 @@ def test_get_individual_rna_seq_data(self):
},
},
'spliceOutliers': {
'ENSG00000106554': mock.ANY,
'ENSG00000268903': mock.ANY,
},
}})
self.assertDictEqual(
{
'chrom': '7', 'deltaPsi': 0.85, 'end': 132886973, 'geneId': 'ENSG00000106554', 'isSignificant': True,
'chrom': '7', 'deltaPsi': 0.85, 'end': 132886973, 'geneId': 'ENSG00000268903', 'isSignificant': True,
'pValue': 1.08e-56, 'rareDiseaseSamplesTotal': 20, 'rareDiseaseSamplesWithJunction': 1,
'readCount': 1297, 'start': 132885746, 'strand': '*', 'type': 'psi5', 'zScore': 12.34,
'tissueType': 'F',
},
response_json['rnaSeqData'][INDIVIDUAL_GUID]['spliceOutliers']['ENSG00000106554'][0]
response_json['rnaSeqData'][INDIVIDUAL_GUID]['spliceOutliers']['ENSG00000268903'][0]
)
self.assertSetEqual(set(response_json['genesById'].keys()), {'ENSG00000135953', 'ENSG00000268903'})

Expand All @@ -1026,7 +1026,7 @@ def test_get_individual_rna_seq_data_is_significant(self):
self.assertEqual(2, len(significant_outliers))
self.assertListEqual(
[{field: outlier[field] for field in ['start', 'end', 'pValue', 'tissueType', 'isSignificant']}
for outlier in response_rnaseq_data['spliceOutliers']['ENSG00000106554']],
for outlier in response_rnaseq_data['spliceOutliers']['ENSG00000268903']],
[{'start': 132885746, 'end': 132886973, 'pValue': 1.08e-56, 'tissueType': 'F', 'isSignificant': True},
{'start': 1001, 'end': 2001, 'pValue': 0.1, 'tissueType': 'F', 'isSignificant': False},
{'start': 3000, 'end': 4000, 'pValue': 0.0001, 'tissueType': 'F', 'isSignificant': True},
Expand Down
6 changes: 4 additions & 2 deletions seqr/views/apis/saved_variant_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,10 @@ def test_saved_variant_data(self):
'outliers': {
'ENSG00000135953': {
'geneId': 'ENSG00000135953', 'zScore': 7.31, 'pValue': 0.00000000000948, 'pAdjust': 0.00000000781,
'isSignificant': True,
}},
'tissueType': None, 'isSignificant': True,
}
},
'spliceOutliers': {},
}})

# include project tag types
Expand Down
33 changes: 29 additions & 4 deletions seqr/views/apis/variant_search_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,10 @@
'VFD0000025_1248367227_r0390_10': expected_functional_tag, 'VFD0000026_1248367227_r0390_10': expected_functional_tag,
},
'locusListsByGuid': {LOCUS_LIST_GUID: {'intervals': mock.ANY}},
'rnaSeqData': {'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}}},
'rnaSeqData': {
'I000001_na19675': {'outliers': {'ENSG00000268903': mock.ANY}, 'spliceOutliers': {'ENSG00000268903': mock.ANY}},
'I000003_na19679': {'outliers': {}, 'spliceOutliers': {'ENSG00000268903': mock.ANY}},
},
'phenotypeGeneScores': {
'I000001_na19675': {'ENSG00000268903': {'exomiser': EXPECTED_EXOMISER_DATA}},
'I000002_na19678': {'ENSG00000268903': {'lirical': EXPECTED_LIRICAL_DATA}},
Expand Down Expand Up @@ -142,6 +145,7 @@ def _get_compound_het_es_variants(results_model, **kwargs):
return deepcopy(COMP_HET_VARAINTS), 1


@mock.patch('seqr.views.utils.orm_to_json_utils.RnaSeqSpliceOutlier.MAX_SIGNIFICANT_OUTLIER_NUM', 2)
@mock.patch('seqr.views.utils.permissions_utils.safe_redis_get_json', lambda *args: None)
class VariantSearchAPITest(object):

Expand All @@ -164,6 +168,24 @@ def _assert_expected_search_context(self, response_json):
self.assertEqual(response_json['familiesByGuid']['F000001_1']['displayName'], '1')
self.assertEqual(response_json['familiesByGuid']['F000001_1']['analysisStatus'], 'Q')

def _assert_expected_rnaseq_response(self, response_json):
self.assertDictEqual(
response_json['rnaSeqData']['I000001_na19675']['outliers']['ENSG00000268903'],
{'geneId': 'ENSG00000268903', 'isSignificant': True, 'pAdjust': 1.39e-09, 'pValue': 5.88e-10,
'tissueType': None, 'zScore': 7.08}
)
self.assertListEqual(
sorted(response_json['rnaSeqData']['I000001_na19675']['spliceOutliers']['ENSG00000268903'], key=lambda d: d['start']),
[{'chrom': '7', 'deltaPsi': 0.85, 'end': 4000, 'geneId': 'ENSG00000268903', 'isSignificant': True,
'pValue': 0.0001, 'rareDiseaseSamplesTotal': 20, 'rareDiseaseSamplesWithJunction': 1, 'readCount': 1297,
'start': 3000, 'strand': '*', 'tissueType': 'F', 'type': 'psi5', 'zScore': 12.34},
{'chrom': '7', 'deltaPsi': 0.85, 'end': 8000, 'geneId': 'ENSG00000268903', 'isSignificant': True,
'pValue': 0.001, 'rareDiseaseSamplesTotal': 20, 'rareDiseaseSamplesWithJunction': 1, 'readCount': 1297,
'start': 7000, 'strand': '*', 'tissueType': 'M', 'type': 'psi5', 'zScore': 12.34},
{'chrom': '7', 'deltaPsi': 0.85, 'end': 132886973, 'geneId': 'ENSG00000268903', 'isSignificant': True,
'pValue': 1.08e-56, 'rareDiseaseSamplesTotal': 20, 'rareDiseaseSamplesWithJunction': 1, 'readCount': 1297,
'start': 132885746, 'strand': '*', 'tissueType': 'F', 'type': 'psi5', 'zScore': 12.34}]
)

def _assert_expected_results_family_context(self, response_json, locus_list_detail=False):
self._assert_expected_results_context(response_json, locus_list_detail=locus_list_detail)
Expand All @@ -188,7 +210,9 @@ def _assert_expected_results_family_context(self, response_json, locus_list_deta
self.assertEqual(len(response_json['familyNotesByGuid']), 3)
self.assertSetEqual(set(response_json['familyNotesByGuid']['FAN000001_1'].keys()), FAMILY_NOTE_FIELDS)

def _assert_expected_results_context(self, response_json, has_pa_detail=True, locus_list_detail=False):
self._assert_expected_rnaseq_response(response_json)

def _assert_expected_results_context(self, response_json, has_pa_detail=True, locus_list_detail=False, rnaseq=True):
gene_fields = {'locusListGuids'}
gene_fields.update(GENE_VARIANT_FIELDS)
basic_gene_id = next(gene_id for gene_id in ['ENSG00000268903', 'ENSG00000233653'] if gene_id in response_json['genesById'])
Expand Down Expand Up @@ -222,6 +246,8 @@ def _assert_expected_results_context(self, response_json, has_pa_detail=True, lo
if response_json['variantFunctionalDataByGuid']:
self.assertSetEqual(set(next(iter(response_json['variantFunctionalDataByGuid'].values())).keys()), FUNCTIONAL_FIELDS)

if rnaseq:
self._assert_expected_rnaseq_response(response_json)

@mock.patch('seqr.utils.middleware.logger.error')
@mock.patch('seqr.views.apis.variant_search_api.get_variant_query_gene_counts')
Expand Down Expand Up @@ -424,7 +450,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro
})
expected_search_response['search']['totalResults'] = 1
self.assertDictEqual(response_json, expected_search_response)
self._assert_expected_results_context(response_json, has_pa_detail=False)
self._assert_expected_results_context(response_json, has_pa_detail=False, rnaseq=False)
mock_error_logger.assert_not_called()

# Test cross-project discovery for analyst users
Expand Down Expand Up @@ -507,7 +533,6 @@ def _get_variants(results_model, **kwargs):
response = self.client.post(url, content_type='application/json', data=json.dumps(body))
self.assertEqual(response.status_code, 200)
response_json = response.json()
self.assertSetEqual(set(response_json.keys()), set(EXPECTED_SEARCH_RESPONSE.keys()))
self.assertDictEqual(response_json, EXPECTED_SEARCH_RESPONSE)
self._assert_expected_results_context(response_json)
self.assertSetEqual(
Expand Down
18 changes: 4 additions & 14 deletions seqr/views/utils/variant_utils.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
from collections import defaultdict
from django.contrib.postgres.aggregates import ArrayAgg
from django.db.models import F, Value
from django.db.models import F
import logging
import redis

from matchmaker.models import MatchmakerSubmissionGenes, MatchmakerSubmission
from reference_data.models import TranscriptInfo
from seqr.models import SavedVariant, VariantSearchResults, Family, LocusList, LocusListInterval, LocusListGene, \
RnaSeqOutlier, RnaSeqTpm, PhenotypePrioritization, Project
RnaSeqTpm, PhenotypePrioritization, Project
from seqr.utils.search.utils import get_variants_for_variant_ids
from seqr.utils.gene_utils import get_genes_for_variants
from seqr.views.utils.json_to_orm_utils import update_model_from_json
Expand Down Expand Up @@ -131,19 +131,9 @@ def _add_locus_lists(projects, genes, add_list_detail=False, user=None):


def _get_rna_seq_outliers(gene_ids, family_guids):
# TODO change to get_json_for_rna_seq_outliers in issue #3324
data_by_individual_gene = defaultdict(lambda: {'outliers': {}})

outlier_data = get_json_for_queryset(
RnaSeqOutlier.objects.filter(
gene_id__in=gene_ids, p_adjust__lt=RnaSeqOutlier.SIGNIFICANCE_THRESHOLD, sample__individual__family__guid__in=family_guids),
nested_fields=[{'fields': ('sample', 'individual', 'guid'), 'key': 'individualGuid'}],
additional_values={'isSignificant': Value(True)},
)
for data in outlier_data:
data_by_individual_gene[data.pop('individualGuid')]['outliers'][data['geneId']] = data
filters = {'gene_id__in': gene_ids, 'sample__individual__family__guid__in': family_guids}

return data_by_individual_gene
return get_json_for_rna_seq_outliers(filters)


def get_phenotype_prioritization(family_guids, gene_ids=None):
Expand Down
8 changes: 5 additions & 3 deletions ui/pages/Project/reducers.js
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,11 @@ export const searchMmeMatches = submissionGuid => (dispatch) => {
}

export const loadRnaSeqData = individualGuid => (dispatch, getState) => {
const data = getState().rnaSeqDataByIndividual[individualGuid]
// If variants were loaded for the individual, the significant gene data will be loaded but not all the needed data
if (!data?.outliers || Object.values(data.outliers).every(({ isSignificant }) => isSignificant)) {
const { outliers, spliceOutliers } = getState().rnaSeqDataByIndividual[individualGuid] || {}
// If variants were loaded for the individual, the significant data were loaded but not the non-significant ones
if (!outliers || !spliceOutliers || (Object.values(outliers).every(({ isSignificant }) => isSignificant) &&
Object.values(spliceOutliers).flat().every(({ isSignificant }) => isSignificant))
) {
dispatch({ type: REQUEST_RNA_SEQ_DATA })
new HttpRequestHelper(`/api/individual/${individualGuid}/rna_seq_data`,
(responseJson) => {
Expand Down
14 changes: 13 additions & 1 deletion ui/pages/Search/fixtures.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ export const LOCUS_LIST = {
locusListGuid: LOCUS_LIST_GUID,
name: "2017 Monogenic IBD Gene List",
numEntries: 60,
parsedItems: { items: [{ geneId: 'ENSG00000164458' }], itemMap: { 'TTN': { geneId: 'ENSG00000164458', symbol: 'TTN' } } }
parsedItems: { items: [{ geneId: 'ENSG00000164458' }], itemMap: { 'TTN': { geneId: 'ENSG00000164458', symbol: 'TTN' } } },
intervals: [
{'chrom': '1', 'end': 7300, 'genomeVersion': '37', 'locusListGuid': 'LL00132_2017_monogenic_ibd_gen', 'locusListIntervalGuid': 'LLI0000012_test_list_edit4545_', 'start': 7200},
{'chrom': '3', 'end': 3000, 'genomeVersion': '37', 'locusListGuid': 'LL00132_2017_monogenic_ibd_gen', 'locusListIntervalGuid': 'LLI0000013_a_new_list325_3000', 'start': 25},
]
}

export const STATE = {
Expand Down Expand Up @@ -100,6 +104,7 @@ export const STATE = {
],
sampleGuids: [],
sex: 'F',
familyGuid: FAMILY_GUID,
},
I021475_na19675: {
affected: 'A',
Expand Down Expand Up @@ -212,6 +217,13 @@ export const STATE = {
},
},
locusListsByGuid: { [LOCUS_LIST_GUID]: LOCUS_LIST },
rnaSeqDataByIndividual: { I021474_na19679: {
outliers: {ENSG00000136758: {geneId: "ENSG00000136758", isSignificant: true, pAdjust: 0.000225907356686287, pValue: 2.69828505929319e-9, tissueType: "M", zScore: 5.62}},
spliceOutliers: { ENSG00000136758: [
{chrom: "10", deltaPsi: 0.56, end: 27114400, geneId: "ENSG00000136758", isSignificant: true, pValue: 2.1234e-10, rareDiseaseSamplesTotal: 171, rareDiseaseSamplesWithJunction: 1, readCount: 1208, start: 27114300, strand: "*", tissueType: "F", type: "psi5", zScore: 2.96},
{chrom: "11", deltaPsi: 0.56, end: 27114400, geneId: "ENSG00000136758", isSignificant: true, pValue: 2.1234e-10, rareDiseaseSamplesTotal: 171, rareDiseaseSamplesWithJunction: 1, readCount: 1208, start: 27114300, strand: "*", tissueType: "F", type: "psi5", zScore: 2.96},
]},
}},
projectsByGuid: {
[PROJECT_GUID] : {
createdDate: '2016-05-16T05:37:08.634Z',
Expand Down
46 changes: 30 additions & 16 deletions ui/redux/selectors.js
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,20 @@ export const getSearchGeneBreakdownValues = createSelector(
})),
)

const groupDataNestedByChrom = (initialData, groupedData, nestedKey) => groupedData.reduce(
(acc, data) => {
const { chrom } = data
if (!acc[chrom]) {
acc[chrom] = {}
}
if (!acc[chrom][nestedKey]) {
acc[chrom][nestedKey] = []
}
acc[chrom][nestedKey].push(data)
return acc
}, initialData,
)

export const getLocusListIntervalsByChromProject = createSelector(
getProjectsByGuid,
getLocusListsByGuid,
Expand All @@ -368,16 +382,7 @@ export const getLocusListIntervalsByChromProject = createSelector(
const projectIntervals = locusListGuids.map(locusListGuid => locusListsByGuid[locusListGuid]).reduce(
(acc2, { intervals = [] }) => [...acc2, ...intervals], [],
)
projectIntervals.forEach((interval) => {
if (!acc[interval.chrom]) {
acc[interval.chrom] = {}
}
if (!acc[interval.chrom][projectGuid]) {
acc[interval.chrom][projectGuid] = []
}
acc[interval.chrom][projectGuid].push(interval)
})
return acc
return groupDataNestedByChrom(acc, projectIntervals, projectGuid)
}, {},
),
)
Expand Down Expand Up @@ -419,18 +424,27 @@ export const getRnaSeqSignificantJunctionData = createSelector(
getGenesById,
getIndividualsByGuid,
getRnaSeqDataByIndividual,
(genesById, individualsByGuid, rnaSeqDataByIndividual) => Object.entries(rnaSeqDataByIndividual).reduce(
(acc, [individualGuid, rnaSeqData]) => (rnaSeqData.spliceOutliers ? {
...acc,
[individualGuid]: Object.values(rnaSeqData.spliceOutliers).flat().filter(({ isSignificant }) => isSignificant)
(genesById, individualsByGuid, rnaSeqDataByIndividual) => Object.entries(rnaSeqDataByIndividual || {}).reduce(
(acc, [individualGuid, rnaSeqData]) => {
const individualData = Object.values(rnaSeqData.spliceOutliers || {}).flat()
.filter(({ isSignificant }) => isSignificant)
.sort((a, b) => a.pValue - b.pValue)
.map(({ geneId, chrom, start, end, strand, type, ...cols }) => ({
geneSymbol: (genesById[geneId] || {}).geneSymbol || geneId,
idField: `${geneId}-${chrom}-${start}-${end}-${strand}-${type}`,
familyGuid: individualsByGuid[individualGuid].familyGuid,
individualName: individualsByGuid[individualGuid].displayName,
individualGuid,
...{ geneId, chrom, start, end, strand, type, ...cols },
})),
} : acc), {},
}))
return (individualData.length > 0 ? { ...acc, [individualGuid]: individualData } : acc)
}, {},
),
)

export const getSpliceOutliersByChromFamily = createSelector(
getRnaSeqSignificantJunctionData,
spliceDataByIndiv => Object.values(spliceDataByIndiv).reduce(
(acc, spliceData) => (groupDataNestedByChrom(acc, spliceData, spliceData[0].familyGuid)), {},
),
)
38 changes: 38 additions & 0 deletions ui/redux/selectors.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import {
getSearchGeneBreakdownValues,
getSelectableTagTypesByProject,
getUserOptions,
getLocusListIntervalsByChromProject,
getSpliceOutliersByChromFamily,
} from './selectors'
import {FAMILY_GUID, GENE_ID, SEARCH, SEARCH_HASH, STATE} from "../pages/Search/fixtures";

Expand Down Expand Up @@ -47,3 +49,39 @@ test('getUserOptions', () => {
expect(Object.keys(options).length).toEqual(7)
expect(options[1]).toEqual({ key: '4MW8vPtmHG', value: '4MW8vPtmHG', text: 'Mekdes ([email protected])'})
})

test('getLocusListIntervalsByChromProject', () => {
expect(getLocusListIntervalsByChromProject(STATE, {})).toEqual({
['1']: {
'R0237_1000_genomes_demo': [
{'chrom': '1', 'end': 7300, 'genomeVersion': '37', 'locusListGuid': 'LL00132_2017_monogenic_ibd_gen', 'locusListIntervalGuid': 'LLI0000012_test_list_edit4545_', 'start': 7200},
],
},
['3']: {
'R0237_1000_genomes_demo': [
{'chrom': '3', 'end': 3000, 'genomeVersion': '37', 'locusListGuid': 'LL00132_2017_monogenic_ibd_gen', 'locusListIntervalGuid': 'LLI0000013_a_new_list325_3000', 'start': 25},
],
}
})
})

test('getSpliceOutliersByChromFamily', () => {
expect(getSpliceOutliersByChromFamily(STATE, {})).toEqual({
['10']: {
'F011652_1': [
{
familyGuid: "F011652_1", geneSymbol: "ENSG00000136758", idField: "ENSG00000136758-10-27114300-27114400-*-psi5", individualGuid: "I021474_na19679", individualName: "",
chrom: "10", deltaPsi: 0.56, end: 27114400, geneId: "ENSG00000136758", isSignificant: true, pValue: 2.1234e-10, rareDiseaseSamplesTotal: 171, rareDiseaseSamplesWithJunction: 1, readCount: 1208, start: 27114300, strand: "*", tissueType: "F", type: "psi5", zScore: 2.96,
},
],
},
['11']: {
'F011652_1': [
{
familyGuid: "F011652_1", geneSymbol: "ENSG00000136758", idField: "ENSG00000136758-11-27114300-27114400-*-psi5", individualGuid: "I021474_na19679", individualName: "",
chrom: "11", deltaPsi: 0.56, end: 27114400, geneId: "ENSG00000136758", isSignificant: true, pValue: 2.1234e-10, rareDiseaseSamplesTotal: 171, rareDiseaseSamplesWithJunction: 1, readCount: 1208, start: 27114300, strand: "*", tissueType: "F", type: "psi5", zScore: 2.96,
},
],
}
})
})
Loading

0 comments on commit 689aa27

Please sign in to comment.