diff --git a/CHANGELOG.md b/CHANGELOG.md index e9bb0b41df..ae33d21e87 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## dev * Migrate Fammily post_discovery_omim_number to integer array (REQUIRES DB MIGRATION) +* Add GeneShet model to the reference DB (REQUIRES DB MIGRATION) ## 10/6/23 * Require tissue_type in Sample model (REQUIRES DB MIGRATION) diff --git a/reference_data/management/commands/update_gene_shet.py b/reference_data/management/commands/update_gene_shet.py new file mode 100644 index 0000000000..d348b0c164 --- /dev/null +++ b/reference_data/management/commands/update_gene_shet.py @@ -0,0 +1,22 @@ +import logging +from reference_data.management.commands.utils.update_utils import GeneCommand, ReferenceDataHandler +from reference_data.models import GeneShet + +logger = logging.getLogger(__name__) + + +class ShetReferenceDataHandler(ReferenceDataHandler): + + model_cls = GeneShet + url = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv' + + @staticmethod + def parse_record(record): + yield { + 'gene_id': record['ensg'], + 'post_mean': float(record['post_mean']), + } + + +class Command(GeneCommand): + reference_data_handler = ShetReferenceDataHandler diff --git a/reference_data/management/tests/update_gene_shet_tests.py b/reference_data/management/tests/update_gene_shet_tests.py new file mode 100644 index 0000000000..ba16605b0d --- /dev/null +++ b/reference_data/management/tests/update_gene_shet_tests.py @@ -0,0 +1,20 @@ +from reference_data.models import GeneShet +from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase + +class UpdateGeneShetTest(ReferenceDataCommandTestCase): + URL = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv' + DATA = [ + 'ensg hgnc chrom obs_lof exp_lof prior_mean post_mean post_lower_95 post_upper_95\n', + 'ENSG00000223972 HGNC:37225 chr15 26.0 21.66 0.00059216 3.01e-05 1.05e-06 0.00010405\n', + 'ENSG00000227233 HGNC:26441 chr5 31.0 28.55 0.00038727 4.853e-05 3.05e-06 0.00015705\n', + 'ENSG00000243485 HGNC:4013 chr19 17.0 11.327 0.00082297 5.083e-05 3.05e-06 0.00016605\n' + ] + + def test_update_gene_cn_sensitivity_command(self): + self._test_update_command('update_gene_shet', 'GeneShet', created_records=2) + + self.assertEqual(GeneShet.objects.count(), 2) + record = GeneShet.objects.get(gene__gene_id='ENSG00000223972') + self.assertEqual(record.post_mean, 3.01E-05) + record = GeneShet.objects.get(gene__gene_id='ENSG00000243485') + self.assertEqual(record.post_mean, 5.083E-05) diff --git a/reference_data/migrations/0022_geneshet.py b/reference_data/migrations/0022_geneshet.py new file mode 100644 index 0000000000..593f864a9a --- /dev/null +++ b/reference_data/migrations/0022_geneshet.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.18 on 2023-10-10 20:12 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('reference_data', '0021_auto_20221031_2049'), + ] + + operations = [ + migrations.CreateModel( + name='GeneShet', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('post_mean', models.FloatField()), + ('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')), + ], + ), + ] diff --git a/reference_data/models.py b/reference_data/models.py index ce221425aa..f8aae30788 100644 --- a/reference_data/models.py +++ b/reference_data/models.py @@ -161,6 +161,15 @@ class Meta: json_fields = ['pHI', 'pTS'] +class GeneShet(models.Model): + gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE) + + post_mean = models.FloatField() + + class Meta: + json_fields = ['post_mean'] + + class Omim(models.Model): MAP_METHOD_CHOICES = ( ('1', 'the disorder is placed on the map based on its association with a gene, but the underlying defect is not known.'), diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index 8d559de4fb..ae755195a0 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -1167,6 +1167,14 @@ "pHI": 0.90576, "pTS": 0.7346 } +}, +{ + "model": "reference_data.geneshet", + "pk": 1, + "fields": { + "gene": 1, + "post_mean": 0.90576 + } }, { "model": "reference_data.dbnsfpgene", diff --git a/seqr/utils/gene_utils.py b/seqr/utils/gene_utils.py index 7c773e21db..c590b888fc 100644 --- a/seqr/utils/gene_utils.py +++ b/seqr/utils/gene_utils.py @@ -4,7 +4,7 @@ from django.db.models.functions import Length from reference_data.models import GeneInfo, GeneConstraint, dbNSFPGene, Omim, MGI, PrimateAI, GeneCopyNumberSensitivity, \ - GenCC, ClinGen + GenCC, ClinGen, GeneShet from seqr.utils.xpos_utils import get_xpos from seqr.views.utils.orm_to_json_utils import _get_json_for_model, _get_json_for_models, _get_empty_json_for_model, \ get_json_for_gene_notes_by_gene_id @@ -90,6 +90,7 @@ def _add_mgi(gene): OMIM = 'omim' CONSTRAINT = 'constraint' CN_SENSITIVITY = 'cn_sensitivity' +SHET = 'shet' DBNSFP = 'dbnsfp' GENCC = 'gencc' PRIMATE_AI = 'primate_ai' @@ -100,6 +101,7 @@ def _add_mgi(gene): OMIM: (Omim, _add_omim), CONSTRAINT: (GeneConstraint, None), CN_SENSITIVITY: (GeneCopyNumberSensitivity, _add_gene_model('genecopynumbersensitivity', 'cnSensitivity', dict)), + SHET: (GeneShet, _add_gene_model('geneshet', 'sHet', dict)), GENCC: (GenCC, _add_gene_model('gencc', 'genCc', dict)), CLINGEN: (ClinGen, _add_gene_model('clingen', 'clinGen', lambda: None)), } diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 391a7380a2..ef4946f397 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -545,7 +545,7 @@ def test_get_family_phenotype_gene_scores(self): 'ENSG00000268903': { 'chromGrch37': '1', 'chromGrch38': '1', 'clinGen': None, 'cnSensitivity': {}, 'codingRegionSizeGrch37': 0, 'codingRegionSizeGrch38': 0, 'constraints': {}, - 'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {}, + 'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {}, 'sHet': {}, 'gencodeGeneType': 'processed_pseudogene', 'geneId': 'ENSG00000268903', 'geneSymbol': 'AL627309.7', 'mimNumber': None, 'omimPhenotypes': [], 'startGrch37': 135141, 'startGrch38': 135141 diff --git a/seqr/views/apis/gene_api_tests.py b/seqr/views/apis/gene_api_tests.py index 10b0f61728..6b394faf9b 100644 --- a/seqr/views/apis/gene_api_tests.py +++ b/seqr/views/apis/gene_api_tests.py @@ -33,6 +33,35 @@ def test_genes_info(self): genes = response.json()['genesById'] self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981'}) self.assertSetEqual(set(genes[GENE_ID].keys()), GENE_DETAIL_FIELDS) + self.assertDictEqual(genes[GENE_ID], { + 'chromGrch37': '1', + 'chromGrch38': '1', + 'clinGen': {'haploinsufficiency': 'No Evidence', 'href': 'https://dosage.clinicalgenome.org/clingen_gene.cgi?sym=', 'triplosensitivity': ''}, + 'cnSensitivity': {'phi': 0.90576, 'pts': 0.7346}, + 'codingRegionSizeGrch37': 0, + 'codingRegionSizeGrch38': 0, + 'constraints': {'louef': 1.606, 'louefRank': 0, 'misZ': -0.7773, 'misZRank': 1, 'pli': 0.00090576, 'pliRank': 1, 'totalGenes': 1}, + 'diseaseDesc': '', + 'endGrch37': 14409, + 'endGrch38': 14409, + 'functionDesc': '', + 'genCc': {'hgncId': 'HGNC:943', 'classifications': [ + {'classification': 'Strong', 'date': '7/29/19 19:04', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Laboratory for Molecular Medicine'}, + {'classification': 'Supportive', 'date': '9/14/21 0:00', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Orphanet'}, + ]}, + 'gencodeGeneType': 'transcribed_unprocessed_pseudogene', + 'geneId': 'ENSG00000223972', + 'geneNames': '', + 'geneSymbol': 'DDX11L1', + 'mgiMarkerId': None, + 'mimNumber': 147571, + 'notes': [], + 'omimPhenotypes': [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126}], + 'primateAi': {'percentile25': 0.587214291096, 'percentile75': 0.821286439896}, + 'sHet': {'postMean': 0.90576}, + 'startGrch37': 11869, + 'startGrch38': 11869, + }) def test_create_update_and_delete_gene_note(self): diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index bd3d71fa95..97a86ab8c5 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -794,7 +794,7 @@ def _get_list_param(call, param): 'gencodeGeneType', 'geneId', 'geneSymbol', 'startGrch37', 'startGrch38', } GENE_VARIANT_DISPLAY_FIELDS = { - 'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen', + 'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen', 'sHet', } GENE_VARIANT_DISPLAY_FIELDS.update(GENE_FIELDS) GENE_VARIANT_FIELDS = { diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx index dd7eb8f9c6..82470104f9 100644 --- a/ui/shared/components/panel/genes/GeneDetail.jsx +++ b/ui/shared/components/panel/genes/GeneDetail.jsx @@ -175,6 +175,7 @@ DosageSensitivity.propTypes = { export const HI_THRESHOLD = 0.86 export const TS_THRESHOLD = 0.94 +export const SHET_THRESHOLD = 0.1 const HAPLOINSUFFICIENT_FIELDS = [{ field: 'phi', label: 'pHaplo' }] const TRIPLOSENSITIVE_FIELDS = [{ field: 'pts', label: 'pTriplo' }] const STAT_DETAILS = [ @@ -210,6 +211,24 @@ const STAT_DETAILS = [ note: 'These metrics are based on the amount of expected variation observed in the gnomAD data and is a measure ' + 'of how likely the gene is to be intolerant of loss-of-function mutations.', }, + { + title: 'Shet', + scoreField: 'sHet', + fields: [ + { field: 'postMean', label: 'post_mean' }, + ], + note: ( + + This score was developed by the Pritchard lab [ + + Zeng et al 2023 + + ] to predict gene constraint based on functional and evolutionary information. Scores > + {SHET_THRESHOLD} +   are considered to have high likelihood to be under extreme selection. + + ), + }, { title: 'Haploinsufficient', content: gene => ( diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx index e7bbe1931d..9dde03e3a8 100644 --- a/ui/shared/components/panel/variants/VariantGene.jsx +++ b/ui/shared/components/panel/variants/VariantGene.jsx @@ -23,7 +23,7 @@ import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../Sty import { PermissiveGeneSearchLink } from '../../buttons/SearchResultsLink' import ShowGeneModal from '../../buttons/ShowGeneModal' import Modal from '../../modal/Modal' -import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD } from '../genes/GeneDetail' +import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD, SHET_THRESHOLD } from '../genes/GeneDetail' import { getIndividualGeneDataByFamilyGene } from './selectors' const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm')) @@ -379,24 +379,38 @@ const GENE_DETAIL_SECTIONS = [ color: 'red', description: 'Loss of Function Constraint', label: 'LOF CONSTR', - showDetails: gene => gene.constraints.louef < LOF_THRESHHOLD, + showDetails: gene => (gene.constraints.louef < LOF_THRESHHOLD) || + (gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD) || + (gene.sHet.postMean && gene.sHet.postMean > SHET_THRESHOLD), detailsDisplay: gene => ( - `This gene ranks as ${gene.constraints.louefRank} most intolerant of LoF mutations out of - ${gene.constraints.totalGenes} genes under study (louef: - ${gene.constraints.louef.toPrecision(4)}${gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''}). - LOEUF is the observed to expected upper bound fraction for loss-of-function variants based on the variation - observed in the gnomad data. Both LOEUF and pLi are measures of how likely the gene is to be intolerant of - loss-of-function mutations`), - }, - { - color: 'red', - description: 'HaploInsufficient', - label: 'HI', - showDetails: gene => gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD, - detailsDisplay: gene => ( - `These are a score developed by the Talkowski lab that predict whether a gene is haploinsufficient based - on large chromosomal microarray data set analysis. Scores >${HI_THRESHOLD} are considered to have high likelihood to be - haploinsufficient. This gene has a score of ${gene.cnSensitivity.phi.toPrecision(4)}.`), + + + This gene ranks as   + {gene.constraints.louefRank} +  most intolerant of LoF mutations out of   + {gene.constraints.totalGenes} +  genes under study (louef:   + {gene.constraints.louef.toPrecision(4)} + {gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''} + ) + Karczewski (2020) + + {gene.sHet.postMean && ( + + This gene has a Shet score of   + {gene.sHet.postMean.toPrecision(4)} + Zeng (2023) + + )} + {gene.cnSensitivity.phi && ( + + This gene has a haploinsufficiency (HI) score of   + {gene.cnSensitivity.phi.toPrecision(4)} + Collins (2022) + + )} + + ), }, { color: 'red',