Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Create a Gene Shet reference data update cmd. #3576

Merged
merged 11 commits into from
Oct 19, 2023
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## dev
* Migrate Fammily post_discovery_omim_number to integer array (REQUIRES DB MIGRATION)
* Add GeneShet model to the reference DB (REQUIRES DB MIGRATION)

## 10/6/23
* Require tissue_type in Sample model (REQUIRES DB MIGRATION)
Expand Down
22 changes: 22 additions & 0 deletions reference_data/management/commands/update_gene_shet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import logging
from reference_data.management.commands.utils.update_utils import GeneCommand, ReferenceDataHandler
from reference_data.models import GeneShet

logger = logging.getLogger(__name__)


class ShetReferenceDataHandler(ReferenceDataHandler):

model_cls = GeneShet
url = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv'

@staticmethod
def parse_record(record):
yield {
'gene_id': record['ensg'],
'post_mean': float(record['post_mean']),
}


class Command(GeneCommand):
reference_data_handler = ShetReferenceDataHandler
20 changes: 20 additions & 0 deletions reference_data/management/tests/update_gene_shet_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from reference_data.models import GeneShet
from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase

class UpdateGeneShetTest(ReferenceDataCommandTestCase):
URL = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv'
DATA = [
'ensg hgnc chrom obs_lof exp_lof prior_mean post_mean post_lower_95 post_upper_95\n',
'ENSG00000223972 HGNC:37225 chr15 26.0 21.66 0.00059216 3.01e-05 1.05e-06 0.00010405\n',
'ENSG00000227233 HGNC:26441 chr5 31.0 28.55 0.00038727 4.853e-05 3.05e-06 0.00015705\n',
'ENSG00000243485 HGNC:4013 chr19 17.0 11.327 0.00082297 5.083e-05 3.05e-06 0.00016605\n'
]

def test_update_gene_cn_sensitivity_command(self):
self._test_update_command('update_gene_shet', 'GeneShet', created_records=2)

self.assertEqual(GeneShet.objects.count(), 2)
record = GeneShet.objects.get(gene__gene_id='ENSG00000223972')
self.assertEqual(record.post_mean, 3.01E-05)
record = GeneShet.objects.get(gene__gene_id='ENSG00000243485')
self.assertEqual(record.post_mean, 5.083E-05)
22 changes: 22 additions & 0 deletions reference_data/migrations/0022_geneshet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Generated by Django 3.2.18 on 2023-10-10 20:12

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('reference_data', '0021_auto_20221031_2049'),
]

operations = [
migrations.CreateModel(
name='GeneShet',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('post_mean', models.FloatField()),
('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')),
],
),
]
9 changes: 9 additions & 0 deletions reference_data/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ class Meta:
json_fields = ['pHI', 'pTS']


class GeneShet(models.Model):
gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE)

post_mean = models.FloatField()

class Meta:
json_fields = ['post_mean']


class Omim(models.Model):
MAP_METHOD_CHOICES = (
('1', 'the disorder is placed on the map based on its association with a gene, but the underlying defect is not known.'),
Expand Down
8 changes: 8 additions & 0 deletions seqr/fixtures/reference_data.json
Original file line number Diff line number Diff line change
Expand Up @@ -1167,6 +1167,14 @@
"pHI": 0.90576,
"pTS": 0.7346
}
},
{
"model": "reference_data.geneshet",
"pk": 1,
"fields": {
"gene": 1,
"post_mean": 0.90576
}
},
{
"model": "reference_data.dbnsfpgene",
Expand Down
4 changes: 3 additions & 1 deletion seqr/utils/gene_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from django.db.models.functions import Length

from reference_data.models import GeneInfo, GeneConstraint, dbNSFPGene, Omim, MGI, PrimateAI, GeneCopyNumberSensitivity, \
GenCC, ClinGen
GenCC, ClinGen, GeneShet
from seqr.utils.xpos_utils import get_xpos
from seqr.views.utils.orm_to_json_utils import _get_json_for_model, _get_json_for_models, _get_empty_json_for_model, \
get_json_for_gene_notes_by_gene_id
Expand Down Expand Up @@ -90,6 +90,7 @@ def _add_mgi(gene):
OMIM = 'omim'
CONSTRAINT = 'constraint'
CN_SENSITIVITY = 'cn_sensitivity'
SHET = 'shet'
DBNSFP = 'dbnsfp'
GENCC = 'gencc'
PRIMATE_AI = 'primate_ai'
Expand All @@ -100,6 +101,7 @@ def _add_mgi(gene):
OMIM: (Omim, _add_omim),
CONSTRAINT: (GeneConstraint, None),
CN_SENSITIVITY: (GeneCopyNumberSensitivity, _add_gene_model('genecopynumbersensitivity', 'cnSensitivity', dict)),
SHET: (GeneShet, _add_gene_model('geneshet', 'sHet', dict)),
GENCC: (GenCC, _add_gene_model('gencc', 'genCc', dict)),
CLINGEN: (ClinGen, _add_gene_model('clingen', 'clinGen', lambda: None)),
}
Expand Down
2 changes: 1 addition & 1 deletion seqr/views/apis/family_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ def test_get_family_phenotype_gene_scores(self):
'ENSG00000268903': {
'chromGrch37': '1', 'chromGrch38': '1', 'clinGen': None, 'cnSensitivity': {},
'codingRegionSizeGrch37': 0, 'codingRegionSizeGrch38': 0, 'constraints': {},
'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {},
'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {}, 'sHet': {},
'gencodeGeneType': 'processed_pseudogene', 'geneId': 'ENSG00000268903',
'geneSymbol': 'AL627309.7', 'mimNumber': None, 'omimPhenotypes': [],
'startGrch37': 135141, 'startGrch38': 135141
Expand Down
29 changes: 29 additions & 0 deletions seqr/views/apis/gene_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,35 @@ def test_genes_info(self):
genes = response.json()['genesById']
self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981'})
self.assertSetEqual(set(genes[GENE_ID].keys()), GENE_DETAIL_FIELDS)
self.assertDictEqual(genes[GENE_ID], {
'chromGrch37': '1',
'chromGrch38': '1',
'clinGen': {'haploinsufficiency': 'No Evidence', 'href': 'https://dosage.clinicalgenome.org/clingen_gene.cgi?sym=', 'triplosensitivity': ''},
'cnSensitivity': {'phi': 0.90576, 'pts': 0.7346},
'codingRegionSizeGrch37': 0,
'codingRegionSizeGrch38': 0,
'constraints': {'louef': 1.606, 'louefRank': 0, 'misZ': -0.7773, 'misZRank': 1, 'pli': 0.00090576, 'pliRank': 1, 'totalGenes': 1},
'diseaseDesc': '',
'endGrch37': 14409,
'endGrch38': 14409,
'functionDesc': '',
'genCc': {'hgncId': 'HGNC:943', 'classifications': [
{'classification': 'Strong', 'date': '7/29/19 19:04', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Laboratory for Molecular Medicine'},
{'classification': 'Supportive', 'date': '9/14/21 0:00', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Orphanet'},
]},
'gencodeGeneType': 'transcribed_unprocessed_pseudogene',
'geneId': 'ENSG00000223972',
'geneNames': '',
'geneSymbol': 'DDX11L1',
'mgiMarkerId': None,
'mimNumber': 147571,
'notes': [],
'omimPhenotypes': [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126}],
'primateAi': {'percentile25': 0.587214291096, 'percentile75': 0.821286439896},
'sHet': {'postMean': 0.90576},
'startGrch37': 11869,
'startGrch38': 11869,
})


def test_create_update_and_delete_gene_note(self):
Expand Down
2 changes: 1 addition & 1 deletion seqr/views/utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ def _get_list_param(call, param):
'gencodeGeneType', 'geneId', 'geneSymbol', 'startGrch37', 'startGrch38',
}
GENE_VARIANT_DISPLAY_FIELDS = {
'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen',
'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen', 'sHet',
}
GENE_VARIANT_DISPLAY_FIELDS.update(GENE_FIELDS)
GENE_VARIANT_FIELDS = {
Expand Down
19 changes: 19 additions & 0 deletions ui/shared/components/panel/genes/GeneDetail.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ DosageSensitivity.propTypes = {

export const HI_THRESHOLD = 0.86
export const TS_THRESHOLD = 0.94
export const SHET_THRESHOLD = 0.1
const HAPLOINSUFFICIENT_FIELDS = [{ field: 'phi', label: 'pHaplo' }]
const TRIPLOSENSITIVE_FIELDS = [{ field: 'pts', label: 'pTriplo' }]
const STAT_DETAILS = [
Expand Down Expand Up @@ -210,6 +211,24 @@ const STAT_DETAILS = [
note: 'These metrics are based on the amount of expected variation observed in the gnomAD data and is a measure ' +
'of how likely the gene is to be intolerant of loss-of-function mutations.',
},
{
title: 'Shet',
scoreField: 'sHet',
fields: [
{ field: 'postMean', label: 'post_mean' },
],
note: (
<span>
This score was developed by the Pritchard lab [
<a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10245655" target="_blank" rel="noreferrer">
Zeng et al 2023
</a>
] to predict gene constraint based on functional and evolutionary information. Scores &gt;
{SHET_THRESHOLD}
&nbsp; are considered to have high likelihood to be under extreme selection.
</span>
),
},
{
title: 'Haploinsufficient',
content: gene => (
Expand Down
50 changes: 32 additions & 18 deletions ui/shared/components/panel/variants/VariantGene.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../Sty
import { PermissiveGeneSearchLink } from '../../buttons/SearchResultsLink'
import ShowGeneModal from '../../buttons/ShowGeneModal'
import Modal from '../../modal/Modal'
import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD } from '../genes/GeneDetail'
import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD, SHET_THRESHOLD } from '../genes/GeneDetail'
import { getIndividualGeneDataByFamilyGene } from './selectors'

const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm'))
Expand Down Expand Up @@ -379,24 +379,38 @@ const GENE_DETAIL_SECTIONS = [
color: 'red',
description: 'Loss of Function Constraint',
label: 'LOF CONSTR',
showDetails: gene => gene.constraints.louef < LOF_THRESHHOLD,
showDetails: gene => (gene.constraints.louef < LOF_THRESHHOLD) ||
(gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD) ||
(gene.sHet.postMean && gene.sHet.postMean > SHET_THRESHOLD),
detailsDisplay: gene => (
`This gene ranks as ${gene.constraints.louefRank} most intolerant of LoF mutations out of
${gene.constraints.totalGenes} genes under study (louef:
${gene.constraints.louef.toPrecision(4)}${gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''}).
LOEUF is the observed to expected upper bound fraction for loss-of-function variants based on the variation
observed in the gnomad data. Both LOEUF and pLi are measures of how likely the gene is to be intolerant of
loss-of-function mutations`),
},
{
color: 'red',
description: 'HaploInsufficient',
label: 'HI',
showDetails: gene => gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD,
detailsDisplay: gene => (
`These are a score developed by the Talkowski lab that predict whether a gene is haploinsufficient based
on large chromosomal microarray data set analysis. Scores >${HI_THRESHOLD} are considered to have high likelihood to be
haploinsufficient. This gene has a score of ${gene.cnSensitivity.phi.toPrecision(4)}.`),
<List bulleted>
<List.Item>
This gene ranks as &nbsp;
{gene.constraints.louefRank}
&nbsp;most intolerant of LoF mutations out of &nbsp;
{gene.constraints.totalGenes}
&nbsp;genes under study (louef: &nbsp;
{gene.constraints.louef.toPrecision(4)}
{gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''}
)
<a href="https://pubmed.ncbi.nlm.nih.gov/32461654/" target="_blank" rel="noreferrer"> Karczewski (2020)</a>
</List.Item>
{gene.sHet.postMean && (
<List.Item>
This gene has a Shet score of &nbsp;
{gene.sHet.postMean.toPrecision(4)}
<a href="https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10245655" target="_blank" rel="noreferrer"> Zeng (2023)</a>
</List.Item>
)}
{gene.cnSensitivity.phi && (
<List.Item>
This gene has a haploinsufficiency (HI) score of &nbsp;
{gene.cnSensitivity.phi.toPrecision(4)}
<a href="https://pubmed.ncbi.nlm.nih.gov/35917817" target="_blank" rel="noreferrer"> Collins (2022)</a>
</List.Item>
)}
</List>
),
},
{
color: 'red',
Expand Down
Loading