diff --git a/CHANGELOG.md b/CHANGELOG.md
index e9bb0b41df..ae33d21e87 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,7 @@
## dev
* Migrate Fammily post_discovery_omim_number to integer array (REQUIRES DB MIGRATION)
+* Add GeneShet model to the reference DB (REQUIRES DB MIGRATION)
## 10/6/23
* Require tissue_type in Sample model (REQUIRES DB MIGRATION)
diff --git a/reference_data/management/commands/update_gene_shet.py b/reference_data/management/commands/update_gene_shet.py
new file mode 100644
index 0000000000..d348b0c164
--- /dev/null
+++ b/reference_data/management/commands/update_gene_shet.py
@@ -0,0 +1,22 @@
+import logging
+from reference_data.management.commands.utils.update_utils import GeneCommand, ReferenceDataHandler
+from reference_data.models import GeneShet
+
+logger = logging.getLogger(__name__)
+
+
+class ShetReferenceDataHandler(ReferenceDataHandler):
+
+ model_cls = GeneShet
+ url = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv'
+
+ @staticmethod
+ def parse_record(record):
+ yield {
+ 'gene_id': record['ensg'],
+ 'post_mean': float(record['post_mean']),
+ }
+
+
+class Command(GeneCommand):
+ reference_data_handler = ShetReferenceDataHandler
diff --git a/reference_data/management/tests/update_gene_shet_tests.py b/reference_data/management/tests/update_gene_shet_tests.py
new file mode 100644
index 0000000000..ba16605b0d
--- /dev/null
+++ b/reference_data/management/tests/update_gene_shet_tests.py
@@ -0,0 +1,20 @@
+from reference_data.models import GeneShet
+from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase
+
+class UpdateGeneShetTest(ReferenceDataCommandTestCase):
+ URL = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv'
+ DATA = [
+ 'ensg hgnc chrom obs_lof exp_lof prior_mean post_mean post_lower_95 post_upper_95\n',
+ 'ENSG00000223972 HGNC:37225 chr15 26.0 21.66 0.00059216 3.01e-05 1.05e-06 0.00010405\n',
+ 'ENSG00000227233 HGNC:26441 chr5 31.0 28.55 0.00038727 4.853e-05 3.05e-06 0.00015705\n',
+ 'ENSG00000243485 HGNC:4013 chr19 17.0 11.327 0.00082297 5.083e-05 3.05e-06 0.00016605\n'
+ ]
+
+ def test_update_gene_cn_sensitivity_command(self):
+ self._test_update_command('update_gene_shet', 'GeneShet', created_records=2)
+
+ self.assertEqual(GeneShet.objects.count(), 2)
+ record = GeneShet.objects.get(gene__gene_id='ENSG00000223972')
+ self.assertEqual(record.post_mean, 3.01E-05)
+ record = GeneShet.objects.get(gene__gene_id='ENSG00000243485')
+ self.assertEqual(record.post_mean, 5.083E-05)
diff --git a/reference_data/migrations/0022_geneshet.py b/reference_data/migrations/0022_geneshet.py
new file mode 100644
index 0000000000..593f864a9a
--- /dev/null
+++ b/reference_data/migrations/0022_geneshet.py
@@ -0,0 +1,22 @@
+# Generated by Django 3.2.18 on 2023-10-10 20:12
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('reference_data', '0021_auto_20221031_2049'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='GeneShet',
+ fields=[
+ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+ ('post_mean', models.FloatField()),
+ ('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')),
+ ],
+ ),
+ ]
diff --git a/reference_data/models.py b/reference_data/models.py
index ce221425aa..f8aae30788 100644
--- a/reference_data/models.py
+++ b/reference_data/models.py
@@ -161,6 +161,15 @@ class Meta:
json_fields = ['pHI', 'pTS']
+class GeneShet(models.Model):
+ gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE)
+
+ post_mean = models.FloatField()
+
+ class Meta:
+ json_fields = ['post_mean']
+
+
class Omim(models.Model):
MAP_METHOD_CHOICES = (
('1', 'the disorder is placed on the map based on its association with a gene, but the underlying defect is not known.'),
diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json
index 8d559de4fb..ae755195a0 100644
--- a/seqr/fixtures/reference_data.json
+++ b/seqr/fixtures/reference_data.json
@@ -1167,6 +1167,14 @@
"pHI": 0.90576,
"pTS": 0.7346
}
+},
+{
+ "model": "reference_data.geneshet",
+ "pk": 1,
+ "fields": {
+ "gene": 1,
+ "post_mean": 0.90576
+ }
},
{
"model": "reference_data.dbnsfpgene",
diff --git a/seqr/utils/gene_utils.py b/seqr/utils/gene_utils.py
index 7c773e21db..c590b888fc 100644
--- a/seqr/utils/gene_utils.py
+++ b/seqr/utils/gene_utils.py
@@ -4,7 +4,7 @@
from django.db.models.functions import Length
from reference_data.models import GeneInfo, GeneConstraint, dbNSFPGene, Omim, MGI, PrimateAI, GeneCopyNumberSensitivity, \
- GenCC, ClinGen
+ GenCC, ClinGen, GeneShet
from seqr.utils.xpos_utils import get_xpos
from seqr.views.utils.orm_to_json_utils import _get_json_for_model, _get_json_for_models, _get_empty_json_for_model, \
get_json_for_gene_notes_by_gene_id
@@ -90,6 +90,7 @@ def _add_mgi(gene):
OMIM = 'omim'
CONSTRAINT = 'constraint'
CN_SENSITIVITY = 'cn_sensitivity'
+SHET = 'shet'
DBNSFP = 'dbnsfp'
GENCC = 'gencc'
PRIMATE_AI = 'primate_ai'
@@ -100,6 +101,7 @@ def _add_mgi(gene):
OMIM: (Omim, _add_omim),
CONSTRAINT: (GeneConstraint, None),
CN_SENSITIVITY: (GeneCopyNumberSensitivity, _add_gene_model('genecopynumbersensitivity', 'cnSensitivity', dict)),
+ SHET: (GeneShet, _add_gene_model('geneshet', 'sHet', dict)),
GENCC: (GenCC, _add_gene_model('gencc', 'genCc', dict)),
CLINGEN: (ClinGen, _add_gene_model('clingen', 'clinGen', lambda: None)),
}
diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py
index 391a7380a2..ef4946f397 100644
--- a/seqr/views/apis/family_api_tests.py
+++ b/seqr/views/apis/family_api_tests.py
@@ -545,7 +545,7 @@ def test_get_family_phenotype_gene_scores(self):
'ENSG00000268903': {
'chromGrch37': '1', 'chromGrch38': '1', 'clinGen': None, 'cnSensitivity': {},
'codingRegionSizeGrch37': 0, 'codingRegionSizeGrch38': 0, 'constraints': {},
- 'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {},
+ 'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {}, 'sHet': {},
'gencodeGeneType': 'processed_pseudogene', 'geneId': 'ENSG00000268903',
'geneSymbol': 'AL627309.7', 'mimNumber': None, 'omimPhenotypes': [],
'startGrch37': 135141, 'startGrch38': 135141
diff --git a/seqr/views/apis/gene_api_tests.py b/seqr/views/apis/gene_api_tests.py
index 10b0f61728..6b394faf9b 100644
--- a/seqr/views/apis/gene_api_tests.py
+++ b/seqr/views/apis/gene_api_tests.py
@@ -33,6 +33,35 @@ def test_genes_info(self):
genes = response.json()['genesById']
self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981'})
self.assertSetEqual(set(genes[GENE_ID].keys()), GENE_DETAIL_FIELDS)
+ self.assertDictEqual(genes[GENE_ID], {
+ 'chromGrch37': '1',
+ 'chromGrch38': '1',
+ 'clinGen': {'haploinsufficiency': 'No Evidence', 'href': 'https://dosage.clinicalgenome.org/clingen_gene.cgi?sym=', 'triplosensitivity': ''},
+ 'cnSensitivity': {'phi': 0.90576, 'pts': 0.7346},
+ 'codingRegionSizeGrch37': 0,
+ 'codingRegionSizeGrch38': 0,
+ 'constraints': {'louef': 1.606, 'louefRank': 0, 'misZ': -0.7773, 'misZRank': 1, 'pli': 0.00090576, 'pliRank': 1, 'totalGenes': 1},
+ 'diseaseDesc': '',
+ 'endGrch37': 14409,
+ 'endGrch38': 14409,
+ 'functionDesc': '',
+ 'genCc': {'hgncId': 'HGNC:943', 'classifications': [
+ {'classification': 'Strong', 'date': '7/29/19 19:04', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Laboratory for Molecular Medicine'},
+ {'classification': 'Supportive', 'date': '9/14/21 0:00', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Orphanet'},
+ ]},
+ 'gencodeGeneType': 'transcribed_unprocessed_pseudogene',
+ 'geneId': 'ENSG00000223972',
+ 'geneNames': '',
+ 'geneSymbol': 'DDX11L1',
+ 'mgiMarkerId': None,
+ 'mimNumber': 147571,
+ 'notes': [],
+ 'omimPhenotypes': [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126}],
+ 'primateAi': {'percentile25': 0.587214291096, 'percentile75': 0.821286439896},
+ 'sHet': {'postMean': 0.90576},
+ 'startGrch37': 11869,
+ 'startGrch38': 11869,
+ })
def test_create_update_and_delete_gene_note(self):
diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py
index bd3d71fa95..97a86ab8c5 100644
--- a/seqr/views/utils/test_utils.py
+++ b/seqr/views/utils/test_utils.py
@@ -794,7 +794,7 @@ def _get_list_param(call, param):
'gencodeGeneType', 'geneId', 'geneSymbol', 'startGrch37', 'startGrch38',
}
GENE_VARIANT_DISPLAY_FIELDS = {
- 'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen',
+ 'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen', 'sHet',
}
GENE_VARIANT_DISPLAY_FIELDS.update(GENE_FIELDS)
GENE_VARIANT_FIELDS = {
diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx
index dd7eb8f9c6..82470104f9 100644
--- a/ui/shared/components/panel/genes/GeneDetail.jsx
+++ b/ui/shared/components/panel/genes/GeneDetail.jsx
@@ -175,6 +175,7 @@ DosageSensitivity.propTypes = {
export const HI_THRESHOLD = 0.86
export const TS_THRESHOLD = 0.94
+export const SHET_THRESHOLD = 0.1
const HAPLOINSUFFICIENT_FIELDS = [{ field: 'phi', label: 'pHaplo' }]
const TRIPLOSENSITIVE_FIELDS = [{ field: 'pts', label: 'pTriplo' }]
const STAT_DETAILS = [
@@ -210,6 +211,24 @@ const STAT_DETAILS = [
note: 'These metrics are based on the amount of expected variation observed in the gnomAD data and is a measure ' +
'of how likely the gene is to be intolerant of loss-of-function mutations.',
},
+ {
+ title: 'Shet',
+ scoreField: 'sHet',
+ fields: [
+ { field: 'postMean', label: 'post_mean' },
+ ],
+ note: (
+
+ This score was developed by the Pritchard lab [
+
+ Zeng et al 2023
+
+ ] to predict gene constraint based on functional and evolutionary information. Scores >
+ {SHET_THRESHOLD}
+ are considered to have high likelihood to be under extreme selection.
+
+ ),
+ },
{
title: 'Haploinsufficient',
content: gene => (
diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx
index e7bbe1931d..9dde03e3a8 100644
--- a/ui/shared/components/panel/variants/VariantGene.jsx
+++ b/ui/shared/components/panel/variants/VariantGene.jsx
@@ -23,7 +23,7 @@ import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../Sty
import { PermissiveGeneSearchLink } from '../../buttons/SearchResultsLink'
import ShowGeneModal from '../../buttons/ShowGeneModal'
import Modal from '../../modal/Modal'
-import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD } from '../genes/GeneDetail'
+import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD, SHET_THRESHOLD } from '../genes/GeneDetail'
import { getIndividualGeneDataByFamilyGene } from './selectors'
const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm'))
@@ -379,24 +379,38 @@ const GENE_DETAIL_SECTIONS = [
color: 'red',
description: 'Loss of Function Constraint',
label: 'LOF CONSTR',
- showDetails: gene => gene.constraints.louef < LOF_THRESHHOLD,
+ showDetails: gene => (gene.constraints.louef < LOF_THRESHHOLD) ||
+ (gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD) ||
+ (gene.sHet.postMean && gene.sHet.postMean > SHET_THRESHOLD),
detailsDisplay: gene => (
- `This gene ranks as ${gene.constraints.louefRank} most intolerant of LoF mutations out of
- ${gene.constraints.totalGenes} genes under study (louef:
- ${gene.constraints.louef.toPrecision(4)}${gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''}).
- LOEUF is the observed to expected upper bound fraction for loss-of-function variants based on the variation
- observed in the gnomad data. Both LOEUF and pLi are measures of how likely the gene is to be intolerant of
- loss-of-function mutations`),
- },
- {
- color: 'red',
- description: 'HaploInsufficient',
- label: 'HI',
- showDetails: gene => gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD,
- detailsDisplay: gene => (
- `These are a score developed by the Talkowski lab that predict whether a gene is haploinsufficient based
- on large chromosomal microarray data set analysis. Scores >${HI_THRESHOLD} are considered to have high likelihood to be
- haploinsufficient. This gene has a score of ${gene.cnSensitivity.phi.toPrecision(4)}.`),
+
+
+ This gene ranks as
+ {gene.constraints.louefRank}
+ most intolerant of LoF mutations out of
+ {gene.constraints.totalGenes}
+ genes under study (louef:
+ {gene.constraints.louef.toPrecision(4)}
+ {gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''}
+ )
+ Karczewski (2020)
+
+ {gene.sHet.postMean && (
+
+ This gene has a Shet score of
+ {gene.sHet.postMean.toPrecision(4)}
+ Zeng (2023)
+
+ )}
+ {gene.cnSensitivity.phi && (
+
+ This gene has a haploinsufficiency (HI) score of
+ {gene.cnSensitivity.phi.toPrecision(4)}
+ Collins (2022)
+
+ )}
+
+ ),
},
{
color: 'red',