From 690e19903e473a6dde78811feffd44febfc1e14f Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Wed, 23 Aug 2023 23:40:11 -0400 Subject: [PATCH 1/8] Create a gene Shet reference data update cmd. --- CHANGELOG.md | 1 + .../management/commands/update_gene_shet.py | 23 +++++++++++++++++++ .../tests/update_gene_shet_tests.py | 22 ++++++++++++++++++ reference_data/migrations/0022_geneshet.py | 23 +++++++++++++++++++ reference_data/models.py | 10 ++++++++ seqr/fixtures/reference_data.json | 9 ++++++++ 6 files changed, 88 insertions(+) create mode 100644 reference_data/management/commands/update_gene_shet.py create mode 100644 reference_data/management/tests/update_gene_shet_tests.py create mode 100644 reference_data/migrations/0022_geneshet.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9808f0fea5..072ee1cd5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # _seqr_ Changes ## dev +* Add GeneShet model to the reference DB (REQUIRES DB MIGRATION) * Add db indices to optimize RNA data queries (REQUIRES DB MIGRATION) ## 7/11/23 diff --git a/reference_data/management/commands/update_gene_shet.py b/reference_data/management/commands/update_gene_shet.py new file mode 100644 index 0000000000..470949818a --- /dev/null +++ b/reference_data/management/commands/update_gene_shet.py @@ -0,0 +1,23 @@ +import logging +from reference_data.management.commands.utils.update_utils import GeneCommand, ReferenceDataHandler +from reference_data.models import GeneShet + +logger = logging.getLogger(__name__) + + +class ShetReferenceDataHandler(ReferenceDataHandler): + + model_cls = GeneShet + url = 'https://storage.googleapis.com/seqr-reference-data/Shet/Shet_Zeng_2023.tsv' + + @staticmethod + def parse_record(record): + yield { + 'gene_id': record['ensg'], + 'shet': float(record['post_mean_shet']), + 'shet_constrained': bool(int(record['shet_constrained'])), + } + + +class Command(GeneCommand): + reference_data_handler = ShetReferenceDataHandler diff --git a/reference_data/management/tests/update_gene_shet_tests.py b/reference_data/management/tests/update_gene_shet_tests.py new file mode 100644 index 0000000000..667934cd58 --- /dev/null +++ b/reference_data/management/tests/update_gene_shet_tests.py @@ -0,0 +1,22 @@ +from reference_data.models import GeneShet +from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase + +class UpdateGeneShetTest(ReferenceDataCommandTestCase): + URL = 'https://storage.googleapis.com/seqr-reference-data/Shet/Shet_Zeng_2023.tsv' + DATA = [ + 'ensg hgnc post_mean_shet shet_constrained\n', + 'ENSG00000223972 HGNC:37225 3.01E-05 0\n', + 'ENSG00000227233 HGNC:26441 4.85E-05 0\n', + 'ENSG00000243485 HGNC:4013 5.08E-05 1\n', + ] + + def test_update_gene_cn_sensitivity_command(self): + self._test_update_command('update_gene_shet', 'GeneShet', created_records=2) + + self.assertEqual(GeneShet.objects.count(), 2) + record = GeneShet.objects.get(gene__gene_id='ENSG00000223972') + self.assertEqual(record.shet, 3.01E-05) + self.assertEqual(record.shet_constrained, False) + record = GeneShet.objects.get(gene__gene_id='ENSG00000243485') + self.assertEqual(record.shet, 5.08E-05) + self.assertEqual(record.shet_constrained, True) diff --git a/reference_data/migrations/0022_geneshet.py b/reference_data/migrations/0022_geneshet.py new file mode 100644 index 0000000000..c4a1f022ef --- /dev/null +++ b/reference_data/migrations/0022_geneshet.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.20 on 2023-08-22 20:45 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('reference_data', '0021_auto_20221031_2049'), + ] + + operations = [ + migrations.CreateModel( + name='GeneShet', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('shet', models.FloatField()), + ('shet_constrained', models.BooleanField()), + ('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')), + ], + ), + ] diff --git a/reference_data/models.py b/reference_data/models.py index ce221425aa..1fb869efec 100644 --- a/reference_data/models.py +++ b/reference_data/models.py @@ -161,6 +161,16 @@ class Meta: json_fields = ['pHI', 'pTS'] +class GeneShet(models.Model): + gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE) + + shet = models.FloatField() + shet_constrained = models.BooleanField() + + class Meta: + json_fields = ['shet', 'shet_constrained'] + + class Omim(models.Model): MAP_METHOD_CHOICES = ( ('1', 'the disorder is placed on the map based on its association with a gene, but the underlying defect is not known.'), diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index 8d559de4fb..6129de0adb 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -1167,6 +1167,15 @@ "pHI": 0.90576, "pTS": 0.7346 } +}, +{ + "model": "reference_data.geneshet", + "pk": 1, + "fields": { + "gene": 1, + "shet": 0.90576, + "shet_constrained": true + } }, { "model": "reference_data.dbnsfpgene", From 120c5da1e0049562ba0d9ae2db72e49ba7324576 Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Fri, 25 Aug 2023 10:56:44 -0400 Subject: [PATCH 2/8] Remove gene_constrained field and change gs folder. --- reference_data/management/commands/update_gene_shet.py | 7 ++++--- reference_data/migrations/0022_geneshet.py | 3 +-- reference_data/models.py | 3 +-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/reference_data/management/commands/update_gene_shet.py b/reference_data/management/commands/update_gene_shet.py index 470949818a..929eb60b58 100644 --- a/reference_data/management/commands/update_gene_shet.py +++ b/reference_data/management/commands/update_gene_shet.py @@ -8,14 +8,15 @@ class ShetReferenceDataHandler(ReferenceDataHandler): model_cls = GeneShet - url = 'https://storage.googleapis.com/seqr-reference-data/Shet/Shet_Zeng_2023.tsv' + # The .tsv file is generated from the Google Doc at https://docs.google.com/spreadsheets/d/1enxGBWCAFBHdrRlqCj_ueleiDo9K9GWn/edit#gid=1146995171 + # by downloading with a tsv format. + url = 'https://storage.googleapis.com/seqr-reference-data/gene_constraint/shet_Zeng(2023).xlsx%20-%20All%20scores-for%20gene%20page.tsv' @staticmethod def parse_record(record): yield { 'gene_id': record['ensg'], - 'shet': float(record['post_mean_shet']), - 'shet_constrained': bool(int(record['shet_constrained'])), + 'shet': float(record['post_mean (Shet)']), } diff --git a/reference_data/migrations/0022_geneshet.py b/reference_data/migrations/0022_geneshet.py index c4a1f022ef..95f4ff25b4 100644 --- a/reference_data/migrations/0022_geneshet.py +++ b/reference_data/migrations/0022_geneshet.py @@ -1,4 +1,4 @@ -# Generated by Django 3.2.20 on 2023-08-22 20:45 +# Generated by Django 3.2.20 on 2023-08-25 14:33 from django.db import migrations, models import django.db.models.deletion @@ -16,7 +16,6 @@ class Migration(migrations.Migration): fields=[ ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), ('shet', models.FloatField()), - ('shet_constrained', models.BooleanField()), ('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')), ], ), diff --git a/reference_data/models.py b/reference_data/models.py index 1fb869efec..dc6a765596 100644 --- a/reference_data/models.py +++ b/reference_data/models.py @@ -165,10 +165,9 @@ class GeneShet(models.Model): gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE) shet = models.FloatField() - shet_constrained = models.BooleanField() class Meta: - json_fields = ['shet', 'shet_constrained'] + json_fields = ['shet'] class Omim(models.Model): From 6bceb0e58ef40d52fd74be6527023f6b2b4ec41a Mon Sep 17 00:00:00 2001 From: Shifa Zhang Date: Fri, 25 Aug 2023 11:07:28 -0400 Subject: [PATCH 3/8] Update tests. --- .../management/tests/update_gene_shet_tests.py | 12 +++++------- seqr/fixtures/reference_data.json | 3 +-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/reference_data/management/tests/update_gene_shet_tests.py b/reference_data/management/tests/update_gene_shet_tests.py index 667934cd58..877c07be3d 100644 --- a/reference_data/management/tests/update_gene_shet_tests.py +++ b/reference_data/management/tests/update_gene_shet_tests.py @@ -2,12 +2,12 @@ from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase class UpdateGeneShetTest(ReferenceDataCommandTestCase): - URL = 'https://storage.googleapis.com/seqr-reference-data/Shet/Shet_Zeng_2023.tsv' + URL = 'https://storage.googleapis.com/seqr-reference-data/gene_constraint/shet_Zeng(2023).xlsx%20-%20All%20scores-for%20gene%20page.tsv' DATA = [ - 'ensg hgnc post_mean_shet shet_constrained\n', - 'ENSG00000223972 HGNC:37225 3.01E-05 0\n', - 'ENSG00000227233 HGNC:26441 4.85E-05 0\n', - 'ENSG00000243485 HGNC:4013 5.08E-05 1\n', + 'ensg hgnc post_mean (Shet)\n', + 'ENSG00000223972 HGNC:37225 3.01E-05\n', + 'ENSG00000227233 HGNC:26441 4.85E-05\n', + 'ENSG00000243485 HGNC:4013 5.08E-05\n', ] def test_update_gene_cn_sensitivity_command(self): @@ -16,7 +16,5 @@ def test_update_gene_cn_sensitivity_command(self): self.assertEqual(GeneShet.objects.count(), 2) record = GeneShet.objects.get(gene__gene_id='ENSG00000223972') self.assertEqual(record.shet, 3.01E-05) - self.assertEqual(record.shet_constrained, False) record = GeneShet.objects.get(gene__gene_id='ENSG00000243485') self.assertEqual(record.shet, 5.08E-05) - self.assertEqual(record.shet_constrained, True) diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index 6129de0adb..de47a9b327 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -1173,8 +1173,7 @@ "pk": 1, "fields": { "gene": 1, - "shet": 0.90576, - "shet_constrained": true + "shet": 0.90576 } }, { From b14e574abddf4cbf858e9a93ab80d73cc095b2ea Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 10 Oct 2023 15:29:44 -0400 Subject: [PATCH 4/8] remove migration --- reference_data/migrations/0022_geneshet.py | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 reference_data/migrations/0022_geneshet.py diff --git a/reference_data/migrations/0022_geneshet.py b/reference_data/migrations/0022_geneshet.py deleted file mode 100644 index 95f4ff25b4..0000000000 --- a/reference_data/migrations/0022_geneshet.py +++ /dev/null @@ -1,22 +0,0 @@ -# Generated by Django 3.2.20 on 2023-08-25 14:33 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ('reference_data', '0021_auto_20221031_2049'), - ] - - operations = [ - migrations.CreateModel( - name='GeneShet', - fields=[ - ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), - ('shet', models.FloatField()), - ('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')), - ], - ), - ] From 72910be8b201ed8858dae22851420eb3c0983e80 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 10 Oct 2023 16:19:27 -0400 Subject: [PATCH 5/8] update to use data from publication --- .../management/commands/update_gene_shet.py | 6 ++--- .../tests/update_gene_shet_tests.py | 14 ++++++------ reference_data/migrations/0022_geneshet.py | 22 +++++++++++++++++++ reference_data/models.py | 4 ++-- seqr/fixtures/reference_data.json | 2 +- 5 files changed, 34 insertions(+), 14 deletions(-) create mode 100644 reference_data/migrations/0022_geneshet.py diff --git a/reference_data/management/commands/update_gene_shet.py b/reference_data/management/commands/update_gene_shet.py index 929eb60b58..d348b0c164 100644 --- a/reference_data/management/commands/update_gene_shet.py +++ b/reference_data/management/commands/update_gene_shet.py @@ -8,15 +8,13 @@ class ShetReferenceDataHandler(ReferenceDataHandler): model_cls = GeneShet - # The .tsv file is generated from the Google Doc at https://docs.google.com/spreadsheets/d/1enxGBWCAFBHdrRlqCj_ueleiDo9K9GWn/edit#gid=1146995171 - # by downloading with a tsv format. - url = 'https://storage.googleapis.com/seqr-reference-data/gene_constraint/shet_Zeng(2023).xlsx%20-%20All%20scores-for%20gene%20page.tsv' + url = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv' @staticmethod def parse_record(record): yield { 'gene_id': record['ensg'], - 'shet': float(record['post_mean (Shet)']), + 'post_mean': float(record['post_mean']), } diff --git a/reference_data/management/tests/update_gene_shet_tests.py b/reference_data/management/tests/update_gene_shet_tests.py index 877c07be3d..ba16605b0d 100644 --- a/reference_data/management/tests/update_gene_shet_tests.py +++ b/reference_data/management/tests/update_gene_shet_tests.py @@ -2,12 +2,12 @@ from reference_data.management.tests.test_utils import ReferenceDataCommandTestCase class UpdateGeneShetTest(ReferenceDataCommandTestCase): - URL = 'https://storage.googleapis.com/seqr-reference-data/gene_constraint/shet_Zeng(2023).xlsx%20-%20All%20scores-for%20gene%20page.tsv' + URL = 'https://zenodo.org/record/7939768/files/s_het_estimates.genebayes.tsv' DATA = [ - 'ensg hgnc post_mean (Shet)\n', - 'ENSG00000223972 HGNC:37225 3.01E-05\n', - 'ENSG00000227233 HGNC:26441 4.85E-05\n', - 'ENSG00000243485 HGNC:4013 5.08E-05\n', + 'ensg hgnc chrom obs_lof exp_lof prior_mean post_mean post_lower_95 post_upper_95\n', + 'ENSG00000223972 HGNC:37225 chr15 26.0 21.66 0.00059216 3.01e-05 1.05e-06 0.00010405\n', + 'ENSG00000227233 HGNC:26441 chr5 31.0 28.55 0.00038727 4.853e-05 3.05e-06 0.00015705\n', + 'ENSG00000243485 HGNC:4013 chr19 17.0 11.327 0.00082297 5.083e-05 3.05e-06 0.00016605\n' ] def test_update_gene_cn_sensitivity_command(self): @@ -15,6 +15,6 @@ def test_update_gene_cn_sensitivity_command(self): self.assertEqual(GeneShet.objects.count(), 2) record = GeneShet.objects.get(gene__gene_id='ENSG00000223972') - self.assertEqual(record.shet, 3.01E-05) + self.assertEqual(record.post_mean, 3.01E-05) record = GeneShet.objects.get(gene__gene_id='ENSG00000243485') - self.assertEqual(record.shet, 5.08E-05) + self.assertEqual(record.post_mean, 5.083E-05) diff --git a/reference_data/migrations/0022_geneshet.py b/reference_data/migrations/0022_geneshet.py new file mode 100644 index 0000000000..593f864a9a --- /dev/null +++ b/reference_data/migrations/0022_geneshet.py @@ -0,0 +1,22 @@ +# Generated by Django 3.2.18 on 2023-10-10 20:12 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ('reference_data', '0021_auto_20221031_2049'), + ] + + operations = [ + migrations.CreateModel( + name='GeneShet', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('post_mean', models.FloatField()), + ('gene', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to='reference_data.geneinfo')), + ], + ), + ] diff --git a/reference_data/models.py b/reference_data/models.py index dc6a765596..f8aae30788 100644 --- a/reference_data/models.py +++ b/reference_data/models.py @@ -164,10 +164,10 @@ class Meta: class GeneShet(models.Model): gene = models.ForeignKey(GeneInfo, on_delete=models.CASCADE) - shet = models.FloatField() + post_mean = models.FloatField() class Meta: - json_fields = ['shet'] + json_fields = ['post_mean'] class Omim(models.Model): diff --git a/seqr/fixtures/reference_data.json b/seqr/fixtures/reference_data.json index de47a9b327..ae755195a0 100644 --- a/seqr/fixtures/reference_data.json +++ b/seqr/fixtures/reference_data.json @@ -1173,7 +1173,7 @@ "pk": 1, "fields": { "gene": 1, - "shet": 0.90576 + "post_mean": 0.90576 } }, { From d9d2ed984a1dc5ea185c4517f2a01dd6882aae0d Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 10 Oct 2023 16:39:58 -0400 Subject: [PATCH 6/8] update tetss --- seqr/utils/gene_utils.py | 4 +++- seqr/views/apis/family_api_tests.py | 2 +- seqr/views/apis/gene_api_tests.py | 29 +++++++++++++++++++++++++++++ seqr/views/utils/test_utils.py | 2 +- 4 files changed, 34 insertions(+), 3 deletions(-) diff --git a/seqr/utils/gene_utils.py b/seqr/utils/gene_utils.py index 7c773e21db..c590b888fc 100644 --- a/seqr/utils/gene_utils.py +++ b/seqr/utils/gene_utils.py @@ -4,7 +4,7 @@ from django.db.models.functions import Length from reference_data.models import GeneInfo, GeneConstraint, dbNSFPGene, Omim, MGI, PrimateAI, GeneCopyNumberSensitivity, \ - GenCC, ClinGen + GenCC, ClinGen, GeneShet from seqr.utils.xpos_utils import get_xpos from seqr.views.utils.orm_to_json_utils import _get_json_for_model, _get_json_for_models, _get_empty_json_for_model, \ get_json_for_gene_notes_by_gene_id @@ -90,6 +90,7 @@ def _add_mgi(gene): OMIM = 'omim' CONSTRAINT = 'constraint' CN_SENSITIVITY = 'cn_sensitivity' +SHET = 'shet' DBNSFP = 'dbnsfp' GENCC = 'gencc' PRIMATE_AI = 'primate_ai' @@ -100,6 +101,7 @@ def _add_mgi(gene): OMIM: (Omim, _add_omim), CONSTRAINT: (GeneConstraint, None), CN_SENSITIVITY: (GeneCopyNumberSensitivity, _add_gene_model('genecopynumbersensitivity', 'cnSensitivity', dict)), + SHET: (GeneShet, _add_gene_model('geneshet', 'sHet', dict)), GENCC: (GenCC, _add_gene_model('gencc', 'genCc', dict)), CLINGEN: (ClinGen, _add_gene_model('clingen', 'clinGen', lambda: None)), } diff --git a/seqr/views/apis/family_api_tests.py b/seqr/views/apis/family_api_tests.py index 51fdf5b60a..2ef99dd846 100644 --- a/seqr/views/apis/family_api_tests.py +++ b/seqr/views/apis/family_api_tests.py @@ -521,7 +521,7 @@ def test_get_family_phenotype_gene_scores(self): 'ENSG00000268903': { 'chromGrch37': '1', 'chromGrch38': '1', 'clinGen': None, 'cnSensitivity': {}, 'codingRegionSizeGrch37': 0, 'codingRegionSizeGrch38': 0, 'constraints': {}, - 'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {}, + 'endGrch37': 135895, 'endGrch38': 135895, 'genCc': {}, 'sHet': {}, 'gencodeGeneType': 'processed_pseudogene', 'geneId': 'ENSG00000268903', 'geneSymbol': 'AL627309.7', 'mimNumber': None, 'omimPhenotypes': [], 'startGrch37': 135141, 'startGrch38': 135141 diff --git a/seqr/views/apis/gene_api_tests.py b/seqr/views/apis/gene_api_tests.py index 10b0f61728..6b394faf9b 100644 --- a/seqr/views/apis/gene_api_tests.py +++ b/seqr/views/apis/gene_api_tests.py @@ -33,6 +33,35 @@ def test_genes_info(self): genes = response.json()['genesById'] self.assertSetEqual(set(genes.keys()), {GENE_ID, 'ENSG00000269981'}) self.assertSetEqual(set(genes[GENE_ID].keys()), GENE_DETAIL_FIELDS) + self.assertDictEqual(genes[GENE_ID], { + 'chromGrch37': '1', + 'chromGrch38': '1', + 'clinGen': {'haploinsufficiency': 'No Evidence', 'href': 'https://dosage.clinicalgenome.org/clingen_gene.cgi?sym=', 'triplosensitivity': ''}, + 'cnSensitivity': {'phi': 0.90576, 'pts': 0.7346}, + 'codingRegionSizeGrch37': 0, + 'codingRegionSizeGrch38': 0, + 'constraints': {'louef': 1.606, 'louefRank': 0, 'misZ': -0.7773, 'misZRank': 1, 'pli': 0.00090576, 'pliRank': 1, 'totalGenes': 1}, + 'diseaseDesc': '', + 'endGrch37': 14409, + 'endGrch38': 14409, + 'functionDesc': '', + 'genCc': {'hgncId': 'HGNC:943', 'classifications': [ + {'classification': 'Strong', 'date': '7/29/19 19:04', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Laboratory for Molecular Medicine'}, + {'classification': 'Supportive', 'date': '9/14/21 0:00', 'disease': 'dystonia 16', 'moi': 'Autosomal recessive', 'submitter': 'Orphanet'}, + ]}, + 'gencodeGeneType': 'transcribed_unprocessed_pseudogene', + 'geneId': 'ENSG00000223972', + 'geneNames': '', + 'geneSymbol': 'DDX11L1', + 'mgiMarkerId': None, + 'mimNumber': 147571, + 'notes': [], + 'omimPhenotypes': [{'mimNumber': 147571, 'phenotypeDescription': 'Immunodeficiency 38', 'phenotypeInheritance': 'Autosomal recessive', 'phenotypeMimNumber': 616126}], + 'primateAi': {'percentile25': 0.587214291096, 'percentile75': 0.821286439896}, + 'sHet': {'postMean': 0.90576}, + 'startGrch37': 11869, + 'startGrch38': 11869, + }) def test_create_update_and_delete_gene_note(self): diff --git a/seqr/views/utils/test_utils.py b/seqr/views/utils/test_utils.py index e9eb8d1973..9792f298ce 100644 --- a/seqr/views/utils/test_utils.py +++ b/seqr/views/utils/test_utils.py @@ -794,7 +794,7 @@ def _get_list_param(call, param): 'gencodeGeneType', 'geneId', 'geneSymbol', 'startGrch37', 'startGrch38', } GENE_VARIANT_DISPLAY_FIELDS = { - 'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen', + 'constraints', 'omimPhenotypes', 'mimNumber', 'cnSensitivity', 'genCc', 'clinGen', 'sHet', } GENE_VARIANT_DISPLAY_FIELDS.update(GENE_FIELDS) GENE_VARIANT_FIELDS = { From 4d3788eaf95789d2ec5afa1d4789ab3cc58080c3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 10 Oct 2023 16:49:54 -0400 Subject: [PATCH 7/8] add shet to gene page --- ui/shared/components/panel/genes/GeneDetail.jsx | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx index dd7eb8f9c6..ce80874bf3 100644 --- a/ui/shared/components/panel/genes/GeneDetail.jsx +++ b/ui/shared/components/panel/genes/GeneDetail.jsx @@ -210,6 +210,23 @@ const STAT_DETAILS = [ note: 'These metrics are based on the amount of expected variation observed in the gnomAD data and is a measure ' + 'of how likely the gene is to be intolerant of loss-of-function mutations.', }, + { + title: 'Shet', + scoreField: 'sHet', + fields: [ + { field: 'postMean', label: 'post_mean' }, + ], + note: ( + + This score was developed by the Pritchard lab [ + + Zeng et al 2023 + + ] to predict gene constraint based on functional and evolutionary information. Scores >0.1 are considered to + have high likelihood to be under extreme selection. + + ), + }, { title: 'Haploinsufficient', content: gene => ( From 8a2c65e184903a26a0a38c249861c883cbab9ffd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 11 Oct 2023 11:01:31 -0400 Subject: [PATCH 8/8] combine lof constraint label --- .../components/panel/genes/GeneDetail.jsx | 6 ++- .../components/panel/variants/VariantGene.jsx | 50 ++++++++++++------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/ui/shared/components/panel/genes/GeneDetail.jsx b/ui/shared/components/panel/genes/GeneDetail.jsx index ce80874bf3..82470104f9 100644 --- a/ui/shared/components/panel/genes/GeneDetail.jsx +++ b/ui/shared/components/panel/genes/GeneDetail.jsx @@ -175,6 +175,7 @@ DosageSensitivity.propTypes = { export const HI_THRESHOLD = 0.86 export const TS_THRESHOLD = 0.94 +export const SHET_THRESHOLD = 0.1 const HAPLOINSUFFICIENT_FIELDS = [{ field: 'phi', label: 'pHaplo' }] const TRIPLOSENSITIVE_FIELDS = [{ field: 'pts', label: 'pTriplo' }] const STAT_DETAILS = [ @@ -222,8 +223,9 @@ const STAT_DETAILS = [ Zeng et al 2023 - ] to predict gene constraint based on functional and evolutionary information. Scores >0.1 are considered to - have high likelihood to be under extreme selection. + ] to predict gene constraint based on functional and evolutionary information. Scores > + {SHET_THRESHOLD} +   are considered to have high likelihood to be under extreme selection. ), }, diff --git a/ui/shared/components/panel/variants/VariantGene.jsx b/ui/shared/components/panel/variants/VariantGene.jsx index e7bbe1931d..9dde03e3a8 100644 --- a/ui/shared/components/panel/variants/VariantGene.jsx +++ b/ui/shared/components/panel/variants/VariantGene.jsx @@ -23,7 +23,7 @@ import { InlineHeader, NoBorderTable, ButtonLink, ColoredLabel } from '../../Sty import { PermissiveGeneSearchLink } from '../../buttons/SearchResultsLink' import ShowGeneModal from '../../buttons/ShowGeneModal' import Modal from '../../modal/Modal' -import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD } from '../genes/GeneDetail' +import { GenCC, ClingenLabel, HI_THRESHOLD, TS_THRESHOLD, SHET_THRESHOLD } from '../genes/GeneDetail' import { getIndividualGeneDataByFamilyGene } from './selectors' const RnaSeqTpm = React.lazy(() => import('./RnaSeqTpm')) @@ -379,24 +379,38 @@ const GENE_DETAIL_SECTIONS = [ color: 'red', description: 'Loss of Function Constraint', label: 'LOF CONSTR', - showDetails: gene => gene.constraints.louef < LOF_THRESHHOLD, + showDetails: gene => (gene.constraints.louef < LOF_THRESHHOLD) || + (gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD) || + (gene.sHet.postMean && gene.sHet.postMean > SHET_THRESHOLD), detailsDisplay: gene => ( - `This gene ranks as ${gene.constraints.louefRank} most intolerant of LoF mutations out of - ${gene.constraints.totalGenes} genes under study (louef: - ${gene.constraints.louef.toPrecision(4)}${gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''}). - LOEUF is the observed to expected upper bound fraction for loss-of-function variants based on the variation - observed in the gnomad data. Both LOEUF and pLi are measures of how likely the gene is to be intolerant of - loss-of-function mutations`), - }, - { - color: 'red', - description: 'HaploInsufficient', - label: 'HI', - showDetails: gene => gene.cnSensitivity.phi && gene.cnSensitivity.phi > HI_THRESHOLD, - detailsDisplay: gene => ( - `These are a score developed by the Talkowski lab that predict whether a gene is haploinsufficient based - on large chromosomal microarray data set analysis. Scores >${HI_THRESHOLD} are considered to have high likelihood to be - haploinsufficient. This gene has a score of ${gene.cnSensitivity.phi.toPrecision(4)}.`), + + + This gene ranks as   + {gene.constraints.louefRank} +  most intolerant of LoF mutations out of   + {gene.constraints.totalGenes} +  genes under study (louef:   + {gene.constraints.louef.toPrecision(4)} + {gene.constraints.pli ? `, pLi: ${gene.constraints.pli.toPrecision(4)}` : ''} + ) + Karczewski (2020) + + {gene.sHet.postMean && ( + + This gene has a Shet score of   + {gene.sHet.postMean.toPrecision(4)} + Zeng (2023) + + )} + {gene.cnSensitivity.phi && ( + + This gene has a haploinsufficiency (HI) score of   + {gene.cnSensitivity.phi.toPrecision(4)} + Collins (2022) + + )} + + ), }, { color: 'red',