From 3626293db4fad1c531ddf5b61182eac513254bba Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Oct 2024 12:47:40 -0400 Subject: [PATCH 1/5] include vairant type in metadata report --- seqr/views/apis/report_api.py | 1 - seqr/views/apis/report_api_tests.py | 2 ++ seqr/views/apis/summary_data_api.py | 2 +- seqr/views/utils/anvil_metadata_utils.py | 2 ++ ui/pages/Report/components/VariantMetadata.jsx | 1 + 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/report_api.py b/seqr/views/apis/report_api.py index ed42ad2909..a7cba7fcc6 100644 --- a/seqr/views/apis/report_api.py +++ b/seqr/views/apis/report_api.py @@ -588,7 +588,6 @@ def _post_process_gregor_variant(row, gene_variants): 'linked_variant': next( v['genetic_findings_id'] for v in gene_variants if v['genetic_findings_id'] != row['genetic_findings_id'] ) if len(gene_variants) > 1 else None, - 'variant_type': 'SNV/INDEL' if row['alt'] else 'SV', } diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 7f9fc0b255..f28a225ebc 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -514,6 +514,7 @@ 'sv_name': None, 'transcript': None, 'validated_name': None, + 'variant_type': 'SNV/INDEL', } PARTICIPANT_TABLE = [ @@ -1403,6 +1404,7 @@ def test_variant_metadata(self): 'tags': ['Tier 1 - Novel gene and phenotype'], 'variant_inheritance': 'unknown', 'variant_reference_assembly': 'GRCh37', + 'variant_type': 'SV', 'zygosity': 'Heterozygous', }) diff --git a/seqr/views/apis/summary_data_api.py b/seqr/views/apis/summary_data_api.py index 96cb2b6547..51193cd389 100644 --- a/seqr/views/apis/summary_data_api.py +++ b/seqr/views/apis/summary_data_api.py @@ -302,7 +302,7 @@ def _add_row(row, family_id, row_type): elif row_type == DISCOVERY_ROW_TYPE: for i, discovery_row in enumerate(row): participant_id = discovery_row.pop('participant_id') - parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items() if k != 'allele_balance_or_heteroplasmy_percentage'} + parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items() if k not in {'allele_balance_or_heteroplasmy_percentage', 'variant_type'}} parsed_row['num_saved_variants'] = len(row) rows_by_subject_family_id[(participant_id, family_id)].update(parsed_row) elif row_type == SUBJECT_ROW_TYPE: diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index d63df5c93c..1f90f7d69e 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -340,6 +340,7 @@ def _get_parsed_saved_discovery_variants_by_family( gene_id = main_transcript.get('geneId') gene_ids.add(gene_id) sv_type = variant_json.get('svType') + variant_type = 'SV' if sv_type else 'SNV/INDEL' partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else '' phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full' @@ -358,6 +359,7 @@ def _get_parsed_saved_discovery_variants_by_family( 'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'), 'sv_type': sv_type, 'sv_name': (variant_json.get('svName') or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json)) if sv_type else None, + 'variant_type': variant_type, 'validated_name': variant.validated_name[0] if variant.validated_name else None, **{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()}, **{k: variant_json.get(k) for k in ['genotypes'] + (variant_json_fields or [])}, diff --git a/ui/pages/Report/components/VariantMetadata.jsx b/ui/pages/Report/components/VariantMetadata.jsx index ee7fe71e10..828c65f416 100644 --- a/ui/pages/Report/components/VariantMetadata.jsx +++ b/ui/pages/Report/components/VariantMetadata.jsx @@ -11,6 +11,7 @@ const VIEW_ALL_PAGES = [ const COLUMNS = [ { name: 'participant_id' }, ...VARIANT_METADATA_COLUMNS.slice(0, -1), + { name: 'variant_type' }, { name: 'allele_balance_or_heteroplasmy_percentage' }, { name: 'Clinvar allele ID', format: ({ clinvar }) => clinvar?.alleleId }, { name: 'ClinVar Clinical Significance', format: ({ clinvar }) => clinvarSignificance(clinvar).pathogenicity }, From 1829ea2944de4ab64479d7f36ae5a72f2ab2222a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Oct 2024 13:03:40 -0400 Subject: [PATCH 2/5] update varian type logic --- seqr/views/utils/anvil_metadata_utils.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/seqr/views/utils/anvil_metadata_utils.py b/seqr/views/utils/anvil_metadata_utils.py index 1f90f7d69e..6ad49d87b1 100644 --- a/seqr/views/utils/anvil_metadata_utils.py +++ b/seqr/views/utils/anvil_metadata_utils.py @@ -313,6 +313,11 @@ def _get_discovery_notes(variant, gene_variants, omit_parent_mnvs): nested_mnvs = sorted([v for v in mnv_names if v != parent_name]) return f'The following variants are part of the {variant_type} variant {parent}: {", ".join(nested_mnvs)}' +VARIANT_TYPES = [ + ('SV', lambda ref, alt: not alt), + ('SNV', lambda ref, alt: len(ref) == 1 and len(alt) == 1), + ('RE', lambda ref, alt: all(['[' in allele and ']' in allele for allele in [ref, alt]])), +] def _get_parsed_saved_discovery_variants_by_family( families: Iterable[Family], include_metadata: bool, variant_json_fields: list[str], @@ -340,7 +345,9 @@ def _get_parsed_saved_discovery_variants_by_family( gene_id = main_transcript.get('geneId') gene_ids.add(gene_id) sv_type = variant_json.get('svType') - variant_type = 'SV' if sv_type else 'SNV/INDEL' + variant_type = next( + (variant_type for variant_type, has_type in VARIANT_TYPES if has_type(variant.ref, variant.alt)), + 'INDEL') partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else '' phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full' From 50d80d27b4bb12b36789eef96057ab34622afd85 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Oct 2024 13:08:35 -0400 Subject: [PATCH 3/5] update tests --- seqr/views/apis/report_api_tests.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index f28a225ebc..0ec0bd88e3 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -410,12 +410,12 @@ {'column': 'genetic_findings_id', 'required': True}, {'column': 'participant_id', 'required': True}, {'column': 'experiment_id'}, - {'column': 'variant_type', 'required': True, 'data_type': 'enumeration', 'enumerations': ['SNV/INDEL', 'SV', 'CNV', 'RE', 'MEI']}, + {'column': 'variant_type', 'required': True, 'data_type': 'enumeration', 'enumerations': ['SNV', 'INDEL', 'SV', 'CNV', 'RE', 'MEI']}, {'column': 'variant_reference_assembly', 'required': True, 'data_type': 'enumeration', 'enumerations': ['GRCh37', 'GRCh38']}, {'column': 'chrom', 'required': True}, {'column': 'pos', 'required': True, 'data_type': 'integer'}, - {'column': 'ref','required': 'CONDITIONAL (variant_type = SNV/INDEL, variant_type = RE)'}, - {'column': 'alt', 'required': 'CONDITIONAL (variant_type = SNV/INDEL, variant_type = RE)'}, + {'column': 'ref','required': 'CONDITIONAL (variant_type = SNV, variant_type = INDEL, variant_type = RE)'}, + {'column': 'alt', 'required': 'CONDITIONAL (variant_type = SNV, variant_type = INDEL, variant_type = RE)'}, {'column': 'ClinGen_allele_ID'}, {'column': 'gene_of_interest', 'required': True}, {'column': 'transcript'}, @@ -514,7 +514,7 @@ 'sv_name': None, 'transcript': None, 'validated_name': None, - 'variant_type': 'SNV/INDEL', + 'variant_type': 'INDEL', } PARTICIPANT_TABLE = [ @@ -604,22 +604,22 @@ 'phenotype_contribution', 'partial_contribution_explained', 'additional_family_members_with_variant', 'method_of_discovery', 'notes', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', 'hgvs', 'gene_disease_validity', ], [ - 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', + 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', 'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', 'Autosomal recessive|X-linked', 'Full', '', '', 'SR-ES', 'This individual is published in PMID34415322', '', '', '', '', '', '', ], [ - 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', + 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', 'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', ], [ - 'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19', + 'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'INDEL', 'GRCh38', '19', '1912632', 'GC', 'TT', '', 'OR4G11P', 'ENST00000371839', 'c.586_587delinsTT', 'p.Ala196Leu', 'Heterozygous', '', 'unknown', 'Broad_HG00731_19_1912634', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', '', '', '', '', '', '', ], [ - 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', + 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown', 'Broad_NA20889_1_249045487_DEL', '', 'Candidate', 'Immunodeficiency 38', 'OMIM:616126', 'Autosomal recessive', 'Partial', 'HP:0000501|HP:0000365', '', 'SR-ES', '', '', '', '', '', '', '', @@ -901,13 +901,13 @@ def _test_gregor_export(self, url, mock_subprocess, mock_temp_dir, mock_open, mo '80.2', '1.05', '', '', '', '', '', ]]) self._assert_expected_file(genetic_findings_file, [GENETIC_FINDINGS_TABLE[0], [ - 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', + 'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '', 'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', 'Autosomal recessive|X-linked', 'Full', '', '', 'SR-ES', 'This individual is published in PMID34415322', '', '', '', '', '', '', ], [ - 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1', + 'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', 'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', ]], additional_calls=1) From 06af3082e0b76aead93c00e5bd8f9f356380a53a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Oct 2024 13:17:03 -0400 Subject: [PATCH 4/5] test SNV --- seqr/fixtures/report_variants.json | 8 ++++---- seqr/views/apis/report_api_tests.py | 17 +++++++++-------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/seqr/fixtures/report_variants.json b/seqr/fixtures/report_variants.json index e0722385b4..06ed7b2d11 100644 --- a/seqr/fixtures/report_variants.json +++ b/seqr/fixtures/report_variants.json @@ -105,9 +105,9 @@ "last_modified_date": "2018-05-31T16:36:02.805Z", "xpos": 19001912632, "xpos_end": 19001912632, - "ref": "GC", - "alt": "TT", - "variant_id": "19-1912632-GC-TT", + "ref": "G", + "alt": "C", + "variant_id": "19-1912632-G-C", "saved_variant_json": { "pos": 1912632, "end": 1912632, @@ -116,7 +116,7 @@ "genotypes": { "I000004_hg00731": {"numAlt": 1} }, - "variantId": "19-1912632-GC-TT", + "variantId": "19-1912632-G-C", "chrom": "19", "mainTranscriptId": "ENST00000371839", "transcripts": { diff --git a/seqr/views/apis/report_api_tests.py b/seqr/views/apis/report_api_tests.py index 0ec0bd88e3..767af58fe3 100644 --- a/seqr/views/apis/report_api_tests.py +++ b/seqr/views/apis/report_api_tests.py @@ -613,10 +613,10 @@ '248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '', 'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '', ], [ - 'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'INDEL', 'GRCh38', '19', - '1912632', 'GC', 'TT', '', 'OR4G11P', 'ENST00000371839', 'c.586_587delinsTT', 'p.Ala196Leu', 'Heterozygous', '', 'unknown', + 'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV', 'GRCh38', '19', + '1912632', 'G', 'C', '', 'OR4G11P', 'ENST00000371839', 'c.586_587delinsTT', 'p.Ala196Leu', 'Heterozygous', '', 'unknown', 'Broad_HG00731_19_1912634', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES', - 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', + 'The following variants are part of the multinucleotide variant 19-1912632-G-C (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', '', '', '', '', '', '', ], [ 'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'INDEL', 'GRCh37', '1', '248367227', 'TC', 'T', @@ -759,13 +759,13 @@ def _check_anvil_export_response(self, response, mock_zip, no_analyst_project_ur self.assertIn([ '19_1912633_HG00731', 'HG00731', 'HG00731', 'OR4G11P', 'Known', 'unknown', 'Heterozygous', 'GRCh38', '19', '1912633', 'G', 'T', '-', '-', 'ENST00000371839', '-', '-', '-', - 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT ' + 'The following variants are part of the multinucleotide variant 19-1912632-G-C ' '(c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T'], discovery_file) self.assertIn([ '19_1912634_HG00731', 'HG00731', 'HG00731', 'OR4G11P', 'Known', 'unknown', 'Heterozygous', 'GRCh38', '19', '1912634', 'C', 'T', '-', '-', 'ENST00000371839', '-', '-', '-', - 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, ' + 'The following variants are part of the multinucleotide variant 19-1912632-G-C (c.586_587delinsTT, ' 'p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T'], discovery_file) @@ -1307,7 +1307,7 @@ def test_variant_metadata(self): self.assertDictEqual(response_json['rows'][1], expected_row) expected_mnv = { **BASE_VARIANT_METADATA_ROW, - 'alt': 'TT', + 'alt': 'C', 'chrom': '19', 'condition_id': 'MONDO:0044970', 'condition_inheritance': 'Unknown', @@ -1321,15 +1321,16 @@ def test_variant_metadata(self): 'hgvsc': 'c.586_587delinsTT', 'hgvsp': 'p.Ala196Leu', 'known_condition_name': 'mitochondrial disease', - 'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', + 'notes': 'The following variants are part of the multinucleotide variant 19-1912632-G-C (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T', 'participant_id': 'HG00731', 'pos': 1912632, 'projectGuid': 'R0001_1kg', - 'ref': 'GC', + 'ref': 'G', 'tags': ['Known gene for phenotype'], 'transcript': 'ENST00000371839', 'variant_inheritance': 'unknown', 'variant_reference_assembly': 'GRCh38', + 'variant_type': 'SNV', 'zygosity': 'Heterozygous', } self.assertDictEqual(response_json['rows'][2], expected_mnv) From 1ccd6cbdf50e527a7e172902309dd518e7c4014f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 16 Oct 2024 13:17:48 -0400 Subject: [PATCH 5/5] update gregor import --- seqr/views/apis/individual_api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/views/apis/individual_api.py b/seqr/views/apis/individual_api.py index b6d82c7b3a..9aad4dc754 100644 --- a/seqr/views/apis/individual_api.py +++ b/seqr/views/apis/individual_api.py @@ -875,7 +875,7 @@ def import_gregor_metadata(request, project_guid): genes = set() for row in _iter_metadata_table( metadata_files_path, FINDINGS_TABLE, request.user, - lambda r: r['participant_id'] in participant_individual_map and r['variant_type'] == 'SNV/INDEL', + lambda r: r['participant_id'] in participant_individual_map and r['variant_type'] in {'SNV/INDEL', 'SNV', 'INDEL'}, ): individual = participant_individual_map[row['participant_id']] variant_id = '-'.join([row[col] for col in ['chrom', 'pos', 'ref', 'alt']])