Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Gregor report variant type update #4436

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions seqr/fixtures/report_variants.json
Original file line number Diff line number Diff line change
Expand Up @@ -105,9 +105,9 @@
"last_modified_date": "2018-05-31T16:36:02.805Z",
"xpos": 19001912632,
"xpos_end": 19001912632,
"ref": "GC",
"alt": "TT",
"variant_id": "19-1912632-GC-TT",
"ref": "G",
"alt": "C",
"variant_id": "19-1912632-G-C",
"saved_variant_json": {
"pos": 1912632,
"end": 1912632,
Expand All @@ -116,7 +116,7 @@
"genotypes": {
"I000004_hg00731": {"numAlt": 1}
},
"variantId": "19-1912632-GC-TT",
"variantId": "19-1912632-G-C",
"chrom": "19",
"mainTranscriptId": "ENST00000371839",
"transcripts": {
Expand Down
2 changes: 1 addition & 1 deletion seqr/views/apis/individual_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,7 +875,7 @@ def import_gregor_metadata(request, project_guid):
genes = set()
for row in _iter_metadata_table(
metadata_files_path, FINDINGS_TABLE, request.user,
lambda r: r['participant_id'] in participant_individual_map and r['variant_type'] == 'SNV/INDEL',
lambda r: r['participant_id'] in participant_individual_map and r['variant_type'] in {'SNV/INDEL', 'SNV', 'INDEL'},
):
individual = participant_individual_map[row['participant_id']]
variant_id = '-'.join([row[col] for col in ['chrom', 'pos', 'ref', 'alt']])
Expand Down
1 change: 0 additions & 1 deletion seqr/views/apis/report_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,6 @@ def _post_process_gregor_variant(row, gene_variants):
'linked_variant': next(
v['genetic_findings_id'] for v in gene_variants if v['genetic_findings_id'] != row['genetic_findings_id']
) if len(gene_variants) > 1 else None,
'variant_type': 'SNV/INDEL' if row['alt'] else 'SV',
}


Expand Down
35 changes: 19 additions & 16 deletions seqr/views/apis/report_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -410,12 +410,12 @@
{'column': 'genetic_findings_id', 'required': True},
{'column': 'participant_id', 'required': True},
{'column': 'experiment_id'},
{'column': 'variant_type', 'required': True, 'data_type': 'enumeration', 'enumerations': ['SNV/INDEL', 'SV', 'CNV', 'RE', 'MEI']},
{'column': 'variant_type', 'required': True, 'data_type': 'enumeration', 'enumerations': ['SNV', 'INDEL', 'SV', 'CNV', 'RE', 'MEI']},
{'column': 'variant_reference_assembly', 'required': True, 'data_type': 'enumeration', 'enumerations': ['GRCh37', 'GRCh38']},
{'column': 'chrom', 'required': True},
{'column': 'pos', 'required': True, 'data_type': 'integer'},
{'column': 'ref','required': 'CONDITIONAL (variant_type = SNV/INDEL, variant_type = RE)'},
{'column': 'alt', 'required': 'CONDITIONAL (variant_type = SNV/INDEL, variant_type = RE)'},
{'column': 'ref','required': 'CONDITIONAL (variant_type = SNV, variant_type = INDEL, variant_type = RE)'},
{'column': 'alt', 'required': 'CONDITIONAL (variant_type = SNV, variant_type = INDEL, variant_type = RE)'},
{'column': 'ClinGen_allele_ID'},
{'column': 'gene_of_interest', 'required': True},
{'column': 'transcript'},
Expand Down Expand Up @@ -514,6 +514,7 @@
'sv_name': None,
'transcript': None,
'validated_name': None,
'variant_type': 'INDEL',
}

PARTICIPANT_TABLE = [
Expand Down Expand Up @@ -603,22 +604,22 @@
'phenotype_contribution', 'partial_contribution_explained', 'additional_family_members_with_variant',
'method_of_discovery', 'notes', 'sv_type', 'chrom_end', 'pos_end', 'copy_number', 'hgvs', 'gene_disease_validity',
], [
'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '',
'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '',
'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '', 'Candidate',
'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120', 'Autosomal recessive|X-linked',
'Full', '', '', 'SR-ES', 'This individual is published in PMID34415322', '', '', '', '', '', '',
], [
'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1',
'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'INDEL', 'GRCh37', '1',
'248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '',
'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '',
], [
'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19',
'1912632', 'GC', 'TT', '', 'OR4G11P', 'ENST00000371839', 'c.586_587delinsTT', 'p.Ala196Leu', 'Heterozygous', '', 'unknown',
'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV', 'GRCh38', '19',
'1912632', 'G', 'C', '', 'OR4G11P', 'ENST00000371839', 'c.586_587delinsTT', 'p.Ala196Leu', 'Heterozygous', '', 'unknown',
'Broad_HG00731_19_1912634', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES',
'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'The following variants are part of the multinucleotide variant 19-1912632-G-C (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'', '', '', '', '', '',
], [
'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'SNV/INDEL', 'GRCh37', '1', '248367227', 'TC', 'T',
'Broad_NA20889_1_248367227', 'Broad_NA20889', '', 'INDEL', 'GRCh37', '1', '248367227', 'TC', 'T',
'CA1501729', 'OR4G11P', 'ENST00000505820', 'c.3955G>A', 'c.1586-17C>G', 'Heterozygous', '', 'unknown',
'Broad_NA20889_1_249045487_DEL', '', 'Candidate', 'Immunodeficiency 38', 'OMIM:616126', 'Autosomal recessive',
'Partial', 'HP:0000501|HP:0000365', '', 'SR-ES', '', '', '', '', '', '', '',
Expand Down Expand Up @@ -758,13 +759,13 @@ def _check_anvil_export_response(self, response, mock_zip, no_analyst_project_ur
self.assertIn([
'19_1912633_HG00731', 'HG00731', 'HG00731', 'OR4G11P', 'Known', 'unknown', 'Heterozygous', 'GRCh38', '19',
'1912633', 'G', 'T', '-', '-', 'ENST00000371839', '-', '-', '-',
'The following variants are part of the multinucleotide variant 19-1912632-GC-TT '
'The following variants are part of the multinucleotide variant 19-1912632-G-C '
'(c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T'],
discovery_file)
self.assertIn([
'19_1912634_HG00731', 'HG00731', 'HG00731', 'OR4G11P', 'Known', 'unknown', 'Heterozygous', 'GRCh38', '19',
'1912634', 'C', 'T', '-', '-', 'ENST00000371839', '-', '-', '-',
'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, '
'The following variants are part of the multinucleotide variant 19-1912632-G-C (c.586_587delinsTT, '
'p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T'],
discovery_file)

Expand Down Expand Up @@ -900,13 +901,13 @@ def _test_gregor_export(self, url, mock_subprocess, mock_temp_dir, mock_open, mo
'80.2', '1.05', '', '', '', '', '',
]])
self._assert_expected_file(genetic_findings_file, [GENETIC_FINDINGS_TABLE[0], [
'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'SNV/INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '',
'Broad_NA19675_1_21_3343353', 'Broad_NA19675_1', '', 'INDEL', 'GRCh37', '21', '3343353', 'GAGA', 'G', '',
'RP11', 'ENST00000258436.5', 'c.375_377delTCT', 'p.Leu126del', 'Heterozygous', '', 'de novo', '', '',
'Candidate', 'Myasthenic syndrome, congenital, 8, with pre- and postsynaptic defects', 'OMIM:615120',
'Autosomal recessive|X-linked', 'Full', '', '', 'SR-ES', 'This individual is published in PMID34415322',
'', '', '', '', '', '',
], [
'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1',
'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'INDEL', 'GRCh37', '1',
'248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '',
'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '',
]], additional_calls=1)
Expand Down Expand Up @@ -1306,7 +1307,7 @@ def test_variant_metadata(self):
self.assertDictEqual(response_json['rows'][1], expected_row)
expected_mnv = {
**BASE_VARIANT_METADATA_ROW,
'alt': 'TT',
'alt': 'C',
'chrom': '19',
'condition_id': 'MONDO:0044970',
'condition_inheritance': 'Unknown',
Expand All @@ -1320,15 +1321,16 @@ def test_variant_metadata(self):
'hgvsc': 'c.586_587delinsTT',
'hgvsp': 'p.Ala196Leu',
'known_condition_name': 'mitochondrial disease',
'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'notes': 'The following variants are part of the multinucleotide variant 19-1912632-G-C (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'participant_id': 'HG00731',
'pos': 1912632,
'projectGuid': 'R0001_1kg',
'ref': 'GC',
'ref': 'G',
'tags': ['Known gene for phenotype'],
'transcript': 'ENST00000371839',
'variant_inheritance': 'unknown',
'variant_reference_assembly': 'GRCh38',
'variant_type': 'SNV',
'zygosity': 'Heterozygous',
}
self.assertDictEqual(response_json['rows'][2], expected_mnv)
Expand Down Expand Up @@ -1403,6 +1405,7 @@ def test_variant_metadata(self):
'tags': ['Tier 1 - Novel gene and phenotype'],
'variant_inheritance': 'unknown',
'variant_reference_assembly': 'GRCh37',
'variant_type': 'SV',
'zygosity': 'Heterozygous',
})

Expand Down
2 changes: 1 addition & 1 deletion seqr/views/apis/summary_data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def _add_row(row, family_id, row_type):
elif row_type == DISCOVERY_ROW_TYPE:
for i, discovery_row in enumerate(row):
participant_id = discovery_row.pop('participant_id')
parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items() if k != 'allele_balance_or_heteroplasmy_percentage'}
parsed_row = {'{}-{}'.format(k, i + 1): v for k, v in discovery_row.items() if k not in {'allele_balance_or_heteroplasmy_percentage', 'variant_type'}}
parsed_row['num_saved_variants'] = len(row)
rows_by_subject_family_id[(participant_id, family_id)].update(parsed_row)
elif row_type == SUBJECT_ROW_TYPE:
Expand Down
9 changes: 9 additions & 0 deletions seqr/views/utils/anvil_metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,11 @@ def _get_discovery_notes(variant, gene_variants, omit_parent_mnvs):
nested_mnvs = sorted([v for v in mnv_names if v != parent_name])
return f'The following variants are part of the {variant_type} variant {parent}: {", ".join(nested_mnvs)}'

VARIANT_TYPES = [
('SV', lambda ref, alt: not alt),
('SNV', lambda ref, alt: len(ref) == 1 and len(alt) == 1),
('RE', lambda ref, alt: all(['[' in allele and ']' in allele for allele in [ref, alt]])),
]

def _get_parsed_saved_discovery_variants_by_family(
families: Iterable[Family], include_metadata: bool, variant_json_fields: list[str],
Expand Down Expand Up @@ -340,6 +345,9 @@ def _get_parsed_saved_discovery_variants_by_family(
gene_id = main_transcript.get('geneId')
gene_ids.add(gene_id)
sv_type = variant_json.get('svType')
variant_type = next(
(variant_type for variant_type, has_type in VARIANT_TYPES if has_type(variant.ref, variant.alt)),
'INDEL')

partial_hpo_terms = variant.partial_hpo_terms[0] if variant.partial_hpo_terms else ''
phenotype_contribution = 'Partial' if partial_hpo_terms else 'Full'
Expand All @@ -358,6 +366,7 @@ def _get_parsed_saved_discovery_variants_by_family(
'partial_contribution_explained': partial_hpo_terms.replace(', ', '|'),
'sv_type': sv_type,
'sv_name': (variant_json.get('svName') or '{svType}:chr{chrom}:{pos}-{end}'.format(**variant_json)) if sv_type else None,
'variant_type': variant_type,
'validated_name': variant.validated_name[0] if variant.validated_name else None,
**{k: _get_transcript_field(k, config, main_transcript) for k, config in TRANSCRIPT_FIELDS.items()},
**{k: variant_json.get(k) for k in ['genotypes'] + (variant_json_fields or [])},
Expand Down
1 change: 1 addition & 0 deletions ui/pages/Report/components/VariantMetadata.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ const VIEW_ALL_PAGES = [
const COLUMNS = [
{ name: 'participant_id' },
...VARIANT_METADATA_COLUMNS.slice(0, -1),
{ name: 'variant_type' },
{ name: 'allele_balance_or_heteroplasmy_percentage' },
{ name: 'Clinvar allele ID', format: ({ clinvar }) => clinvar?.alleleId },
{ name: 'ClinVar Clinical Significance', format: ({ clinvar }) => clinvarSignificance(clinvar).pathogenicity },
Expand Down
Loading