Skip to content

Commit

Permalink
Merge pull request #4352 from broadinstitute/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
bpblanken authored Sep 5, 2024
2 parents 6bc7ac1 + 9614ced commit f444d42
Show file tree
Hide file tree
Showing 22 changed files with 36 additions and 32 deletions.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
Written with version 0.2.128-eead8100a1c1
Created at 2024/03/04 16:14:35
Created at 2024/08/29 13:43:52
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
3 changes: 2 additions & 1 deletion hail_search/queries/snv_indel.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class SnvIndelHailTableQuery(SnvIndelHailTableQuery37):
SCREEN_KEY, MOTIF_FEATURES_KEY, REGULATORY_FEATURES_KEY,
]
FREQUENCY_PREFILTER_FIELDS = OrderedDict([
(True, PREFILTER_FREQ_CUTOFF),
(True, 0.001),
('is_gt_1_percent', PREFILTER_FREQ_CUTOFF),
('is_gt_3_percent', 0.03),
('is_gt_5_percent', 0.05),
('is_gt_10_percent', 0.1),
Expand Down
23 changes: 15 additions & 8 deletions hail_search/queries/snv_indel_37.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,19 +96,26 @@ def _get_gnomad_af_prefilter(self, frequencies=None, pathogenicity=None, **kwarg
if af_cutoff_field is None:
return False

af_filter = True if af_cutoff_field is True else lambda ht: ht[af_cutoff_field]

clinvar_path_ht = False
if af_cutoff < PATH_FREQ_OVERRIDE_CUTOFF:
clinvar_path_ht = self._get_loaded_clinvar_prefilter_ht(pathogenicity)
if clinvar_path_ht is not False:
path_cutoff_field = self._get_af_prefilter_field(PATH_FREQ_OVERRIDE_CUTOFF)
non_clinvar_filter = lambda ht: hl.is_missing(clinvar_path_ht[ht.key])
if af_filter is not True:
non_clinvar_filter = lambda ht: non_clinvar_filter(ht) & af_filter(ht)
af_filter = lambda ht: ht[path_cutoff_field] | non_clinvar_filter(ht)

if clinvar_path_ht is not False:
path_cutoff_field = self._get_af_prefilter_field(PATH_FREQ_OVERRIDE_CUTOFF)
non_clinvar_filter = lambda ht: hl.is_missing(clinvar_path_ht[ht.key])
if af_cutoff_field is not True:
non_clinvar_var_filter = non_clinvar_filter
non_clinvar_filter = lambda ht: non_clinvar_var_filter(ht) & self._af_prefilter(af_cutoff_field)(ht)
af_filter = lambda ht: ht[path_cutoff_field] | non_clinvar_filter(ht)
else:
af_filter = self._af_prefilter(af_cutoff_field)

return af_filter

@staticmethod
def _af_prefilter(af_cutoff_field):
return True if af_cutoff_field is True else lambda ht: ht[af_cutoff_field]

def _get_af_prefilter_field(self, af_cutoff):
return next((field for field, cutoff in self.FREQUENCY_PREFILTER_FIELDS.items() if af_cutoff <= cutoff), None)

Expand Down
36 changes: 15 additions & 21 deletions seqr/views/apis/report_api_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,9 +612,9 @@
'248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '',
'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '',
], [
'Broad_HG00731_19_1912634', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19',
'1912634', 'C', 'T', 'CA403171634', 'OR4G11P', 'ENST00000371839', '', '', 'Heterozygous', '', 'unknown',
'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES',
'Broad_HG00731_19_1912632', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19',
'1912632', 'GC', 'TT', '', 'OR4G11P', 'ENST00000371839', 'c.586_587delinsTT', 'p.Ala196Leu', 'Heterozygous', '', 'unknown',
'Broad_HG00731_19_1912634', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES',
'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'', '', '', '', '', '',
], [
Expand Down Expand Up @@ -830,7 +830,7 @@ def _test_gregor_export(self, url, mock_subprocess, mock_temp_dir, mock_open, mo
'The following entries are missing recommended "reported_race" in the "participant" table: Broad_HG00733, Broad_NA19678, Broad_NA19679, Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
'The following entries are missing recommended "phenotype_description" in the "participant" table: Broad_NA20870, Broad_NA20872, Broad_NA20874, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
'The following entries are missing recommended "age_at_enrollment" in the "participant" table: Broad_HG00731, Broad_NA20870, Broad_NA20872, Broad_NA20875, Broad_NA20876, Broad_NA20881, Broad_NA20888',
'The following entries are missing recommended "known_condition_name" in the "genetic_findings" table: Broad_HG00731_19_1912632, Broad_HG00731_19_1912633, Broad_HG00731_19_1912634, Broad_HG00731_1_248367227',
'The following entries are missing recommended "known_condition_name" in the "genetic_findings" table: Broad_HG00731_19_1912632, Broad_HG00731_1_248367227',
]
validation_warnings = [
'The following columns are specified as "enumeration" in the "participant" data model but are missing the allowed values definition: prior_testing',
Expand All @@ -853,7 +853,7 @@ def _test_gregor_export(self, url, mock_subprocess, mock_temp_dir, mock_open, mo
'The following entries have invalid values for "analysis_details" (from Airtable) in the "aligned_dna_short_read" table. Allowed values are a google bucket path starting with gs://. Invalid values: Broad_exome_VCGS_FAM203_621_D2_1 (DOI:10.5281/zenodo.4469317)',
'The following entries have invalid values for "date_data_generation" (from Airtable) in the "experiment_rna_short_read" table. Allowed values have data type float. Invalid values: NA19679 (2023-02-11)',
'The following entries are missing required "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_NA19675_1_21_3343353',
'The following entries have non-unique values for "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_exome_VCGS_FAM203_621_D2 (Broad_HG00731_19_1912632, Broad_HG00731_19_1912633, Broad_HG00731_19_1912634, Broad_HG00731_1_248367227)',
'The following entries have non-unique values for "experiment_id" (from Airtable) in the "genetic_findings" table: Broad_exome_VCGS_FAM203_621_D2 (Broad_HG00731_19_1912632, Broad_HG00731_1_248367227)',
]
self.assertListEqual(response.json()['errors'], validation_errors)

Expand Down Expand Up @@ -909,13 +909,7 @@ def _test_gregor_export(self, url, mock_subprocess, mock_temp_dir, mock_open, mo
'Broad_HG00731_1_248367227', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh37', '1',
'248367227', 'TC', 'T', 'CA1501729', 'RP11', '', '', '', 'Homozygous', '', 'paternal', '', '', 'Known', '',
'MONDO:0044970', '', 'Uncertain', '', 'Broad_HG00732', 'SR-ES', '', '', '', '', '', '', '',
], [
'Broad_HG00731_19_1912634', 'Broad_HG00731', 'Broad_exome_VCGS_FAM203_621_D2', 'SNV/INDEL', 'GRCh38', '19',
'1912634', 'C', 'T', 'CA403171634', 'OR4G11P', 'ENST00000371839', '', '', 'Heterozygous', '', 'unknown',
'Broad_HG00731_19_1912633', '', 'Known', '', 'MONDO:0044970', '', 'Full', '', '', 'SR-ES',
'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'', '', '', '', '', '',
]], additional_calls=2)
]], additional_calls=1)

responses.calls.reset()
mock_subprocess.reset_mock()
Expand Down Expand Up @@ -1103,7 +1097,6 @@ def _assert_expected_gregor_files(self, mock_open, mock_subprocess, has_second_p
genetic_findings_file,
expected_rows=GENETIC_FINDINGS_TABLE if has_second_project else GENETIC_FINDINGS_TABLE[:4],
absent_rows=None,
additional_calls=2,
)

def _assert_expected_file(self, actual_rows, expected_rows, additional_calls=0, absent_rows=None):
Expand Down Expand Up @@ -1251,7 +1244,7 @@ def test_variant_metadata(self):
self.assertEqual(response.status_code, 200)
response_json = response.json()
self.assertListEqual(list(response_json.keys()), ['rows'])
row_ids = ['NA19675_1_21_3343353', 'HG00731_1_248367227', 'HG00731_19_1912634', 'HG00731_19_1912633', 'HG00731_19_1912632']
row_ids = ['NA19675_1_21_3343353', 'HG00731_1_248367227', 'HG00731_19_1912632']
self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids)
self.assertDictEqual(response_json['rows'][0], {
**BASE_VARIANT_METADATA_ROW,
Expand Down Expand Up @@ -1313,9 +1306,8 @@ def test_variant_metadata(self):
self.assertDictEqual(response_json['rows'][1], expected_row)
expected_mnv = {
**BASE_VARIANT_METADATA_ROW,
'alt': 'T',
'alt': 'TT',
'chrom': '19',
'ClinGen_allele_ID': 'CA403171634',
'condition_id': 'MONDO:0044970',
'condition_inheritance': 'Unknown',
'displayName': '2',
Expand All @@ -1324,13 +1316,15 @@ def test_variant_metadata(self):
'gene_of_interest': 'OR4G11P',
'gene_id': 'ENSG00000240361',
'gene_known_for_phenotype': 'Known',
'genetic_findings_id': 'HG00731_19_1912634',
'genetic_findings_id': 'HG00731_19_1912632',
'hgvsc': 'c.586_587delinsTT',
'hgvsp': 'p.Ala196Leu',
'known_condition_name': 'mitochondrial disease',
'notes': 'The following variants are part of the multinucleotide variant 19-1912632-GC-TT (c.586_587delinsTT, p.Ala196Leu): 19-1912633-G-T, 19-1912634-C-T',
'participant_id': 'HG00731',
'pos': 1912634,
'pos': 1912632,
'projectGuid': 'R0001_1kg',
'ref': 'C',
'ref': 'GC',
'tags': ['Known gene for phenotype'],
'transcript': 'ENST00000371839',
'variant_inheritance': 'unknown',
Expand All @@ -1349,7 +1343,7 @@ def test_variant_metadata(self):
self.assertListEqual([r['genetic_findings_id'] for r in response_json['rows']], row_ids)
self.assertDictEqual(response_json['rows'][1], expected_row)
self.assertDictEqual(response_json['rows'][2], expected_mnv)
self.assertDictEqual(response_json['rows'][5], {
self.assertDictEqual(response_json['rows'][3], {
**BASE_VARIANT_METADATA_ROW,
'MME': True,
'alt': 'T',
Expand Down Expand Up @@ -1382,7 +1376,7 @@ def test_variant_metadata(self):
'variant_reference_assembly': 'GRCh37',
'zygosity': 'Heterozygous',
})
self.assertDictEqual(response_json['rows'][6], {
self.assertDictEqual(response_json['rows'][4], {
**BASE_VARIANT_METADATA_ROW,
'alt': None,
'chrom': '1',
Expand Down
4 changes: 3 additions & 1 deletion seqr/views/utils/anvil_metadata_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,9 @@ def _get_genotype_zygosity(genotype, individual=None, variant=None):

def _get_discovery_notes(variant, gene_variants, omit_parent_mnvs):
parent_mnv = next((v for v in gene_variants if len(v['individual_genotype']) == 1), gene_variants[0])
if parent_mnv['genetic_findings_id'] == variant['genetic_findings_id'] and omit_parent_mnvs:
is_parent_mnv = (parent_mnv['genetic_findings_id'], parent_mnv['alt']) == (variant['genetic_findings_id'], variant['alt'])
should_skip = is_parent_mnv if omit_parent_mnvs else not is_parent_mnv
if should_skip:
return None
variant_type = 'complex structural' if parent_mnv.get('sv_type') else 'multinucleotide'
parent_name = _get_nested_variant_name(parent_mnv)
Expand Down

0 comments on commit f444d42

Please sign in to comment.