Skip to content

Commit

Permalink
Merge pull request #3883 from broadinstitute/sv-lookup-interval
Browse files Browse the repository at this point in the history
Sv lookup interval
  • Loading branch information
hanars authored Feb 14, 2024
2 parents 9abdf47 + 4fc80a5 commit 4f6ec12
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 13 deletions.
5 changes: 2 additions & 3 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def _load_filtered_table(self, sample_data, intervals=None, annotations=None, an
parsed_intervals = self._parse_intervals(intervals, **kwargs)
parsed_annotations = self._parse_annotations(annotations, annotations_secondary, **kwargs)
self.import_filtered_table(
sample_data, parsed_intervals=parsed_intervals, parsed_annotations=parsed_annotations, **kwargs)
*self._parse_sample_data(sample_data), parsed_intervals=parsed_intervals, parsed_annotations=parsed_annotations, **kwargs)

@classmethod
def _get_table_path(cls, path, use_ssd_dir=False):
Expand Down Expand Up @@ -292,8 +292,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, **

return filtered_project_hts

def import_filtered_table(self, sample_data, intervals=None, **kwargs):
project_samples, num_families = self._parse_sample_data(sample_data)
def import_filtered_table(self, project_samples, num_families, intervals=None, **kwargs):
if num_families == 1:
family_sample_data = list(project_samples.values())[0]
family_guid = list(family_sample_data.keys())[0]
Expand Down
24 changes: 23 additions & 1 deletion hail_search/queries/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,18 @@ class SvHailTableQuery(BaseHailTableQuery):
def _get_sample_type(cls, *args):
return cls.DATA_TYPE.split('_')[-1]

def _filter_annotated_table(self, ht, *args, parsed_intervals=None, exclude_intervals=False, **kwargs):
def import_filtered_table(self, project_samples, *args, parsed_intervals=None, padded_interval=None, **kwargs):
if len(project_samples) > 1 and (parsed_intervals or padded_interval):
# For multi-project interval search, faster to first read and filter the annotation table and then add entries
ht = self._read_table('annotations.ht')
ht = self._filter_annotated_table(ht, parsed_intervals=parsed_intervals, padded_interval=padded_interval)
self._load_table_kwargs['variant_ht'] = ht.select()
parsed_intervals = None
padded_interval = None

return super().import_filtered_table(project_samples, *args, parsed_intervals=parsed_intervals, padded_interval=padded_interval, **kwargs)

def _filter_annotated_table(self, ht, *args, parsed_intervals=None, exclude_intervals=False, padded_interval=None, **kwargs):
if parsed_intervals:
interval_filter = hl.array(parsed_intervals).any(lambda interval: hl.if_else(
ht.start_locus.contig == ht.end_locus.contig,
Expand All @@ -66,9 +77,20 @@ def _filter_annotated_table(self, ht, *args, parsed_intervals=None, exclude_inte
if exclude_intervals:
interval_filter = ~interval_filter
ht = ht.filter(interval_filter)
if padded_interval:
padding = int((padded_interval['end'] - padded_interval['start']) * padded_interval['padding'])
ht = ht.filter(hl.all([
ht.start_locus.contig == f"chr{padded_interval['chrom']}",
self._locus_in_range(ht.start_locus, padded_interval['start'], padding),
self._locus_in_range(ht.end_locus, padded_interval['end'], padding)
]))

return super()._filter_annotated_table(ht, *args, **kwargs)

@staticmethod
def _locus_in_range(locus, position, padding):
return (max(position - padding, 1) < locus.position) & (min(position + padding, 3e8) > locus.position)

def _parse_annotations(self, annotations, *args, **kwargs):
parsed_annotations = super()._parse_annotations(annotations, *args, **kwargs)
parsed_annotations[NEW_SV_FIELD] = (annotations or {}).get(NEW_SV_FIELD)
Expand Down
20 changes: 20 additions & 0 deletions hail_search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,11 @@ async def test_location_search(self):
[SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
)

await self._assert_expected_search(
[SV_VARIANT1, SV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], intervals=sv_intervals,
sample_data={'SV_WES': EXPECTED_SAMPLE_DATA['SV_WES'], **SV_WGS_SAMPLE_DATA},
)

await self._assert_expected_search(
[VARIANT1, VARIANT2], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
)
Expand All @@ -606,6 +611,21 @@ async def test_location_search(self):
intervals=LOCATION_SEARCH['intervals'][-1:], gene_ids=LOCATION_SEARCH['gene_ids'][:1]
)

await self._assert_expected_search(
[GCNV_VARIANT4], padded_interval={'chrom': '17', 'start': 38720781, 'end': 38738703, 'padding': 0.2},
omit_sample_type='SNV_INDEL',
)

await self._assert_expected_search(
[], padded_interval={'chrom': '17', 'start': 38720781, 'end': 38738703, 'padding': 0.1},
omit_sample_type='SNV_INDEL',
)

await self._assert_expected_search(
[SV_VARIANT4], padded_interval={'chrom': '14', 'start': 106692244, 'end': 106742587, 'padding': 0.1},
sample_data=SV_WGS_SAMPLE_DATA,
)

async def test_variant_id_search(self):
await self._assert_expected_search([VARIANT2], omit_sample_type='SV_WES', **RSID_SEARCH)

Expand Down
9 changes: 1 addition & 8 deletions seqr/utils/search/hail_search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,10 @@ def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATA
variants = [variant]

if is_sv and sample_data and variant['svType'] in {'DEL', 'DUP'}:
start = variant['pos']
end = variant['end']
offset = 0.2
if variant.get('endChrom'):
start -= 50
end += 50
offset = None
del body['variant_id']
body.update({
'sample_data': sample_data,
'intervals': [_format_interval(chrom=variant['chrom'], start=start, end=end, offset=offset)],
'padded_interval': {'chrom': variant['chrom'], 'start': variant['pos'], 'end': variant['end'], 'padding': 0.2},
'annotations': {'structural': [variant['svType'], f"gCNV_{variant['svType']}"]}
})
variants += _execute_search(body, user)['results']
Expand Down
3 changes: 2 additions & 1 deletion seqr/utils/search/hail_search_utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,8 @@ def test_variant_lookup(self):
})
self._test_minimal_search_call(expected_search_body={
'genome_version': 'GRCh38', 'data_type': 'SV_WES', 'annotations': {'structural': ['DEL', 'gCNV_DEL']},
'intervals': ['17:38718997-38738487'], 'sample_data': {'SV_WGS': SV_WGS_SAMPLE_DATA},
'padded_interval': {'chrom': '17', 'start': 38721781, 'end': 38735703, 'padding': 0.2},
'sample_data': {'SV_WGS': SV_WGS_SAMPLE_DATA},
})

# No second lookup call is made for non DELs/DUPs
Expand Down

0 comments on commit 4f6ec12

Please sign in to comment.