Skip to content

Commit

Permalink
Merge pull request #3880 from broadinstitute/sv-lookup-performance
Browse files Browse the repository at this point in the history
Sv lookup performance
  • Loading branch information
hanars authored Feb 14, 2024
2 parents eec6bb8 + c9c1e39 commit 5cdb7c4
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 3 deletions.
5 changes: 2 additions & 3 deletions hail_search/queries/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def _load_filtered_table(self, sample_data, intervals=None, annotations=None, an
parsed_intervals = self._parse_intervals(intervals, **kwargs)
parsed_annotations = self._parse_annotations(annotations, annotations_secondary, **kwargs)
self.import_filtered_table(
sample_data, parsed_intervals=parsed_intervals, parsed_annotations=parsed_annotations, **kwargs)
*self._parse_sample_data(sample_data), parsed_intervals=parsed_intervals, parsed_annotations=parsed_annotations, **kwargs)

@classmethod
def _get_table_path(cls, path, use_ssd_dir=False):
Expand Down Expand Up @@ -291,8 +291,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, **

return filtered_project_hts

def import_filtered_table(self, sample_data, intervals=None, **kwargs):
project_samples, num_families = self._parse_sample_data(sample_data)
def import_filtered_table(self, project_samples, num_families, intervals=None, **kwargs):
if num_families == 1:
family_sample_data = list(project_samples.values())[0]
family_guid = list(family_sample_data.keys())[0]
Expand Down
10 changes: 10 additions & 0 deletions hail_search/queries/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,16 @@ class SvHailTableQuery(BaseHailTableQuery):
def _get_sample_type(cls, *args):
return cls.DATA_TYPE.split('_')[-1]

def import_filtered_table(self, project_samples, *args, parsed_intervals=None, **kwargs):
if len(project_samples) > 1 and parsed_intervals:
# For multi-project interval search, faster to first read and filter the annotation table and then add entries
ht = self._read_table('annotations.ht')
ht = self._filter_annotated_table(ht, parsed_intervals=parsed_intervals)
self._load_table_kwargs['variant_ht'] = ht.select()
parsed_intervals = None

return super().import_filtered_table(project_samples, *args, parsed_intervals=parsed_intervals, **kwargs)

def _filter_annotated_table(self, ht, *args, parsed_intervals=None, exclude_intervals=False, **kwargs):
if parsed_intervals:
interval_filter = hl.array(parsed_intervals).any(lambda interval: hl.if_else(
Expand Down
5 changes: 5 additions & 0 deletions hail_search/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,11 @@ async def test_location_search(self):
[SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
)

await self._assert_expected_search(
[SV_VARIANT1, SV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], intervals=sv_intervals,
sample_data={'SV_WES': EXPECTED_SAMPLE_DATA['SV_WES'], **SV_WGS_SAMPLE_DATA},
)

await self._assert_expected_search(
[VARIANT1, VARIANT2], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
)
Expand Down

0 comments on commit 5cdb7c4

Please sign in to comment.