From 3220115a7f16b8a20b5d05473e7458b751666c62 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Feb 2024 23:33:06 -0500 Subject: [PATCH 1/2] improve performance for search with many intervals --- hail_search/queries/mito.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index d65ed2eb4a..e905999cec 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -5,6 +5,7 @@ PATHOGENICTY_HGMD_SORT_KEY from hail_search.queries.base import BaseHailTableQuery, PredictionPath, QualityFilterFormat +MAX_LOAD_INTERVALS = 1000 def _clinvar_sort(clinvar_field, r): return hl.or_else(r[clinvar_field].pathogenicity_id, ABSENT_PATH_SORT_OFFSET) @@ -135,7 +136,7 @@ def __init__(self, *args, **kwargs): def _parse_intervals(self, intervals, exclude_intervals=False, **kwargs): parsed_intervals = super()._parse_intervals(intervals,**kwargs) - if parsed_intervals and not exclude_intervals: + if parsed_intervals and not exclude_intervals and len(parsed_intervals) < MAX_LOAD_INTERVALS: self._load_table_kwargs = {'_intervals': parsed_intervals, '_filter_intervals': True} return parsed_intervals @@ -197,6 +198,8 @@ def _parse_variant_keys(self, variant_ids=None, **kwargs): def _prefilter_entries_table(self, ht, parsed_intervals=None, exclude_intervals=False, **kwargs): if exclude_intervals and parsed_intervals: ht = hl.filter_intervals(ht, parsed_intervals, keep=False) + elif len(parsed_intervals) >= MAX_LOAD_INTERVALS: + ht = hl.filter_intervals(ht, parsed_intervals) return ht def _get_allowed_consequence_ids(self, annotations): From 182fb6eadbc16331839df3ae8e6a8c83781414cd Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 14 Feb 2024 23:44:45 -0500 Subject: [PATCH 2/2] fix syntax --- hail_search/queries/mito.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index e905999cec..0b48d592f6 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -198,7 +198,7 @@ def _parse_variant_keys(self, variant_ids=None, **kwargs): def _prefilter_entries_table(self, ht, parsed_intervals=None, exclude_intervals=False, **kwargs): if exclude_intervals and parsed_intervals: ht = hl.filter_intervals(ht, parsed_intervals, keep=False) - elif len(parsed_intervals) >= MAX_LOAD_INTERVALS: + elif len(parsed_intervals or []) >= MAX_LOAD_INTERVALS: ht = hl.filter_intervals(ht, parsed_intervals) return ht