Merge pull request #3540 from broadinstitute/hail-backend-clinvar-ove…

…rrides Hail backend - clinvar overrides
broadinstitute · Aug 9, 2023 · a81c997 · a81c997
2 parents ee43165 + 3156430
commit a81c997
Show file tree

Hide file tree

Showing 39 changed files with 126 additions and 40 deletions.
diff --git a/hail_search/constants.py b/hail_search/constants.py
@@ -52,9 +52,13 @@
     },
 }
 
+PATH_FREQ_OVERRIDE_CUTOFF = 0.05
+CLINVAR_PATH_FILTER = 'pathogenic'
+CLINVAR_LIKELY_PATH_FILTER = 'likely_pathogenic'
+CLINVAR_PATH_SIGNIFICANCES = {CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER}
 CLINVAR_PATH_RANGES = [
-    ('pathogenic', 'Pathogenic', 'Pathogenic/Likely_risk_allele'),
-    ('likely_pathogenic', 'Pathogenic/Likely_pathogenic', 'Likely_risk_allele'),
+    (CLINVAR_PATH_FILTER, 'Pathogenic', 'Pathogenic/Likely_risk_allele'),
+    (CLINVAR_LIKELY_PATH_FILTER, 'Pathogenic/Likely_pathogenic', 'Likely_risk_allele'),
     ('vus_or_conflicting', 'Conflicting_interpretations_of_pathogenicity', 'No_pathogenic_assertion'),
     ('likely_benign', 'Likely_benign', 'Benign/Likely_benign'),
     ('benign', 'Benign/Likely_benign', 'Benign'),

diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
   Written with version 0.2.109-b71b065e4bb6
-  Created at 2023/08/04 12:47:37
+  Created at 2023/08/07 11:46:24
diff --git a/.../VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/.index.crc b/.../VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/.index.crc
diff --git a/...5f-ca93e860ce89.idx/.metadata.json.gz.crc → ...95-6d84fd6f4a97.idx/.metadata.json.gz.crc b/...5f-ca93e860ce89.idx/.metadata.json.gz.crc → ...95-6d84fd6f4a97.idx/.metadata.json.gz.crc
diff --git a/...RCh38/VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/index b/...RCh38/VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/index
diff --git a/...28-835f-ca93e860ce89.idx/metadata.json.gz → ...b2-a095-6d84fd6f4a97.idx/metadata.json.gz b/...28-835f-ca93e860ce89.idx/metadata.json.gz → ...b2-a095-6d84fd6f4a97.idx/metadata.json.gz
diff --git a/.../VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/.index.crc b/.../VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/.index.crc
diff --git a/...RCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/index b/...RCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/index
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/metadata.json.gz
diff --git a/...RCh38/VARIANTS/annotations.ht/rows/parts/.part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.crc b/...RCh38/VARIANTS/annotations.ht/rows/parts/.part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.crc
diff --git a/...RCh38/VARIANTS/annotations.ht/rows/parts/.part-0-bd787778-d7c2-4828-835f-ca93e860ce89.crc b/...RCh38/VARIANTS/annotations.ht/rows/parts/.part-0-bd787778-d7c2-4828-835f-ca93e860ce89.crc
diff --git a/...res/GRCh38/VARIANTS/annotations.ht/rows/parts/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97 b/...res/GRCh38/VARIANTS/annotations.ht/rows/parts/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97
diff --git a/...res/GRCh38/VARIANTS/annotations.ht/rows/parts/part-0-bd787778-d7c2-4828-835f-ca93e860ce89 b/...res/GRCh38/VARIANTS/annotations.ht/rows/parts/part-0-bd787778-d7c2-4828-835f-ca93e860ce89
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/._SUCCESS.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/README.txt b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/04 13:56:14
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/_SUCCESS b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/_SUCCESS
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/part-0
diff --git a/...clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.index.crc b/...clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.index.crc
diff --git a/...h_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.metadata.json.gz.crc b/...h_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.metadata.json.gz.crc
diff --git a/...ANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/index b/...ANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/index
diff --git a/...r_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/metadata.json.gz b/...r_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/metadata.json.gz
diff --git a/...ANTS/clinvar_path_variants.ht/rows/parts/.part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.crc b/...ANTS/clinvar_path_variants.ht/rows/parts/.part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.crc
diff --git a/.../VARIANTS/clinvar_path_variants.ht/rows/parts/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5 b/.../VARIANTS/clinvar_path_variants.ht/rows/parts/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5
diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
@@ -7,7 +7,8 @@
 from hail_search.constants import AFFECTED, UNAFFECTED, AFFECTED_ID, UNAFFECTED_ID, MALE, VARIANT_DATASET, \
     VARIANT_KEY_FIELD, GNOMAD_GENOMES_FIELD, XPOS, GENOME_VERSION_GRCh38_DISPLAY, INHERITANCE_FILTERS, \
     ANY_AFFECTED, X_LINKED_RECESSIVE, REF_REF, REF_ALT, COMP_HET_ALT, ALT_ALT, HAS_ALT, HAS_REF, \
-    ANNOTATION_OVERRIDE_FIELDS, SCREEN_KEY, SPLICE_AI_FIELD, CLINVAR_KEY, HGMD_KEY, CLINVAR_PATH_RANGES, HGMD_PATH_RANGES
+    ANNOTATION_OVERRIDE_FIELDS, SCREEN_KEY, SPLICE_AI_FIELD, CLINVAR_KEY, HGMD_KEY, CLINVAR_PATH_SIGNIFICANCES, \
+    CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_RANGES, HGMD_PATH_RANGES, PATH_FREQ_OVERRIDE_CUTOFF
 
 DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets')
 
@@ -142,42 +143,46 @@ def __init__(self, data_type, sample_data, genome_version, sort=XPOS, num_result
         self._genome_version = genome_version
         self._sort = sort
         self._num_results = num_results
+        self._data_type = data_type
         self._ht = None
         self._enums = None
         self._globals = None
 
-        self._load_filtered_table(data_type, sample_data, **kwargs)
+        self._load_filtered_table(sample_data, **kwargs)
 
-    def _load_filtered_table(self, data_type, sample_data, intervals=None, exclude_intervals=False, variant_ids=None, **kwargs):
+    def _load_filtered_table(self, sample_data, intervals=None, exclude_intervals=False, variant_ids=None, **kwargs):
         parsed_intervals, variant_ids = self._parse_intervals(intervals, variant_ids)
         excluded_intervals = None
         if exclude_intervals:
             excluded_intervals = parsed_intervals
             parsed_intervals = None
+        self._load_table_kwargs = {'_intervals': parsed_intervals, '_filter_intervals': bool(parsed_intervals)}
         self.import_filtered_table(
-            data_type, sample_data, intervals=parsed_intervals, excluded_intervals=excluded_intervals,
-            variant_ids=variant_ids, **kwargs)
+            sample_data, excluded_intervals=excluded_intervals, variant_ids=variant_ids, **kwargs)
 
-    def import_filtered_table(self, data_type, sample_data, intervals=None, **kwargs):
-        tables_path = f'{DATASETS_DIR}/{self._genome_version}/{data_type}'
-        load_table_kwargs = {'_intervals': intervals, '_filter_intervals': bool(intervals)}
+    def _get_table_path(self, path):
+        return f'{DATASETS_DIR}/{self._genome_version}/{self._data_type}/{path}'
 
+    def _read_table(self, path):
+        return hl.read_table(self._get_table_path(path), **self._load_table_kwargs)
+
+    def import_filtered_table(self, sample_data, **kwargs):
         family_samples = defaultdict(list)
         project_samples = defaultdict(list)
         for s in sample_data:
             family_samples[s['family_guid']].append(s)
             project_samples[s['project_guid']].append(s)
 
-        logger.info(f'Loading {data_type} data for {len(family_samples)} families in {len(project_samples)} projects')
+        logger.info(f'Loading {self._data_type} data for {len(family_samples)} families in {len(project_samples)} projects')
         if len(family_samples) == 1:
             family_guid, family_sample_data = list(family_samples.items())[0]
-            family_ht = hl.read_table(f'{tables_path}/families/{family_guid}.ht', **load_table_kwargs)
+            family_ht = self._read_table(f'families/{family_guid}.ht')
             families_ht = self._filter_entries_table(family_ht, family_sample_data, **kwargs)
         else:
             filtered_project_hts = []
             exception_messages = set()
             for project_guid, project_sample_data in project_samples.items():
-                project_ht = hl.read_table(f'{tables_path}/projects/{project_guid}.ht', **load_table_kwargs)
+                project_ht = self._read_table(f'projects/{project_guid}.ht')
                 try:
                     filtered_project_hts.append(self._filter_entries_table(project_ht, project_sample_data, **kwargs))
                 except HTTPBadRequest as e:
@@ -199,7 +204,7 @@ def import_filtered_table(self, data_type, sample_data, intervals=None, **kwargs
                     ),
                 )
 
-        annotations_ht_path = f'{tables_path}/annotations.ht'
+        annotations_ht_path = self._get_table_path('annotations.ht')
         annotation_ht_query_result = hl.query_table(
             annotations_ht_path, families_ht.key).first().drop(*families_ht.key)
         ht = families_ht.annotate(**annotation_ht_query_result)
@@ -220,7 +225,7 @@ def import_filtered_table(self, data_type, sample_data, intervals=None, **kwargs
         )
         self._filter_annotated_table(**kwargs)
 
-    def _filter_entries_table(self, ht, sample_data,  inheritance_mode=None, inheritance_filter=None, quality_filter=None,
+    def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inheritance_filter=None, quality_filter=None,
                               excluded_intervals=None, variant_ids=None, **kwargs):
         if excluded_intervals:
             ht = hl.filter_intervals(ht, excluded_intervals, keep=False)
@@ -238,10 +243,10 @@ def _filter_entries_table(self, ht, sample_data,  inheritance_mode=None, inherit
         if quality_filter.get('vcf_filter'):
             ht = self._filter_vcf_filters(ht)
 
-        passes_quality_filter = self._get_genotype_passes_quality_filter(quality_filter)
+        passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht=ht, **kwargs)
         if passes_quality_filter is not None:
             ht = ht.annotate(family_entries=ht.family_entries.map(
-                lambda entries: hl.or_missing(entries.all(passes_quality_filter), entries)
+                lambda entries: hl.or_missing(passes_quality_filter(entries), entries)
             ))
             ht = ht.filter(ht.family_entries.any(hl.is_defined))
 
@@ -353,19 +358,18 @@ def _valid_genotype_family_entries(cls, entries, gentoype_entry_indices, genotyp
         )
         return hl.or_missing(is_valid, entries)
 
-    @classmethod
-    def _get_genotype_passes_quality_filter(cls, quality_filter):
+    def _get_family_passes_quality_filter(self, quality_filter, **kwargs):
         affected_only = quality_filter.get('affected_only')
         passes_quality_filters = []
         for filter_k, value in quality_filter.items():
-            field = cls.GENOTYPE_FIELDS.get(filter_k.replace('min_', ''))
+            field = self.GENOTYPE_FIELDS.get(filter_k.replace('min_', ''))
             if field and value:
-                passes_quality_filters.append(cls._get_genotype_passes_quality_field(field, value, affected_only))
+                passes_quality_filters.append(self._get_genotype_passes_quality_field(field, value, affected_only))
 
         if not passes_quality_filters:
             return None
 
-        return lambda gt: hl.all([f(gt) for f in passes_quality_filters])
+        return lambda entries: entries.all(lambda gt: hl.all([f(gt) for f in passes_quality_filters]))
 
     @classmethod
     def _get_genotype_passes_quality_field(cls, field, value, affected_only):
@@ -412,7 +416,7 @@ def _filter_annotated_table(self, gene_ids=None, rs_ids=None, frequencies=None,
         if rs_ids:
             self._filter_rs_ids(rs_ids)
 
-        self._filter_by_frequency(frequencies)
+        self._filter_by_frequency(frequencies, pathogenicity)
 
         self._filter_by_in_silico(in_silico)
 
@@ -458,18 +462,23 @@ def _parse_intervals(self, intervals, variant_ids):
 
         return parsed_intervals, variant_ids
 
-    def _filter_by_frequency(self, frequencies):
+    def _filter_by_frequency(self, frequencies, pathogenicity):
         frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS}
         if not frequencies:
             return
 
+        path_override_filter = self._frequency_override_filter(pathogenicity)
+        filters = []
         for pop, freqs in sorted(frequencies.items()):
             pop_filters = []
             pop_expr = self._ht[self.POPULATION_FIELDS.get(pop, pop)]
             pop_config = self._format_population_config(self.POPULATIONS[pop])
             if freqs.get('af') is not None:
                 af_field = pop_config.get('filter_af') or pop_config['af']
-                pop_filters.append(pop_expr[af_field] <= freqs['af'])
+                pop_filter = pop_expr[af_field] <= freqs['af']
+                if path_override_filter is not None and freqs['af'] < PATH_FREQ_OVERRIDE_CUTOFF:
+                    pop_filter |= path_override_filter & (pop_expr[af_field] <= PATH_FREQ_OVERRIDE_CUTOFF)
+                pop_filters.append(pop_filter)
             elif freqs.get('ac') is not None:
                 ac_field = pop_config['ac']
                 if ac_field:
@@ -484,7 +493,13 @@ def _filter_by_frequency(self, frequencies):
                     pop_filters.append(pop_expr[hemi_field] <= freqs['hh'])
 
             if pop_filters:
-                self._ht = self._ht.filter(hl.is_missing(pop_expr) | hl.all(pop_filters))
+                filters.append(hl.is_missing(pop_expr) | hl.all(pop_filters))
+
+        if filters:
+            self._ht = self._ht.filter(hl.all(filters))
+
+    def _frequency_override_filter(self, pathogenicity):
+        return None
 
     def _filter_by_in_silico(self, in_silico_filters):
         in_silico_filters = in_silico_filters or {}
@@ -592,10 +607,10 @@ class VariantHailTableQuery(BaseHailTableQuery):
         'revel': PredictionPath('dbnsfp', 'REVEL_score'),
         'sift': PredictionPath('dbnsfp', 'SIFT_pred'),
     }
-    PATHOGENICITY_FILTERS = [
-        (CLINVAR_KEY, 'pathogenicity', CLINVAR_PATH_RANGES),
-        (HGMD_KEY, 'class', HGMD_PATH_RANGES),
-    ]
+    PATHOGENICITY_FILTERS = {
+        CLINVAR_KEY: ('pathogenicity', CLINVAR_PATH_RANGES),
+        HGMD_KEY: ('class', HGMD_PATH_RANGES),
+    }
 
     GLOBALS = BaseHailTableQuery.GLOBALS + ['versions']
     CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS + ['rsid']
@@ -644,6 +659,10 @@ def _selected_main_transcript_expr(ht):
 
         return hl.or_else(matched_transcript, main_transcript)
 
+    def __init__(self, *args, **kwargs):
+        self._filter_hts = {}
+        super(VariantHailTableQuery, self).__init__(*args, **kwargs)
+
     def import_filtered_table(self, *args, **kwargs):
         super().import_filtered_table(*args, **kwargs)
         self._ht = self._ht.key_by(**{VARIANT_KEY_FIELD: self._ht.variant_id})
@@ -656,6 +675,32 @@ def _format_transcript_args(self):
         })
         return args
 
+    def _get_family_passes_quality_filter(self, quality_filter, ht=None, pathogenicity=None, **kwargs):
+        passes_quality = super(VariantHailTableQuery, self)._get_family_passes_quality_filter(quality_filter)
+        clinvar_path_ht = False if passes_quality is None else self._get_clinvar_filter_ht(pathogenicity)
+        if not clinvar_path_ht:
+            return passes_quality
+
+        return lambda entries: hl.is_defined(clinvar_path_ht[ht.key]) | passes_quality(entries)
+
+    def _get_clinvar_filter_ht(self, pathogenicity):
+        if self._filter_hts.get(CLINVAR_KEY) is not None:
+            return self._filter_hts[CLINVAR_KEY]
+
+        clinvar_path_filters = self._get_clinvar_path_filters(pathogenicity)
+        if not clinvar_path_filters:
+            self._filter_hts[CLINVAR_KEY] = False
+            return False
+
+        clinvar_path_ht = self._read_table('clinvar_path_variants.ht')
+        if CLINVAR_LIKELY_PATH_FILTER not in clinvar_path_filters:
+            clinvar_path_ht = clinvar_path_ht.filter(clinvar_path_ht.is_pathogenic)
+        elif CLINVAR_PATH_FILTER not in clinvar_path_filters:
+            clinvar_path_ht = clinvar_path_ht.filter(clinvar_path_ht.is_likely_pathogenic)
+        self._filter_hts[CLINVAR_KEY] = clinvar_path_ht
+
+        return clinvar_path_ht
+
     def _get_gene_id_filter(self, gene_ids):
         self._ht = self._ht.annotate(
             gene_transcripts=self._ht.sorted_transcript_consequences.filter(lambda t: gene_ids.contains(t.gene_id))
@@ -688,10 +733,10 @@ def _annotate_allowed_consequences(self, annotations, annotation_filters):
     def _get_annotation_override_filters(self, pathogenicity, annotations):
         annotation_filters = []
 
-        for key, *args in self.PATHOGENICITY_FILTERS:
+        for key in self.PATHOGENICITY_FILTERS.keys():
             path_terms = (pathogenicity or {}).get(key)
             if path_terms:
-                annotation_filters.append(self._has_terms_range_expr(path_terms, key, *args))
+                annotation_filters.append(self._has_path_expr(path_terms, key))
         if annotations.get(SCREEN_KEY):
             screen_enum = self._get_enum_lookup(SCREEN_KEY.lower(), 'region_type')
             allowed_consequences = hl.set({screen_enum[c] for c in annotations[SCREEN_KEY]})
@@ -702,7 +747,18 @@ def _get_annotation_override_filters(self, pathogenicity, annotations):
 
         return annotation_filters
 
-    def _has_terms_range_expr(self, terms, field, subfield, range_configs):
+    def _frequency_override_filter(self, pathogenicity):
+        path_terms = self._get_clinvar_path_filters(pathogenicity)
+        return self._has_path_expr(path_terms, CLINVAR_KEY) if path_terms else None
+
+    @staticmethod
+    def _get_clinvar_path_filters(pathogenicity):
+        return {
+            f for f in (pathogenicity or {}).get(CLINVAR_KEY) or [] if f in CLINVAR_PATH_SIGNIFICANCES
+        }
+
+    def _has_path_expr(self, terms, field):
+        subfield, range_configs = self.PATHOGENICITY_FILTERS[field]
         enum_lookup = self._get_enum_lookup(field, subfield)
 
         ranges = [[None, None]]

diff --git a/hail_search/test_search.py b/hail_search/test_search.py
@@ -182,8 +182,20 @@ async def test_quality_filter(self):
             omit_sample_type='SV_WES',
         )
 
+        quality_filter = {'min_gq': 40, 'min_ab': 50}
         await self._assert_expected_search(
-            [VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_gq': 40, 'min_ab': 50}, omit_sample_type='SV_WES',
+            [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+        )
+
+        annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['pathogenic']},
         )
 
     async def test_location_search(self):
@@ -235,7 +247,7 @@ async def test_frequency_filter(self):
         )
 
         await self._assert_expected_search(
-            [VARIANT4], frequencies={'gnomad_genomes': {'af': 0.41, 'hh': 1}}, omit_sample_type='SV_WES',
+            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.41, 'hh': 1}}, omit_sample_type='SV_WES',
         )
 
         await self._assert_expected_search(
@@ -248,6 +260,17 @@ async def test_frequency_filter(self):
             omit_sample_type='SV_WES',
         )
 
+        annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']},
+        )
+
     async def test_annotations_filter(self):
         await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES')
 

diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
@@ -97,7 +97,7 @@
         'topmed': {'af': 0.0784199982881546, 'ac': 20757, 'an': 264690, 'hom': 0, 'het': 20757},
         'exac': {'af': 0.0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'het': 0, 'filter_af': 0.0},
         'gnomad_exomes': {'af': 0.0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'filter_af': 0.0},
-        'gnomad_genomes': {'af': 0.34449315071105957, 'ac': 9271, 'an': 26912, 'hom': 480, 'hemi': 0, 'filter_af': 0.40276646614074707},
+        'gnomad_genomes': {'af': 0.034449315071105957, 'ac': 927, 'an': 26912, 'hom': 48, 'hemi': 0, 'filter_af': 0.040276646614074707},
     },
     'predictions': {
         'cadd': 4.668000221252441,
@@ -168,7 +168,7 @@
        'topmed': {'af': 0.24615199863910675, 'ac': 65154, 'an': 264690, 'hom': 8775, 'het': 47604},
        'exac': {'af': 0.29499998688697815, 'ac': 35805, 'an': 121372, 'hom': 5872, 'hemi': 0, 'het': 24061, 'filter_af': 0.4153035283088684},
        'gnomad_exomes': {'af': 0.28899794816970825, 'ac': 72672, 'an': 251462, 'hom': 11567, 'hemi': 0, 'filter_af': 0.4116474986076355},
-       'gnomad_genomes': {'af': 0.2633855640888214, 'ac': 40003, 'an': 151880, 'hom': 5754, 'hemi': 0, 'filter_af': 0.4067690968513489},
+       'gnomad_genomes': {'af': 0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'filter_af': 0},
     },
     'predictions': {
        'cadd': 20.899999618530273,