Merge pull request #3597 from broadinstitute/dev

Dev
broadinstitute · Sep 11, 2023 · efc3193 · efc3193
2 parents cd73180 + c6832b3
commit efc3193
Show file tree

Hide file tree

Showing 68 changed files with 991 additions and 454 deletions.
diff --git a/hail_search/constants.py b/hail_search/constants.py
@@ -29,6 +29,8 @@
 PATHOGENICTY_SORT_KEY = 'pathogenicity'
 PATHOGENICTY_HGMD_SORT_KEY = 'pathogenicity_hgmd'
 ABSENT_PATH_SORT_OFFSET = 12.5
+CONSEQUENCE_SORT = 'protein_consequence'
+OMIM_SORT = 'in_omim'
 
 ALT_ALT = 'alt_alt'
 REF_REF = 'ref_ref'

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/._SUCCESS.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/24 17:42:53
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/_SUCCESS
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/part-0
diff --git a/...S/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.index.crc b/...S/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.index.crc
diff --git a/...ns.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.metadata.json.gz.crc b/...ns.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.metadata.json.gz.crc
diff --git a/...SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/index b/...SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/index
diff --git a/...tations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/metadata.json.gz b/...tations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/metadata.json.gz
diff --git a/...SV_WES/annotations.ht/rows/parts/.part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc b/...SV_WES/annotations.ht/rows/parts/.part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc
diff --git a/...Ch38/SV_WES/annotations.ht/rows/parts/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a b/...Ch38/SV_WES/annotations.ht/rows/parts/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/25 10:47:38
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0
diff --git a/...ies/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc b/...ies/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc
diff --git a/..._2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc b/..._2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc
diff --git a/...families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index b/...families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index
diff --git a/...00002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz b/...00002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz
diff --git a/...families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc b/...families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc
diff --git a/..._WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 b/..._WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/25 11:56:33
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0
diff --git a/...cts/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.index.crc b/...cts/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.index.crc
diff --git a/...kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.metadata.json.gz.crc b/...kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.metadata.json.gz.crc
diff --git a/...projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/index b/...projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/index
diff --git a/...001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/metadata.json.gz b/...001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz
diff --git a/...projects/R0001_1kg.ht/rows/parts/.part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc b/...projects/R0001_1kg.ht/rows/parts/.part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc
diff --git a/..._WES/projects/R0001_1kg.ht/rows/parts/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa b/..._WES/projects/R0001_1kg.ht/rows/parts/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa
diff --git a/hail_search/queries/__init__.py b/hail_search/queries/__init__.py
diff --git a/hail_search/hail_search_query.py → hail_search/queries/base.py b/hail_search/hail_search_query.py → hail_search/queries/base.py
diff --git a/hail_search/queries/gcnv.py b/hail_search/queries/gcnv.py
@@ -0,0 +1,91 @@
+import hail as hl
+
+from hail_search.constants import COMP_HET_ALT, HAS_ALT, HAS_REF, REF_REF
+from hail_search.queries.sv import SvHailTableQuery
+
+
+class GcnvHailTableQuery(SvHailTableQuery):
+
+    DATA_TYPE = 'SV_WES'
+
+    #  gCNV data has no ref/ref calls so a missing entry indicates ref/ref
+    GENOTYPE_QUERY_MAP = {
+        **SvHailTableQuery.GENOTYPE_QUERY_MAP,
+        REF_REF: hl.is_missing,
+        HAS_REF: lambda gt: hl.is_missing(gt) | gt.is_het_ref(),
+        HAS_ALT: hl.is_defined,
+        COMP_HET_ALT: hl.is_defined,
+    }
+    MISSING_NUM_ALT = 0
+
+    GENOTYPE_FIELDS = {
+        **SvHailTableQuery.GENOTYPE_FIELDS,
+        **{f.lower(): f for f in ['QS', 'defragged']},
+    }
+    del GENOTYPE_FIELDS['gq']
+    GENOTYPE_QUERY_FIELDS = {}
+    GENOTYPE_OVERRIDE_FIELDS = {
+        'start': (hl.min, lambda r: r.start_locus.position),
+        'end': (hl.max, lambda r: r.end_locus.position),
+        'num_exon': (hl.max, lambda r: r.num_exon),
+        'gene_ids': (
+            lambda entry_gene_ids: entry_gene_ids.fold(lambda s1, s2: s1.union(s2), hl.empty_set(hl.tstr)),
+            lambda r: hl.missing(hl.tset(hl.tstr)),
+        ),
+    }
+    COMPUTED_GENOTYPE_FIELDS = {
+        **SvHailTableQuery.COMPUTED_GENOTYPE_FIELDS,
+        **{k: lambda entry, field, r: hl.or_missing(
+            hl.is_missing(r[field]) | (r[field] != entry[f'sample_{field}']), entry[f'sample_{field}']
+        ) for k in GENOTYPE_OVERRIDE_FIELDS.keys()},
+    }
+    COMPUTED_GENOTYPE_FIELDS['prev_overlap'] = COMPUTED_GENOTYPE_FIELDS.pop('prev_num_alt')
+
+    CORE_FIELDS = SvHailTableQuery.CORE_FIELDS[:-1]
+    BASE_ANNOTATION_FIELDS = {
+        **SvHailTableQuery.BASE_ANNOTATION_FIELDS,
+        'pos': lambda r: r.start,
+        'end': lambda r: r.end,
+        'numExon': lambda r: r.num_exon,
+    }
+    del BASE_ANNOTATION_FIELDS['bothsidesSupport']
+
+    TRANSCRIPTS_ENUM_FIELD = SvHailTableQuery.ENUM_ANNOTATION_FIELDS[SvHailTableQuery.TRANSCRIPTS_FIELD]
+    ENUM_ANNOTATION_FIELDS = {SvHailTableQuery.TRANSCRIPTS_FIELD: {
+        **TRANSCRIPTS_ENUM_FIELD,
+        'format_array_values': lambda values, r: GcnvHailTableQuery.TRANSCRIPTS_ENUM_FIELD['format_array_values'](
+            hl.if_else(hl.is_missing(r.gene_ids), values, values.filter(lambda t: r.gene_ids.contains(t.geneId))), r,
+        ),
+    }}
+
+    POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'}
+
+    @classmethod
+    def _get_genotype_override_field(cls, r, field, family_entries_field=None):
+        agg, get_default = cls.GENOTYPE_OVERRIDE_FIELDS[field]
+        sample_field = f'sample_{field}'
+        entries = r[family_entries_field or 'family_entries'].flatmap(lambda x: x)
+        return hl.if_else(
+            entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])),
+            get_default(r), agg(entries.map(lambda g: g[sample_field]))
+        )
+
+    def _format_results(self, ht, annotation_fields):
+        ht = ht.annotate(**{k: self._get_genotype_override_field(ht, k) for k in self.GENOTYPE_OVERRIDE_FIELDS})
+        return super()._format_results(ht, annotation_fields)
+
+    def get_allowed_sv_type_ids(self, sv_types):
+        return super().get_allowed_sv_type_ids([
+            type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_')
+        ])
+
+    @classmethod
+    def _gene_ids_expr(cls, ht, comp_het=False):
+        family_entries_field = 'comp_het_family_entries' if comp_het else None
+        return hl.or_else(
+            cls._get_genotype_override_field(ht, 'gene_ids', family_entries_field=family_entries_field),
+            super()._gene_ids_expr(ht),
+        )
+
+    def _additional_annotation_fields(self):
+        return {}
diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py
@@ -0,0 +1,62 @@
+import hail as hl
+
+from hail_search.constants import CONSEQUENCE_SORT, OMIM_SORT
+from hail_search.queries.base import BaseHailTableQuery
+from hail_search.queries.variants import VariantHailTableQuery
+from hail_search.queries.sv import SvHailTableQuery
+from hail_search.queries.gcnv import GcnvHailTableQuery
+
+QUERY_CLASS_MAP = {cls.DATA_TYPE: cls for cls in [VariantHailTableQuery, SvHailTableQuery, GcnvHailTableQuery]}
+
+
+class MultiDataTypeHailTableQuery(BaseHailTableQuery):
+
+    def __init__(self, sample_data, *args, **kwargs):
+        self._data_type_queries = {k: QUERY_CLASS_MAP[k](v, *args, **kwargs) for k, v in sample_data.items()}
+        super().__init__(sample_data, *args, **kwargs)
+
+    def _load_filtered_table(self, *args, **kwargs):
+        pass
+
+    def format_search_ht(self):
+        ht = None
+        for data_type, query in self._data_type_queries.items():
+            dt_ht = query.format_search_ht()
+            merged_sort_expr = self._merged_sort_expr(data_type, dt_ht)
+            if merged_sort_expr is not None:
+                dt_ht = dt_ht.annotate(_sort=merged_sort_expr)
+            dt_ht = dt_ht.select('_sort', **{data_type: dt_ht.row})
+            if ht is None:
+                ht = dt_ht
+            else:
+                ht = ht.join(dt_ht, 'outer')
+                ht = ht.transmute(_sort=hl.or_else(ht._sort, ht._sort_1))
+        return ht
+
+    def _merged_sort_expr(self, data_type, ht):
+        # Certain sorts have an extra element for variant-type data, so need to add an element for SV data
+        if not data_type.startswith('SV'):
+            return None
+
+        if self._sort == CONSEQUENCE_SORT:
+            return hl.array([hl.float64(4.5)]).extend(ht._sort.map(hl.float64))
+        elif self._sort == OMIM_SORT:
+            return hl.array([hl.int64(0)]).extend(ht._sort)
+        elif self._sort_metadata:
+            return ht._sort[:1].extend(ht._sort)
+
+        return None
+
+    def _format_collected_rows(self, collected):
+        return super()._format_collected_rows([
+            next(row.get(data_type) for data_type in self._data_type_queries if row.get(data_type))
+            for row in collected
+        ])
+
+    def format_gene_counts_ht(self):
+        hts = [query.format_gene_counts_ht() for query in self._data_type_queries.values()]
+        ht = hts[0]
+        for dt_ht in hts[1:]:
+            ht = ht.join(dt_ht, 'outer')
+            ht = ht.transmute(**{k: hl.or_else(ht[k], ht[f'{k}_1']) for k in ['gene_ids', 'families']})
+        return ht
diff --git a/hail_search/queries/sv.py b/hail_search/queries/sv.py
@@ -0,0 +1,116 @@
+import hail as hl
+
+
+from hail_search.constants import CONSEQUENCE_SORT, NEW_SV_FIELD, STRUCTURAL_ANNOTATION_FIELD
+from hail_search.queries.base import BaseHailTableQuery, PredictionPath
+
+
+class SvHailTableQuery(BaseHailTableQuery):
+
+    DATA_TYPE = 'SV_WGS'
+
+    GENOTYPE_FIELDS = {f.lower(): f for f in ['CN', 'GQ']}
+    COMPUTED_GENOTYPE_FIELDS = {
+        k: lambda entry, field, *args: entry.concordance[field] for k in ['new_call', 'prev_call', 'prev_num_alt']
+    }
+    GENOTYPE_QUERY_FIELDS = {'gq_sv': 'GQ', 'gq': None}
+
+    TRANSCRIPTS_FIELD = 'sorted_gene_consequences'
+    TRANSCRIPT_CONSEQUENCE_FIELD = 'major_consequence'
+    CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS + ['algorithms']
+    BASE_ANNOTATION_FIELDS = {
+        'bothsidesSupport': lambda r: r.bothsides_support,
+        'chrom': lambda r: r.start_locus.contig.replace('^chr', ''),
+        'pos': lambda r: r.start_locus.position,
+        'end': lambda r: r.end_locus.position,
+        'rg37LocusEnd': lambda r: hl.or_missing(
+            hl.is_defined(r.rg37_locus_end), hl.struct(contig=r.rg37_locus_end.contig, position=r.rg37_locus_end.position)
+        ),
+        **BaseHailTableQuery.BASE_ANNOTATION_FIELDS,
+    }
+    ENUM_ANNOTATION_FIELDS = {
+        TRANSCRIPTS_FIELD: BaseHailTableQuery.ENUM_ANNOTATION_FIELDS['transcripts'],
+    }
+
+    POPULATIONS = {
+        'sv_callset': {'hemi': None, 'sort': 'callset_af'},
+        'gnomad_svs': {'id': 'ID', 'ac': None, 'an': None, 'hom': None, 'hemi': None, 'het': None, 'sort': 'gnomad'},
+    }
+    POPULATION_FIELDS = {'sv_callset': 'gt_stats'}
+    PREDICTION_FIELDS_CONFIG = {
+        'strvctvre': PredictionPath('strvctvre', 'score'),
+    }
+
+    SORTS = {
+        **BaseHailTableQuery.SORTS,
+        CONSEQUENCE_SORT: lambda r: [hl.min(r.sorted_gene_consequences.map(lambda g: g.major_consequence_id))],
+        'size': lambda r: [hl.if_else(
+            r.start_locus.contig == r.end_locus.contig, r.start_locus.position - r.end_locus.position, -50,
+        )],
+    }
+
+    def _parse_intervals(self, intervals, variant_ids, variant_keys=None, **kwargs):
+        parsed_intervals, _ = super()._parse_intervals(intervals, variant_ids=None, **kwargs)
+        return parsed_intervals, variant_keys
+
+    def _filter_annotated_table(self, *args, parsed_intervals=None, exclude_intervals=False, **kwargs):
+        if parsed_intervals:
+            interval_filter = hl.array(parsed_intervals).any(lambda interval: hl.if_else(
+                self._ht.start_locus.contig == self._ht.end_locus.contig,
+                interval.overlaps(hl.interval(self._ht.start_locus, self._ht.end_locus)),
+                interval.contains(self._ht.start_locus) | interval.contains(self._ht.end_locus),
+            ))
+            if exclude_intervals:
+                interval_filter = ~interval_filter
+            self._ht = self._ht.filter(interval_filter)
+
+        return super()._filter_annotated_table(*args, **kwargs)
+
+    def _get_family_passes_quality_filter(self, quality_filter, annotations=None, **kwargs):
+        passes_quality = super()._get_family_passes_quality_filter(quality_filter)
+        if not (annotations or {}).get(NEW_SV_FIELD):
+            return passes_quality
+
+        entries_has_new_call = lambda entries: entries.any(lambda x: x.concordance.new_call)
+        if passes_quality is None:
+            return entries_has_new_call
+
+        return lambda entries: entries_has_new_call(entries) & passes_quality(entries)
+
+    def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False):
+        if is_secondary:
+            # SV search can specify secondary SV types, as well as secondary consequences
+            annotation_filters = self._get_annotation_override_filters(annotations)
+        return super()._get_allowed_consequences_annotations(annotations, annotation_filters)
+
+    def _get_annotation_override_filters(self, annotations, **kwargs):
+        annotation_filters = []
+        if annotations.get(STRUCTURAL_ANNOTATION_FIELD):
+            allowed_type_ids = self.get_allowed_sv_type_ids(annotations[STRUCTURAL_ANNOTATION_FIELD])
+            if allowed_type_ids:
+                annotation_filters.append(hl.set(allowed_type_ids).contains(self._ht.sv_type_id))
+
+        return annotation_filters
+
+    def get_allowed_sv_type_ids(self, sv_types):
+        return self._get_enum_terms_ids('sv_type', None, sv_types)
+
+    def _additional_annotation_fields(self):
+        sv_type_enum = self._enums['sv_type']
+        insertion_type_id = sv_type_enum.index('INS')
+        get_end_chrom = lambda r: hl.or_missing(r.start_locus.contig != r.end_locus.contig, r.end_locus.contig.replace('^chr', ''))
+        return {
+            'cpxIntervals': lambda r: self._format_enum(
+                r, 'cpx_intervals', {'type': sv_type_enum}, annotate_value=lambda val, *args: {
+                    'chrom': val.start.contig,
+                    'start': val.start.position,
+                    'end': val.end.position,
+                },
+            ),
+            # For insertions, end_locus represents the svSourceDetail, otherwise represents the endChrom
+            'endChrom': lambda r: hl.or_missing(r.sv_type_id != insertion_type_id, get_end_chrom(r)),
+            'svSourceDetail': lambda r: hl.or_missing(r.sv_type_id == insertion_type_id, hl.bind(
+                lambda end_chrom: hl.or_missing(hl.is_defined(end_chrom), hl.struct(chrom=end_chrom)),
+                get_end_chrom(r),
+            )),
+        }