From 9f84c5c48f392d652b1ec27f03f339f52e36e87a Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 22 Aug 2023 14:58:29 -0400
Subject: [PATCH 01/16] clean up

---
 hail_search/test_search.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 6abe0eeba8..83cc30fbcc 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -121,10 +121,6 @@ async def _assert_expected_search(self, results, **search_kwargs):
         self.assertSetEqual(set(resp_json.keys()), {'results', 'total'})
         self.assertEqual(resp_json['total'], len(results))
         for i, result in enumerate(resp_json['results']):
-            if result != results[i]:
-                self.assertSetEqual(set(result.keys()), set(results[i].keys()))
-                diff_k = {k for k, v in results[i].items() if v != result[k]}
-                import pdb; pdb.set_trace()
             self.assertEqual(result, results[i])
 
     async def test_single_family_search(self):

From 521f591363821bfddc8304aa1412bad9ec6cebc3 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 22 Aug 2023 17:03:21 -0400
Subject: [PATCH 02/16] hadle gcnv missing ref calls

---
 hail_search/hail_search_query.py | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 154f36a58b..91009584de 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -40,6 +40,7 @@ class BaseHailTableQuery(object):
         HAS_ALT: lambda gt: gt.is_non_ref(),
         HAS_REF: lambda gt: gt.is_hom_ref() | gt.is_het_ref(),
     }
+    MISSING_NUM_ALT = -1
 
     GENOTYPE_FIELDS = {}
     NESTED_GENOTYPE_FIELDS = {}
@@ -97,7 +98,7 @@ def annotation_fields(self):
                 lambda gt: hl.is_defined(gt.individualGuid)
             ).group_by(lambda x: x.individualGuid).map_values(lambda x: x[0].select(
                 'sampleId', 'individualGuid', 'familyGuid',
-                numAlt=hl.if_else(hl.is_defined(x[0].GT), x[0].GT.n_alt_alleles(), -1),
+                numAlt=hl.if_else(hl.is_defined(x[0].GT), x[0].GT.n_alt_alleles(), self.MISSING_NUM_ALT),
                 **{k: x[0][field] for k, field in self.GENOTYPE_FIELDS.items()},
                 **{_to_camel_case(k): x[0][field][k] for field, v in self.NESTED_GENOTYPE_FIELDS.items() for k in v},
             )),
@@ -365,7 +366,7 @@ def _add_entry_sample_families(cls, ht, sample_data):
 
         ht = ht.transmute(
             family_entries=family_sample_indices.map(lambda sample_indices: sample_indices.map(
-                lambda i: hl.or_else(ht.entries[i], cls._missing_entry(ht.entries[i])).annotate(
+                lambda i: ht.entries[i].annotate(
                     sampleId=sample_index_id_map.get(i),
                     individualGuid=sample_index_individual_map.get(i),
                     familyGuid=sample_index_family_map.get(i),
@@ -376,11 +377,6 @@ def _add_entry_sample_families(cls, ht, sample_data):
 
         return ht, sample_id_family_index_map, num_families
 
-    @classmethod
-    def _missing_entry(cls, entry):
-        entry_type = dict(**entry.dtype)
-        return hl.struct(**{k: hl.missing(v) for k, v in entry_type.items()})
-
     def _filter_inheritance(self, ht, inheritance_mode, inheritance_filter, sample_data, sample_id_family_index_map):
         any_valid_entry = lambda x: self.GENOTYPE_QUERY_MAP[HAS_ALT](x.GT)
 
@@ -1162,10 +1158,14 @@ class GcnvHailTableQuery(SvHailTableQuery):
 
     DATA_TYPE = 'SV_WES'
 
+    #  gCNV data has no ref/ref calls so a missing entry indicates ref/ref
     GENOTYPE_QUERY_MAP = {
         **BaseHailTableQuery.GENOTYPE_QUERY_MAP,
+        REF_REF: hl.is_missing,
+        HAS_REF: lambda gt: hl.is_missing(gt) | gt.is_het_ref(),
         COMP_HET_ALT: BaseHailTableQuery.GENOTYPE_QUERY_MAP[HAS_ALT],
     }
+    MISSING_NUM_ALT = 0
 
     GENOTYPE_FIELDS = {
         **SvHailTableQuery.GENOTYPE_FIELDS,
@@ -1173,6 +1173,7 @@ class GcnvHailTableQuery(SvHailTableQuery):
         **{_to_camel_case(f): f'sample_{f}' for f in ['start', 'end', 'num_exon', 'gene_ids']},
     }
     del GENOTYPE_FIELDS['gq']
+    GENOTYPE_QUERY_FIELDS = {}
     NESTED_GENOTYPE_FIELDS = {
         'concordance': SvHailTableQuery.NESTED_GENOTYPE_FIELDS['concordance'][:-1] + ['prev_overlap']
     }
@@ -1197,15 +1198,10 @@ def _get_genotype_override_field(r, field, agg, default=None):
         if default is None:
             default = r[field]
         return hl.if_else(
-            entries.any(lambda g: g.GT.is_non_ref() & hl.is_missing(g[sample_field])),
+            entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])),
             default, agg(entries.map(lambda g: g[sample_field]))
         )
 
-    @classmethod
-    def _missing_entry(cls, entry):
-        #  gCNV data has no ref/ref calls so a missing entry indicates ref/ref
-        return super()._missing_entry(entry).annotate(GT=hl.Call([0, 0]))
-
     def _filter_annotated_table(self, **kwargs):
         # sorted_gene_consequences may contain genes absent from the queried families, so remove those before filtering
         empty_gene_set = hl.empty_set(hl.tstr)

From 734c4c117e05d9d95a5ffaf60ff146a1a25f6e57 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 23 Aug 2023 11:43:52 -0400
Subject: [PATCH 03/16] fux gene id override behavior

---
 hail_search/hail_search_query.py             | 36 ++++++++++++--------
 seqr/utils/search/elasticsearch/constants.py |  2 +-
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 91009584de..90b92b0ac3 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -64,7 +64,7 @@ class BaseHailTableQuery(object):
             'response_key': 'transcripts',
             'empty_array': True,
             'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}),
-            'format_values': lambda values: values.group_by(lambda t: t.geneId),
+            'format_values': lambda values, *args: values.group_by(lambda t: t.geneId),
         },
     }
     LIFTOVER_ANNOTATION_FIELDS = {
@@ -145,7 +145,7 @@ def _get_enum_lookup(self, field, subfield):
 
     def _get_enum_terms_ids(self, field, subfield, terms):
         enum = self._get_enum_lookup(field, subfield)
-        return {enum[t] for t in terms if enum.get(t)}
+        return {enum[t] for t in terms if enum.get(t) is not None}
 
     def _format_enum_response(self, k, enum):
         enum_config = self.ENUM_ANNOTATION_FIELDS.get(k, {})
@@ -163,7 +163,7 @@ def _format_enum(cls, r, field, enum, empty_array=False, format_values=None, **k
                 value = hl.or_else(value, hl.empty_array(value.dtype.element_type))
             value = value.map(lambda x: cls._enum_field(x, enum, **kwargs))
             if format_values:
-                value = format_values(value)
+                value = format_values(value, r)
             return value
 
         return cls._enum_field(value, enum, **kwargs)
@@ -630,6 +630,7 @@ def _filter_by_annotations(self, pathogenicity, annotations, annotations_seconda
         annotation_filter = self._ht[HAS_ALLOWED_ANNOTATION]
         if has_secondary_annotations:
             annotation_filter |= self._ht[HAS_ALLOWED_SECONDARY_ANNOTATION]
+
         self._ht = self._ht.filter(annotation_filter)
 
     def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False):
@@ -1163,7 +1164,8 @@ class GcnvHailTableQuery(SvHailTableQuery):
         **BaseHailTableQuery.GENOTYPE_QUERY_MAP,
         REF_REF: hl.is_missing,
         HAS_REF: lambda gt: hl.is_missing(gt) | gt.is_het_ref(),
-        COMP_HET_ALT: BaseHailTableQuery.GENOTYPE_QUERY_MAP[HAS_ALT],
+        HAS_ALT: hl.is_defined,
+        COMP_HET_ALT: hl.is_defined,
     }
     MISSING_NUM_ALT = 0
 
@@ -1189,6 +1191,14 @@ class GcnvHailTableQuery(SvHailTableQuery):
     }
     del BASE_ANNOTATION_FIELDS['bothsidesSupport']
 
+    TRANSCRIPTS_ENUM_FIELD = SvHailTableQuery.ENUM_ANNOTATION_FIELDS[SvHailTableQuery.TRANSCRIPTS_FIELD]
+    ENUM_ANNOTATION_FIELDS = {SvHailTableQuery.TRANSCRIPTS_FIELD: {
+        **TRANSCRIPTS_ENUM_FIELD,
+        'format_values': lambda values, r: GcnvHailTableQuery.TRANSCRIPTS_ENUM_FIELD['format_values'](
+            GcnvHailTableQuery._get_gene_id_transcripts_override(values, r), r
+        ),
+    }}
+
     POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'}
 
     @staticmethod
@@ -1202,21 +1212,19 @@ def _get_genotype_override_field(r, field, agg, default=None):
             default, agg(entries.map(lambda g: g[sample_field]))
         )
 
-    def _filter_annotated_table(self, **kwargs):
-        # sorted_gene_consequences may contain genes absent from the queried families, so remove those before filtering
+    @classmethod
+    def _get_gene_id_transcripts_override(cls, transcripts, r):
         empty_gene_set = hl.empty_set(hl.tstr)
-        geneotype_gene_ids_expr = self._get_genotype_override_field(
-            self._ht, 'gene_ids',
+        geneotype_gene_ids_expr = cls._get_genotype_override_field(
+            r, 'gene_ids',
             lambda entry_gene_ids: entry_gene_ids.fold(lambda s1, s2: s1.union(s2), empty_gene_set),
             default=hl.missing(empty_gene_set.dtype))
-        self._ht = self._ht.annotate(sorted_gene_consequences=hl.bind(
+        return hl.bind(
             lambda gene_ids: hl.if_else(
-                hl.is_missing(gene_ids), self._ht.sorted_gene_consequences,
-                self._ht.sorted_gene_consequences.filter(lambda t: gene_ids.contains(t.gene_id)),
+                hl.is_missing(gene_ids), transcripts,
+                transcripts.filter(lambda t: gene_ids.contains(t.geneId)),
             ), geneotype_gene_ids_expr,
-        ))
-
-        return super()._filter_annotated_table(**kwargs)
+        )
 
     def _additional_annotation_fields(self):
         return {}
diff --git a/seqr/utils/search/elasticsearch/constants.py b/seqr/utils/search/elasticsearch/constants.py
index 0bd970693a..2053c16de5 100644
--- a/seqr/utils/search/elasticsearch/constants.py
+++ b/seqr/utils/search/elasticsearch/constants.py
@@ -437,7 +437,7 @@ def get_prediction_response_key(key):
 SV_SAMPLE_OVERRIDE_FIELD_CONFIGS = {
     'pos': {'select_val': min, 'genotype_field': 'start'},
     'end': {'select_val': max},
-    'numExon':{'select_val': max},
+    'numExon': {'select_val': max},
     'geneIds': {
         'select_val': lambda gene_lists: set([gene_id for gene_list in gene_lists for gene_id in (gene_list or [])]),
         'equal': lambda a, b: set(a or []) == set(b or [])

From 5a7d7b5bbd458457254644f0099252dc692ca263 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Wed, 23 Aug 2023 11:52:00 -0400
Subject: [PATCH 04/16] clean up

---
 hail_search/hail_search_query.py             | 1 -
 seqr/utils/search/elasticsearch/constants.py | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index aaed72054e..a26c51b959 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -630,7 +630,6 @@ def _filter_by_annotations(self, pathogenicity, annotations, annotations_seconda
         annotation_filter = self._ht[HAS_ALLOWED_ANNOTATION]
         if has_secondary_annotations:
             annotation_filter |= self._ht[HAS_ALLOWED_SECONDARY_ANNOTATION]
-
         self._ht = self._ht.filter(annotation_filter)
 
     def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False):
diff --git a/seqr/utils/search/elasticsearch/constants.py b/seqr/utils/search/elasticsearch/constants.py
index 2053c16de5..0bd970693a 100644
--- a/seqr/utils/search/elasticsearch/constants.py
+++ b/seqr/utils/search/elasticsearch/constants.py
@@ -437,7 +437,7 @@ def get_prediction_response_key(key):
 SV_SAMPLE_OVERRIDE_FIELD_CONFIGS = {
     'pos': {'select_val': min, 'genotype_field': 'start'},
     'end': {'select_val': max},
-    'numExon': {'select_val': max},
+    'numExon':{'select_val': max},
     'geneIds': {
         'select_val': lambda gene_lists: set([gene_id for gene_list in gene_lists for gene_id in (gene_list or [])]),
         'equal': lambda a, b: set(a or []) == set(b or [])

From 7866de2eecbbb7724be3e04312f18ffd1d2e1415 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 24 Aug 2023 14:20:12 -0400
Subject: [PATCH 05/16] remove endcrhom from gcnv results

---
 hail_search/hail_search_query.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 450e30367d..bf4004570f 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1065,7 +1065,6 @@ class SvHailTableQuery(BaseHailTableQuery):
     BASE_ANNOTATION_FIELDS = {
         'bothsidesSupport': lambda r: r.bothsides_support,
         'chrom': lambda r: r.start_locus.contig.replace('^chr', ''),
-        'endChrom': lambda r: hl.or_missing(r.start_locus.contig != r.end_locus.contig, r.end_locus.contig.replace('^chr', '')),
         'pos': lambda r: r.start_locus.position,
         'end': lambda r: r.end_locus.position,
         'rg37LocusEnd': lambda r: hl.or_missing(
@@ -1149,7 +1148,7 @@ def _get_annotation_override_filters(self, annotations, **kwargs):
     def _additional_annotation_fields(self):
         sv_type_enum = self._enums['sv_type']
         insertion_type_id = sv_type_enum.index('INS')
-        get_end_chrom = self.BASE_ANNOTATION_FIELDS['endChrom']
+        get_end_chrom = lambda r: hl.or_missing(r.start_locus.contig != r.end_locus.contig, r.end_locus.contig.replace('^chr', ''))
         return {
             'cpxIntervals': lambda r: self._format_enum(
                 r, 'cpx_intervals', {'type': sv_type_enum}, annotate_value=lambda val, *args: {

From 42a704477dff0834c2eb8ad0138f30266d5dc829 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 24 Aug 2023 14:29:27 -0400
Subject: [PATCH 06/16] handle gcnv specific sv type

---
 hail_search/hail_search_query.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index bf4004570f..b09f186de8 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1139,12 +1139,15 @@ def _get_consequence_filter(self, allowed_consequence_ids, annotation_exprs):
     def _get_annotation_override_filters(self, annotations, **kwargs):
         annotation_filters = []
         if annotations.get(STRUCTURAL_ANNOTATION_FIELD):
-            allowed_type_ids = self._get_enum_terms_ids('sv_type', None, annotations[STRUCTURAL_ANNOTATION_FIELD])
+            allowed_type_ids = self._get_allowed_sv_types(annotations[STRUCTURAL_ANNOTATION_FIELD])
             if allowed_type_ids:
                 annotation_filters.append(hl.set(allowed_type_ids).contains(self._ht.sv_type_id))
 
         return annotation_filters
 
+    def _get_allowed_sv_types(self, sv_types):
+        return self._get_enum_terms_ids('sv_type', None, sv_types)
+
     def _additional_annotation_fields(self):
         sv_type_enum = self._enums['sv_type']
         insertion_type_id = sv_type_enum.index('INS')
@@ -1237,6 +1240,11 @@ def _get_gene_id_transcripts_override(cls, transcripts, r):
             ), geneotype_gene_ids_expr,
         )
 
+    def _get_allowed_sv_types(self, sv_types):
+        return super()._get_allowed_sv_types([
+            type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_')
+        ])
+
     def _additional_annotation_fields(self):
         return {}
 

From c6c3a47d5fa3231bbe7c7c694b7d8e78ba123e2e Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Thu, 24 Aug 2023 14:38:04 -0400
Subject: [PATCH 07/16] clean up

---
 hail_search/hail_search_query.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index b09f186de8..678b9b3343 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1139,13 +1139,13 @@ def _get_consequence_filter(self, allowed_consequence_ids, annotation_exprs):
     def _get_annotation_override_filters(self, annotations, **kwargs):
         annotation_filters = []
         if annotations.get(STRUCTURAL_ANNOTATION_FIELD):
-            allowed_type_ids = self._get_allowed_sv_types(annotations[STRUCTURAL_ANNOTATION_FIELD])
+            allowed_type_ids = self.get_allowed_sv_type_ids(annotations[STRUCTURAL_ANNOTATION_FIELD])
             if allowed_type_ids:
                 annotation_filters.append(hl.set(allowed_type_ids).contains(self._ht.sv_type_id))
 
         return annotation_filters
 
-    def _get_allowed_sv_types(self, sv_types):
+    def get_allowed_sv_type_ids(self, sv_types):
         return self._get_enum_terms_ids('sv_type', None, sv_types)
 
     def _additional_annotation_fields(self):
@@ -1240,8 +1240,8 @@ def _get_gene_id_transcripts_override(cls, transcripts, r):
             ), geneotype_gene_ids_expr,
         )
 
-    def _get_allowed_sv_types(self, sv_types):
-        return super()._get_allowed_sv_types([
+    def get_allowed_sv_type_ids(self, sv_types):
+        return super().get_allowed_sv_type_ids([
             type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_')
         ])
 

From 881df7dcbd64092505afb3d89cfb5a6b4b121b37 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 25 Aug 2023 10:48:03 -0400
Subject: [PATCH 08/16] add wes fixture data

---
 .../GRCh38/SV_WES/annotations.ht/.README.txt.crc  | Bin 0 -> 12 bytes
 .../GRCh38/SV_WES/annotations.ht/._SUCCESS.crc    | Bin 0 -> 8 bytes
 .../SV_WES/annotations.ht/.metadata.json.gz.crc   | Bin 0 -> 12 bytes
 .../GRCh38/SV_WES/annotations.ht/README.txt       |   3 +++
 .../GRCh38/SV_WES/annotations.ht/_SUCCESS         |   0
 .../annotations.ht/globals/.metadata.json.gz.crc  | Bin 0 -> 12 bytes
 .../annotations.ht/globals/metadata.json.gz       | Bin 0 -> 366 bytes
 .../annotations.ht/globals/parts/.part-0.crc      | Bin 0 -> 12 bytes
 .../SV_WES/annotations.ht/globals/parts/part-0    | Bin 0 -> 330 bytes
 .../.index.crc                                    | Bin 0 -> 12 bytes
 .../.metadata.json.gz.crc                         | Bin 0 -> 12 bytes
 .../index                                         | Bin 0 -> 104 bytes
 .../metadata.json.gz                              | Bin 0 -> 161 bytes
 .../GRCh38/SV_WES/annotations.ht/metadata.json.gz | Bin 0 -> 443 bytes
 .../annotations.ht/rows/.metadata.json.gz.crc     | Bin 0 -> 16 bytes
 .../SV_WES/annotations.ht/rows/metadata.json.gz   | Bin 0 -> 733 bytes
 ...0-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc | Bin 0 -> 12 bytes
 ...0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a | Bin 0 -> 366 bytes
 .../SV_WES/families/F000002_2.ht/.README.txt.crc  | Bin 0 -> 12 bytes
 .../SV_WES/families/F000002_2.ht/._SUCCESS.crc    | Bin 0 -> 8 bytes
 .../families/F000002_2.ht/.metadata.json.gz.crc   | Bin 0 -> 12 bytes
 .../SV_WES/families/F000002_2.ht/README.txt       |   3 +++
 .../GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS  |   0
 .../F000002_2.ht/globals/.metadata.json.gz.crc    | Bin 0 -> 12 bytes
 .../F000002_2.ht/globals/metadata.json.gz         | Bin 0 -> 295 bytes
 .../F000002_2.ht/globals/parts/.part-0.crc        | Bin 0 -> 12 bytes
 .../families/F000002_2.ht/globals/parts/part-0    | Bin 0 -> 477 bytes
 .../.index.crc                                    | Bin 0 -> 12 bytes
 .../.metadata.json.gz.crc                         | Bin 0 -> 12 bytes
 .../index                                         | Bin 0 -> 104 bytes
 .../metadata.json.gz                              | Bin 0 -> 161 bytes
 .../SV_WES/families/F000002_2.ht/metadata.json.gz | Bin 0 -> 368 bytes
 .../F000002_2.ht/rows/.metadata.json.gz.crc       | Bin 0 -> 16 bytes
 .../families/F000002_2.ht/rows/metadata.json.gz   | Bin 0 -> 669 bytes
 ...8-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc | Bin 0 -> 12 bytes
 ...0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 | Bin 0 -> 200 bytes
 .../SV_WES/projects/R0001_1kg.ht/.README.txt.crc  | Bin 0 -> 12 bytes
 .../SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc    | Bin 0 -> 8 bytes
 .../projects/R0001_1kg.ht/.metadata.json.gz.crc   | Bin 0 -> 12 bytes
 .../SV_WES/projects/R0001_1kg.ht/README.txt       |   3 +++
 .../GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS  |   0
 .../R0001_1kg.ht/globals/.metadata.json.gz.crc    | Bin 0 -> 12 bytes
 .../R0001_1kg.ht/globals/metadata.json.gz         | Bin 0 -> 295 bytes
 .../R0001_1kg.ht/globals/parts/.part-0.crc        | Bin 0 -> 12 bytes
 .../projects/R0001_1kg.ht/globals/parts/part-0    | Bin 0 -> 485 bytes
 .../.index.crc                                    | Bin 0 -> 12 bytes
 .../.metadata.json.gz.crc                         | Bin 0 -> 12 bytes
 .../index                                         | Bin 0 -> 104 bytes
 .../metadata.json.gz                              | Bin 0 -> 161 bytes
 .../SV_WES/projects/R0001_1kg.ht/metadata.json.gz | Bin 0 -> 368 bytes
 .../R0001_1kg.ht/rows/.metadata.json.gz.crc       | Bin 0 -> 16 bytes
 .../projects/R0001_1kg.ht/rows/metadata.json.gz   | Bin 0 -> 667 bytes
 ...6-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc | Bin 0 -> 12 bytes
 ...0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7 | Bin 0 -> 227 bytes
 54 files changed, 9 insertions(+)
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.README.txt.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/._SUCCESS.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/README.txt
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/_SUCCESS
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/.part-0.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/part-0
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.index.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/index
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/parts/.part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/parts/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.index.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/index
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.README.txt.crc
new file mode 100644
index 0000000000000000000000000000000000000000..b1f590f9a02bb013dcccb7f6b37c306cce9139d8
GIT binary patch
literal 12
TcmYc;N@ieSU}9)c=$s7z5sd=>

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..15fb1af5aa824b7bac050f8349593a1996d87c7f
GIT binary patch
literal 12
TcmYc;N@ieSU}7j-QDOlA5?liS

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/README.txt
new file mode 100644
index 0000000000..dc2b042c6b
--- /dev/null
+++ b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/24 17:42:53
\ No newline at end of file
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/_SUCCESS
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..5b3d27b2cbfff9db3ceb6448774f417fd5329a8e
GIT binary patch
literal 12
TcmYc;N@ieSU}BJKyz?Fa5+Vbo

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..13ff2b0cc2090baa841fbd177dd73e6633c717b1
GIT binary patch
literal 366
zcmV-!0g?V6iwFP!000000F_c(OT#b}{x5m8f=<!VH#;Xj41`sDaEy{AXI-{7t+@<p
z$$$4`-MY0!!9Jzmjq{zPCX(hrLh`0KDB$yt=ePtmA^}NuP?2!4#1ktxFprb4J{jJt
z68NMgPz4@B=4u*I4Y5&{nYyHAqr(p5f;}iL3OT?*+PnwqePS>c!6&6?wLnrxs;Xt6
z3uS;ODM-KprG_sHl0na((`}(H`C*cCfh{>T2W!inBN)vx4JJGpf>IS*U?xee;4SKw
z&M#qqTtcV*uz0U>uWT~}f<mT$6#cbg$A3{T2D9juYDC>i4R$QFvg<Xd-sR7x(d}p)
zovDDZjmY?gB+3f5!<dd}<-u5dqu_4z_%I&YW}o-3lY{r4MjRSw-gi3xVT+O~^CB|P
z_@asbxxOL(WpC$HrVtfYa!=Jn8zeCnO(1!55Rzq<<8wGrxIbvIsn>YK`7~;b(%$2K
M00<x{hgbsu0JdnT#{d8T

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/.part-0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..975c25c37ee752cfc03f295b3722ece6657e85b4
GIT binary patch
literal 12
TcmYc;N@ieSU}8v2oAe0)5=;Yz

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/part-0
new file mode 100644
index 0000000000000000000000000000000000000000..fdfc9d390a2a064deaf74ccbf0813337564a6ba4
GIT binary patch
literal 330
zcmWNLO;5rw0ESCy;*c0Yy&4Z5JjupK6uos_SsF&SbY<eHDXtj=Hk8hfc=reRTl`}r
z{sDV<FHfF4dB+U^gxzoB$SE+(WI&=O*dNRi=(7OgWDc>ILrdW~*mo#PA!Qk~<}r-o
z@fl@;qmVF)C7H+U9(=bG#05oB2mFC1j*}-DBFb99ERmPMZ%@<g3hYngjPM90j*ZDQ
zK@83WcM={?V?p?#NK*oSPJB+#gs@-;Jc^w!0PM54#R%dg71BzLbzvS{*XY;XO|EKX
zbZy*sukVH&F9#!+2cdi<sq}YSl}lRHdRM89HF_?dhO+PV2i~YRl&Y$>wW@XAoq+vy
t*QoMkqiJB|!X_(K>h4147u*}(-OA^(kRPkfdQ};@RBwgUpW9L!@DHALUmXAd

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.index.crc
new file mode 100644
index 0000000000000000000000000000000000000000..399bfc17c871350c80f57e12e1ae79569e924312
GIT binary patch
literal 12
TcmYc;N@ieSU}Cs&B!w9O6ZZo?

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d82735252bf5ccf12488fef777e0997b07455a6a
GIT binary patch
literal 12
TcmYc;N@ieSU}C6Ka$F7o5eEXp

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/index b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/index
new file mode 100644
index 0000000000000000000000000000000000000000..7f91fffbf24726a4068986ff0fc32f802e19088b
GIT binary patch
literal 104
zcmZ=}U|<LY;;-xs3@n1hrD<uI74e2fCZ<LP@h+hO48jZnCI+UK#=;EhXBb5o49pBH
kL>P=$GYN<=Y_K#nHUX<z$1DppLL7)ca4<45FkmqU09FPS&;S4c

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c951bb1eef5c6a28538ddc6c8c5164018acd0fb5
GIT binary patch
literal 161
zcmV;S0ABweiwFP!0000009B1K4#F@DMepJ{10ZPC9Ds!ZBoHfdTeq=54oY1_RORj%
zHa6@3_n#LS_(4}n6Gs7G?doRZpiL-UU>Wz=X-wd^tEO@gh{vGdjkr_3<R^iLv!=Pz
z<%Nk+^v1on#HIxtph#3yQ~$%?P7ahOGdp1E>3u>C2K;VoQ;d2XhKcN5y*87Yw4Ie~
PvXt@x0+ti5w*UYD`u|7U

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..f735bf3d0f716d2bd74ce095700aeed316a41c8d
GIT binary patch
literal 443
zcmV;s0Yv^EiwFP!000000F6`MPunmM{$KJ0l0l{In&G#q6C2VrO)%}Hsw~Gj(lEyk
z_9akN{@-W2aoWL<c!}=r`@Z|-%RYOkC9ocht+mSNBA?__l5Ke{kHKtmIla7^TwfIV
zRWVt7f_YIaGLjj100T;ZWd<Z`Ic`1q81)q7g-?&W7CiDRIQvTKf=hajrW3diBAXmX
zhMFf4P+c>+wvV~!(ipy5`(UlA4YArFl&k^;On5Oob+LX@HT+SVQ{-R0bEIoajzIvp
z9z=pmX#wehL|UUCK_IKTx*TF|<H#S_(RI&F%@wk`+z_LmQ`uuUB>`13sYPe$_wYUY
zdbirnKA)2cnMICc!^<0bBQ?j_lq^^DRz>5-V0_a}vV42da>Or*!HZS{VyapGwo-n6
z0x;jqEAZVtj_V4|Mff<=o8<=V(3O7~<OcscvPIJi><x@hE3|ipu^ky+dQ3)MT5W^5
zm^<eWOX#N63KZIm?z6*9MN{!6+}hpmFRMhkZ$ZR!9d9)8osa70kLx1Yi&{Tcjs8&m
l;N8%}kl<&6RuOrfEb-U#aCb=*|BT3Re*tz-bqJCJ002QU*p2`I

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..fe86ac351370f6596adee6d8cc8da16fd214b5f8
GIT binary patch
literal 16
XcmYc;N@ieSU}A`?;(C{Ad-Xg3B3K1c

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..df721e15bb19c8c344c906237cc677858fb647ea
GIT binary patch
literal 733
zcmV<30wVn%iwFP!000000NqwyZ`v>v{V#snw4_Tz0yJ+x=~}6(s&?DMswy(^4RsqF
z<~Rt3^51uzK)wj-$773B!S%V<=iYPdWJ?5H0P)C`V6X;u_xt>Q34u)v;=jO#cn@Sn
zC8t99oRLS21~G#`V~DriC;!|mflbN<M1c<B%D3s5CUCE08mMhm$-_7oRBj9q%#Y$A
zO}siXh=t<w3bl+Au6QgkawSyH&WyS86Gc@sZFJ#HFQS+t!Wd1w0&9$J_TMh!Mfnw!
zx34AAg%4}2zFU*?-rz%|sze|AdS`MHO9jlIg8&?|NZ@Y@A^_wtl9Fy78l%in^*Ju+
zb1dt5dUaR&(ZEqj^`Vv$C0Bu3NoeFHL98RYStDxYD-^t7R19&AwQ>S|C)18t?HqCF
z1&nYpuQ}mb|NrviZt~O{eyx<QM6PLEN?a*i3EbBAK;GI;gRgD*nu|x}Db#7{vg@f+
zl`c`5EmuQt%)4vcwP9DYo~2<3vUNzWr{hz5IIflq5<KyU)gWUe^Mbq^pqUQd^M45F
z*@OP@Fzxz}b2-U#-<gQr0bR5+GyR8MaD64Unz-7>@*7G*bW7C}#!pVZ=Wrd3fQI>H
z^rjoCZtIZCL|bxQxVY~acn2o<qvHhPiuc;>mKkvvCTPTcIVwbqUM@~pEjP;^yADak
zr^cq?H_hVMi7IzL*t@msXR(U{DQI+qDt}D=d_M`ITkzape#j&6TdE$~rii@FA_Z0g
z9&y%P`JJvQ+h{oDkbawdoBDBIbx}S-_)}U9r}N6wEN`|NLld15U1W*3{{_mC=$9=0
z!?aA-wM_jPge+i=%go^nsCjDjXJ+WQt_ijUmeU`e^(~sejXmE{F^6%S3a0C9N&ZmL
zjnrP-Jd$7!eC?mwj%!=~`LA2e0H1NkfM;&cDREo`QOeMCd1Y(?%Ggww(2-1&p-M?)
Pfnxs$zqifLR|)_C=rwH!

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/parts/.part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/parts/.part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc
new file mode 100644
index 0000000000000000000000000000000000000000..f57036f8ca98914611ae7808b7c1bf6ce8a2a59c
GIT binary patch
literal 12
TcmYc;N@ieSU}9iBt^N=I5hMc>

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/parts/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a b/hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/parts/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a
new file mode 100644
index 0000000000000000000000000000000000000000..26a44a296a4d81299ce4996a5101e477514833c5
GIT binary patch
literal 366
zcmWNJ%`0?K0LGtthrx|I=Gsg)7W@JC+;i?Z_xPHUk0z2xv2hkkyi1dkjU<uOlq{4<
zF&i5iH5(rbY}8=ELZql+qbO6an%CxaR==mG=lT6H0NVtLM2Bc{W@2J$RuPZ+o~t^C
z2g=7MrwQL&OWU#iLZUToF6WGTlkoS)EnDOEEp1mI($zoI;~JPGlyU|KBSLb*9n2D!
zGU<nO!vz(@!JHSkB9MC197x)PK|)!8U6hbr*s^h)sE&uT=QmF+*BEB5FX=T(5%Di`
z;>6CZ%jU+FoDnjK_{is@8uu^xsG%?@eAlB2KZZ!9P8{@!M7%up`fwUo;M@%qie+n=
zQzw?4&$J}9qEdhFJ{88^pRb}Jg(?TV!t(<M9a4CqQQUqXYESCIkB7gP9v4wpyNGV-
zQ?_rEB5d`gV8t5t(rvFbT2^KfwqE>eY2*(6TLlaePx@Tp%VZln4;E@iWP5nyD7Ve<
E3);YQKL7v#

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc
new file mode 100644
index 0000000000000000000000000000000000000000..17e76b8f5b74d8de753995b0ff9788fbcbea6317
GIT binary patch
literal 12
TcmYc;N@ieSU}A_74iX0d599(9

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d51b8848d746ac5febcdf9b99b63652a9a7c3f2e
GIT binary patch
literal 12
TcmYc;N@ieSU}Erj_ml?!6Ho(Y

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt
new file mode 100644
index 0000000000..440bda49da
--- /dev/null
+++ b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/25 10:47:38
\ No newline at end of file
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/_SUCCESS
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d2dc39b830e972bd0f8cbab05cf11594ac345777
GIT binary patch
literal 12
TcmYc;N@ieSU}CUeE&Kog5rYF5

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..be8cfbcd3f386024b3c5ddbf688fc61596f64fa7
GIT binary patch
literal 295
zcmV+?0oeW@iwFP!000000F{wXPlG@Z#lOp5i-}gF4L7M64^2#p@gQl)c6Y$Fu#o*@
zf`sqx47IIbFLGMm@4YuH(=p8}U?F=p5)8=o>qF9jn6rRoJ7`&$Ztx^(34(MMc89=A
z+kjji0lI)BB(pf;77}N?aGh@?cVNR=d~XbIQ-n%$({2LGtEPlZ$#XF0l`7k;@Mm#P
z<34}JO$H_4Qk0Y~MkDqO2QaE1p`m)~^Bm0N!mBvCnlGXg3Yd6TrTk@y%3AEOWn|it
zn5Zu_T+i=r7k^``x0|Qsko{*6zZrNnP4@KQEgEjzqbkAT78$)<Tr%3`$C~Q`qS|Xo
tsAJY4#99o2mivR!qVy8CI3WDRaMWV*kkV5dcg|SK^xwJVNIOgd005WjjnV)B

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..8f83ae4eea07029458d1b8b41edf95e49f6fb0e8
GIT binary patch
literal 12
TcmYc;N@ieSU}AW1?m{{M6>$V0

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0
new file mode 100644
index 0000000000000000000000000000000000000000..d08c20b4765418d26668dcc7e1325bab2c8a85e9
GIT binary patch
literal 477
zcmXAkO-|cT6ot<tAX}LgKawJ$4q^#JB;xnuKM?AGq%{a3MFxqX<zN>h5))$cvjT>c
zQ3tG}i|7_Q>k=x_1@u}Bo<2R@bI-ZwHv_)6fWITgt}m-S$Eot$#ZH#iX_eQNb1#$=
z#lu2}PCSZ|^Dn2#!Qv*(lB2TD7iBsng>;4|QKE#>B9v~DmgP;I)_FEK#@mNMmCokV
z{Ag4q6S6GRSw5KLS<5(#gb1`WVJQ=DhTgQk3gv0xB@b8A>tym!2q8JT0f(iDA<-r7
za;$nBJxRmO+2t5pQqhRsD#_N$M?}8%^cWT?JG5a-!D0Lam;31Y8G3q+WBC`Vt88{b
z86B*L9k3TfLFmgfEK^A1*o68-3N{3@CD{>sirhX18_c33Oae)|?gnc;Vk>Cwa>>2@
z2k2m^rR)A+aeeo#uFh~yUr)TCSv3B^R%F;!1+|H>?YRZ4pbz7TaUH|>#BAk%Mtja$
neVB7*skniyskOnEY~6GsD!S5qe($s%v`N>;$}1ZeLMi+Qjf`G+

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc
new file mode 100644
index 0000000000000000000000000000000000000000..e911752221cc1e519f5019165a26c23a55dcce78
GIT binary patch
literal 12
TcmYc;N@ieSU}AX4DPso!5tRae

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d82735252bf5ccf12488fef777e0997b07455a6a
GIT binary patch
literal 12
TcmYc;N@ieSU}C6Ka$F7o5eEXp

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index
new file mode 100644
index 0000000000000000000000000000000000000000..6ba08ef5883c0e5facfd4027a17a9e3be7e129ca
GIT binary patch
literal 104
zcmZ=}U|<LY;;-xs3@n1hrD<uI74e2fCZ<LP@h+hO48jZnCI+UK#=;Ehj~PW649pBH
kL>P=0FbRk-Y_K#nHUX=;&m;>pLL7)ca4<45FkmqU0AK7D3;+NC

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c951bb1eef5c6a28538ddc6c8c5164018acd0fb5
GIT binary patch
literal 161
zcmV;S0ABweiwFP!0000009B1K4#F@DMepJ{10ZPC9Ds!ZBoHfdTeq=54oY1_RORj%
zHa6@3_n#LS_(4}n6Gs7G?doRZpiL-UU>Wz=X-wd^tEO@gh{vGdjkr_3<R^iLv!=Pz
z<%Nk+^v1on#HIxtph#3yQ~$%?P7ahOGdp1E>3u>C2K;VoQ;d2XhKcN5y*87Yw4Ie~
PvXt@x0+ti5w*UYD`u|7U

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6a81de8537f587e4d4f1f9124f192692de52dcca
GIT binary patch
literal 368
zcmV-$0gwJ4iwFP!000000F6>jPs1<_{VzR-sT*Sxxj~T-7X)<TGEJ46x?M$@L}|KF
zRsHXHU9&OZz|DCdj$fQE$V-XI4sBg(74XF}T+Udqg;JiuAe_u5)A05xT~5<*af9<T
zT?8z!n4?7{uud$>#DLq9_>7XoK}zjiYmk`L;m{RQryzOkY$IIPm<$}&JVv)Bs8q#<
z)leC7m08X2tcCVO<WQBeO^W6h)w2(5`FaPo1m(!OVOH<`C1;o)^^8(Gig$4>Dk|_>
z<AYUbCA7_e^7w8F_lW>0`MuT>K`~=-_Xncs0VNm@_AJBPLQ!CL(v3+`W&BCG>zM}%
zRLA^(Jd0|o1V6R9bkq*48=>fVEFM=pqOsU(d<6PU-_ufru8dX`w+?t09QQGl3aZ1N
z-rgS~FNqC`Vb!PodVNo*-v8e>h{<GV(9+SzN`!948O8=Lo{9$MXEU!~&nXA<rr}o&
OivA1RS~l6v0ssKTSFoi3

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..c05fcd0b4471e31454acfe7ef7c8095b8f8d85af
GIT binary patch
literal 16
XcmYc;N@ieSU}8A5`s}(pte!^zDl!HU

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..72c0c63946ce32ea97cd64d746641c282e7b5cda
GIT binary patch
literal 669
zcmV;O0%H9iiwFP!000000NqwibJ{Qv{V%%KPKhyJz7;}SW-^&Hq&<{jJVII%HAZG+
zK_QI)dsha3V4F$OV>mGO)9&i+(_7mIBIyc<M<$ZP4*0{bo9P;uLoDJy!=89EvZYE;
zseQr898nf=U?zrmhb;MVzXm?a7cdG8ge#xMLmI<Ws}0kKqLPQWkW}rhK!h4p?2W%D
zMfbA|pIacc5|A&H>##_&NO39|cx7C|U!T!1G}WUh1S+kyg6$DIhoUX0kgki!#NrSr
zx?BP;LSwpGhv0)mY$A2r$&e*Tz&09ap917&<-;zLmAqI}r88`FmtB2(%&vY+Pglu2
zF*<HfI@T3LM95{X-xT!h2o=&DxiyGZ)vK;nqgV}6H7M0ARGD8@qXvm`g>r#LNXy7m
zR%fvZ0#Fq*-j0XYPJdV|S%f`_ke5P+VZ@%1%309DfK%~;0d1$(?bp+8f4kpDY3@4{
zv2mcQYGz`7*o82oS7~CBVd*3)2=A$WLj3CLa~f_?6h26Y?oE?e9+W_+*qCUC{zpB#
zu7$H6<4x}7A{JM?*QPkJq9BMt=jEuBk)~SU;hyCt&9*N1l-QiP*RZ%W1NrW?y(_zZ
z6uaQSOB&wcDgWQ`r~4_8%?F<P%g5&m_?1(w+vbS8%_7HG1?DN#T=|`@$=f({N}Ya{
ze3klTUo}y_K=?yi4b^$&{gXCZ$Z#H=5L0B0&-1T%9I^SBFgJF~?soh5-?C|+^*U~+
zXFCfBY|dNUUJMokJLq+3&~gThx&8F3<Ie+?OBhBQ$xWRDNnaDDk<#ahM`G+j?EGuT
z?Kmy}=I6a(fG?#>frC!l&Ete*;fAB>(#qHZ<k;AhFi2v^P^}fRz{CCrg9YMsKM4Q;
DtGY{K

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc
new file mode 100644
index 0000000000000000000000000000000000000000..07529cf1e07072facf005ca191c7a97ea5167ea5
GIT binary patch
literal 12
TcmYc;N@ieSU}ES9yc7-q5{?5s

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48 b/hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48
new file mode 100644
index 0000000000000000000000000000000000000000..0b1e95c86a8d93a70eb152f2fef1bb1bbe013c62
GIT binary patch
literal 200
zcmX@Yz`)SQ$iVP{gONe7xHK&-vm)Nm$i&phAl@Z3fPtBTk>LSQdfUs{e9Zi=e!=br
zj11|92F50qrUDFMX6DA`h5`)ljVuf-ER8u@wg6S}$ht5X$m;<Kan;8R!6pW#md1}6
z1SO&vE-_A7xLc^df42~e2(LsQ1B(ztBr^~De@0sdV>1H_TLvMH84Ly2=j{<<|GH6!
z!C2-$qbP&wiz%WE&a#XQ0`?5vmZrugaf~0Bm+ZXF#lZ0I_)RWm9<~3B{}~wo5OXuA

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc
new file mode 100644
index 0000000000000000000000000000000000000000..17c414e9d5b7ded20df83dfa3a45f6c4cce0c1f5
GIT binary patch
literal 12
TcmYc;N@ieSU}E6d*>(#65wQbY

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc
new file mode 100644
index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c
GIT binary patch
literal 8
PcmYc;N@ieSU}69O2$TUk

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d51b8848d746ac5febcdf9b99b63652a9a7c3f2e
GIT binary patch
literal 12
TcmYc;N@ieSU}Erj_ml?!6Ho(Y

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
new file mode 100644
index 0000000000..e9bf5b62a9
--- /dev/null
+++ b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
@@ -0,0 +1,3 @@
+This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
+  Written with version 0.2.109-b71b065e4bb6
+  Created at 2023/08/25 10:45:26
\ No newline at end of file
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/_SUCCESS
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d2dc39b830e972bd0f8cbab05cf11594ac345777
GIT binary patch
literal 12
TcmYc;N@ieSU}CUeE&Kog5rYF5

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..be8cfbcd3f386024b3c5ddbf688fc61596f64fa7
GIT binary patch
literal 295
zcmV+?0oeW@iwFP!000000F{wXPlG@Z#lOp5i-}gF4L7M64^2#p@gQl)c6Y$Fu#o*@
zf`sqx47IIbFLGMm@4YuH(=p8}U?F=p5)8=o>qF9jn6rRoJ7`&$Ztx^(34(MMc89=A
z+kjji0lI)BB(pf;77}N?aGh@?cVNR=d~XbIQ-n%$({2LGtEPlZ$#XF0l`7k;@Mm#P
z<34}JO$H_4Qk0Y~MkDqO2QaE1p`m)~^Bm0N!mBvCnlGXg3Yd6TrTk@y%3AEOWn|it
zn5Zu_T+i=r7k^``x0|Qsko{*6zZrNnP4@KQEgEjzqbkAT78$)<Tr%3`$C~Q`qS|Xo
tsAJY4#99o2mivR!qVy8CI3WDRaMWV*kkV5dcg|SK^xwJVNIOgd005WjjnV)B

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc
new file mode 100644
index 0000000000000000000000000000000000000000..069f64842204fe146ceb73ccf1ebe3763274ea5d
GIT binary patch
literal 12
TcmYc;N@ieSU}8w$W&0ce5~~A%

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0
new file mode 100644
index 0000000000000000000000000000000000000000..b6f0c3995f1e67e00de6c02d2d80e8286d1a2650
GIT binary patch
literal 485
zcmXAk-Ade06ouF6Oi7p<(Hv(Kq!$xVI?&1a$)6&HI@6|7T832cGGt6N;!K=QQuPVE
z6t67wIr<PjM*9YWeSq%Ji^IoZuf5h@e;Dwi1w4%zyEv=%9H+{^E#G8momP2WIafkC
zQ9LSi=)~hFIez~&Iapq%S#ns``LaxBq>#?&bCf8dv<Rh}q-A+or*)nUKjC?GSf%sD
zEI%Aq$&@V1be<2VdDb>QMnVKyny{3K=M`;yd8b4eh;8eGP@WcEa(zC#NT%0?5R#)8
za9Elc5?$gh$C}5{mo(g-pG~kM6^-bvk!-!ZLF8*sPhgR<OB=Qn9L6tjxsRTop|96D
z+K;Htv-t^Sbg>zB!R`?Sp)ZfnrjW+53H6B-YzSscvLp5txoren%%USq0!e!A7Hi#L
zCur_+$-VO%=whU$>prl!{(f3lM>wXhC+^WK8h>FcGVG~>+QfM6xdp6X0OOW%9m9CV
uY~{a2XTe$nm<wj9xPh&ywZUK5y6HqzbftOsztd*WA>9Bkf3is-l)^tN(qJe6

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.index.crc
new file mode 100644
index 0000000000000000000000000000000000000000..a2063fcbcdcd520eae71faab30752ce90e7472dd
GIT binary patch
literal 12
TcmYc;N@ieSU}8Axuuu>H5~l+4

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..d82735252bf5ccf12488fef777e0997b07455a6a
GIT binary patch
literal 12
TcmYc;N@ieSU}C6Ka$F7o5eEXp

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/index b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/index
new file mode 100644
index 0000000000000000000000000000000000000000..d9f7a479646071fbb913a7ccf66d675f9cd4d029
GIT binary patch
literal 104
zcmZ=}U|<LY;;-xs3@n1hrD<uI74e2fCZ<LP@h+hO48jZnCI+UK#=;EhUl~Oh49pBH
kL>P>>FbRk-Y_K#nHUX>p$|MUkLL7)ca4<45FkmqU0Bax?NB{r;

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..c951bb1eef5c6a28538ddc6c8c5164018acd0fb5
GIT binary patch
literal 161
zcmV;S0ABweiwFP!0000009B1K4#F@DMepJ{10ZPC9Ds!ZBoHfdTeq=54oY1_RORj%
zHa6@3_n#LS_(4}n6Gs7G?doRZpiL-UU>Wz=X-wd^tEO@gh{vGdjkr_3<R^iLv!=Pz
z<%Nk+^v1on#HIxtph#3yQ~$%?P7ahOGdp1E>3u>C2K;VoQ;d2XhKcN5y*87Yw4Ie~
PvXt@x0+ti5w*UYD`u|7U

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..6a81de8537f587e4d4f1f9124f192692de52dcca
GIT binary patch
literal 368
zcmV-$0gwJ4iwFP!000000F6>jPs1<_{VzR-sT*Sxxj~T-7X)<TGEJ46x?M$@L}|KF
zRsHXHU9&OZz|DCdj$fQE$V-XI4sBg(74XF}T+Udqg;JiuAe_u5)A05xT~5<*af9<T
zT?8z!n4?7{uud$>#DLq9_>7XoK}zjiYmk`L;m{RQryzOkY$IIPm<$}&JVv)Bs8q#<
z)leC7m08X2tcCVO<WQBeO^W6h)w2(5`FaPo1m(!OVOH<`C1;o)^^8(Gig$4>Dk|_>
z<AYUbCA7_e^7w8F_lW>0`MuT>K`~=-_Xncs0VNm@_AJBPLQ!CL(v3+`W&BCG>zM}%
zRLA^(Jd0|o1V6R9bkq*48=>fVEFM=pqOsU(d<6PU-_ufru8dX`w+?t09QQGl3aZ1N
z-rgS~FNqC`Vb!PodVNo*-v8e>h{<GV(9+SzN`!948O8=Lo{9$MXEU!~&nXA<rr}o&
OivA1RS~l6v0ssKTSFoi3

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc
new file mode 100644
index 0000000000000000000000000000000000000000..9cba05dcb8675e53e6e06d64fcb287acdd42349a
GIT binary patch
literal 16
XcmYc;N@ieSU}9iyt8)ID+1myHAqWKW

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz
new file mode 100644
index 0000000000000000000000000000000000000000..da058c7333047f2ddd7f9435bba544cc03a546e4
GIT binary patch
literal 667
zcmV;M0%ZLkiwFP!000000NqyGZrVT){g-{Mod(-n>bF8@ixfp^Nc&KPtYtkW+nBYr
zUMLj#-#csY-6%;P0}?W4XJ*fwJ!AVw1l<5}$xQgL2mj&c%{&HX5rcToa3Jo2?5N~a
zD3AMOi71PFU^<4l$2|G*5Q9G{7BC7lgexDWW17HR$t_dIvXYB97gQdMfQJfX;!ZzH
zNe_!0Um75k<WMYB>##_$NcvPTaI3h0KOUoDsH;a&2viucgxv|NL(vXoNaHdxHGBvp
zU9Z6}Lle4*L-0T#HWSLWa%2qxu#FDd=K!(Yc(9K|BQH@@svMi#<yT)H^Q-Uk^HsV`
zwT`>9j%@`I;bL9rHwU#iL8WvjZXKdE^=hltDOQJ69ZGczRpr;zs6(Pcp+cYu($ex&
z)md$W0A$0Ax6^UY8jZ^(gRmza`jwDz7_n!hQVUvXaL)f?fNc#2qjuWuFX!7N%Y9=a
zHV$;t%uMwMyWo2C8cob{ESp3L;XPGPh+kcO%)&K_!Ux&Vy=oGxgA#C=XcO(x|ETA{
zF>uxs{8PBOjKvlAr72F0C<qczML8-&q^RO}xV7B0+twwY3Y(gHHH%9#Q0(5?yRqwg
zu?r5opy3^!^8X!wwx0yiec+kDJiM=f*ErR-ZBEGREOLsKpzlK6mEY)^qK%qU+Vq>`
zo76A+s*Cak!tc^*sLl(|pRCzZhI({LbdebE=U?zR68$ov|Crb2z!~5#e*GQ**nQI)
z!nL__U}&=5YGpDu7zNC-sT~Znw~pTrRIFhfZH2Gv97*<=(2Z1HPh65<4}9<SET?Z>
zdpAGsH3NJoT?+ivx1A!63l?sDG+kC1TYv(a>Jl1BH5sauL>741{{ZA&-4{Oz001x#
BKyv^9

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc
new file mode 100644
index 0000000000000000000000000000000000000000..c0e184f89f44249ff5950572cdd5f48baaf0a082
GIT binary patch
literal 12
TcmYc;N@ieSU}A_~y8S2s6AlBn

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7 b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7
new file mode 100644
index 0000000000000000000000000000000000000000..0dca74ce0f4f9ec367c684dd58d234018ba50a4b
GIT binary patch
literal 227
zcmcc5z`(GRk%8d@2P1=EacNpwW<|WAk%_61LA*<700RpHBf|rr^tP9?`Iz}#{es;M
z7#Y$H4UA1JO$8Xj%*>6=4Fwq98(A1wSQ>M*Yyqm`k#%7(kk<neYN`@03`{}{;u>EW
zf=vueEsei22uf%$Tw<KEaJNu@|85}`5nhP~1{NWPN@gDR|BU|`?HP>C3@q#!gg7!7
z3a-!FBgFo7qaK5?%zs8v2Gti+L>UBR9T|iL92vYVO^r=T7%wc@d7F!Y;otF_T+BRb
Z`3%ew3?CTQKDrAO`@ZA`P;3b!0|1FnI^6&O

literal 0
HcmV?d00001


From 919744d858c86830c929aa4cce1c01d32a5a2072 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 25 Aug 2023 11:19:57 -0400
Subject: [PATCH 09/16] initial response

---
 hail_search/hail_search_query.py |    2 +
 hail_search/test_search.py       | 1013 +++++++++++++++---------------
 hail_search/test_utils.py        |  165 +++++
 3 files changed, 686 insertions(+), 494 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 678b9b3343..775248242d 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1245,6 +1245,8 @@ def get_allowed_sv_type_ids(self, sv_types):
             type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_')
         ])
 
+    # TODO override genotype fields in genotypes response, actually return geneIds
+
     def _additional_annotation_fields(self):
         return {}
 
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 71ebe42b92..7b772f0afc 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -4,7 +4,7 @@
 from hail_search.test_utils import get_hail_search_body, FAMILY_2_VARIANT_SAMPLE_DATA, FAMILY_2_MISSING_SAMPLE_DATA, \
     VARIANT1, VARIANT2, VARIANT3, VARIANT4, MULTI_PROJECT_SAMPLE_DATA, MULTI_PROJECT_MISSING_SAMPLE_DATA, \
     LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS, SV_WGS_SAMPLE_DATA, \
-    SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4
+    SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4, GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4
 from hail_search.web_app import init_web_app
 
 PROJECT_2_VARIANT = {
@@ -113,11 +113,11 @@ class HailSearchTestCase(AioHTTPTestCase):
     async def get_application(self):
         return init_web_app()
 
-    async def test_status(self):
-        async with self.client.request('GET', '/status') as resp:
-            self.assertEqual(resp.status, 200)
-            resp_json = await resp.json()
-        self.assertDictEqual(resp_json, {'success': True})
+    # async def test_status(self):
+    #     async with self.client.request('GET', '/status') as resp:
+    #         self.assertEqual(resp.status, 200)
+    #         resp_json = await resp.json()
+    #     self.assertDictEqual(resp_json, {'success': True})
 
     async def _assert_expected_search(self, results, gene_counts=None, **search_kwargs):
         search_body = get_hail_search_body(**search_kwargs)
@@ -127,12 +127,16 @@ async def _assert_expected_search(self, results, gene_counts=None, **search_kwar
         self.assertSetEqual(set(resp_json.keys()), {'results', 'total'})
         self.assertEqual(resp_json['total'], len(results))
         for i, result in enumerate(resp_json['results']):
+            if result != results[i]:
+                import pdb; pdb.set_trace()
             self.assertEqual(result, results[i])
 
         if gene_counts:
             async with self.client.request('POST', '/gene_counts', json=search_body) as resp:
                 self.assertEqual(resp.status, 200)
                 gene_counts_json = await resp.json()
+            if gene_counts_json != gene_counts:
+                import pdb; pdb.set_trace()
             self.assertDictEqual(gene_counts_json, gene_counts)
 
     async def test_single_family_search(self):
@@ -144,497 +148,518 @@ async def test_single_family_search(self):
         )
 
         await self._assert_expected_search(
-            [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-    async def test_single_project_search(self):
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts={
-                'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
-                'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+            [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_sample_type='VARIANTS', gene_counts={
+                'ENSG00000129562': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000013364': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000079616': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000103495': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000167371': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000280789': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000280893': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000281348': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}},
             }
         )
 
-    async def test_multi_project_search(self):
-        await self._assert_expected_search(
-            [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4],
-            gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_DATA,
-        )
-
-    async def test_inheritance_filter(self):
-        inheritance_mode = 'any_affected'
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT2], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        inheritance_mode = 'de_novo'
-        await self._assert_expected_search(
-            [VARIANT1, FAMILY_3_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT1], inheritance_mode=inheritance_mode,  sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        inheritance_mode = 'x_linked_recessive'
-        await self._assert_expected_search([], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES')
-        await self._assert_expected_search([], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA)
-
-        inheritance_mode = 'homozygous_recessive'
-        await self._assert_expected_search(
-            [VARIANT2], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [PROJECT_2_VARIANT1, VARIANT2], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA,
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        gt_inheritance_filter = {'genotype': {'I000006_hg00733': 'has_alt', 'I000005_hg00732': 'ref_ref'}}
-        await self._assert_expected_search(
-            [VARIANT2, VARIANT3], inheritance_filter=gt_inheritance_filter, sample_data=FAMILY_2_VARIANT_SAMPLE_DATA)
-
-        inheritance_mode = 'compound_het'
-        await self._assert_expected_search(
-            [[VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA, gene_counts={
-                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
-                'ENSG00000177000': {'total': 1, 'families': {'F000002_2': 1}},
-            }, **COMP_HET_ALL_PASS_FILTERS,
-        )
-
-        await self._assert_expected_search(
-            [[SV_VARIANT1, SV_VARIANT2]], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-            **COMP_HET_ALL_PASS_FILTERS,
-        )
-
-        inheritance_mode = 'recessive'
-        await self._assert_expected_search(
-            [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, gene_counts={
-                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
-                'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}},
-            }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
-        )
-
-        await self._assert_expected_search(
-            [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-            **COMP_HET_ALL_PASS_FILTERS,
-        )
-
-    async def test_quality_filter(self):
-        quality_filter = {'vcf_filter': 'pass'}
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search([SV_VARIANT4], quality_filter=quality_filter, sample_data=SV_WGS_SAMPLE_DATA)
-
-        await self._assert_expected_search(
-            [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40}, omit_sample_type='SV_WES',
-        )
-
-        sv_quality_filter = {'min_gq_sv': 40}
-        await self._assert_expected_search(
-            [SV_VARIANT3, SV_VARIANT4], quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        await self._assert_expected_search(
-            [], annotations=NEW_SV_FILTER, quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40, 'vcf_filter': 'pass'}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 60, 'affected_only': True},
-            omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT3, SV_VARIANT4], quality_filter={'min_gq_sv': 60, 'affected_only': True}, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_ab': 50}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, VARIANT3], quality_filter={'min_ab': 70, 'affected_only': True},
-            omit_sample_type='SV_WES',
-        )
-
-        quality_filter = {'min_gq': 40, 'min_ab': 50}
-        await self._assert_expected_search(
-            [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-        )
-
-        annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-            annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-            annotations=annotations, pathogenicity={'clinvar': ['pathogenic']},
-        )
-
-    async def test_location_search(self):
-        await self._assert_expected_search(
-            [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', **LOCATION_SEARCH,
-        )
-
-        sv_intervals = ['1:9310023-9380264']
-        await self._assert_expected_search(
-            [SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
-        )
-
-        await self._assert_expected_search(
-            [VARIANT1], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, exclude_intervals=True,
-        )
-
-        await self._assert_expected_search(
-            [SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],  omit_sample_type='SV_WES',
-            intervals=LOCATION_SEARCH['intervals'][-1:], gene_ids=LOCATION_SEARCH['gene_ids'][:1]
-        )
-
-    async def test_variant_id_search(self):
-        await self._assert_expected_search([VARIANT2], omit_sample_type='SV_WES', **RSID_SEARCH)
-
-        await self._assert_expected_search([VARIANT1], omit_sample_type='SV_WES', **VARIANT_ID_SEARCH)
-
-        await self._assert_expected_search(
-            [VARIANT1], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][:1],
-        )
-
-        await self._assert_expected_search(
-            [], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:],
-        )
-
-        await self._assert_expected_search([SV_VARIANT2, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, variant_keys=[
-            'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'phase2_DEL_chr14_4640',
-        ])
-
-    async def test_frequency_filter(self):
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT4], frequencies={'seqr': {'af': 0.2}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'ac': 4}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'hh': 1}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT1], frequencies={'sv_callset': {'af': 0.05}}, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05, 'hh': 1}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.005}}, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT1, SV_VARIANT3, SV_VARIANT4], frequencies={'gnomad_svs': {'af': 0.001}}, sample_data=SV_WGS_SAMPLE_DATA,
-        )
-
         await self._assert_expected_search(
-            [VARIANT4], frequencies={'seqr': {'af': 0.2}, 'gnomad_genomes': {'ac': 50}},
-            omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {}, 'gnomad_genomes': {'af': None}},
-            omit_sample_type='SV_WES',
-        )
-
-        annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
-            annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'likely_pathogenic', 'vus_or_conflicting']},
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
-            annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']},
-        )
-
-    async def test_annotations_filter(self):
-        await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES')
-
-        pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting', 'benign']}
-        await self._assert_expected_search([VARIANT1, VARIANT2], pathogenicity=pathogenicity, omit_sample_type='SV_WES')
-
-        pathogenicity['clinvar'] = pathogenicity['clinvar'][:1]
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT4], pathogenicity=pathogenicity, annotations={'SCREEN': ['CTCF-only', 'DNase-only']},
-            omit_sample_type='SV_WES',
-        )
-
-        annotations = {
-            'missense': ['missense_variant'], 'in_frame': ['inframe_insertion', 'inframe_deletion'], 'frameshift': None,
-            'structural_consequence': ['INTRONIC'],
-        }
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, VARIANT4], pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search([VARIANT2, VARIANT4], annotations=annotations, omit_sample_type='SV_WES')
-
-        await self._assert_expected_search([SV_VARIANT1], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
-
-        annotations['splice_ai'] = '0.005'
-        await self._assert_expected_search(
-            [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], annotations=annotations, omit_sample_type='SV_WES',
-        )
-
-        annotations['structural'] = ['DEL']
-        await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
-
-        annotations = {'other': ['non_coding_transcript_exon_variant']}
-        await self._assert_expected_search(
-            [VARIANT1, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT],
-            pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],
-            gene_ids=LOCATION_SEARCH['gene_ids'][:1], annotations=annotations, omit_sample_type='SV_WES',
-        )
-
-    async def test_secondary_annotations_filter(self):
-        annotations_1 = {'missense': ['missense_variant']}
-        annotations_2 = {'other': ['intron_variant']}
-
-        await self._assert_expected_search(
-            [[VARIANT3, VARIANT4]], inheritance_mode='compound_het', omit_sample_type='SV_WES',
-            annotations=annotations_1, annotations_secondary=annotations_2,
-        )
-
-        await self._assert_expected_search(
-            [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-            annotations=annotations_1, annotations_secondary=annotations_2,
-        )
-
-        await self._assert_expected_search(
-            [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-            annotations=annotations_2, annotations_secondary=annotations_1,
-        )
-
-        sv_annotations_1 = {'structural': ['INS']}
-        sv_annotations_2 = {'structural': ['DEL'], 'structural_consequence': ['INTRONIC']}
-
-        await self._assert_expected_search(
-            [[SV_VARIANT1, SV_VARIANT2]], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='compound_het',
-            annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
-        )
-
-        await self._assert_expected_search(
-            [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='recessive',
-            annotations=sv_annotations_2, annotations_secondary=sv_annotations_1,
-        )
-
-        pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}
-        await self._assert_expected_search(
-            [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-            annotations=annotations_2, annotations_secondary=annotations_1, pathogenicity=pathogenicity,
-        )
-
-        screen_annotations = {'SCREEN': ['CTCF-only']}
-        await self._assert_expected_search(
-            [], inheritance_mode='recessive', omit_sample_type='SV_WES',
-            annotations=screen_annotations, annotations_secondary=annotations_1,
-        )
-
-        await self._assert_expected_search(
-            [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-            annotations=screen_annotations, annotations_secondary=annotations_2,
-        )
-
-        selected_transcript_annotations = {'other': ['non_coding_transcript_exon_variant']}
-        await self._assert_expected_search(
-            [VARIANT2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]], inheritance_mode='recessive',
-            annotations=screen_annotations, annotations_secondary=selected_transcript_annotations,
-            pathogenicity=pathogenicity, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]],
-            annotations={**selected_transcript_annotations, **screen_annotations}, annotations_secondary=annotations_2,
-            inheritance_mode='recessive', omit_sample_type='SV_WES',
-        )
-
-    async def test_in_silico_filter(self):
-        in_silico = {'eigen': '5.5', 'mut_taster': 'P'}
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
-        )
-
-        in_silico['requireScore'] = True
-        await self._assert_expected_search(
-            [VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, in_silico={'strvctvre': 0.1, 'requireScore': True},
-        )
-
-    async def test_search_errors(self):
-        search_body = get_hail_search_body(sample_data=FAMILY_2_MISSING_SAMPLE_DATA)
-        async with self.client.request('POST', '/search', json=search_body) as resp:
-            self.assertEqual(resp.status, 400)
-            reason = resp.reason
-        self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
-
-        search_body = get_hail_search_body(sample_data=MULTI_PROJECT_MISSING_SAMPLE_DATA)
-        async with self.client.request('POST', '/search', json=search_body) as resp:
-            self.assertEqual(resp.status, 400)
-            reason = resp.reason
-        self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
-
-        search_body = get_hail_search_body(
-            intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_sample_type='SV_WES',
-        )
-        async with self.client.request('POST', '/search', json=search_body) as resp:
-            self.assertEqual(resp.status, 400)
-            reason = resp.reason
-        self.assertEqual(reason, 'Invalid intervals: 1:1-99999999999')
-
-    async def test_sort(self):
-        await self._assert_expected_search(
-            [_sorted(VARIANT2, [11, 11]),  _sorted(VARIANT4, [11, 11]), _sorted(MULTI_FAMILY_VARIANT, [22, 24]),
-             _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='protein_consequence',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(SV_VARIANT1, [11]), _sorted(SV_VARIANT2, [12]), _sorted(SV_VARIANT3, [12]), _sorted(SV_VARIANT4, [12])],
-             sample_data=SV_WGS_SAMPLE_DATA, sort='protein_consequence',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT4, [11, 11]), _sorted(SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [11, 22]),
-             _sorted(SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT, [22, 22])],
-            omit_sample_type='SV_WES', sort='protein_consequence',
-            annotations={'other': ['non_coding_transcript_exon_variant'], 'splice_ai': '0'},
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT1, [4]), _sorted(VARIANT2, [8]), _sorted(MULTI_FAMILY_VARIANT, [12.5]),
-             _sorted(VARIANT4, [12.5])], omit_sample_type='SV_WES', sort='pathogenicity',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT1, [4, None]), _sorted(VARIANT2, [8, 3]), _sorted(MULTI_FAMILY_VARIANT, [12.5, None]),
-             _sorted(VARIANT4, [12.5, None])], omit_sample_type='SV_WES', sort='pathogenicity_hgmd',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT2, [0]), _sorted(VARIANT4, [0.00026519427774474025]),
-             _sorted(VARIANT1, [0.034449315071105957]), _sorted(MULTI_FAMILY_VARIANT, [0.38041073083877563])],
-            omit_sample_type='SV_WES', sort='gnomad',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT1, [0]), _sorted(MULTI_FAMILY_VARIANT, [0]), _sorted(VARIANT4, [0]),
-             _sorted(VARIANT2, [0.28899794816970825])], omit_sample_type='SV_WES', sort='gnomad_exomes',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT4, [0.02222222276031971]), _sorted(VARIANT1, [0.10000000149011612]),
-             _sorted(VARIANT2, [0.31111112236976624]), _sorted(MULTI_FAMILY_VARIANT, [0.6666666865348816])],
-            omit_sample_type='SV_WES', sort='callset_af',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT4, [-29.899999618530273]), _sorted(VARIANT2, [-20.899999618530273]),
-             _sorted(VARIANT1, [-4.668000221252441]), _sorted(MULTI_FAMILY_VARIANT, [-2.753999948501587]), ],
-            omit_sample_type='SV_WES', sort='cadd',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT2, [-0.19699999690055847]),
-             _sorted(VARIANT1, [None]), _sorted(MULTI_FAMILY_VARIANT, [None])], omit_sample_type='SV_WES', sort='revel',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(MULTI_FAMILY_VARIANT, [-0.009999999776482582]), _sorted(VARIANT2, [0]), _sorted(VARIANT4, [0]),
-             _sorted(VARIANT1, [None])], omit_sample_type='SV_WES', sort='splice_ai',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(MULTI_FAMILY_VARIANT, [0, -2]), _sorted(VARIANT2, [0, -1]), _sorted(VARIANT4, [0, -1]), _sorted(VARIANT1, [1, 0])],
-            omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000', 'ENSG00000097046'],
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT2, [0, -1]), _sorted(MULTI_FAMILY_VARIANT, [1, -1]), _sorted(VARIANT1, [1, 0]), _sorted(VARIANT4, [1, 0])],
-            omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000'],
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT2, [2, 2]), _sorted(MULTI_FAMILY_VARIANT, [4, 2]), _sorted(VARIANT4, [4, 4]),
-             _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='constraint',
-            sort_metadata={'ENSG00000177000': 2, 'ENSG00000097046': 4},
-        )
-
-        await self._assert_expected_search(
-            [_sorted(VARIANT2, [3, 3]), _sorted(MULTI_FAMILY_VARIANT, [None, 3]), _sorted(VARIANT1, [None, None]),
-             _sorted(VARIANT4, [None, None])], omit_sample_type='SV_WES', sort='prioritized_gene',
-            sort_metadata={'ENSG00000177000': 3},
-        )
-
-        # size sort only applies to SVs, so has no impact on other variants
-        await self._assert_expected_search(
-            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], sort='size', omit_sample_type='SV_WES',
-        )
-
-        await self._assert_expected_search(
-            [_sorted(SV_VARIANT4, [-46343]), _sorted(SV_VARIANT1, [-104]), _sorted(SV_VARIANT2, [-50]),
-             _sorted(SV_VARIANT3, [-50])], sample_data=SV_WGS_SAMPLE_DATA, sort='size',
-        )
-
-        # sort applies to compound hets
-        await self._assert_expected_search(
-            [_sorted(VARIANT2, [11, 11]), [_sorted(VARIANT4, [11, 11]),  _sorted(VARIANT3, [22, 24])]],
-            sort='protein_consequence', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
-        )
-
-        await self._assert_expected_search(
-            [[_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT3, [None])],
-             _sorted(VARIANT2, [-0.19699999690055847])],
-            sort='revel', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+            [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, gene_counts={
+                'ENSG00000171621': {'total': 2, 'families': {'F000011_11': 2}},
+                'ENSG00000083544': {'total': 1, 'families': {'F000011_11': 1}},
+                'ENSG00000184986': {'total': 1, 'families': {'F000011_11': 1}},
+                'null': {'total': 1, 'families': {'F000011_11': 1}},
+            }
         )
 
-        await self._assert_expected_search(
-            [[_sorted(VARIANT3, [-0.009999999776482582]),  _sorted(VARIANT4, [0])], _sorted(VARIANT2, [0])],
-            sort='splice_ai', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
-        )
+    # async def test_single_project_search(self):
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts={
+    #             'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+    #             'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+    #         }
+    #     )
+    #
+    # async def test_multi_project_search(self):
+    #     await self._assert_expected_search(
+    #         [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4],
+    #         gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_DATA,
+    #     )
+    #
+    # async def test_inheritance_filter(self):
+    #     inheritance_mode = 'any_affected'
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT2], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     inheritance_mode = 'de_novo'
+    #     await self._assert_expected_search(
+    #         [VARIANT1, FAMILY_3_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT1], inheritance_mode=inheritance_mode,  sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     inheritance_mode = 'x_linked_recessive'
+    #     await self._assert_expected_search([], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES')
+    #     await self._assert_expected_search([], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA)
+    #
+    #     inheritance_mode = 'homozygous_recessive'
+    #     await self._assert_expected_search(
+    #         [VARIANT2], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [PROJECT_2_VARIANT1, VARIANT2], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     gt_inheritance_filter = {'genotype': {'I000006_hg00733': 'has_alt', 'I000005_hg00732': 'ref_ref'}}
+    #     await self._assert_expected_search(
+    #         [VARIANT2, VARIANT3], inheritance_filter=gt_inheritance_filter, sample_data=FAMILY_2_VARIANT_SAMPLE_DATA)
+    #
+    #     inheritance_mode = 'compound_het'
+    #     await self._assert_expected_search(
+    #         [[VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA, gene_counts={
+    #             'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+    #             'ENSG00000177000': {'total': 1, 'families': {'F000002_2': 1}},
+    #         }, **COMP_HET_ALL_PASS_FILTERS,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[SV_VARIANT1, SV_VARIANT2]], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+    #         **COMP_HET_ALL_PASS_FILTERS,
+    #     )
+    #
+    #     inheritance_mode = 'recessive'
+    #     await self._assert_expected_search(
+    #         [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, gene_counts={
+    #             'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+    #             'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}},
+    #         }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+    #         **COMP_HET_ALL_PASS_FILTERS,
+    #     )
+    #
+    # async def test_quality_filter(self):
+    #     quality_filter = {'vcf_filter': 'pass'}
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search([SV_VARIANT4], quality_filter=quality_filter, sample_data=SV_WGS_SAMPLE_DATA)
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     sv_quality_filter = {'min_gq_sv': 40}
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT3, SV_VARIANT4], quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [], annotations=NEW_SV_FILTER, quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40, 'vcf_filter': 'pass'}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 60, 'affected_only': True},
+    #         omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT3, SV_VARIANT4], quality_filter={'min_gq_sv': 60, 'affected_only': True}, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_ab': 50}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, VARIANT3], quality_filter={'min_ab': 70, 'affected_only': True},
+    #         omit_sample_type='SV_WES',
+    #     )
+    #
+    #     quality_filter = {'min_gq': 40, 'min_ab': 50}
+    #     await self._assert_expected_search(
+    #         [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+    #         annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+    #         annotations=annotations, pathogenicity={'clinvar': ['pathogenic']},
+    #     )
+    #
+    # async def test_location_search(self):
+    #     await self._assert_expected_search(
+    #         [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', **LOCATION_SEARCH,
+    #     )
+    #
+    #     sv_intervals = ['1:9310023-9380264']
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT1], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, exclude_intervals=True,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],  omit_sample_type='SV_WES',
+    #         intervals=LOCATION_SEARCH['intervals'][-1:], gene_ids=LOCATION_SEARCH['gene_ids'][:1]
+    #     )
+    #
+    # async def test_variant_id_search(self):
+    #     await self._assert_expected_search([VARIANT2], omit_sample_type='SV_WES', **RSID_SEARCH)
+    #
+    #     await self._assert_expected_search([VARIANT1], omit_sample_type='SV_WES', **VARIANT_ID_SEARCH)
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT1], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][:1],
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:],
+    #     )
+    #
+    #     await self._assert_expected_search([SV_VARIANT2, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, variant_keys=[
+    #         'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'phase2_DEL_chr14_4640',
+    #     ])
+    #
+    # async def test_frequency_filter(self):
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT4], frequencies={'seqr': {'af': 0.2}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'ac': 4}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'hh': 1}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT1], frequencies={'sv_callset': {'af': 0.05}}, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05, 'hh': 1}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.005}}, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT1, SV_VARIANT3, SV_VARIANT4], frequencies={'gnomad_svs': {'af': 0.001}}, sample_data=SV_WGS_SAMPLE_DATA,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT4], frequencies={'seqr': {'af': 0.2}, 'gnomad_genomes': {'ac': 50}},
+    #         omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {}, 'gnomad_genomes': {'af': None}},
+    #         omit_sample_type='SV_WES',
+    #     )
+    #
+    #     annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
+    #         annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'likely_pathogenic', 'vus_or_conflicting']},
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
+    #         annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']},
+    #     )
+    #
+    # async def test_annotations_filter(self):
+    #     await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES')
+    #
+    #     pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting', 'benign']}
+    #     await self._assert_expected_search([VARIANT1, VARIANT2], pathogenicity=pathogenicity, omit_sample_type='SV_WES')
+    #
+    #     pathogenicity['clinvar'] = pathogenicity['clinvar'][:1]
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT4], pathogenicity=pathogenicity, annotations={'SCREEN': ['CTCF-only', 'DNase-only']},
+    #         omit_sample_type='SV_WES',
+    #     )
+    #
+    #     annotations = {
+    #         'missense': ['missense_variant'], 'in_frame': ['inframe_insertion', 'inframe_deletion'], 'frameshift': None,
+    #         'structural_consequence': ['INTRONIC'],
+    #     }
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, VARIANT4], pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search([VARIANT2, VARIANT4], annotations=annotations, omit_sample_type='SV_WES')
+    #
+    #     await self._assert_expected_search([SV_VARIANT1], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
+    #
+    #     annotations['splice_ai'] = '0.005'
+    #     await self._assert_expected_search(
+    #         [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], annotations=annotations, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     annotations['structural'] = ['DEL']
+    #     await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
+    #
+    #     annotations = {'other': ['non_coding_transcript_exon_variant']}
+    #     await self._assert_expected_search(
+    #         [VARIANT1, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT],
+    #         pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],
+    #         gene_ids=LOCATION_SEARCH['gene_ids'][:1], annotations=annotations, omit_sample_type='SV_WES',
+    #     )
+    #
+    # async def test_secondary_annotations_filter(self):
+    #     annotations_1 = {'missense': ['missense_variant']}
+    #     annotations_2 = {'other': ['intron_variant']}
+    #
+    #     await self._assert_expected_search(
+    #         [[VARIANT3, VARIANT4]], inheritance_mode='compound_het', omit_sample_type='SV_WES',
+    #         annotations=annotations_1, annotations_secondary=annotations_2,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+    #         annotations=annotations_1, annotations_secondary=annotations_2,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+    #         annotations=annotations_2, annotations_secondary=annotations_1,
+    #     )
+    #
+    #     sv_annotations_1 = {'structural': ['INS']}
+    #     sv_annotations_2 = {'structural': ['DEL'], 'structural_consequence': ['INTRONIC']}
+    #
+    #     await self._assert_expected_search(
+    #         [[SV_VARIANT1, SV_VARIANT2]], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='compound_het',
+    #         annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='recessive',
+    #         annotations=sv_annotations_2, annotations_secondary=sv_annotations_1,
+    #     )
+    #
+    #     pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}
+    #     await self._assert_expected_search(
+    #         [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+    #         annotations=annotations_2, annotations_secondary=annotations_1, pathogenicity=pathogenicity,
+    #     )
+    #
+    #     screen_annotations = {'SCREEN': ['CTCF-only']}
+    #     await self._assert_expected_search(
+    #         [], inheritance_mode='recessive', omit_sample_type='SV_WES',
+    #         annotations=screen_annotations, annotations_secondary=annotations_1,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+    #         annotations=screen_annotations, annotations_secondary=annotations_2,
+    #     )
+    #
+    #     selected_transcript_annotations = {'other': ['non_coding_transcript_exon_variant']}
+    #     await self._assert_expected_search(
+    #         [VARIANT2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]], inheritance_mode='recessive',
+    #         annotations=screen_annotations, annotations_secondary=selected_transcript_annotations,
+    #         pathogenicity=pathogenicity, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]],
+    #         annotations={**selected_transcript_annotations, **screen_annotations}, annotations_secondary=annotations_2,
+    #         inheritance_mode='recessive', omit_sample_type='SV_WES',
+    #     )
+    #
+    # async def test_in_silico_filter(self):
+    #     in_silico = {'eigen': '5.5', 'mut_taster': 'P'}
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     in_silico['requireScore'] = True
+    #     await self._assert_expected_search(
+    #         [VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, in_silico={'strvctvre': 0.1, 'requireScore': True},
+    #     )
+    #
+    # async def test_search_errors(self):
+    #     search_body = get_hail_search_body(sample_data=FAMILY_2_MISSING_SAMPLE_DATA)
+    #     async with self.client.request('POST', '/search', json=search_body) as resp:
+    #         self.assertEqual(resp.status, 400)
+    #         reason = resp.reason
+    #     self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
+    #
+    #     search_body = get_hail_search_body(sample_data=MULTI_PROJECT_MISSING_SAMPLE_DATA)
+    #     async with self.client.request('POST', '/search', json=search_body) as resp:
+    #         self.assertEqual(resp.status, 400)
+    #         reason = resp.reason
+    #     self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
+    #
+    #     search_body = get_hail_search_body(
+    #         intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_sample_type='SV_WES',
+    #     )
+    #     async with self.client.request('POST', '/search', json=search_body) as resp:
+    #         self.assertEqual(resp.status, 400)
+    #         reason = resp.reason
+    #     self.assertEqual(reason, 'Invalid intervals: 1:1-99999999999')
+    #
+    # async def test_sort(self):
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT2, [11, 11]),  _sorted(VARIANT4, [11, 11]), _sorted(MULTI_FAMILY_VARIANT, [22, 24]),
+    #          _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='protein_consequence',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(SV_VARIANT1, [11]), _sorted(SV_VARIANT2, [12]), _sorted(SV_VARIANT3, [12]), _sorted(SV_VARIANT4, [12])],
+    #          sample_data=SV_WGS_SAMPLE_DATA, sort='protein_consequence',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT4, [11, 11]), _sorted(SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [11, 22]),
+    #          _sorted(SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT, [22, 22])],
+    #         omit_sample_type='SV_WES', sort='protein_consequence',
+    #         annotations={'other': ['non_coding_transcript_exon_variant'], 'splice_ai': '0'},
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT1, [4]), _sorted(VARIANT2, [8]), _sorted(MULTI_FAMILY_VARIANT, [12.5]),
+    #          _sorted(VARIANT4, [12.5])], omit_sample_type='SV_WES', sort='pathogenicity',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT1, [4, None]), _sorted(VARIANT2, [8, 3]), _sorted(MULTI_FAMILY_VARIANT, [12.5, None]),
+    #          _sorted(VARIANT4, [12.5, None])], omit_sample_type='SV_WES', sort='pathogenicity_hgmd',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT2, [0]), _sorted(VARIANT4, [0.00026519427774474025]),
+    #          _sorted(VARIANT1, [0.034449315071105957]), _sorted(MULTI_FAMILY_VARIANT, [0.38041073083877563])],
+    #         omit_sample_type='SV_WES', sort='gnomad',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT1, [0]), _sorted(MULTI_FAMILY_VARIANT, [0]), _sorted(VARIANT4, [0]),
+    #          _sorted(VARIANT2, [0.28899794816970825])], omit_sample_type='SV_WES', sort='gnomad_exomes',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT4, [0.02222222276031971]), _sorted(VARIANT1, [0.10000000149011612]),
+    #          _sorted(VARIANT2, [0.31111112236976624]), _sorted(MULTI_FAMILY_VARIANT, [0.6666666865348816])],
+    #         omit_sample_type='SV_WES', sort='callset_af',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT4, [-29.899999618530273]), _sorted(VARIANT2, [-20.899999618530273]),
+    #          _sorted(VARIANT1, [-4.668000221252441]), _sorted(MULTI_FAMILY_VARIANT, [-2.753999948501587]), ],
+    #         omit_sample_type='SV_WES', sort='cadd',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT2, [-0.19699999690055847]),
+    #          _sorted(VARIANT1, [None]), _sorted(MULTI_FAMILY_VARIANT, [None])], omit_sample_type='SV_WES', sort='revel',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(MULTI_FAMILY_VARIANT, [-0.009999999776482582]), _sorted(VARIANT2, [0]), _sorted(VARIANT4, [0]),
+    #          _sorted(VARIANT1, [None])], omit_sample_type='SV_WES', sort='splice_ai',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(MULTI_FAMILY_VARIANT, [0, -2]), _sorted(VARIANT2, [0, -1]), _sorted(VARIANT4, [0, -1]), _sorted(VARIANT1, [1, 0])],
+    #         omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000', 'ENSG00000097046'],
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT2, [0, -1]), _sorted(MULTI_FAMILY_VARIANT, [1, -1]), _sorted(VARIANT1, [1, 0]), _sorted(VARIANT4, [1, 0])],
+    #         omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000'],
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT2, [2, 2]), _sorted(MULTI_FAMILY_VARIANT, [4, 2]), _sorted(VARIANT4, [4, 4]),
+    #          _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='constraint',
+    #         sort_metadata={'ENSG00000177000': 2, 'ENSG00000097046': 4},
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT2, [3, 3]), _sorted(MULTI_FAMILY_VARIANT, [None, 3]), _sorted(VARIANT1, [None, None]),
+    #          _sorted(VARIANT4, [None, None])], omit_sample_type='SV_WES', sort='prioritized_gene',
+    #         sort_metadata={'ENSG00000177000': 3},
+    #     )
+    #
+    #     # size sort only applies to SVs, so has no impact on other variants
+    #     await self._assert_expected_search(
+    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], sort='size', omit_sample_type='SV_WES',
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [_sorted(SV_VARIANT4, [-46343]), _sorted(SV_VARIANT1, [-104]), _sorted(SV_VARIANT2, [-50]),
+    #          _sorted(SV_VARIANT3, [-50])], sample_data=SV_WGS_SAMPLE_DATA, sort='size',
+    #     )
+    #
+    #     # sort applies to compound hets
+    #     await self._assert_expected_search(
+    #         [_sorted(VARIANT2, [11, 11]), [_sorted(VARIANT4, [11, 11]),  _sorted(VARIANT3, [22, 24])]],
+    #         sort='protein_consequence', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT3, [None])],
+    #          _sorted(VARIANT2, [-0.19699999690055847])],
+    #         sort='revel', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+    #     )
+    #
+    #     await self._assert_expected_search(
+    #         [[_sorted(VARIANT3, [-0.009999999776482582]),  _sorted(VARIANT4, [0])], _sorted(VARIANT2, [0])],
+    #         sort='splice_ai', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+    #     )
diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
index 993f5fc63f..446cd13040 100644
--- a/hail_search/test_utils.py
+++ b/hail_search/test_utils.py
@@ -528,6 +528,171 @@
     },
     '_sort': [14106694244],
 }
+GCNV_VARIANT1 = {
+    'variantId': 'suffix_95340_DUP',
+    'chrom': '14',
+    'pos': 22438910,
+    'end': 22469796,
+    'genomeVersion': '38',
+    'liftedOverGenomeVersion': '37',
+    'liftedOverChrom': '14',
+    'liftedOverPos': 22886546,
+    'rg37LocusEnd': {'contig': '14', 'position': 23058228},
+    'xpos': 14022417556,
+    'familyGuids': ['F000002_2'],
+    'genotypeFilters': '',
+    'genotypes': {
+        'I000004_hg00731': {
+            'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 38, 'defragged': False, 'start': 22438910, 'end': 22469796, 'numExon': 0,
+            'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+        },
+        'I000005_hg00732': {
+            'sampleId': 'HG00732', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2',
+            'numAlt': 0, 'cn': None, 'qs': None, 'defragged': None, 'start': None, 'end': None, 'numExon': None,
+            'geneIds': None, 'newCall': None, 'prevCall': None, 'prevOverlap': None,
+        },
+        'I000006_hg00733': {
+            'sampleId': 'HG00733', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2',
+            'numAlt': 0, 'cn': None, 'qs': None, 'defragged': None, 'start': None, 'end': None, 'numExon': None,
+            'geneIds': None, 'newCall': None, 'prevCall': None, 'prevOverlap': None,
+        }
+    },
+    'populations': {'sv_callset': {'af': 0.076492540538311, 'ac': 1763, 'an': 23048, 'hom': 0, 'het': 0}},
+    'predictions': {'strvctvre': 0.1809999942779541},
+    'numExon': 0,
+    'svType': 'DUP',
+    'transcripts': {
+        'ENSG00000129562': [{'geneId': 'ENSG00000129562', 'majorConsequence': 'COPY_GAIN'}],
+    },
+    '_sort': [14022417556],
+
+}
+GCNV_VARIANT2 = {
+    'variantId': 'suffix_124520_DUP',
+    'chrom': '16',
+    'pos': 29809156,
+    'end': 29815990,
+    'xpos': 16029802672,
+    'genomeVersion': '38',
+    'liftedOverGenomeVersion': '37',
+    'liftedOverChrom': '16',
+    'liftedOverPos': 29813993,
+    'rg37LocusEnd': {'contig': '16', 'position': 29831761},
+    'familyGuids': ['F000002_2'],
+    'genotypeFilters': '',
+    'genotypes': {
+        'I000004_hg00731': {
+            'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 29, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': 8,
+            'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+        },
+        'I000005_hg00732': {
+            'sampleId': 'HG00732', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 46, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': 8,
+            'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+        },
+        'I000006_hg00733': {
+            'sampleId': 'HG00733', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 37, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': 8,
+            'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+        }
+    },
+    'populations': {'sv_callset': {'af': 0.012322110123932362, 'ac': 284, 'an': 23047, 'hom': 0, 'het': 0}},
+    'predictions': {'strvctvre': 0.5479999780654907},
+    'numExon': 8,
+    'svType': 'DUP',
+    'transcripts': {
+        'ENSG00000013364': [{'geneId': 'ENSG00000013364', 'majorConsequence': 'LOF'}],
+        'ENSG00000079616': [{'geneId': 'ENSG00000079616', 'majorConsequence': 'LOF'}],
+        'ENSG00000103495': [{'geneId': 'ENSG00000103495', 'majorConsequence': 'COPY_GAIN'}],
+        'ENSG00000167371': [{'geneId': 'ENSG00000167371', 'majorConsequence': 'COPY_GAIN'}],
+        'ENSG00000280789': [{'geneId': 'ENSG00000280789', 'majorConsequence': 'LOF'}],
+        'ENSG00000280893': [{'geneId': 'ENSG00000280893', 'majorConsequence': 'COPY_GAIN'}],
+        'ENSG00000281348': [{'geneId': 'ENSG00000281348', 'majorConsequence': 'LOF'}],
+    },
+    '_sort': [16029802672],
+}
+GCNV_VARIANT3 = {
+    'variantId': 'suffix_140593_DUP',
+    'chrom': '17',
+    'pos': 38717327,
+    'end': 38719636,
+    'xpos': 17038717327,
+    'genomeVersion': '38',
+    'liftedOverGenomeVersion': '37',
+    'liftedOverChrom': '17',
+    'liftedOverPos': 36873580,
+    'rg37LocusEnd': {'contig': '17', 'position': 36876246},
+    'familyGuids': ['F000002_2'],
+    'genotypeFilters': '',
+    'genotypes': {
+        'I000004_hg00731': {
+            'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
+            'numAlt': 2, 'cn': 4, 'qs': 13, 'defragged': True, 'start': 38717327, 'end': 38719636, 'numExon': None,
+            'geneIds': None, 'newCall': True, 'prevCall': False, 'prevOverlap': False,
+        },
+        'I000005_hg00732': {
+            'sampleId': 'HG00732', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 7, 'defragged': False, 'start': 38717327, 'end': 38719636, 'numExon': None,
+            'geneIds': None, 'newCall': False, 'prevCall': False, 'prevOverlap': True,
+        },
+        'I000006_hg00733': {
+            'sampleId': 'HG00733', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2', 'numAlt': 0,
+            'cn': None, 'qs': None, 'defragged': None, 'start': None, 'end': None, 'numExon': None, 'geneIds': None,
+            'newCall': None, 'prevCall': None, 'prevOverlap': None,
+        },
+    },
+    'populations': {'sv_callset': {'af': 0.0015185698866844177, 'ac': 35, 'an': 23048, 'hom': 0, 'het': 0}},
+    'predictions': {'strvctvre': 0.7860000133514404},
+    'numExon': 3,
+    'svType': 'DEL',
+    'transcripts': {
+        'ENSG00000275023': [{'geneId': 'ENSG00000275023', 'majorConsequence': 'LOF'}],
+    },
+    '_sort': [17038717327],
+}
+GCNV_VARIANT4 = {
+    'variantId': 'suffix_140608_DUP',
+    'chrom': '17',
+    'pos': 38721781,
+    'end': 38735703,
+    'genomeVersion': '38',
+    'liftedOverGenomeVersion': '37',
+    'liftedOverChrom': '17',
+    'liftedOverPos': 36878034,
+    'rg37LocusEnd': {'contig': '17', 'position': 36892521},
+    'familyGuids': ['F000002_2'],
+    'genotypeFilters': '',
+    'xpos': 17038721781,
+    'genotypes': {
+        'I000004_hg00731': {
+            'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 28, 'defragged': False, 'start': 38721781, 'end': 38735703, 'numExon': 7,
+            'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+        },
+        'I000005_hg00732': {
+            'sampleId': 'HG00732', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2',
+            'numAlt': 0, 'cn': None, 'qs': None, 'defragged': None, 'start': None, 'end': None, 'numExon': None,
+            'geneIds': None, 'newCall': None, 'prevCall': None, 'prevOverlap': None,
+        },
+        'I000006_hg00733': {
+            'sampleId': 'HG00733', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2',
+            'numAlt': 1, 'cn': 3, 'qs': 29, 'defragged': False, 'start': 38721781, 'end': 38734440, 'numExon': 7,
+            'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+        }
+    },
+    'populations': {'sv_callset': {'af': 0.004989586770534515, 'ac': 115, 'an': 23048, 'hom': 0, 'het': 0}},
+    'predictions': {'strvctvre': 0.7099999785423279},
+    'numExon': 7,
+    'svType': 'DUP',
+    'transcripts': {
+        'ENSG00000275023': [{'geneId': 'ENSG00000275023', 'majorConsequence': 'LOF'}],
+        'ENSG00000277258': [{'geneId': 'ENSG00000277258', 'majorConsequence': 'LOF'}],
+        'ENSG00000277972': [{'geneId': 'ENSG00000277972', 'majorConsequence': 'COPY_GAIN'}],
+    },
+    '_sort': [17038721781],
+}
 
 LOCATION_SEARCH = {
     'gene_ids': ['ENSG00000177000', 'ENSG00000097046'],

From c91f63c5aa92b55845de5284defe2926a326506e Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 25 Aug 2023 12:36:17 -0400
Subject: [PATCH 10/16] initial test and transcript overrde

---
 .../projects/R0001_1kg.ht/.README.txt.crc     | Bin 12 -> 12 bytes
 .../SV_WES/projects/R0001_1kg.ht/README.txt   |   2 +-
 .../.index.crc                                | Bin
 .../.metadata.json.gz.crc                     | Bin
 .../index                                     | Bin
 .../metadata.json.gz                          | Bin
 .../R0001_1kg.ht/rows/.metadata.json.gz.crc   | Bin 16 -> 16 bytes
 .../R0001_1kg.ht/rows/metadata.json.gz        | Bin 667 -> 668 bytes
 ...0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc | Bin 12 -> 0 bytes
 ...0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc | Bin 0 -> 12 bytes
 ...-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa} | Bin 227 -> 235 bytes
 hail_search/hail_search_query.py              |   4 +-
 hail_search/test_search.py                    |  40 +++++++++++++-----
 hail_search/test_utils.py                     |  40 +++++++++++++++---
 14 files changed, 66 insertions(+), 20 deletions(-)
 rename hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/{part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx => part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx}/.index.crc (100%)
 rename hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/{part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx => part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx}/.metadata.json.gz.crc (100%)
 rename hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/{part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx => part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx}/index (100%)
 rename hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/{part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx => part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx}/metadata.json.gz (100%)
 delete mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc
 create mode 100644 hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc
 rename hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/{part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7 => part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa} (60%)

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc
index 17c414e9d5b7ded20df83dfa3a45f6c4cce0c1f5..a50f0cb506c8fc42d14688e60a15b6bf87baa97d 100644
GIT binary patch
literal 12
TcmYc;N@ieSU}AWYb?zSk6!-*L

literal 12
TcmYc;N@ieSU}E6d*>(#65wQbY

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
index e9bf5b62a9..3bd7ff0ea8 100644
--- a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
+++ b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
   Written with version 0.2.109-b71b065e4bb6
-  Created at 2023/08/25 10:45:26
\ No newline at end of file
+  Created at 2023/08/25 11:56:33
\ No newline at end of file
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.index.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.index.crc
similarity index 100%
rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.index.crc
rename to hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.index.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.metadata.json.gz.crc
similarity index 100%
rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/.metadata.json.gz.crc
rename to hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/index b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/index
similarity index 100%
rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/index
rename to hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/index
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/metadata.json.gz
similarity index 100%
rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.idx/metadata.json.gz
rename to hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc
index 9cba05dcb8675e53e6e06d64fcb287acdd42349a..5dc5b05e4bf9fc30be96d841f2634f10565f1fa7 100644
GIT binary patch
literal 16
XcmYc;N@ieSU}BiCVz!BF#?RRRBx40z

literal 16
XcmYc;N@ieSU}9iyt8)ID+1myHAqWKW

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz
index da058c7333047f2ddd7f9435bba544cc03a546e4..ee8e1b9e6ff26e9b4757b255eac866996373b7f4 100644
GIT binary patch
literal 668
zcmV;N0%QFjiwFP!000000NqwibJ{Qv{V%%KPQlno@U0NqGLy-qA?=|I;}O!Ds4+4l
z3kqTU-@7vS1KUiJ9>amLpLSPopWfQu6G@jqd@`0Cw!j~L-Aq=%9Ac5+8Fs{<k_}aY
zN*xGJW{9$w12Zwi-{;AXyA|+Zv4BxvAYA!08qfqLTCJJhmz8|Pg`{d{g(A|R5`XkX
zDY~2H_{;*Sm4ITQT8G7w#fnqOz^~#G{sxRjk*OZX5m0HZ6l@OIDHLx&MRZk0rWS`#
z(ZvFI8Jf`LDuMtcVq>X$PL3=<0=CgY`xqeC%K)~qtmP$|DxG7)yZq|gV}A8xa=c1s
zsnKzB)Ul}`B0??-{U)HN2dI?pz^z5Jx?WASTE%LSszs@Gp{o4q8nsANC{zeEL|R6k
zsyg#^7=o&q@pd%mI@g18$s+7YM7$C*h+_7PR8E5C2Aqf&4Cpymm)Fg-+u!c@VV3*O
zL~I=BvYwfmA9f+k=+&AS=U6t03Zi?epAf&e_?(3s6on77p?lLLRtF^%DlsP7qW@9P
zOV`3#Pw=L2a~X>({%cd5T5%XApo?-;%2-n^@NiFZ({@{zd@5{C+-q2zn}K5Y#@@AE
zKZ;#&;3bXj@Ra}W__O^K$o2!z{1xDH1%leCHf?i2-e!?QtOD~CYOnlG*A#7>IHgIy
zPQFh4ysz3QpCSAqt%mBn^8U%1EoC^3PKhb9!sq!{JdVVCOqd(HV_$U~{O{N<?>Md#
zx^~x_gYB`-rR{oQ-*&)rq0jmr4{7$*@#lfc1q|Y~<fhJ^WUmR+NagdyCkgf-wn5i%
zJ*N}g{Jb{|@TGJvu<!NUB2GvatvQ-5tBfr`fsITFgQSKG)mkA7JnVliF$=dp2><{a
C_D%Z$

literal 667
zcmV;M0%ZLkiwFP!000000NqyGZrVT){g-{Mod(-n>bF8@ixfp^Nc&KPtYtkW+nBYr
zUMLj#-#csY-6%;P0}?W4XJ*fwJ!AVw1l<5}$xQgL2mj&c%{&HX5rcToa3Jo2?5N~a
zD3AMOi71PFU^<4l$2|G*5Q9G{7BC7lgexDWW17HR$t_dIvXYB97gQdMfQJfX;!ZzH
zNe_!0Um75k<WMYB>##_$NcvPTaI3h0KOUoDsH;a&2viucgxv|NL(vXoNaHdxHGBvp
zU9Z6}Lle4*L-0T#HWSLWa%2qxu#FDd=K!(Yc(9K|BQH@@svMi#<yT)H^Q-Uk^HsV`
zwT`>9j%@`I;bL9rHwU#iL8WvjZXKdE^=hltDOQJ69ZGczRpr;zs6(Pcp+cYu($ex&
z)md$W0A$0Ax6^UY8jZ^(gRmza`jwDz7_n!hQVUvXaL)f?fNc#2qjuWuFX!7N%Y9=a
zHV$;t%uMwMyWo2C8cob{ESp3L;XPGPh+kcO%)&K_!Ux&Vy=oGxgA#C=XcO(x|ETA{
zF>uxs{8PBOjKvlAr72F0C<qczML8-&q^RO}xV7B0+twwY3Y(gHHH%9#Q0(5?yRqwg
zu?r5opy3^!^8X!wwx0yiec+kDJiM=f*ErR-ZBEGREOLsKpzlK6mEY)^qK%qU+Vq>`
zo76A+s*Cak!tc^*sLl(|pRCzZhI({LbdebE=U?zR68$ov|Crb2z!~5#e*GQ**nQI)
z!nL__U}&=5YGpDu7zNC-sT~Znw~pTrRIFhfZH2Gv97*<=(2Z1HPh65<4}9<SET?Z>
zdpAGsH3NJoT?+ivx1A!63l?sDG+kC1TYv(a>Jl1BH5sauL>741{{ZA&-4{Oz001x#
BKyv^9

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7.crc
deleted file mode 100644
index c0e184f89f44249ff5950572cdd5f48baaf0a082..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 12
TcmYc;N@ieSU}A_~y8S2s6AlBn

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/.part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc
new file mode 100644
index 0000000000000000000000000000000000000000..f83b4fa7bdceeec297d4e8ced85f14d5ef4ac890
GIT binary patch
literal 12
TcmYc;N@ieSU}E?uvCS0#6LAAf

literal 0
HcmV?d00001

diff --git a/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7 b/hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa
similarity index 60%
rename from hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-646-0-0-0d1eee35-27e0-b4e7-c1bb-cc68fc23a3f7
rename to hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/parts/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa
index 0dca74ce0f4f9ec367c684dd58d234018ba50a4b..0c51afbfb967f29486ce15cdfa25d352f5519579 100644
GIT binary patch
delta 39
xcmV+?0NDTI0qX$;=Kufz$&m%k8CR|1-3kHp0Pp}ZIW;yi0QaQW3IhzJ0RStR4ebB`

delta 31
ncmaFO_?VIPJ_7^8&WWsN1wSyXeRP+Lf#Lg-8(hphOBfjd!io!d

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 775248242d..c1803a7bdf 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1208,7 +1208,7 @@ class GcnvHailTableQuery(SvHailTableQuery):
     TRANSCRIPTS_ENUM_FIELD = SvHailTableQuery.ENUM_ANNOTATION_FIELDS[SvHailTableQuery.TRANSCRIPTS_FIELD]
     ENUM_ANNOTATION_FIELDS = {SvHailTableQuery.TRANSCRIPTS_FIELD: {
         **TRANSCRIPTS_ENUM_FIELD,
-        'format_values': lambda values, r: GcnvHailTableQuery.TRANSCRIPTS_ENUM_FIELD['format_values'](
+        'format_array_values': lambda values, r: GcnvHailTableQuery.TRANSCRIPTS_ENUM_FIELD['format_array_values'](
             GcnvHailTableQuery._get_gene_id_transcripts_override(values, r), r
         ),
     }}
@@ -1247,6 +1247,8 @@ def get_allowed_sv_type_ids(self, sv_types):
 
     # TODO override genotype fields in genotypes response, actually return geneIds
 
+    # TODO filter family transcripts for gene counts
+
     def _additional_annotation_fields(self):
         return {}
 
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 7b772f0afc..6ceffe16b6 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -4,7 +4,8 @@
 from hail_search.test_utils import get_hail_search_body, FAMILY_2_VARIANT_SAMPLE_DATA, FAMILY_2_MISSING_SAMPLE_DATA, \
     VARIANT1, VARIANT2, VARIANT3, VARIANT4, MULTI_PROJECT_SAMPLE_DATA, MULTI_PROJECT_MISSING_SAMPLE_DATA, \
     LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS, SV_WGS_SAMPLE_DATA, \
-    SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4, GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4
+    SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4, GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4, \
+    GCNV_MULTI_FAMILY_VARIANT1, GCNV_MULTI_FAMILY_VARIANT2, SV_WES_SAMPLE_DATA
 from hail_search.web_app import init_web_app
 
 PROJECT_2_VARIANT = {
@@ -128,7 +129,8 @@ async def _assert_expected_search(self, results, gene_counts=None, **search_kwar
         self.assertEqual(resp_json['total'], len(results))
         for i, result in enumerate(resp_json['results']):
             if result != results[i]:
-                import pdb; pdb.set_trace()
+                diff_k = {ky for ky, val in results[i].items() if val != result[ky]}
+                import pdb; pdb.set_trace()  # TODO
             self.assertEqual(result, results[i])
 
         if gene_counts:
@@ -136,7 +138,7 @@ async def _assert_expected_search(self, results, gene_counts=None, **search_kwar
                 self.assertEqual(resp.status, 200)
                 gene_counts_json = await resp.json()
             if gene_counts_json != gene_counts:
-                import pdb; pdb.set_trace()
+                import pdb; pdb.set_trace()  # TODO
             self.assertDictEqual(gene_counts_json, gene_counts)
 
     async def test_single_family_search(self):
@@ -172,14 +174,30 @@ async def test_single_family_search(self):
             }
         )
 
-    # async def test_single_project_search(self):
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts={
-    #             'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
-    #             'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
-    #         }
-    #     )
-    #
+    async def test_single_project_search(self):
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', gene_counts={
+                'ENSG00000097046': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+                'ENSG00000177000': {'total': 3, 'families': {'F000002_2': 2, 'F000003_3': 1}},
+            }
+        )
+
+        await self._assert_expected_search(
+            [GCNV_MULTI_FAMILY_VARIANT1, GCNV_MULTI_FAMILY_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], sample_data=SV_WES_SAMPLE_DATA, gene_counts={
+                'ENSG00000129562': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000013364': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000079616': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000103495': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000167371': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000280789': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000280893': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000281348': {'total': 2, 'families': {'F000002_2': 1, 'F000003_3': 1}},
+                'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}},
+            }
+        )
+
     # async def test_multi_project_search(self):
     #     await self._assert_expected_search(
     #         [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4],
diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
index 446cd13040..7d0dc9b3de 100644
--- a/hail_search/test_utils.py
+++ b/hail_search/test_utils.py
@@ -60,6 +60,8 @@
     'sample_id': 'NA20883', 'individual_guid': 'I000035_na20883', 'family_guid': 'F000011_11', 'project_guid': 'R0003_test', 'affected': 'N', 'sex': 'F',
 }]}
 
+SV_WES_SAMPLE_DATA = {'SV_WES': EXPECTED_SAMPLE_DATA['SV_WES'] + [FAMILY_3_SAMPLE]}
+
 VARIANT1 = {
     'variantId': '1-10439-AC-A',
     'chrom': '1',
@@ -562,9 +564,7 @@
     'predictions': {'strvctvre': 0.1809999942779541},
     'numExon': 0,
     'svType': 'DUP',
-    'transcripts': {
-        'ENSG00000129562': [{'geneId': 'ENSG00000129562', 'majorConsequence': 'COPY_GAIN'}],
-    },
+    'transcripts': {},
     '_sort': [14022417556],
 
 }
@@ -603,13 +603,9 @@
     'numExon': 8,
     'svType': 'DUP',
     'transcripts': {
-        'ENSG00000013364': [{'geneId': 'ENSG00000013364', 'majorConsequence': 'LOF'}],
-        'ENSG00000079616': [{'geneId': 'ENSG00000079616', 'majorConsequence': 'LOF'}],
         'ENSG00000103495': [{'geneId': 'ENSG00000103495', 'majorConsequence': 'COPY_GAIN'}],
         'ENSG00000167371': [{'geneId': 'ENSG00000167371', 'majorConsequence': 'COPY_GAIN'}],
-        'ENSG00000280789': [{'geneId': 'ENSG00000280789', 'majorConsequence': 'LOF'}],
         'ENSG00000280893': [{'geneId': 'ENSG00000280893', 'majorConsequence': 'COPY_GAIN'}],
-        'ENSG00000281348': [{'geneId': 'ENSG00000281348', 'majorConsequence': 'LOF'}],
     },
     '_sort': [16029802672],
 }
@@ -694,6 +690,36 @@
     '_sort': [17038721781],
 }
 
+GCNV_MULTI_FAMILY_VARIANT1 = deepcopy(GCNV_VARIANT1)
+GCNV_MULTI_FAMILY_VARIANT1.update({
+    'pos': 22418039,
+    'end': 22507821,
+    'transcripts': {
+        'ENSG00000129562': [{'geneId': 'ENSG00000129562', 'majorConsequence': 'COPY_GAIN'}],
+    },
+})
+GCNV_MULTI_FAMILY_VARIANT1['familyGuids'].append('F000003_3')
+GCNV_MULTI_FAMILY_VARIANT1['genotypes'].update({'I000007_na20870': {
+    'sampleId': 'NA20870', 'individualGuid': 'I000007_na20870', 'familyGuid': 'F000003_3',
+    'numAlt': 1, 'cn': 3, 'qs': 164, 'defragged': False, 'start': 22418039, 'end': 22507821, 'numExon': 0,
+    'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+}})
+
+GCNV_MULTI_FAMILY_VARIANT2 = deepcopy(GCNV_VARIANT2)
+GCNV_MULTI_FAMILY_VARIANT2['numExon'] = 26
+GCNV_MULTI_FAMILY_VARIANT2['familyGuids'].append('F000003_3')
+GCNV_MULTI_FAMILY_VARIANT2['genotypes'].update({'I000007_na20870': {
+    'sampleId': 'NA20870', 'individualGuid': 'I000007_na20870', 'familyGuid': 'F000003_3',
+    'numAlt': 1, 'cn': 3, 'qs': 40, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': None,
+    'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
+}})
+GCNV_MULTI_FAMILY_VARIANT2['transcripts'].update({
+    'ENSG00000013364': [{'geneId': 'ENSG00000013364', 'majorConsequence': 'LOF'}],
+    'ENSG00000079616': [{'geneId': 'ENSG00000079616', 'majorConsequence': 'LOF'}],
+    'ENSG00000281348': [{'geneId': 'ENSG00000281348', 'majorConsequence': 'LOF'}],
+    'ENSG00000280789': [{'geneId': 'ENSG00000280789', 'majorConsequence': 'LOF'}],
+})
+
 LOCATION_SEARCH = {
     'gene_ids': ['ENSG00000177000', 'ENSG00000097046'],
     'intervals': ['2:1234-5678', '7:1-11100', '1:11785723-11806455', '1:91500851-91525764'],

From 2e4f372f3b4432efa80b186250f32f462c65be3a Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 25 Aug 2023 16:49:16 -0400
Subject: [PATCH 11/16] genotype override fields

---
 hail_search/hail_search_query.py | 62 ++++++++++++++++----------------
 hail_search/test_utils.py        | 23 ++++++------
 2 files changed, 44 insertions(+), 41 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index c1803a7bdf..d2dac80def 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -43,7 +43,7 @@ class BaseHailTableQuery(object):
     MISSING_NUM_ALT = -1
 
     GENOTYPE_FIELDS = {}
-    NESTED_GENOTYPE_FIELDS = {}
+    COMPUTED_GENOTYPE_FIELDS = {}
     GENOTYPE_QUERY_FIELDS = {}
     QUALITY_FILTER_FORMAT = {}
     POPULATIONS = {}
@@ -100,7 +100,7 @@ def annotation_fields(self):
                 'sampleId', 'individualGuid', 'familyGuid',
                 numAlt=hl.if_else(hl.is_defined(x[0].GT), x[0].GT.n_alt_alleles(), self.MISSING_NUM_ALT),
                 **{k: x[0][field] for k, field in self.GENOTYPE_FIELDS.items()},
-                **{_to_camel_case(k): x[0][field][k] for field, v in self.NESTED_GENOTYPE_FIELDS.items() for k in v},
+                **{_to_camel_case(k): v(x[0], k, r) for k, v in self.COMPUTED_GENOTYPE_FIELDS.items()},
             )),
             'populations': lambda r: hl.struct(**{
                 population: self.population_expression(r, population) for population in self.POPULATIONS.keys()
@@ -1056,7 +1056,9 @@ class SvHailTableQuery(BaseHailTableQuery):
     DATA_TYPE = 'SV_WGS'
 
     GENOTYPE_FIELDS = {_to_camel_case(f): f for f in ['CN', 'GQ']}
-    NESTED_GENOTYPE_FIELDS = {'concordance': ['new_call', 'prev_call', 'prev_num_alt']}
+    COMPUTED_GENOTYPE_FIELDS = {
+        k: lambda entry, field, *args: entry.concordance[field] for k in ['new_call', 'prev_call', 'prev_num_alt']
+    }
     GENOTYPE_QUERY_FIELDS = {'gq_sv': 'GQ', 'gq': None}
 
     TRANSCRIPTS_FIELD = 'sorted_gene_consequences'
@@ -1186,22 +1188,31 @@ class GcnvHailTableQuery(SvHailTableQuery):
     GENOTYPE_FIELDS = {
         **SvHailTableQuery.GENOTYPE_FIELDS,
         **{f.lower(): f for f in ['QS', 'defragged']},
-        **{_to_camel_case(f): f'sample_{f}' for f in ['start', 'end', 'num_exon', 'gene_ids']},
     }
     del GENOTYPE_FIELDS['gq']
     GENOTYPE_QUERY_FIELDS = {}
-    NESTED_GENOTYPE_FIELDS = {
-        'concordance': SvHailTableQuery.NESTED_GENOTYPE_FIELDS['concordance'][:-1] + ['prev_overlap']
+    GENOTYPE_OVERRIDE_FIELDS = {
+        'start': (hl.min, lambda r: r.start_locus.position),
+        'end': (hl.max, lambda r: r.end_locus.position),
+        'num_exon': (hl.max, lambda r: r.num_exon),
+        'gene_ids': (
+            lambda entry_gene_ids: entry_gene_ids.fold(lambda s1, s2: s1.union(s2), hl.empty_set(hl.tstr)),
+            lambda r: hl.missing(hl.tset(hl.tstr)),
+        ),
+    }
+    COMPUTED_GENOTYPE_FIELDS = {
+        **SvHailTableQuery.COMPUTED_GENOTYPE_FIELDS,
+        **{k: lambda entry, field, r: hl.or_missing(r[field] != entry[f'sample_{field}'], entry[f'sample_{field}'])
+           for k in GENOTYPE_OVERRIDE_FIELDS.keys()},
     }
+    COMPUTED_GENOTYPE_FIELDS['prev_overlap'] = COMPUTED_GENOTYPE_FIELDS.pop('prev_num_alt')
 
     CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS
     BASE_ANNOTATION_FIELDS = {
         **SvHailTableQuery.BASE_ANNOTATION_FIELDS,
-        'pos': lambda r: GcnvHailTableQuery._get_genotype_override_field(
-            r, 'start', hl.min, default=r.start_locus.position),
-        'end': lambda r: GcnvHailTableQuery._get_genotype_override_field(
-            r, 'end', hl.max, default=r.end_locus.position),
-        'numExon': lambda r: GcnvHailTableQuery._get_genotype_override_field(r, 'num_exon', hl.max),
+        'pos': lambda r: r.start,
+        'end': lambda r: r.end,
+        'numExon': lambda r: r.num_exon,
     }
     del BASE_ANNOTATION_FIELDS['bothsidesSupport']
 
@@ -1209,44 +1220,33 @@ class GcnvHailTableQuery(SvHailTableQuery):
     ENUM_ANNOTATION_FIELDS = {SvHailTableQuery.TRANSCRIPTS_FIELD: {
         **TRANSCRIPTS_ENUM_FIELD,
         'format_array_values': lambda values, r: GcnvHailTableQuery.TRANSCRIPTS_ENUM_FIELD['format_array_values'](
-            GcnvHailTableQuery._get_gene_id_transcripts_override(values, r), r
+            hl.if_else(hl.is_missing(r.gene_ids), values, values.filter(lambda t: r.gene_ids.contains(t.geneId))), r,
         ),
     }}
 
     POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'}
 
     @staticmethod
-    def _get_genotype_override_field(r, field, agg, default=None):
+    def _get_genotype_override_field(r, field, agg, get_default):
         sample_field = f'sample_{field}'
         entries = r.family_entries.flatmap(lambda x: x)
-        if default is None:
-            default = r[field]
         return hl.if_else(
             entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])),
-            default, agg(entries.map(lambda g: g[sample_field]))
+            get_default(r), agg(entries.map(lambda g: g[sample_field]))
         )
 
-    @classmethod
-    def _get_gene_id_transcripts_override(cls, transcripts, r):
-        empty_gene_set = hl.empty_set(hl.tstr)
-        geneotype_gene_ids_expr = cls._get_genotype_override_field(
-            r, 'gene_ids',
-            lambda entry_gene_ids: entry_gene_ids.fold(lambda s1, s2: s1.union(s2), empty_gene_set),
-            default=hl.missing(empty_gene_set.dtype))
-        return hl.bind(
-            lambda gene_ids: hl.if_else(
-                hl.is_missing(gene_ids), transcripts,
-                transcripts.filter(lambda t: gene_ids.contains(t.geneId)),
-            ), geneotype_gene_ids_expr,
-        )
+    # TODO actually return geneIds in genotypes
+    def _format_results(self, ht, annotation_fields):
+        ht = ht.annotate(**{
+            k: self._get_genotype_override_field(ht, k, *args) for k, args in self.GENOTYPE_OVERRIDE_FIELDS.items()
+        })
+        return super()._format_results(ht, annotation_fields)
 
     def get_allowed_sv_type_ids(self, sv_types):
         return super().get_allowed_sv_type_ids([
             type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_')
         ])
 
-    # TODO override genotype fields in genotypes response, actually return geneIds
-
     # TODO filter family transcripts for gene counts
 
     def _additional_annotation_fields(self):
diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
index 7d0dc9b3de..01e0dcf9f3 100644
--- a/hail_search/test_utils.py
+++ b/hail_search/test_utils.py
@@ -546,7 +546,7 @@
     'genotypes': {
         'I000004_hg00731': {
             'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 38, 'defragged': False, 'start': 22438910, 'end': 22469796, 'numExon': 0,
+            'numAlt': 1, 'cn': 3, 'qs': 38, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
         },
         'I000005_hg00732': {
@@ -584,17 +584,17 @@
     'genotypes': {
         'I000004_hg00731': {
             'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 29, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': 8,
+            'numAlt': 1, 'cn': 3, 'qs': 29, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
         },
         'I000005_hg00732': {
             'sampleId': 'HG00732', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 46, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': 8,
+            'numAlt': 1, 'cn': 3, 'qs': 46, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
         },
         'I000006_hg00733': {
             'sampleId': 'HG00733', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 37, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': 8,
+            'numAlt': 1, 'cn': 3, 'qs': 37, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
         }
     },
@@ -625,12 +625,12 @@
     'genotypes': {
         'I000004_hg00731': {
             'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
-            'numAlt': 2, 'cn': 4, 'qs': 13, 'defragged': True, 'start': 38717327, 'end': 38719636, 'numExon': None,
+            'numAlt': 2, 'cn': 4, 'qs': 13, 'defragged': True, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': True, 'prevCall': False, 'prevOverlap': False,
         },
         'I000005_hg00732': {
             'sampleId': 'HG00732', 'individualGuid': 'I000005_hg00732', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 7, 'defragged': False, 'start': 38717327, 'end': 38719636, 'numExon': None,
+            'numAlt': 1, 'cn': 3, 'qs': 7, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': False, 'prevOverlap': True,
         },
         'I000006_hg00733': {
@@ -664,7 +664,7 @@
     'genotypes': {
         'I000004_hg00731': {
             'sampleId': 'HG00731', 'individualGuid': 'I000004_hg00731', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 28, 'defragged': False, 'start': 38721781, 'end': 38735703, 'numExon': 7,
+            'numAlt': 1, 'cn': 3, 'qs': 28, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
         },
         'I000005_hg00732': {
@@ -674,7 +674,7 @@
         },
         'I000006_hg00733': {
             'sampleId': 'HG00733', 'individualGuid': 'I000006_hg00733', 'familyGuid': 'F000002_2',
-            'numAlt': 1, 'cn': 3, 'qs': 29, 'defragged': False, 'start': 38721781, 'end': 38734440, 'numExon': 7,
+            'numAlt': 1, 'cn': 3, 'qs': 29, 'defragged': False, 'start': None, 'end': 38734440, 'numExon': None,
             'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
         }
     },
@@ -701,16 +701,19 @@
 GCNV_MULTI_FAMILY_VARIANT1['familyGuids'].append('F000003_3')
 GCNV_MULTI_FAMILY_VARIANT1['genotypes'].update({'I000007_na20870': {
     'sampleId': 'NA20870', 'individualGuid': 'I000007_na20870', 'familyGuid': 'F000003_3',
-    'numAlt': 1, 'cn': 3, 'qs': 164, 'defragged': False, 'start': 22418039, 'end': 22507821, 'numExon': 0,
+    'numAlt': 1, 'cn': 3, 'qs': 164, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
     'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
 }})
+GCNV_MULTI_FAMILY_VARIANT1['genotypes']['I000004_hg00731'].update({'start': 22438910, 'end': 22469796})
 
 GCNV_MULTI_FAMILY_VARIANT2 = deepcopy(GCNV_VARIANT2)
 GCNV_MULTI_FAMILY_VARIANT2['numExon'] = 26
 GCNV_MULTI_FAMILY_VARIANT2['familyGuids'].append('F000003_3')
+for genotype in GCNV_MULTI_FAMILY_VARIANT2['genotypes'].values():
+    genotype.update({'numExon': 8})
 GCNV_MULTI_FAMILY_VARIANT2['genotypes'].update({'I000007_na20870': {
     'sampleId': 'NA20870', 'individualGuid': 'I000007_na20870', 'familyGuid': 'F000003_3',
-    'numAlt': 1, 'cn': 3, 'qs': 40, 'defragged': False, 'start': 29809156, 'end': 29815990, 'numExon': None,
+    'numAlt': 1, 'cn': 3, 'qs': 40, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
     'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
 }})
 GCNV_MULTI_FAMILY_VARIANT2['transcripts'].update({

From bea8c3071f7def8e1ddf5fe4584683d008ed4076 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Fri, 25 Aug 2023 17:50:52 -0400
Subject: [PATCH 12/16] fix geneotype geen id response

---
 hail_search/hail_search_query.py | 6 +++---
 hail_search/test_search.py       | 1 +
 hail_search/test_utils.py        | 4 ++--
 hail_search/web_app.py           | 2 ++
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index d2dac80def..61937f962f 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1202,8 +1202,9 @@ class GcnvHailTableQuery(SvHailTableQuery):
     }
     COMPUTED_GENOTYPE_FIELDS = {
         **SvHailTableQuery.COMPUTED_GENOTYPE_FIELDS,
-        **{k: lambda entry, field, r: hl.or_missing(r[field] != entry[f'sample_{field}'], entry[f'sample_{field}'])
-           for k in GENOTYPE_OVERRIDE_FIELDS.keys()},
+        **{k: lambda entry, field, r: hl.or_missing(
+            hl.is_missing(r[field]) | (r[field] != entry[f'sample_{field}']), entry[f'sample_{field}']
+        ) for k in GENOTYPE_OVERRIDE_FIELDS.keys()},
     }
     COMPUTED_GENOTYPE_FIELDS['prev_overlap'] = COMPUTED_GENOTYPE_FIELDS.pop('prev_num_alt')
 
@@ -1235,7 +1236,6 @@ def _get_genotype_override_field(r, field, agg, get_default):
             get_default(r), agg(entries.map(lambda g: g[sample_field]))
         )
 
-    # TODO actually return geneIds in genotypes
     def _format_results(self, ht, annotation_fields):
         ht = ht.annotate(**{
             k: self._get_genotype_override_field(ht, k, *args) for k, args in self.GENOTYPE_OVERRIDE_FIELDS.items()
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 6ceffe16b6..6018be0253 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -151,6 +151,7 @@ async def test_single_family_search(self):
 
         await self._assert_expected_search(
             [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_sample_type='VARIANTS', gene_counts={
+                # TODO should be filtered to returned transcripts - just use entries?
                 'ENSG00000129562': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000013364': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000079616': {'total': 1, 'families': {'F000002_2': 1}},
diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py
index 01e0dcf9f3..30d0d08f19 100644
--- a/hail_search/test_utils.py
+++ b/hail_search/test_utils.py
@@ -704,13 +704,13 @@
     'numAlt': 1, 'cn': 3, 'qs': 164, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
     'geneIds': None, 'newCall': False, 'prevCall': True, 'prevOverlap': False,
 }})
-GCNV_MULTI_FAMILY_VARIANT1['genotypes']['I000004_hg00731'].update({'start': 22438910, 'end': 22469796})
+GCNV_MULTI_FAMILY_VARIANT1['genotypes']['I000004_hg00731'].update({'start': 22438910, 'end': 22469796, 'geneIds': []})
 
 GCNV_MULTI_FAMILY_VARIANT2 = deepcopy(GCNV_VARIANT2)
 GCNV_MULTI_FAMILY_VARIANT2['numExon'] = 26
 GCNV_MULTI_FAMILY_VARIANT2['familyGuids'].append('F000003_3')
 for genotype in GCNV_MULTI_FAMILY_VARIANT2['genotypes'].values():
-    genotype.update({'numExon': 8})
+    genotype.update({'numExon': 8, 'geneIds': ['ENSG00000103495', 'ENSG00000167371', 'ENSG00000280893']})
 GCNV_MULTI_FAMILY_VARIANT2['genotypes'].update({'I000007_na20870': {
     'sampleId': 'NA20870', 'individualGuid': 'I000007_na20870', 'familyGuid': 'F000003_3',
     'numAlt': 1, 'cn': 3, 'qs': 40, 'defragged': False, 'start': None, 'end': None, 'numExon': None,
diff --git a/hail_search/web_app.py b/hail_search/web_app.py
index 303ab82f5c..c6a3f5aa24 100644
--- a/hail_search/web_app.py
+++ b/hail_search/web_app.py
@@ -8,6 +8,8 @@
 def _hl_json_default(o):
     if isinstance(o, hl.Struct) or isinstance(o, hl.utils.frozendict):
         return dict(o)
+    elif isinstance(o, set):
+        return sorted(o)
 
 
 def hl_json_dumps(obj):

From fe4f735b55f77984d74a940e7b71072f88d3d206 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 5 Sep 2023 12:16:53 -0400
Subject: [PATCH 13/16] family specific sv gene counts

---
 hail_search/hail_search_query.py |  16 +-
 hail_search/test_search.py       | 987 +++++++++++++++----------------
 2 files changed, 498 insertions(+), 505 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index 61937f962f..dd4974d39c 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -1227,8 +1227,9 @@ class GcnvHailTableQuery(SvHailTableQuery):
 
     POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'}
 
-    @staticmethod
-    def _get_genotype_override_field(r, field, agg, get_default):
+    @classmethod
+    def _get_genotype_override_field(cls, r, field):
+        agg, get_default = cls.GENOTYPE_OVERRIDE_FIELDS[field]
         sample_field = f'sample_{field}'
         entries = r.family_entries.flatmap(lambda x: x)
         return hl.if_else(
@@ -1237,9 +1238,7 @@ def _get_genotype_override_field(r, field, agg, get_default):
         )
 
     def _format_results(self, ht, annotation_fields):
-        ht = ht.annotate(**{
-            k: self._get_genotype_override_field(ht, k, *args) for k, args in self.GENOTYPE_OVERRIDE_FIELDS.items()
-        })
+        ht = ht.annotate(**{k: self._get_genotype_override_field(ht, k) for k in self.GENOTYPE_OVERRIDE_FIELDS})
         return super()._format_results(ht, annotation_fields)
 
     def get_allowed_sv_type_ids(self, sv_types):
@@ -1247,7 +1246,12 @@ def get_allowed_sv_type_ids(self, sv_types):
             type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_')
         ])
 
-    # TODO filter family transcripts for gene counts
+    @classmethod
+    def _gene_ids_expr(cls, ht):
+        gene_ids_expr = getattr(ht, 'gene_ids', None)
+        if gene_ids_expr is None:
+            gene_ids_expr = cls._get_genotype_override_field(ht, 'gene_ids')
+        return hl.or_else(gene_ids_expr, super()._gene_ids_expr(ht))
 
     def _additional_annotation_fields(self):
         return {}
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 6018be0253..2a94444220 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -114,11 +114,11 @@ class HailSearchTestCase(AioHTTPTestCase):
     async def get_application(self):
         return init_web_app()
 
-    # async def test_status(self):
-    #     async with self.client.request('GET', '/status') as resp:
-    #         self.assertEqual(resp.status, 200)
-    #         resp_json = await resp.json()
-    #     self.assertDictEqual(resp_json, {'success': True})
+    async def test_status(self):
+        async with self.client.request('GET', '/status') as resp:
+            self.assertEqual(resp.status, 200)
+            resp_json = await resp.json()
+        self.assertDictEqual(resp_json, {'success': True})
 
     async def _assert_expected_search(self, results, gene_counts=None, **search_kwargs):
         search_body = get_hail_search_body(**search_kwargs)
@@ -128,17 +128,12 @@ async def _assert_expected_search(self, results, gene_counts=None, **search_kwar
         self.assertSetEqual(set(resp_json.keys()), {'results', 'total'})
         self.assertEqual(resp_json['total'], len(results))
         for i, result in enumerate(resp_json['results']):
-            if result != results[i]:
-                diff_k = {ky for ky, val in results[i].items() if val != result[ky]}
-                import pdb; pdb.set_trace()  # TODO
             self.assertEqual(result, results[i])
 
         if gene_counts:
             async with self.client.request('POST', '/gene_counts', json=search_body) as resp:
                 self.assertEqual(resp.status, 200)
                 gene_counts_json = await resp.json()
-            if gene_counts_json != gene_counts:
-                import pdb; pdb.set_trace()  # TODO
             self.assertDictEqual(gene_counts_json, gene_counts)
 
     async def test_single_family_search(self):
@@ -151,15 +146,9 @@ async def test_single_family_search(self):
 
         await self._assert_expected_search(
             [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_sample_type='VARIANTS', gene_counts={
-                # TODO should be filtered to returned transcripts - just use entries?
-                'ENSG00000129562': {'total': 1, 'families': {'F000002_2': 1}},
-                'ENSG00000013364': {'total': 1, 'families': {'F000002_2': 1}},
-                'ENSG00000079616': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000103495': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000167371': {'total': 1, 'families': {'F000002_2': 1}},
-                'ENSG00000280789': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000280893': {'total': 1, 'families': {'F000002_2': 1}},
-                'ENSG00000281348': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}},
                 'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}},
@@ -199,486 +188,486 @@ async def test_single_project_search(self):
             }
         )
 
-    # async def test_multi_project_search(self):
-    #     await self._assert_expected_search(
-    #         [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4],
-    #         gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_DATA,
-    #     )
-    #
-    # async def test_inheritance_filter(self):
-    #     inheritance_mode = 'any_affected'
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT2], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     inheritance_mode = 'de_novo'
-    #     await self._assert_expected_search(
-    #         [VARIANT1, FAMILY_3_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT1], inheritance_mode=inheritance_mode,  sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     inheritance_mode = 'x_linked_recessive'
-    #     await self._assert_expected_search([], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES')
-    #     await self._assert_expected_search([], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA)
-    #
-    #     inheritance_mode = 'homozygous_recessive'
-    #     await self._assert_expected_search(
-    #         [VARIANT2], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [PROJECT_2_VARIANT1, VARIANT2], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     gt_inheritance_filter = {'genotype': {'I000006_hg00733': 'has_alt', 'I000005_hg00732': 'ref_ref'}}
-    #     await self._assert_expected_search(
-    #         [VARIANT2, VARIANT3], inheritance_filter=gt_inheritance_filter, sample_data=FAMILY_2_VARIANT_SAMPLE_DATA)
-    #
-    #     inheritance_mode = 'compound_het'
-    #     await self._assert_expected_search(
-    #         [[VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA, gene_counts={
-    #             'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
-    #             'ENSG00000177000': {'total': 1, 'families': {'F000002_2': 1}},
-    #         }, **COMP_HET_ALL_PASS_FILTERS,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[SV_VARIANT1, SV_VARIANT2]], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-    #         **COMP_HET_ALL_PASS_FILTERS,
-    #     )
-    #
-    #     inheritance_mode = 'recessive'
-    #     await self._assert_expected_search(
-    #         [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, gene_counts={
-    #             'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
-    #             'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}},
-    #         }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-    #         **COMP_HET_ALL_PASS_FILTERS,
-    #     )
-    #
-    # async def test_quality_filter(self):
-    #     quality_filter = {'vcf_filter': 'pass'}
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search([SV_VARIANT4], quality_filter=quality_filter, sample_data=SV_WGS_SAMPLE_DATA)
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     sv_quality_filter = {'min_gq_sv': 40}
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT3, SV_VARIANT4], quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [], annotations=NEW_SV_FILTER, quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40, 'vcf_filter': 'pass'}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 60, 'affected_only': True},
-    #         omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT3, SV_VARIANT4], quality_filter={'min_gq_sv': 60, 'affected_only': True}, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_ab': 50}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, VARIANT3], quality_filter={'min_ab': 70, 'affected_only': True},
-    #         omit_sample_type='SV_WES',
-    #     )
-    #
-    #     quality_filter = {'min_gq': 40, 'min_ab': 50}
-    #     await self._assert_expected_search(
-    #         [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-    #         annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
-    #         annotations=annotations, pathogenicity={'clinvar': ['pathogenic']},
-    #     )
-    #
-    # async def test_location_search(self):
-    #     await self._assert_expected_search(
-    #         [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', **LOCATION_SEARCH,
-    #     )
-    #
-    #     sv_intervals = ['1:9310023-9380264']
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT1], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, exclude_intervals=True,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],  omit_sample_type='SV_WES',
-    #         intervals=LOCATION_SEARCH['intervals'][-1:], gene_ids=LOCATION_SEARCH['gene_ids'][:1]
-    #     )
-    #
-    # async def test_variant_id_search(self):
-    #     await self._assert_expected_search([VARIANT2], omit_sample_type='SV_WES', **RSID_SEARCH)
-    #
-    #     await self._assert_expected_search([VARIANT1], omit_sample_type='SV_WES', **VARIANT_ID_SEARCH)
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT1], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][:1],
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:],
-    #     )
-    #
-    #     await self._assert_expected_search([SV_VARIANT2, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, variant_keys=[
-    #         'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'phase2_DEL_chr14_4640',
-    #     ])
-    #
-    # async def test_frequency_filter(self):
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT4], frequencies={'seqr': {'af': 0.2}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'ac': 4}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'hh': 1}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT1], frequencies={'sv_callset': {'af': 0.05}}, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05, 'hh': 1}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.005}}, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT1, SV_VARIANT3, SV_VARIANT4], frequencies={'gnomad_svs': {'af': 0.001}}, sample_data=SV_WGS_SAMPLE_DATA,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT4], frequencies={'seqr': {'af': 0.2}, 'gnomad_genomes': {'ac': 50}},
-    #         omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {}, 'gnomad_genomes': {'af': None}},
-    #         omit_sample_type='SV_WES',
-    #     )
-    #
-    #     annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
-    #         annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'likely_pathogenic', 'vus_or_conflicting']},
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
-    #         annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']},
-    #     )
-    #
-    # async def test_annotations_filter(self):
-    #     await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES')
-    #
-    #     pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting', 'benign']}
-    #     await self._assert_expected_search([VARIANT1, VARIANT2], pathogenicity=pathogenicity, omit_sample_type='SV_WES')
-    #
-    #     pathogenicity['clinvar'] = pathogenicity['clinvar'][:1]
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT4], pathogenicity=pathogenicity, annotations={'SCREEN': ['CTCF-only', 'DNase-only']},
-    #         omit_sample_type='SV_WES',
-    #     )
-    #
-    #     annotations = {
-    #         'missense': ['missense_variant'], 'in_frame': ['inframe_insertion', 'inframe_deletion'], 'frameshift': None,
-    #         'structural_consequence': ['INTRONIC'],
-    #     }
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, VARIANT4], pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search([VARIANT2, VARIANT4], annotations=annotations, omit_sample_type='SV_WES')
-    #
-    #     await self._assert_expected_search([SV_VARIANT1], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
-    #
-    #     annotations['splice_ai'] = '0.005'
-    #     await self._assert_expected_search(
-    #         [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], annotations=annotations, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     annotations['structural'] = ['DEL']
-    #     await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
-    #
-    #     annotations = {'other': ['non_coding_transcript_exon_variant']}
-    #     await self._assert_expected_search(
-    #         [VARIANT1, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT],
-    #         pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],
-    #         gene_ids=LOCATION_SEARCH['gene_ids'][:1], annotations=annotations, omit_sample_type='SV_WES',
-    #     )
-    #
-    # async def test_secondary_annotations_filter(self):
-    #     annotations_1 = {'missense': ['missense_variant']}
-    #     annotations_2 = {'other': ['intron_variant']}
-    #
-    #     await self._assert_expected_search(
-    #         [[VARIANT3, VARIANT4]], inheritance_mode='compound_het', omit_sample_type='SV_WES',
-    #         annotations=annotations_1, annotations_secondary=annotations_2,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-    #         annotations=annotations_1, annotations_secondary=annotations_2,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-    #         annotations=annotations_2, annotations_secondary=annotations_1,
-    #     )
-    #
-    #     sv_annotations_1 = {'structural': ['INS']}
-    #     sv_annotations_2 = {'structural': ['DEL'], 'structural_consequence': ['INTRONIC']}
-    #
-    #     await self._assert_expected_search(
-    #         [[SV_VARIANT1, SV_VARIANT2]], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='compound_het',
-    #         annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='recessive',
-    #         annotations=sv_annotations_2, annotations_secondary=sv_annotations_1,
-    #     )
-    #
-    #     pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}
-    #     await self._assert_expected_search(
-    #         [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-    #         annotations=annotations_2, annotations_secondary=annotations_1, pathogenicity=pathogenicity,
-    #     )
-    #
-    #     screen_annotations = {'SCREEN': ['CTCF-only']}
-    #     await self._assert_expected_search(
-    #         [], inheritance_mode='recessive', omit_sample_type='SV_WES',
-    #         annotations=screen_annotations, annotations_secondary=annotations_1,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
-    #         annotations=screen_annotations, annotations_secondary=annotations_2,
-    #     )
-    #
-    #     selected_transcript_annotations = {'other': ['non_coding_transcript_exon_variant']}
-    #     await self._assert_expected_search(
-    #         [VARIANT2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]], inheritance_mode='recessive',
-    #         annotations=screen_annotations, annotations_secondary=selected_transcript_annotations,
-    #         pathogenicity=pathogenicity, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]],
-    #         annotations={**selected_transcript_annotations, **screen_annotations}, annotations_secondary=annotations_2,
-    #         inheritance_mode='recessive', omit_sample_type='SV_WES',
-    #     )
-    #
-    # async def test_in_silico_filter(self):
-    #     in_silico = {'eigen': '5.5', 'mut_taster': 'P'}
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     in_silico['requireScore'] = True
-    #     await self._assert_expected_search(
-    #         [VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, in_silico={'strvctvre': 0.1, 'requireScore': True},
-    #     )
-    #
-    # async def test_search_errors(self):
-    #     search_body = get_hail_search_body(sample_data=FAMILY_2_MISSING_SAMPLE_DATA)
-    #     async with self.client.request('POST', '/search', json=search_body) as resp:
-    #         self.assertEqual(resp.status, 400)
-    #         reason = resp.reason
-    #     self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
-    #
-    #     search_body = get_hail_search_body(sample_data=MULTI_PROJECT_MISSING_SAMPLE_DATA)
-    #     async with self.client.request('POST', '/search', json=search_body) as resp:
-    #         self.assertEqual(resp.status, 400)
-    #         reason = resp.reason
-    #     self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
-    #
-    #     search_body = get_hail_search_body(
-    #         intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_sample_type='SV_WES',
-    #     )
-    #     async with self.client.request('POST', '/search', json=search_body) as resp:
-    #         self.assertEqual(resp.status, 400)
-    #         reason = resp.reason
-    #     self.assertEqual(reason, 'Invalid intervals: 1:1-99999999999')
-    #
-    # async def test_sort(self):
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT2, [11, 11]),  _sorted(VARIANT4, [11, 11]), _sorted(MULTI_FAMILY_VARIANT, [22, 24]),
-    #          _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='protein_consequence',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(SV_VARIANT1, [11]), _sorted(SV_VARIANT2, [12]), _sorted(SV_VARIANT3, [12]), _sorted(SV_VARIANT4, [12])],
-    #          sample_data=SV_WGS_SAMPLE_DATA, sort='protein_consequence',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT4, [11, 11]), _sorted(SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [11, 22]),
-    #          _sorted(SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT, [22, 22])],
-    #         omit_sample_type='SV_WES', sort='protein_consequence',
-    #         annotations={'other': ['non_coding_transcript_exon_variant'], 'splice_ai': '0'},
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT1, [4]), _sorted(VARIANT2, [8]), _sorted(MULTI_FAMILY_VARIANT, [12.5]),
-    #          _sorted(VARIANT4, [12.5])], omit_sample_type='SV_WES', sort='pathogenicity',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT1, [4, None]), _sorted(VARIANT2, [8, 3]), _sorted(MULTI_FAMILY_VARIANT, [12.5, None]),
-    #          _sorted(VARIANT4, [12.5, None])], omit_sample_type='SV_WES', sort='pathogenicity_hgmd',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT2, [0]), _sorted(VARIANT4, [0.00026519427774474025]),
-    #          _sorted(VARIANT1, [0.034449315071105957]), _sorted(MULTI_FAMILY_VARIANT, [0.38041073083877563])],
-    #         omit_sample_type='SV_WES', sort='gnomad',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT1, [0]), _sorted(MULTI_FAMILY_VARIANT, [0]), _sorted(VARIANT4, [0]),
-    #          _sorted(VARIANT2, [0.28899794816970825])], omit_sample_type='SV_WES', sort='gnomad_exomes',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT4, [0.02222222276031971]), _sorted(VARIANT1, [0.10000000149011612]),
-    #          _sorted(VARIANT2, [0.31111112236976624]), _sorted(MULTI_FAMILY_VARIANT, [0.6666666865348816])],
-    #         omit_sample_type='SV_WES', sort='callset_af',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT4, [-29.899999618530273]), _sorted(VARIANT2, [-20.899999618530273]),
-    #          _sorted(VARIANT1, [-4.668000221252441]), _sorted(MULTI_FAMILY_VARIANT, [-2.753999948501587]), ],
-    #         omit_sample_type='SV_WES', sort='cadd',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT2, [-0.19699999690055847]),
-    #          _sorted(VARIANT1, [None]), _sorted(MULTI_FAMILY_VARIANT, [None])], omit_sample_type='SV_WES', sort='revel',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(MULTI_FAMILY_VARIANT, [-0.009999999776482582]), _sorted(VARIANT2, [0]), _sorted(VARIANT4, [0]),
-    #          _sorted(VARIANT1, [None])], omit_sample_type='SV_WES', sort='splice_ai',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(MULTI_FAMILY_VARIANT, [0, -2]), _sorted(VARIANT2, [0, -1]), _sorted(VARIANT4, [0, -1]), _sorted(VARIANT1, [1, 0])],
-    #         omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000', 'ENSG00000097046'],
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT2, [0, -1]), _sorted(MULTI_FAMILY_VARIANT, [1, -1]), _sorted(VARIANT1, [1, 0]), _sorted(VARIANT4, [1, 0])],
-    #         omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000'],
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT2, [2, 2]), _sorted(MULTI_FAMILY_VARIANT, [4, 2]), _sorted(VARIANT4, [4, 4]),
-    #          _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='constraint',
-    #         sort_metadata={'ENSG00000177000': 2, 'ENSG00000097046': 4},
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT2, [3, 3]), _sorted(MULTI_FAMILY_VARIANT, [None, 3]), _sorted(VARIANT1, [None, None]),
-    #          _sorted(VARIANT4, [None, None])], omit_sample_type='SV_WES', sort='prioritized_gene',
-    #         sort_metadata={'ENSG00000177000': 3},
-    #     )
-    #
-    #     # size sort only applies to SVs, so has no impact on other variants
-    #     await self._assert_expected_search(
-    #         [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], sort='size', omit_sample_type='SV_WES',
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [_sorted(SV_VARIANT4, [-46343]), _sorted(SV_VARIANT1, [-104]), _sorted(SV_VARIANT2, [-50]),
-    #          _sorted(SV_VARIANT3, [-50])], sample_data=SV_WGS_SAMPLE_DATA, sort='size',
-    #     )
-    #
-    #     # sort applies to compound hets
-    #     await self._assert_expected_search(
-    #         [_sorted(VARIANT2, [11, 11]), [_sorted(VARIANT4, [11, 11]),  _sorted(VARIANT3, [22, 24])]],
-    #         sort='protein_consequence', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT3, [None])],
-    #          _sorted(VARIANT2, [-0.19699999690055847])],
-    #         sort='revel', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
-    #     )
-    #
-    #     await self._assert_expected_search(
-    #         [[_sorted(VARIANT3, [-0.009999999776482582]),  _sorted(VARIANT4, [0])], _sorted(VARIANT2, [0])],
-    #         sort='splice_ai', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
-    #     )
+    async def test_multi_project_search(self):
+        await self._assert_expected_search(
+            [PROJECT_2_VARIANT, MULTI_PROJECT_VARIANT1, MULTI_PROJECT_VARIANT2, VARIANT3, VARIANT4],
+            gene_counts=GENE_COUNTS, sample_data=MULTI_PROJECT_SAMPLE_DATA,
+        )
+
+    async def test_inheritance_filter(self):
+        inheritance_mode = 'any_affected'
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT2], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        inheritance_mode = 'de_novo'
+        await self._assert_expected_search(
+            [VARIANT1, FAMILY_3_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT1], inheritance_mode=inheritance_mode,  sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        inheritance_mode = 'x_linked_recessive'
+        await self._assert_expected_search([], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES')
+        await self._assert_expected_search([], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA)
+
+        inheritance_mode = 'homozygous_recessive'
+        await self._assert_expected_search(
+            [VARIANT2], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [PROJECT_2_VARIANT1, VARIANT2], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        gt_inheritance_filter = {'genotype': {'I000006_hg00733': 'has_alt', 'I000005_hg00732': 'ref_ref'}}
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT3], inheritance_filter=gt_inheritance_filter, sample_data=FAMILY_2_VARIANT_SAMPLE_DATA)
+
+        inheritance_mode = 'compound_het'
+        await self._assert_expected_search(
+            [[VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA, gene_counts={
+                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000177000': {'total': 1, 'families': {'F000002_2': 1}},
+            }, **COMP_HET_ALL_PASS_FILTERS,
+        )
+
+        await self._assert_expected_search(
+            [[SV_VARIANT1, SV_VARIANT2]], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+            **COMP_HET_ALL_PASS_FILTERS,
+        )
+
+        inheritance_mode = 'recessive'
+        await self._assert_expected_search(
+            [PROJECT_2_VARIANT1, VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode=inheritance_mode, gene_counts={
+                'ENSG00000097046': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000177000': {'total': 2, 'families': {'F000002_2': 2}},
+            }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
+        )
+
+        await self._assert_expected_search(
+            [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
+            **COMP_HET_ALL_PASS_FILTERS,
+        )
+
+    async def test_quality_filter(self):
+        quality_filter = {'vcf_filter': 'pass'}
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search([SV_VARIANT4], quality_filter=quality_filter, sample_data=SV_WGS_SAMPLE_DATA)
+
+        await self._assert_expected_search(
+            [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40}, omit_sample_type='SV_WES',
+        )
+
+        sv_quality_filter = {'min_gq_sv': 40}
+        await self._assert_expected_search(
+            [SV_VARIANT3, SV_VARIANT4], quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [], annotations=NEW_SV_FILTER, quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40, 'vcf_filter': 'pass'}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 60, 'affected_only': True},
+            omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT3, SV_VARIANT4], quality_filter={'min_gq_sv': 60, 'affected_only': True}, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_ab': 50}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT3], quality_filter={'min_ab': 70, 'affected_only': True},
+            omit_sample_type='SV_WES',
+        )
+
+        quality_filter = {'min_gq': 40, 'min_ab': 50}
+        await self._assert_expected_search(
+            [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+        )
+
+        annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']},
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['pathogenic']},
+        )
+
+    async def test_location_search(self):
+        await self._assert_expected_search(
+            [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', **LOCATION_SEARCH,
+        )
+
+        sv_intervals = ['1:9310023-9380264']
+        await self._assert_expected_search(
+            [SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
+        )
+
+        await self._assert_expected_search(
+            [VARIANT1], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, exclude_intervals=True,
+        )
+
+        await self._assert_expected_search(
+            [SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],  omit_sample_type='SV_WES',
+            intervals=LOCATION_SEARCH['intervals'][-1:], gene_ids=LOCATION_SEARCH['gene_ids'][:1]
+        )
+
+    async def test_variant_id_search(self):
+        await self._assert_expected_search([VARIANT2], omit_sample_type='SV_WES', **RSID_SEARCH)
+
+        await self._assert_expected_search([VARIANT1], omit_sample_type='SV_WES', **VARIANT_ID_SEARCH)
+
+        await self._assert_expected_search(
+            [VARIANT1], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][:1],
+        )
+
+        await self._assert_expected_search(
+            [], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:],
+        )
+
+        await self._assert_expected_search([SV_VARIANT2, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, variant_keys=[
+            'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'phase2_DEL_chr14_4640',
+        ])
+
+    async def test_frequency_filter(self):
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT4], frequencies={'seqr': {'af': 0.2}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'ac': 4}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {'hh': 1}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT1], frequencies={'sv_callset': {'af': 0.05}}, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.05, 'hh': 1}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.005}}, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT1, SV_VARIANT3, SV_VARIANT4], frequencies={'gnomad_svs': {'af': 0.001}}, sample_data=SV_WGS_SAMPLE_DATA,
+        )
+
+        await self._assert_expected_search(
+            [VARIANT4], frequencies={'seqr': {'af': 0.2}, 'gnomad_genomes': {'ac': 50}},
+            omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], frequencies={'seqr': {}, 'gnomad_genomes': {'af': None}},
+            omit_sample_type='SV_WES',
+        )
+
+        annotations = {'splice_ai': '0.0'}  # Ensures no variants are filtered out by annotation/path filters
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'likely_pathogenic', 'vus_or_conflicting']},
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES',
+            annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']},
+        )
+
+    async def test_annotations_filter(self):
+        await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES')
+
+        pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting', 'benign']}
+        await self._assert_expected_search([VARIANT1, VARIANT2], pathogenicity=pathogenicity, omit_sample_type='SV_WES')
+
+        pathogenicity['clinvar'] = pathogenicity['clinvar'][:1]
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT4], pathogenicity=pathogenicity, annotations={'SCREEN': ['CTCF-only', 'DNase-only']},
+            omit_sample_type='SV_WES',
+        )
+
+        annotations = {
+            'missense': ['missense_variant'], 'in_frame': ['inframe_insertion', 'inframe_deletion'], 'frameshift': None,
+            'structural_consequence': ['INTRONIC'],
+        }
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, VARIANT4], pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search([VARIANT2, VARIANT4], annotations=annotations, omit_sample_type='SV_WES')
+
+        await self._assert_expected_search([SV_VARIANT1], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
+
+        annotations['splice_ai'] = '0.005'
+        await self._assert_expected_search(
+            [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], annotations=annotations, omit_sample_type='SV_WES',
+        )
+
+        annotations['structural'] = ['DEL']
+        await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
+
+        annotations = {'other': ['non_coding_transcript_exon_variant']}
+        await self._assert_expected_search(
+            [VARIANT1, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT],
+            pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_TRANSCRIPT_MULTI_FAMILY_VARIANT],
+            gene_ids=LOCATION_SEARCH['gene_ids'][:1], annotations=annotations, omit_sample_type='SV_WES',
+        )
+
+    async def test_secondary_annotations_filter(self):
+        annotations_1 = {'missense': ['missense_variant']}
+        annotations_2 = {'other': ['intron_variant']}
+
+        await self._assert_expected_search(
+            [[VARIANT3, VARIANT4]], inheritance_mode='compound_het', omit_sample_type='SV_WES',
+            annotations=annotations_1, annotations_secondary=annotations_2,
+        )
+
+        await self._assert_expected_search(
+            [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+            annotations=annotations_1, annotations_secondary=annotations_2,
+        )
+
+        await self._assert_expected_search(
+            [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+            annotations=annotations_2, annotations_secondary=annotations_1,
+        )
+
+        sv_annotations_1 = {'structural': ['INS']}
+        sv_annotations_2 = {'structural': ['DEL'], 'structural_consequence': ['INTRONIC']}
+
+        await self._assert_expected_search(
+            [[SV_VARIANT1, SV_VARIANT2]], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='compound_het',
+            annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
+        )
+
+        await self._assert_expected_search(
+            [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='recessive',
+            annotations=sv_annotations_2, annotations_secondary=sv_annotations_1,
+        )
+
+        pathogenicity = {'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}
+        await self._assert_expected_search(
+            [VARIANT2, [VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+            annotations=annotations_2, annotations_secondary=annotations_1, pathogenicity=pathogenicity,
+        )
+
+        screen_annotations = {'SCREEN': ['CTCF-only']}
+        await self._assert_expected_search(
+            [], inheritance_mode='recessive', omit_sample_type='SV_WES',
+            annotations=screen_annotations, annotations_secondary=annotations_1,
+        )
+
+        await self._assert_expected_search(
+            [[VARIANT3, VARIANT4]], inheritance_mode='recessive', omit_sample_type='SV_WES',
+            annotations=screen_annotations, annotations_secondary=annotations_2,
+        )
+
+        selected_transcript_annotations = {'other': ['non_coding_transcript_exon_variant']}
+        await self._assert_expected_search(
+            [VARIANT2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]], inheritance_mode='recessive',
+            annotations=screen_annotations, annotations_secondary=selected_transcript_annotations,
+            pathogenicity=pathogenicity, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_3, VARIANT4]],
+            annotations={**selected_transcript_annotations, **screen_annotations}, annotations_secondary=annotations_2,
+            inheritance_mode='recessive', omit_sample_type='SV_WES',
+        )
+
+    async def test_in_silico_filter(self):
+        in_silico = {'eigen': '5.5', 'mut_taster': 'P'}
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
+        )
+
+        in_silico['requireScore'] = True
+        await self._assert_expected_search(
+            [VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, in_silico={'strvctvre': 0.1, 'requireScore': True},
+        )
+
+    async def test_search_errors(self):
+        search_body = get_hail_search_body(sample_data=FAMILY_2_MISSING_SAMPLE_DATA)
+        async with self.client.request('POST', '/search', json=search_body) as resp:
+            self.assertEqual(resp.status, 400)
+            reason = resp.reason
+        self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
+
+        search_body = get_hail_search_body(sample_data=MULTI_PROJECT_MISSING_SAMPLE_DATA)
+        async with self.client.request('POST', '/search', json=search_body) as resp:
+            self.assertEqual(resp.status, 400)
+            reason = resp.reason
+        self.assertEqual(reason, 'The following samples are available in seqr but missing the loaded data: NA19675, NA19678')
+
+        search_body = get_hail_search_body(
+            intervals=LOCATION_SEARCH['intervals'] + ['1:1-99999999999'], omit_sample_type='SV_WES',
+        )
+        async with self.client.request('POST', '/search', json=search_body) as resp:
+            self.assertEqual(resp.status, 400)
+            reason = resp.reason
+        self.assertEqual(reason, 'Invalid intervals: 1:1-99999999999')
+
+    async def test_sort(self):
+        await self._assert_expected_search(
+            [_sorted(VARIANT2, [11, 11]),  _sorted(VARIANT4, [11, 11]), _sorted(MULTI_FAMILY_VARIANT, [22, 24]),
+             _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='protein_consequence',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(SV_VARIANT1, [11]), _sorted(SV_VARIANT2, [12]), _sorted(SV_VARIANT3, [12]), _sorted(SV_VARIANT4, [12])],
+             sample_data=SV_WGS_SAMPLE_DATA, sort='protein_consequence',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT4, [11, 11]), _sorted(SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, [11, 22]),
+             _sorted(SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT, [22, 22])],
+            omit_sample_type='SV_WES', sort='protein_consequence',
+            annotations={'other': ['non_coding_transcript_exon_variant'], 'splice_ai': '0'},
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT1, [4]), _sorted(VARIANT2, [8]), _sorted(MULTI_FAMILY_VARIANT, [12.5]),
+             _sorted(VARIANT4, [12.5])], omit_sample_type='SV_WES', sort='pathogenicity',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT1, [4, None]), _sorted(VARIANT2, [8, 3]), _sorted(MULTI_FAMILY_VARIANT, [12.5, None]),
+             _sorted(VARIANT4, [12.5, None])], omit_sample_type='SV_WES', sort='pathogenicity_hgmd',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT2, [0]), _sorted(VARIANT4, [0.00026519427774474025]),
+             _sorted(VARIANT1, [0.034449315071105957]), _sorted(MULTI_FAMILY_VARIANT, [0.38041073083877563])],
+            omit_sample_type='SV_WES', sort='gnomad',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT1, [0]), _sorted(MULTI_FAMILY_VARIANT, [0]), _sorted(VARIANT4, [0]),
+             _sorted(VARIANT2, [0.28899794816970825])], omit_sample_type='SV_WES', sort='gnomad_exomes',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT4, [0.02222222276031971]), _sorted(VARIANT1, [0.10000000149011612]),
+             _sorted(VARIANT2, [0.31111112236976624]), _sorted(MULTI_FAMILY_VARIANT, [0.6666666865348816])],
+            omit_sample_type='SV_WES', sort='callset_af',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT4, [-29.899999618530273]), _sorted(VARIANT2, [-20.899999618530273]),
+             _sorted(VARIANT1, [-4.668000221252441]), _sorted(MULTI_FAMILY_VARIANT, [-2.753999948501587]), ],
+            omit_sample_type='SV_WES', sort='cadd',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT2, [-0.19699999690055847]),
+             _sorted(VARIANT1, [None]), _sorted(MULTI_FAMILY_VARIANT, [None])], omit_sample_type='SV_WES', sort='revel',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(MULTI_FAMILY_VARIANT, [-0.009999999776482582]), _sorted(VARIANT2, [0]), _sorted(VARIANT4, [0]),
+             _sorted(VARIANT1, [None])], omit_sample_type='SV_WES', sort='splice_ai',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(MULTI_FAMILY_VARIANT, [0, -2]), _sorted(VARIANT2, [0, -1]), _sorted(VARIANT4, [0, -1]), _sorted(VARIANT1, [1, 0])],
+            omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000', 'ENSG00000097046'],
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT2, [0, -1]), _sorted(MULTI_FAMILY_VARIANT, [1, -1]), _sorted(VARIANT1, [1, 0]), _sorted(VARIANT4, [1, 0])],
+            omit_sample_type='SV_WES', sort='in_omim', sort_metadata=['ENSG00000177000'],
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT2, [2, 2]), _sorted(MULTI_FAMILY_VARIANT, [4, 2]), _sorted(VARIANT4, [4, 4]),
+             _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='constraint',
+            sort_metadata={'ENSG00000177000': 2, 'ENSG00000097046': 4},
+        )
+
+        await self._assert_expected_search(
+            [_sorted(VARIANT2, [3, 3]), _sorted(MULTI_FAMILY_VARIANT, [None, 3]), _sorted(VARIANT1, [None, None]),
+             _sorted(VARIANT4, [None, None])], omit_sample_type='SV_WES', sort='prioritized_gene',
+            sort_metadata={'ENSG00000177000': 3},
+        )
+
+        # size sort only applies to SVs, so has no impact on other variants
+        await self._assert_expected_search(
+            [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], sort='size', omit_sample_type='SV_WES',
+        )
+
+        await self._assert_expected_search(
+            [_sorted(SV_VARIANT4, [-46343]), _sorted(SV_VARIANT1, [-104]), _sorted(SV_VARIANT2, [-50]),
+             _sorted(SV_VARIANT3, [-50])], sample_data=SV_WGS_SAMPLE_DATA, sort='size',
+        )
+
+        # sort applies to compound hets
+        await self._assert_expected_search(
+            [_sorted(VARIANT2, [11, 11]), [_sorted(VARIANT4, [11, 11]),  _sorted(VARIANT3, [22, 24])]],
+            sort='protein_consequence', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+        )
+
+        await self._assert_expected_search(
+            [[_sorted(VARIANT4, [-0.5260000228881836]), _sorted(VARIANT3, [None])],
+             _sorted(VARIANT2, [-0.19699999690055847])],
+            sort='revel', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+        )
+
+        await self._assert_expected_search(
+            [[_sorted(VARIANT3, [-0.009999999776482582]),  _sorted(VARIANT4, [0])], _sorted(VARIANT2, [0])],
+            sort='splice_ai', inheritance_mode='recessive', omit_sample_type='SV_WES', **COMP_HET_ALL_PASS_FILTERS,
+        )

From 7f5863af9285f8c4b2bab7f042382385b62d0461 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 5 Sep 2023 13:05:37 -0400
Subject: [PATCH 14/16] add initial gcnv filter tests

---
 hail_search/test_search.py | 123 ++++++++++++++++++++++++++++++++++---
 1 file changed, 115 insertions(+), 8 deletions(-)

diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 2a94444220..2c9caf94d2 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -200,10 +200,19 @@ async def test_inheritance_filter(self):
             [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], inheritance_mode=inheritance_mode,
+            omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT1, SV_VARIANT2, SV_VARIANT3, SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT3], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT2], inheritance_mode=inheritance_mode, annotations=NEW_SV_FILTER, sample_data=SV_WGS_SAMPLE_DATA,
         )
@@ -213,12 +222,17 @@ async def test_inheritance_filter(self):
             [VARIANT1, FAMILY_3_VARIANT, VARIANT4], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES',
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT1], inheritance_mode=inheritance_mode,  sample_data=SV_WGS_SAMPLE_DATA,
         )
 
         inheritance_mode = 'x_linked_recessive'
         await self._assert_expected_search([], inheritance_mode=inheritance_mode, omit_sample_type='SV_WES')
+        await self._assert_expected_search([], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS')
         await self._assert_expected_search([], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA)
 
         inheritance_mode = 'homozygous_recessive'
@@ -230,6 +244,10 @@ async def test_inheritance_filter(self):
             [PROJECT_2_VARIANT1, VARIANT2], inheritance_mode=inheritance_mode, sample_data=MULTI_PROJECT_SAMPLE_DATA,
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
         )
@@ -246,9 +264,17 @@ async def test_inheritance_filter(self):
             }, **COMP_HET_ALL_PASS_FILTERS,
         )
 
+        await self._assert_expected_search(
+            [[GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS', gene_counts={
+                'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}},
+            }, **COMP_HET_ALL_PASS_FILTERS,
+        )
+
         await self._assert_expected_search(
             [[SV_VARIANT1, SV_VARIANT2]], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-            **COMP_HET_ALL_PASS_FILTERS,
+            **COMP_HET_ALL_PASS_FILTERS, gene_counts={'ENSG00000171621': {'total': 2, 'families': {'F000002_2': 2}}},
         )
 
         inheritance_mode = 'recessive'
@@ -259,9 +285,20 @@ async def test_inheritance_filter(self):
             }, sample_data=MULTI_PROJECT_SAMPLE_DATA, **COMP_HET_ALL_PASS_FILTERS,
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, [GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS', gene_counts={
+                'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}},
+            }, **COMP_HET_ALL_PASS_FILTERS,
+        )
+
         await self._assert_expected_search(
             [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-            **COMP_HET_ALL_PASS_FILTERS,
+            **COMP_HET_ALL_PASS_FILTERS, gene_counts={
+                'ENSG00000171621': {'total': 2, 'families': {'F000002_2': 2}},
+                'ENSG00000184986': {'total': 1, 'families': {'F000002_2': 1}},
+            }
         )
 
     async def test_quality_filter(self):
@@ -270,12 +307,26 @@ async def test_quality_filter(self):
             [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES',
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], quality_filter=quality_filter,
+            omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search([SV_VARIANT4], quality_filter=quality_filter, sample_data=SV_WGS_SAMPLE_DATA)
 
         await self._assert_expected_search(
             [VARIANT2, MULTI_FAMILY_VARIANT], quality_filter={'min_gq': 40}, omit_sample_type='SV_WES',
         )
 
+        gcnv_quality_filter = {'min_qs': 20}
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT4], quality_filter=gcnv_quality_filter, omit_sample_type='VARIANTS',
+        )
+
+        await self._assert_expected_search(
+            [], annotations=NEW_SV_FILTER, quality_filter=gcnv_quality_filter, omit_sample_type='VARIANTS',
+        )
+
         sv_quality_filter = {'min_gq_sv': 40}
         await self._assert_expected_search(
             [SV_VARIANT3, SV_VARIANT4], quality_filter=sv_quality_filter, sample_data=SV_WGS_SAMPLE_DATA,
@@ -294,6 +345,11 @@ async def test_quality_filter(self):
             omit_sample_type='SV_WES',
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], quality_filter={'min_qs': 10, 'affected_only': True},
+            omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT3, SV_VARIANT4], quality_filter={'min_gq_sv': 60, 'affected_only': True}, sample_data=SV_WGS_SAMPLE_DATA,
         )
@@ -328,7 +384,11 @@ async def test_location_search(self):
             [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], omit_sample_type='SV_WES', **LOCATION_SEARCH,
         )
 
-        sv_intervals = ['1:9310023-9380264']
+        sv_intervals = ['1:9310023-9380264', '17:38717636-38724781']
+        await self._assert_expected_search(
+            [GCNV_VARIANT3, GCNV_VARIANT4], intervals=sv_intervals, gene_ids=['ENSG00000275023'], omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT1, SV_VARIANT2], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, gene_ids=['ENSG00000171621'],
         )
@@ -337,6 +397,10 @@ async def test_location_search(self):
             [VARIANT1], omit_sample_type='SV_WES', **EXCLUDE_LOCATION_SEARCH,
         )
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, GCNV_VARIANT2], intervals=sv_intervals, exclude_intervals=True, omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
             [SV_VARIANT3, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, intervals=sv_intervals, exclude_intervals=True,
         )
@@ -359,6 +423,10 @@ async def test_variant_id_search(self):
             [], omit_sample_type='SV_WES', variant_ids=VARIANT_ID_SEARCH['variant_ids'][1:],
         )
 
+        await self._assert_expected_search([GCNV_VARIANT1, GCNV_VARIANT4], omit_sample_type='VARIANTS', variant_keys=[
+            'suffix_95340_DUP', 'suffix_140608_DUP',
+        ])
+
         await self._assert_expected_search([SV_VARIANT2, SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, variant_keys=[
             'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'phase2_DEL_chr14_4640',
         ])
@@ -380,8 +448,13 @@ async def test_frequency_filter(self):
             [VARIANT4], frequencies={'seqr': {'ac': 4, 'hh': 0}}, omit_sample_type='SV_WES',
         )
 
+        sv_callset_filter = {'sv_callset': {'af': 0.05}}
+        await self._assert_expected_search(
+            [GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], frequencies=sv_callset_filter, omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search(
-            [SV_VARIANT1], frequencies={'sv_callset': {'af': 0.05}}, sample_data=SV_WGS_SAMPLE_DATA,
+            [SV_VARIANT1], frequencies=sv_callset_filter, sample_data=SV_WGS_SAMPLE_DATA,
         )
 
         await self._assert_expected_search(
@@ -435,7 +508,7 @@ async def test_annotations_filter(self):
 
         annotations = {
             'missense': ['missense_variant'], 'in_frame': ['inframe_insertion', 'inframe_deletion'], 'frameshift': None,
-            'structural_consequence': ['INTRONIC'],
+            'structural_consequence': ['INTRONIC', 'LOF'],
         }
         await self._assert_expected_search(
             [VARIANT1, VARIANT2, VARIANT4], pathogenicity=pathogenicity, annotations=annotations, omit_sample_type='SV_WES',
@@ -443,6 +516,10 @@ async def test_annotations_filter(self):
 
         await self._assert_expected_search([VARIANT2, VARIANT4], annotations=annotations, omit_sample_type='SV_WES')
 
+        await self._assert_expected_search(
+            [GCNV_VARIANT3, GCNV_VARIANT4], annotations=annotations, omit_sample_type='VARIANTS',
+        )
+
         await self._assert_expected_search([SV_VARIANT1], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
 
         annotations['splice_ai'] = '0.005'
@@ -453,6 +530,11 @@ async def test_annotations_filter(self):
         annotations['structural'] = ['DEL']
         await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
 
+        await self._assert_expected_search([], annotations=annotations, omit_sample_type='VARIANTS')
+
+        annotations['structural'].append('gCNV_DEL')
+        await self._assert_expected_search([GCNV_VARIANT3], annotations=annotations, omit_sample_type='VARIANTS')
+
         annotations = {'other': ['non_coding_transcript_exon_variant']}
         await self._assert_expected_search(
             [VARIANT1, SELECTED_ANNOTATION_TRANSCRIPT_VARIANT_2, SELECTED_ANNOTATION_TRANSCRIPT_MULTI_FAMILY_VARIANT],
@@ -483,8 +565,18 @@ async def test_secondary_annotations_filter(self):
             annotations=annotations_2, annotations_secondary=annotations_1,
         )
 
-        sv_annotations_1 = {'structural': ['INS']}
-        sv_annotations_2 = {'structural': ['DEL'], 'structural_consequence': ['INTRONIC']}
+        sv_annotations_1 = {'structural': ['INS', 'LOF']}
+        sv_annotations_2 = {'structural': ['DEL', 'gCNV_DUP'], 'structural_consequence': ['INTRONIC']}
+
+        await self._assert_expected_search(
+            [[GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='VARIANTS', inheritance_mode='compound_het',
+            annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
+        )
+
+        await self._assert_expected_search(
+            [GCNV_VARIANT1, [GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='VARIANTS', inheritance_mode='recessive',
+            annotations=sv_annotations_2, annotations_secondary=sv_annotations_1,
+        )
 
         await self._assert_expected_search(
             [[SV_VARIANT1, SV_VARIANT2]], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='compound_het',
@@ -537,8 +629,13 @@ async def test_in_silico_filter(self):
             [VARIANT2, VARIANT4], in_silico=in_silico, omit_sample_type='SV_WES',
         )
 
+        sv_in_silico = {'strvctvre': 0.1, 'requireScore': True}
         await self._assert_expected_search(
-            [SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, in_silico={'strvctvre': 0.1, 'requireScore': True},
+            [GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], omit_sample_type='VARIANTS', in_silico=sv_in_silico,
+        )
+
+        await self._assert_expected_search(
+            [SV_VARIANT4], sample_data=SV_WGS_SAMPLE_DATA, in_silico=sv_in_silico,
         )
 
     async def test_search_errors(self):
@@ -568,6 +665,11 @@ async def test_sort(self):
              _sorted(VARIANT1, [None, None])], omit_sample_type='SV_WES', sort='protein_consequence',
         )
 
+        await self._assert_expected_search(
+            [_sorted(GCNV_VARIANT2, [0]), _sorted(GCNV_VARIANT3, [0]), _sorted(GCNV_VARIANT4, [0]),
+             _sorted(GCNV_VARIANT1, [3])], omit_sample_type='VARIANTS', sort='protein_consequence',
+        )
+
         await self._assert_expected_search(
             [_sorted(SV_VARIANT1, [11]), _sorted(SV_VARIANT2, [12]), _sorted(SV_VARIANT3, [12]), _sorted(SV_VARIANT4, [12])],
              sample_data=SV_WGS_SAMPLE_DATA, sort='protein_consequence',
@@ -650,6 +752,11 @@ async def test_sort(self):
             [VARIANT1, VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], sort='size', omit_sample_type='SV_WES',
         )
 
+        await self._assert_expected_search(
+            [_sorted(GCNV_VARIANT1, [-30886]),  _sorted(GCNV_VARIANT4, [-13922]), _sorted(GCNV_VARIANT2, [-6834]),
+             _sorted(GCNV_VARIANT3, [-2309])], omit_sample_type='VARIANTS', sort='protein_consequence',
+        )
+
         await self._assert_expected_search(
             [_sorted(SV_VARIANT4, [-46343]), _sorted(SV_VARIANT1, [-104]), _sorted(SV_VARIANT2, [-50]),
              _sorted(SV_VARIANT3, [-50])], sample_data=SV_WGS_SAMPLE_DATA, sort='size',

From 420c5e84a0b6831bd327dcb29c5a17a85a2ae043 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 5 Sep 2023 15:38:06 -0400
Subject: [PATCH 15/16] fix unit tests

---
 hail_search/hail_search_query.py | 19 ++++++++--------
 hail_search/test_search.py       | 38 ++++++++++++++++----------------
 2 files changed, 29 insertions(+), 28 deletions(-)

diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py
index dd4974d39c..fd0e81fe61 100644
--- a/hail_search/hail_search_query.py
+++ b/hail_search/hail_search_query.py
@@ -658,7 +658,7 @@ def _filter_compound_hets(self):
             ch_ht = ch_ht.filter(ch_ht.comp_het_family_entries.any(hl.is_defined))
 
         # Get possible pairs of variants within the same gene
-        ch_ht = ch_ht.annotate(gene_ids=self._gene_ids_expr(ch_ht))
+        ch_ht = ch_ht.annotate(gene_ids=self._gene_ids_expr(ch_ht, comp_het=True))
         ch_ht = ch_ht.explode(ch_ht.gene_ids)
         formatted_rows_expr = hl.agg.collect(ch_ht.row)
         if HAS_ALLOWED_SECONDARY_ANNOTATION in self._ht.row:
@@ -693,7 +693,7 @@ def _filter_compound_hets(self):
         return ch_ht
 
     @classmethod
-    def _gene_ids_expr(cls, ht):
+    def _gene_ids_expr(cls, ht, comp_het=False):
         return hl.set(ht[cls.TRANSCRIPTS_FIELD].map(lambda t: t.gene_id))
 
     def _is_valid_comp_het_family(self, entries_1, entries_2):
@@ -1228,10 +1228,10 @@ class GcnvHailTableQuery(SvHailTableQuery):
     POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'}
 
     @classmethod
-    def _get_genotype_override_field(cls, r, field):
+    def _get_genotype_override_field(cls, r, field, family_entries_field=None):
         agg, get_default = cls.GENOTYPE_OVERRIDE_FIELDS[field]
         sample_field = f'sample_{field}'
-        entries = r.family_entries.flatmap(lambda x: x)
+        entries = r[family_entries_field or 'family_entries'].flatmap(lambda x: x)
         return hl.if_else(
             entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])),
             get_default(r), agg(entries.map(lambda g: g[sample_field]))
@@ -1247,11 +1247,12 @@ def get_allowed_sv_type_ids(self, sv_types):
         ])
 
     @classmethod
-    def _gene_ids_expr(cls, ht):
-        gene_ids_expr = getattr(ht, 'gene_ids', None)
-        if gene_ids_expr is None:
-            gene_ids_expr = cls._get_genotype_override_field(ht, 'gene_ids')
-        return hl.or_else(gene_ids_expr, super()._gene_ids_expr(ht))
+    def _gene_ids_expr(cls, ht, comp_het=False):
+        family_entries_field = 'comp_het_family_entries' if comp_het else None
+        return hl.or_else(
+            cls._get_genotype_override_field(ht, 'gene_ids', family_entries_field=family_entries_field),
+            super()._gene_ids_expr(ht),
+        )
 
     def _additional_annotation_fields(self):
         return {}
diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index 2c9caf94d2..e162353634 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -99,7 +99,7 @@
 # Ensures no variants are filtered out by annotation/path filters for compound hets
 COMP_HET_ALL_PASS_FILTERS = {
     'annotations': {'splice_ai': '0.0'}, 'pathogenicity': {'clinvar': ['likely_pathogenic']},
-    'structural': ['DEL', 'CPX', 'INS'],
+    'structural': ['DEL', 'CPX', 'INS', 'gCNV_DEL', 'gCNV_DUP'],
 }
 
 NEW_SV_FILTER = {'new_structural_variants': ['NEW']}
@@ -245,7 +245,7 @@ async def test_inheritance_filter(self):
         )
 
         await self._assert_expected_search(
-            [GCNV_VARIANT1], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS',
+            [GCNV_VARIANT3], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS',
         )
 
         await self._assert_expected_search(
@@ -274,7 +274,7 @@ async def test_inheritance_filter(self):
 
         await self._assert_expected_search(
             [[SV_VARIANT1, SV_VARIANT2]], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
-            **COMP_HET_ALL_PASS_FILTERS, gene_counts={'ENSG00000171621': {'total': 2, 'families': {'F000002_2': 2}}},
+            **COMP_HET_ALL_PASS_FILTERS, gene_counts={'ENSG00000171621': {'total': 2, 'families': {'F000011_11': 2}}},
         )
 
         inheritance_mode = 'recessive'
@@ -286,8 +286,8 @@ async def test_inheritance_filter(self):
         )
 
         await self._assert_expected_search(
-            [GCNV_VARIANT1, [GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS', gene_counts={
-                'ENSG00000275023': {'total': 2, 'families': {'F000002_2': 2}},
+            [GCNV_VARIANT3, [GCNV_VARIANT3, GCNV_VARIANT4]], inheritance_mode=inheritance_mode, omit_sample_type='VARIANTS', gene_counts={
+                'ENSG00000275023': {'total': 3, 'families': {'F000002_2': 3}},
                 'ENSG00000277258': {'total': 1, 'families': {'F000002_2': 1}},
                 'ENSG00000277972': {'total': 1, 'families': {'F000002_2': 1}},
             }, **COMP_HET_ALL_PASS_FILTERS,
@@ -517,7 +517,7 @@ async def test_annotations_filter(self):
         await self._assert_expected_search([VARIANT2, VARIANT4], annotations=annotations, omit_sample_type='SV_WES')
 
         await self._assert_expected_search(
-            [GCNV_VARIANT3, GCNV_VARIANT4], annotations=annotations, omit_sample_type='VARIANTS',
+            [GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], annotations=annotations, omit_sample_type='VARIANTS',
         )
 
         await self._assert_expected_search([SV_VARIANT1], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
@@ -527,13 +527,10 @@ async def test_annotations_filter(self):
             [VARIANT2, MULTI_FAMILY_VARIANT, VARIANT4], annotations=annotations, omit_sample_type='SV_WES',
         )
 
-        annotations['structural'] = ['DEL']
-        await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
-
-        await self._assert_expected_search([], annotations=annotations, omit_sample_type='VARIANTS')
+        annotations['structural'] = ['gCNV_DUP', 'DEL']
+        await self._assert_expected_search([GCNV_VARIANT1, GCNV_VARIANT2, GCNV_VARIANT3, GCNV_VARIANT4], annotations=annotations, omit_sample_type='VARIANTS')
 
-        annotations['structural'].append('gCNV_DEL')
-        await self._assert_expected_search([GCNV_VARIANT3], annotations=annotations, omit_sample_type='VARIANTS')
+        await self._assert_expected_search([SV_VARIANT1, SV_VARIANT4], annotations=annotations, sample_data=SV_WGS_SAMPLE_DATA)
 
         annotations = {'other': ['non_coding_transcript_exon_variant']}
         await self._assert_expected_search(
@@ -565,19 +562,22 @@ async def test_secondary_annotations_filter(self):
             annotations=annotations_2, annotations_secondary=annotations_1,
         )
 
-        sv_annotations_1 = {'structural': ['INS', 'LOF']}
-        sv_annotations_2 = {'structural': ['DEL', 'gCNV_DUP'], 'structural_consequence': ['INTRONIC']}
+        gcnv_annotations_1 = {'structural': ['gCNV_DUP']}
+        gcnv_annotations_2 = {'structural_consequence': ['LOF']}
 
         await self._assert_expected_search(
             [[GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='VARIANTS', inheritance_mode='compound_het',
-            annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
+            annotations=gcnv_annotations_1, annotations_secondary=gcnv_annotations_2,
         )
 
         await self._assert_expected_search(
-            [GCNV_VARIANT1, [GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='VARIANTS', inheritance_mode='recessive',
-            annotations=sv_annotations_2, annotations_secondary=sv_annotations_1,
+            [GCNV_VARIANT3, [GCNV_VARIANT3, GCNV_VARIANT4]], omit_sample_type='VARIANTS', inheritance_mode='recessive',
+            annotations=gcnv_annotations_2, annotations_secondary=gcnv_annotations_1,
         )
 
+        sv_annotations_1 = {'structural': ['INS', 'LOF']}
+        sv_annotations_2 = {'structural': ['DEL', 'gCNV_DUP'], 'structural_consequence': ['INTRONIC']}
+
         await self._assert_expected_search(
             [[SV_VARIANT1, SV_VARIANT2]], sample_data=SV_WGS_SAMPLE_DATA, inheritance_mode='compound_het',
             annotations=sv_annotations_1, annotations_secondary=sv_annotations_2,
@@ -753,8 +753,8 @@ async def test_sort(self):
         )
 
         await self._assert_expected_search(
-            [_sorted(GCNV_VARIANT1, [-30886]),  _sorted(GCNV_VARIANT4, [-13922]), _sorted(GCNV_VARIANT2, [-6834]),
-             _sorted(GCNV_VARIANT3, [-2309])], omit_sample_type='VARIANTS', sort='protein_consequence',
+            [_sorted(GCNV_VARIANT1, [-171766]), _sorted(GCNV_VARIANT2, [-17768]), _sorted(GCNV_VARIANT4, [-14487]),
+             _sorted(GCNV_VARIANT3, [-2666])], omit_sample_type='VARIANTS', sort='size',
         )
 
         await self._assert_expected_search(

From 4ce6fe5e907e5424e6cce5e264c5af0befdb1858 Mon Sep 17 00:00:00 2001
From: Hana Snow <hsnow@broadinstitute.org>
Date: Tue, 5 Sep 2023 15:39:35 -0400
Subject: [PATCH 16/16] fix family id

---
 hail_search/test_search.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/hail_search/test_search.py b/hail_search/test_search.py
index e162353634..18940d3a83 100644
--- a/hail_search/test_search.py
+++ b/hail_search/test_search.py
@@ -296,8 +296,8 @@ async def test_inheritance_filter(self):
         await self._assert_expected_search(
             [[SV_VARIANT1, SV_VARIANT2], SV_VARIANT4], inheritance_mode=inheritance_mode, sample_data=SV_WGS_SAMPLE_DATA,
             **COMP_HET_ALL_PASS_FILTERS, gene_counts={
-                'ENSG00000171621': {'total': 2, 'families': {'F000002_2': 2}},
-                'ENSG00000184986': {'total': 1, 'families': {'F000002_2': 1}},
+                'ENSG00000171621': {'total': 2, 'families': {'F000011_11': 2}},
+                'ENSG00000184986': {'total': 1, 'families': {'F000011_11': 1}},
             }
         )