Merge pull request #3951 from broadinstitute/dev

Dev
broadinstitute · Mar 7, 2024 · 8b4a47e · 8b4a47e
2 parents 924c71e + a408953
commit 8b4a47e
Show file tree

Hide file tree

Showing 114 changed files with 359 additions and 230 deletions.
diff --git a/hail_search/constants.py b/hail_search/constants.py
@@ -74,7 +74,7 @@
 CLINVAR_PATH_RANGES = [
     (CLINVAR_PATH_FILTER, 'Pathogenic', 'Pathogenic/Likely_risk_allele'),
     (CLINVAR_LIKELY_PATH_FILTER, 'Pathogenic/Likely_pathogenic', 'Likely_risk_allele'),
-    ('vus_or_conflicting', 'Conflicting_interpretations_of_pathogenicity', 'No_pathogenic_assertion'),
+    ('vus_or_conflicting', 'Conflicting_classifications_of_pathogenicity', 'No_pathogenic_assertion'),
     ('likely_benign', 'Likely_benign', 'Benign/Likely_benign'),
     ('benign', 'Benign/Likely_benign', 'Benign'),
 ]

diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/README.txt b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.126-ee77707f4fab
-  Created at 2024/01/24 11:45:21
+  Written with version 0.2.128-eead8100a1c1
+  Created at 2024/02/26 15:45:13
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/globals/parts/part-0
diff --git a/...fe2-4bd1-9f57-f19e5e746e8c.idx/.index.crc → ...7b8-4308-a65f-b03043bbab4c.idx/.index.crc b/...fe2-4bd1-9f57-f19e5e746e8c.idx/.index.crc → ...7b8-4308-a65f-b03043bbab4c.idx/.index.crc
diff --git a/...57-f19e5e746e8c.idx/.metadata.json.gz.crc → ...5f-b03043bbab4c.idx/.metadata.json.gz.crc b/...57-f19e5e746e8c.idx/.metadata.json.gz.crc → ...5f-b03043bbab4c.idx/.metadata.json.gz.crc
diff --git a/...9b3-6fe2-4bd1-9f57-f19e5e746e8c.idx/index → ...9fe-d7b8-4308-a65f-b03043bbab4c.idx/index b/...9b3-6fe2-4bd1-9f57-f19e5e746e8c.idx/index → ...9fe-d7b8-4308-a65f-b03043bbab4c.idx/index
diff --git a/...d1-9f57-f19e5e746e8c.idx/metadata.json.gz → ...08-a65f-b03043bbab4c.idx/metadata.json.gz b/...d1-9f57-f19e5e746e8c.idx/metadata.json.gz → ...08-a65f-b03043bbab4c.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh37/SNV_INDEL/annotations.ht/rows/metadata.json.gz
diff --git a/...-b93059b3-6fe2-4bd1-9f57-f19e5e746e8c.crc → ...-51a119fe-d7b8-4308-a65f-b03043bbab4c.crc b/...-b93059b3-6fe2-4bd1-9f57-f19e5e746e8c.crc → ...-51a119fe-d7b8-4308-a65f-b03043bbab4c.crc
diff --git a/...rt-0-b93059b3-6fe2-4bd1-9f57-f19e5e746e8c → ...rt-0-51a119fe-d7b8-4308-a65f-b03043bbab4c b/...rt-0-b93059b3-6fe2-4bd1-9f57-f19e5e746e8c → ...rt-0-51a119fe-d7b8-4308-a65f-b03043bbab4c
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/MITO/annotations.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/annotations.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/MITO/annotations.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.120-f00f916faf78
-  Created at 2024/02/15 17:49:47
+  Written with version 0.2.128-eead8100a1c1
+  Created at 2024/02/26 14:35:40
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/MITO/annotations.ht/globals/parts/part-0
diff --git a/...Ch38/MITO/annotations.ht/index/part-0-9051c447-510b-49ab-bf6a-cadcf5f69801.idx/.index.crc b/...Ch38/MITO/annotations.ht/index/part-0-9051c447-510b-49ab-bf6a-cadcf5f69801.idx/.index.crc
diff --git a/...d9-61782f574461.idx/.metadata.json.gz.crc → ...6a-cadcf5f69801.idx/.metadata.json.gz.crc b/...d9-61782f574461.idx/.metadata.json.gz.crc → ...6a-cadcf5f69801.idx/.metadata.json.gz.crc
diff --git a/...es/GRCh38/MITO/annotations.ht/index/part-0-9051c447-510b-49ab-bf6a-cadcf5f69801.idx/index b/...es/GRCh38/MITO/annotations.ht/index/part-0-9051c447-510b-49ab-bf6a-cadcf5f69801.idx/index
diff --git a/...b4-a3d9-61782f574461.idx/metadata.json.gz → ...ab-bf6a-cadcf5f69801.idx/metadata.json.gz b/...b4-a3d9-61782f574461.idx/metadata.json.gz → ...ab-bf6a-cadcf5f69801.idx/metadata.json.gz
diff --git a/...Ch38/MITO/annotations.ht/index/part-0-cce43fcc-aae8-40b4-a3d9-61782f574461.idx/.index.crc b/...Ch38/MITO/annotations.ht/index/part-0-cce43fcc-aae8-40b4-a3d9-61782f574461.idx/.index.crc
diff --git a/...es/GRCh38/MITO/annotations.ht/index/part-0-cce43fcc-aae8-40b4-a3d9-61782f574461.idx/index b/...es/GRCh38/MITO/annotations.ht/index/part-0-cce43fcc-aae8-40b4-a3d9-61782f574461.idx/index
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/annotations.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/MITO/annotations.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/MITO/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/MITO/annotations.ht/rows/metadata.json.gz
diff --git a/...es/GRCh38/MITO/annotations.ht/rows/parts/.part-0-9051c447-510b-49ab-bf6a-cadcf5f69801.crc b/...es/GRCh38/MITO/annotations.ht/rows/parts/.part-0-9051c447-510b-49ab-bf6a-cadcf5f69801.crc
diff --git a/...es/GRCh38/MITO/annotations.ht/rows/parts/.part-0-cce43fcc-aae8-40b4-a3d9-61782f574461.crc b/...es/GRCh38/MITO/annotations.ht/rows/parts/.part-0-cce43fcc-aae8-40b4-a3d9-61782f574461.crc
diff --git a/...ixtures/GRCh38/MITO/annotations.ht/rows/parts/part-0-9051c447-510b-49ab-bf6a-cadcf5f69801 b/...ixtures/GRCh38/MITO/annotations.ht/rows/parts/part-0-9051c447-510b-49ab-bf6a-cadcf5f69801
diff --git a/...ixtures/GRCh38/MITO/annotations.ht/rows/parts/part-0-cce43fcc-aae8-40b4-a3d9-61782f574461 b/...ixtures/GRCh38/MITO/annotations.ht/rows/parts/part-0-cce43fcc-aae8-40b4-a3d9-61782f574461
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.124-13536b531342
-  Created at 2023/11/21 12:19:09
+  Written with version 0.2.128-eead8100a1c1
+  Created at 2024/03/07 14:19:33
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/globals/parts/part-0
diff --git a/...d63-46d7-a244-a2cac529217b.idx/.index.crc → ...9b7-4eaf-963f-41f8b545319c.idx/.index.crc b/...d63-46d7-a244-a2cac529217b.idx/.index.crc → ...9b7-4eaf-963f-41f8b545319c.idx/.index.crc
diff --git a/...44-a2cac529217b.idx/.metadata.json.gz.crc → ...3f-41f8b545319c.idx/.metadata.json.gz.crc b/...44-a2cac529217b.idx/.metadata.json.gz.crc → ...3f-41f8b545319c.idx/.metadata.json.gz.crc
diff --git a/...c69-2d63-46d7-a244-a2cac529217b.idx/index → ...fda-19b7-4eaf-963f-41f8b545319c.idx/index b/...c69-2d63-46d7-a244-a2cac529217b.idx/index → ...fda-19b7-4eaf-963f-41f8b545319c.idx/index
diff --git a/...d7-a244-a2cac529217b.idx/metadata.json.gz → ...af-963f-41f8b545319c.idx/metadata.json.gz b/...d7-a244-a2cac529217b.idx/metadata.json.gz → ...af-963f-41f8b545319c.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/ONT_SNV_INDEL/annotations.ht/rows/metadata.json.gz
diff --git a/.../ONT_SNV_INDEL/annotations.ht/rows/parts/.part-0-432b4fda-19b7-4eaf-963f-41f8b545319c.crc b/.../ONT_SNV_INDEL/annotations.ht/rows/parts/.part-0-432b4fda-19b7-4eaf-963f-41f8b545319c.crc
diff --git a/.../ONT_SNV_INDEL/annotations.ht/rows/parts/.part-0-8bca7c69-2d63-46d7-a244-a2cac529217b.crc b/.../ONT_SNV_INDEL/annotations.ht/rows/parts/.part-0-8bca7c69-2d63-46d7-a244-a2cac529217b.crc
diff --git a/...RCh38/ONT_SNV_INDEL/annotations.ht/rows/parts/part-0-432b4fda-19b7-4eaf-963f-41f8b545319c b/...RCh38/ONT_SNV_INDEL/annotations.ht/rows/parts/part-0-432b4fda-19b7-4eaf-963f-41f8b545319c
diff --git a/...RCh38/ONT_SNV_INDEL/annotations.ht/rows/parts/part-0-8bca7c69-2d63-46d7-a244-a2cac529217b b/...RCh38/ONT_SNV_INDEL/annotations.ht/rows/parts/part-0-8bca7c69-2d63-46d7-a244-a2cac529217b
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.126-ee77707f4fab
-  Created at 2024/01/04 18:06:47
+  Written with version 0.2.128-eead8100a1c1
+  Created at 2024/02/26 15:21:48
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/globals/parts/part-0
diff --git a/...33c-4bc0-9c51-ef99e2b58ae5.idx/.index.crc → ...0a9-4af1-a543-db0e0db24364.idx/.index.crc b/...33c-4bc0-9c51-ef99e2b58ae5.idx/.index.crc → ...0a9-4af1-a543-db0e0db24364.idx/.index.crc
diff --git a/...51-ef99e2b58ae5.idx/.metadata.json.gz.crc → ...43-db0e0db24364.idx/.metadata.json.gz.crc b/...51-ef99e2b58ae5.idx/.metadata.json.gz.crc → ...43-db0e0db24364.idx/.metadata.json.gz.crc
diff --git a/...560-933c-4bc0-9c51-ef99e2b58ae5.idx/index → ...be5-50a9-4af1-a543-db0e0db24364.idx/index b/...560-933c-4bc0-9c51-ef99e2b58ae5.idx/index → ...be5-50a9-4af1-a543-db0e0db24364.idx/index
diff --git a/...c0-9c51-ef99e2b58ae5.idx/metadata.json.gz → ...f1-a543-db0e0db24364.idx/metadata.json.gz b/...c0-9c51-ef99e2b58ae5.idx/metadata.json.gz → ...f1-a543-db0e0db24364.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/annotations.ht/rows/metadata.json.gz
diff --git a/...-76cee560-933c-4bc0-9c51-ef99e2b58ae5.crc → ...-69dcebe5-50a9-4af1-a543-db0e0db24364.crc b/...-76cee560-933c-4bc0-9c51-ef99e2b58ae5.crc → ...-69dcebe5-50a9-4af1-a543-db0e0db24364.crc
diff --git a/...rt-0-76cee560-933c-4bc0-9c51-ef99e2b58ae5 → ...rt-0-69dcebe5-50a9-4af1-a543-db0e0db24364 b/...rt-0-76cee560-933c-4bc0-9c51-ef99e2b58ae5 → ...rt-0-69dcebe5-50a9-4af1-a543-db0e0db24364
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.README.txt.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/README.txt b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/README.txt
@@ -1,3 +1,3 @@
 This folder comprises a Hail (www.hail.is) native Table or MatrixTable.
-  Written with version 0.2.109-b71b065e4bb6
-  Created at 2023/08/11 14:19:35
+  Written with version 0.2.128-eead8100a1c1
+  Created at 2024/03/04 16:14:35
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/parts/.part-0.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/globals/parts/part-0
diff --git a/...NDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.index.crc b/...NDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.index.crc
diff --git a/...f_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.metadata.json.gz.crc b/...f_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/.metadata.json.gz.crc
diff --git a/...SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/index b/...SNV_INDEL/high_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/index
diff --git a/...igh_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/metadata.json.gz b/...igh_af_variants.ht/index/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.idx/metadata.json.gz
diff --git a/...NDEL/high_af_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/.index.crc b/...NDEL/high_af_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/.index.crc
diff --git a/...f_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/.metadata.json.gz.crc b/...f_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/.metadata.json.gz.crc
diff --git a/...SNV_INDEL/high_af_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/index b/...SNV_INDEL/high_af_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/index
diff --git a/...igh_af_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/metadata.json.gz b/...igh_af_variants.ht/index/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.idx/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/metadata.json.gz
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/.metadata.json.gz.crc
diff --git a/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/SNV_INDEL/high_af_variants.ht/rows/metadata.json.gz
diff --git a/...SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.crc b/...SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b.crc
diff --git a/...SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.crc b/...SNV_INDEL/high_af_variants.ht/rows/parts/.part-0-6d901bc9-e51f-4a61-8373-97bc944ff192.crc
diff --git a/...Ch38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b b/...Ch38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-2e367afa-f8b5-4167-84b3-5abbd6837c8b
diff --git a/...Ch38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192 b/...Ch38/SNV_INDEL/high_af_variants.ht/rows/parts/part-0-6d901bc9-e51f-4a61-8373-97bc944ff192
diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py
@@ -17,6 +17,11 @@
 # Estimated based on behavior for several representative gene lists
 MAX_GENE_INTERVALS = 100
 
+# Optimal number of entry table partitions, balancing parallelization with partition overhead
+# Experimentally determined based on compound het search performance:
+# https://github.com/broadinstitute/seqr-private/issues/1283#issuecomment-1973392719
+MAX_PARTITIONS = 12
+
 logger = logging.getLogger(__name__)
 
 
@@ -217,7 +222,7 @@ def __init__(self, sample_data, sort=XPOS, sort_metadata=None, num_results=100,
         self._has_secondary_annotations = False
         self._is_multi_data_type_comp_het = False
         self.max_unaffected_samples = None
-        self._load_table_kwargs = {'_n_partitions': (os.cpu_count() or 2)-1}
+        self._load_table_kwargs = {'_n_partitions': min(MAX_PARTITIONS, (os.cpu_count() or 2)-1)}
         self.entry_samples_by_family_guid = {}
 
         if sample_data:
@@ -811,7 +816,10 @@ def _filter_compound_hets(self):
         ch_ht = self._comp_het_ht
 
         # Get possible pairs of variants within the same gene
-        ch_ht = ch_ht.annotate(gene_ids=self._gene_ids_expr(ch_ht))
+        def key(v):
+            ks = [v[k] for k in self.KEY_FIELD]
+            return ks[0] if len(self.KEY_FIELD) == 1 else hl.tuple(ks)
+        ch_ht = ch_ht.annotate(key_=key(ch_ht.row), gene_ids=self._gene_ids_expr(ch_ht))
         ch_ht = ch_ht.explode(ch_ht.gene_ids)
 
         # Filter allowed transcripts to the grouped gene
@@ -824,108 +832,105 @@ def _filter_compound_hets(self):
 
         if transcript_annotations or self._has_secondary_annotations:
             primary_filters = self._get_annotation_filters(ch_ht)
-            secondary_filters = self._get_annotation_filters(ch_ht, is_secondary=True) if self._has_secondary_annotations else []
-            self.unfiltered_comp_het_ht = ch_ht.filter(hl.any(primary_filters + secondary_filters))
+            ch_ht = ch_ht.annotate(is_primary=hl.coalesce(hl.any(primary_filters), False))
+            if self._has_secondary_annotations and not self._is_multi_data_type_comp_het:
+                secondary_filters = self._get_annotation_filters(ch_ht, is_secondary=True)
+                is_secondary = hl.coalesce(hl.any(secondary_filters), False)
+            else:
+                is_secondary = ch_ht.is_primary
+            ch_ht = ch_ht.annotate(is_secondary=is_secondary)
+            if transcript_annotations:
+                ch_ht = ch_ht.filter(ch_ht.is_primary | ch_ht.is_secondary)
         else:
-            self.unfiltered_comp_het_ht = ch_ht
+            ch_ht = ch_ht.annotate(is_primary=True, is_secondary=True)
+        self.unfiltered_comp_het_ht = ch_ht
 
         if self._is_multi_data_type_comp_het:
             # In cases where comp het pairs must have different data types, there are no single data type results
             return None
 
-        if self._has_secondary_annotations:
-            primary_variants = hl.agg.filter(hl.any(primary_filters), hl.agg.collect(ch_ht.row))
-            row_agg = ch_ht.row
-            if ALLOWED_TRANSCRIPTS in row_agg and ALLOWED_SECONDARY_TRANSCRIPTS in row_agg:
-                # Ensure main transcripts are properly selected for primary/secondary annotations in variant pairs
-                row_agg = row_agg.annotate(**{ALLOWED_TRANSCRIPTS: row_agg[ALLOWED_SECONDARY_TRANSCRIPTS]})
-            secondary_variants = hl.agg.filter(hl.any(secondary_filters), hl.agg.collect(row_agg))
-        else:
-            if transcript_annotations:
-                ch_ht = ch_ht.filter(hl.any(self._get_annotation_filters(ch_ht)))
-            primary_variants = hl.agg.collect(ch_ht.row)
-            secondary_variants = primary_variants
-
-        ch_ht = ch_ht.group_by('gene_ids').aggregate(v1=primary_variants, v2=secondary_variants)
-
-        # Compute all distinct variant pairs
         """ Assume a table with the following data
-         gene_ids | v1     | v2
-         A        | [1, 2] | [1, 2, 3]
-         B        | [2]    | [2, 3]
+        key_ | gene_ids | is_primary | is_secondary
+        1    | A        | true       | true
+        2    | A        | true       | true
+        2    | B        | true       | true
+        3    | A        | false      | true
+        3    | B        | false      | true
         """
-        key_expr = lambda v: v[self.KEY_FIELD[0]] if len(self.KEY_FIELD) == 1 else hl.tuple([v[k] for k in self.KEY_FIELD])
-        ch_ht = ch_ht.annotate(
-            v1_set=hl.set(ch_ht.v1.map(key_expr)),
-            v2=ch_ht.v2.group_by(key_expr).map_values(lambda x: x[0]),
+
+        variants = ch_ht.collect(_localize=False)
+        variants = variants.group_by(lambda v: v.gene_ids)
+        variants = variants.items().flatmap(lambda gvs:
+            hl.rbind(gvs[0], gvs[1], lambda gene_id, variants:
+                hl.rbind(
+                    variants.filter(lambda v: v.is_primary),
+                    variants.filter(lambda v: v.is_secondary),
+                    lambda v1s, v2s:
+                        v1s.flatmap(lambda v1:
+                            v2s \
+                            .filter(lambda v2: ~v2.is_primary | ~v1.is_secondary | (v1.key_ < v2.key_)) \
+                            .map(lambda v2: hl.tuple([gene_id, v1, v2]))
+                        )
+                )
+            )
         )
-        ch_ht = ch_ht.explode(ch_ht.v1)
-        """ After annotating and exploding by v1:
-         gene_ids | v1 | v2                          | v1_set 
-         A        | 1  | {id_1: 1, id_2: 2, id_3: 3} | {id_1, id_2}
-         A        | 2  | {id_2: 2, id_3: 3}          | {id_1, id_2}
-         B        | 2  | {id_2: 2, id_3: 3}          | {id_2}
-        """
 
-        v1_key = key_expr(ch_ht.v1)
-        ch_ht = ch_ht.annotate(v2=ch_ht.v2.items().filter(
-            lambda x: ~ch_ht.v2.contains(v1_key) | ~ch_ht.v1_set.contains(x[0]) | (x[0] > v1_key)
-        ))
-        """ After filtering v2:
-         gene_ids | v1 | v2                     | v1_set 
-         A        | 1  | [(id_2, 2), (id_3, 3)] | {id_1, id_2}
-         A        | 2  | [(id_3, 3)]            | {id_1, id_2}
-         B        | 2  | [(id_3, 3)]            | {id_2}
+        """ After grouping by gene and pairing/filtering have array of tuples (gene_id, v1, v2)
+        (A, 1, 2)
+        (A, 1, 3)
+        (A, 2, 3)
+        (B, 2, 3)
         """
 
-        ch_ht = ch_ht.group_by('v1').aggregate(
-            comp_het_gene_ids=hl.agg.collect_as_set(ch_ht.gene_ids),
-            v2_items=hl.agg.collect(ch_ht.v2).flatmap(lambda x: x),
-        )
-        ch_ht = ch_ht.annotate(v2=hl.dict(ch_ht.v2_items).values())
-        """ After grouping by v1:
-         v1 | v2     | comp_het_gene_ids
-         1  | [2, 3] | {A}
-         2  | [3]    | {A, B}             
-        """
+        variants = variants.group_by(lambda v: hl.tuple([v[1].key_, v[2].key_]))
+        variants = variants.values().map(lambda v: hl.rbind(
+            hl.set(v.map(lambda v: v[0])),
+            lambda comp_het_gene_ids: hl.struct(
+                v1=v[0][1].annotate(comp_het_gene_ids=comp_het_gene_ids),
+                v2=v[0][2].annotate(comp_het_gene_ids=comp_het_gene_ids),
+            )
+        ))
 
-        ch_ht = ch_ht.explode(ch_ht.v2)._key_by_assert_sorted()
-        """ After exploding by v2:
-         v1 | v2 | comp_het_gene_ids
-         1  | 2  | {A}
-         1  | 3  | {A}
-         2  | 3  | {A, B}             
+        """ After grouping by pair have array of structs (v1, v2) annotated with comp_het_gene_ids
+        v1 | v2 | v<1/2>.comp_het_gene_ids
+         1 | 2  | {A}
+         1 | 3  | {A}
+         2 | 3  | {A, B} 
         """
 
-        ch_ht = self._filter_grouped_compound_hets(ch_ht)
+        variants = self._filter_comp_het_families(variants)
 
-        # Return pairs formatted as lists
-        return ch_ht.select(**{GROUPED_VARIANTS_FIELD: hl.array([ch_ht.v1, ch_ht.v2])})
+        return hl.Table.parallelize(
+            variants.map(lambda v: hl.struct(**{GROUPED_VARIANTS_FIELD: hl.array([v.v1, v.v2])}))
+        )
 
-    def _filter_grouped_compound_hets(self, ch_ht):
-        # Filter variant pairs for family and genotype
-        ch_ht = ch_ht.annotate(valid_families=hl.enumerate(ch_ht.v1.family_entries).map(
-            lambda x: self._is_valid_comp_het_family(ch_ht, x[1], ch_ht.v2.family_entries[x[0]])
+    def _filter_comp_het_families(self, variants, set_secondary_annotations=True):
+        return variants.map(lambda v: v.annotate(
+            valid_family_indices=hl.enumerate(v.v1.family_entries).map(lambda x: x[0]).filter(
+                lambda i: self._is_valid_comp_het_family(v.v1, v.v2, i)
+            )
+        )).filter(
+            lambda v: v.valid_family_indices.any(hl.is_defined)
+        ).map(lambda v: v.select(
+            v1=self._annotated_comp_het_variant(v.v1, v.valid_family_indices),
+            v2=self._annotated_comp_het_variant(v.v2, v.valid_family_indices, is_secondary=set_secondary_annotations),
         ))
-        ch_ht = ch_ht.filter(ch_ht.valid_families.any(lambda x: x))
-        ch_ht = ch_ht.select(**{k: self._annotated_comp_het_variant(ch_ht, k) for k in ['v1', 'v2']})
 
-        return ch_ht
+    def _annotated_comp_het_variant(self, variant, valid_family_indices, is_secondary=False):
+        if is_secondary and self._has_secondary_annotations and ALLOWED_TRANSCRIPTS in variant and ALLOWED_SECONDARY_TRANSCRIPTS in variant:
+            variant = variant.annotate(**{ALLOWED_TRANSCRIPTS: variant[ALLOWED_SECONDARY_TRANSCRIPTS]})
 
-    @staticmethod
-    def _annotated_comp_het_variant(ch_ht, field):
-        variant = ch_ht[field]
         return variant.annotate(
-            comp_het_gene_ids=ch_ht.comp_het_gene_ids,
-            family_entries=hl.enumerate(ch_ht.valid_families).filter(
-                lambda x: x[1]).map(lambda x: variant.family_entries[x[0]]),
+            family_entries=valid_family_indices.map(lambda i: variant.family_entries[i]),
         )
 
     @classmethod
     def _gene_ids_expr(cls, ht):
         return hl.set(ht[cls.TRANSCRIPTS_FIELD].map(lambda t: t.gene_id))
 
-    def _is_valid_comp_het_family(self, ch_ht, entries_1, entries_2):
+    def _is_valid_comp_het_family(self, v1, v2, family_index):
+        entries_1 = v1.family_entries[family_index]
+        entries_2 = v2.family_entries[family_index]
         family_filters = [hl.is_defined(entries_1), hl.is_defined(entries_2)]
         if self.max_unaffected_samples > 0:
             family_filters.append(hl.enumerate(entries_1).all(lambda x: hl.any([
@@ -939,9 +944,10 @@ def _comp_het_entry_has_ref(self, gt1, gt2):
         return [self.GENOTYPE_QUERY_MAP[REF_REF](gt1), self.GENOTYPE_QUERY_MAP[REF_REF](gt2)]
 
     def _format_comp_het_results(self, ch_ht, annotation_fields):
-        formatted_grouped_variants = ch_ht[GROUPED_VARIANTS_FIELD].map(
-            lambda v: self._format_results(v, annotation_fields=annotation_fields)
-        )
+        formatted_grouped_variants = hl.array([
+            self._format_results(ch_ht[GROUPED_VARIANTS_FIELD][0], annotation_fields=annotation_fields),
+            self._format_results(ch_ht[GROUPED_VARIANTS_FIELD][1], annotation_fields=annotation_fields),
+        ])
         ch_ht = ch_ht.annotate(**{GROUPED_VARIANTS_FIELD: hl.sorted(formatted_grouped_variants, key=lambda x: x._sort)})
         return ch_ht.annotate(_sort=ch_ht[GROUPED_VARIANTS_FIELD][0]._sort)
 

diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py
@@ -46,7 +46,7 @@ class MitoHailTableQuery(BaseHailTableQuery):
         'hmtvar': PredictionPath('hmtvar', 'score'),
         'mitotip': PredictionPath('mitotip', 'trna_prediction'),
         'mut_taster': PredictionPath('dbnsfp_mito', 'MutationTaster_pred'),
-        'sift': PredictionPath('dbnsfp_mito', 'SIFT_pred'),
+        'sift': PredictionPath('dbnsfp_mito', 'SIFT_score'),
     }
 
     PATHOGENICITY_FILTERS = {
@@ -144,8 +144,7 @@ def _parse_intervals(self, intervals, exclude_intervals=False, **kwargs):
 
     def _get_family_passes_quality_filter(self, quality_filter, ht=None, pathogenicity=None, **kwargs):
         passes_quality = super()._get_family_passes_quality_filter(quality_filter)
-        clinvar_path_ht = False if passes_quality is None else self._get_loaded_filter_ht(
-            CLINVAR_KEY, 'clinvar_path_variants.ht', self._get_clinvar_prefilter, pathogenicity=pathogenicity)
+        clinvar_path_ht = False if passes_quality is None else self._get_loaded_clinvar_prefilter_ht(pathogenicity)
         if not clinvar_path_ht:
             return passes_quality
 
@@ -159,20 +158,24 @@ def _get_loaded_filter_ht(self, key, table_path, get_filters, **kwargs):
             else:
                 ht = self._read_table(table_path)
                 if ht_filter is not True:
-                    ht = ht.filter(ht[ht_filter])
+                    ht = ht.filter(ht_filter(ht))
                 self._filter_hts[key] = ht
 
         return self._filter_hts[key]
 
+    def _get_loaded_clinvar_prefilter_ht(self, pathogenicity):
+        return self._get_loaded_filter_ht(
+            CLINVAR_KEY, 'clinvar_path_variants.ht', self._get_clinvar_prefilter, pathogenicity=pathogenicity)
+
     def _get_clinvar_prefilter(self, pathogenicity=None):
         clinvar_path_filters = self._get_clinvar_path_filters(pathogenicity)
         if not clinvar_path_filters:
             return False
 
         if CLINVAR_LIKELY_PATH_FILTER not in clinvar_path_filters:
-            return 'is_pathogenic'
+            return lambda ht: ht.is_pathogenic
         elif CLINVAR_PATH_FILTER not in clinvar_path_filters:
-            return 'is_likely_pathogenic'
+            return lambda ht: ht.is_likely_pathogenic
         return True
 
     def _filter_variant_ids(self, ht, variant_ids):

diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py
@@ -81,26 +81,34 @@ def _filter_data_type_comp_hets(self, variant_query, variant_families, sv_query)
         variant_ch_ht = variant_ht.group_by('gene_ids').aggregate(v1=hl.agg.collect(variant_ht.row))
         sv_ch_ht = sv_ht.group_by('gene_ids').aggregate(v2=hl.agg.collect(sv_ht.row))
 
-        ch_ht = variant_ch_ht.join(sv_ch_ht)
-        ch_ht = ch_ht.explode(ch_ht.v1)
-        ch_ht = ch_ht.explode(ch_ht.v2)
-        ch_ht = ch_ht.annotate(comp_het_gene_ids=hl.set({ch_ht.gene_ids}))
-        return self._filter_grouped_compound_hets(ch_ht)
+        variants = variant_ch_ht.join(sv_ch_ht).collect(_localize=False)
+        variants = variants.flatmap(lambda gvs: hl.rbind(
+            hl.set({gvs.gene_ids}),
+            lambda comp_het_gene_ids: gvs.v1.flatmap(
+                lambda v1: gvs.v2.map(lambda v2: hl.struct(
+                    v1=v1.annotate(comp_het_gene_ids=comp_het_gene_ids),
+                    v2=v2.annotate(comp_het_gene_ids=comp_het_gene_ids),
+                ))
+            )
+        ))
+        variants = self._filter_comp_het_families(variants, set_secondary_annotations=False)
+        return hl.Table.parallelize(variants)
 
     @staticmethod
     def _family_filtered_ch_ht(ht, overlapped_families, families):
         family_indices = hl.array([families.index(family_guid) for family_guid in overlapped_families])
         return ht.annotate(family_entries=family_indices.map(lambda i: ht.family_entries[i]))
 
-    def _is_valid_comp_het_family(self, ch_ht, entries_1, entries_2):
-        is_valid = super()._is_valid_comp_het_family(ch_ht, entries_1, entries_2)
+    def _is_valid_comp_het_family(self, v1, v2, family_index):
+        is_valid = super()._is_valid_comp_het_family(v1, v2, family_index)
 
         # SNPs overlapped by trans deletions may be incorrectly called as hom alt, and should be
         # considered comp hets with said deletions. Any other hom alt variants are not valid comp hets
+        entries_1 = v1.family_entries[family_index]
         is_allowed_hom_alt = entries_1.all(lambda g: ~self.GENOTYPE_QUERY_MAP[ALT_ALT](g.GT)) | hl.all([
-            ch_ht.v2.sv_type_id == self._sv_type_del_id,
-            ch_ht.v2.start_locus.position <= ch_ht.v1.locus.position,
-            ch_ht.v1.locus.position <= ch_ht.v2.end_locus.position,
+            v2.sv_type_id == self._sv_type_del_id,
+            v2.start_locus.position <= v1.locus.position,
+            v1.locus.position <= v2.end_locus.position,
         ])
         return is_valid & is_allowed_hom_alt
 

diff --git a/hail_search/queries/ont_snv_indel.py b/hail_search/queries/ont_snv_indel.py
@@ -8,12 +8,5 @@ class OntSnvIndelHailTableQuery(SnvIndelHailTableQuery):
 
     CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS
 
-    PREDICTION_FIELDS_CONFIG = {
-        **SnvIndelHailTableQuery.PREDICTION_FIELDS_CONFIG,
-        'fathmm': PredictionPath('dbnsfp', 'fathmm_MKL_coding_pred'),
-        'polyphen': PredictionPath('dbnsfp', 'Polyphen2_HVAR_pred'),
-        'sift': PredictionPath('dbnsfp', 'SIFT_pred'),
-    }
-
     def _get_loaded_filter_ht(self, *args, **kwargs):
         return None