From 57cf46bd2824f34ebaa0d30afca794c28574c79c Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 4 Aug 2023 17:22:07 -0400 Subject: [PATCH 1/5] abstract logic for filter table loading --- hail_search/constants.py | 7 +- .../clinvar_path_variants.ht/.README.txt.crc | Bin 0 -> 12 bytes .../clinvar_path_variants.ht/._SUCCESS.crc | Bin 0 -> 8 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../clinvar_path_variants.ht/README.txt | 3 + .../clinvar_path_variants.ht/_SUCCESS | 0 .../globals/.metadata.json.gz.crc | Bin 0 -> 12 bytes .../globals/metadata.json.gz | Bin 0 -> 240 bytes .../globals/parts/.part-0.crc | Bin 0 -> 12 bytes .../globals/parts/part-0 | Bin 0 -> 11 bytes .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin 0 -> 12 bytes .../index | Bin 0 -> 57 bytes .../metadata.json.gz | Bin 0 -> 184 bytes .../clinvar_path_variants.ht/metadata.json.gz | Bin 0 -> 309 bytes .../rows/.metadata.json.gz.crc | Bin 0 -> 16 bytes .../rows/metadata.json.gz | Bin 0 -> 604 bytes ...0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.crc | Bin 0 -> 12 bytes ...art-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5 | Bin 0 -> 28 bytes hail_search/hail_search_query.py | 81 +++++++++++++----- 20 files changed, 69 insertions(+), 22 deletions(-) create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.README.txt.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/._SUCCESS.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/README.txt create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/_SUCCESS create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/.part-0.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/part-0 create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.index.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/index create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/.metadata.json.gz.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/metadata.json.gz create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/parts/.part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/parts/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5 diff --git a/hail_search/constants.py b/hail_search/constants.py index 618de684fd..a90011d9a6 100644 --- a/hail_search/constants.py +++ b/hail_search/constants.py @@ -52,9 +52,12 @@ }, } +CLINVAR_PATH_FILTER = 'pathogenic' +CLINVAR_LIKELY_PATH_FILTER = 'likely_pathogenic' +CLINVAR_PATH_SIGNIFICANCES = {CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER} CLINVAR_PATH_RANGES = [ - ('pathogenic', 'Pathogenic', 'Pathogenic/Likely_risk_allele'), - ('likely_pathogenic', 'Pathogenic/Likely_pathogenic', 'Likely_risk_allele'), + (CLINVAR_PATH_FILTER, 'Pathogenic', 'Pathogenic/Likely_risk_allele'), + (CLINVAR_LIKELY_PATH_FILTER, 'Pathogenic/Likely_pathogenic', 'Likely_risk_allele'), ('vus_or_conflicting', 'Conflicting_interpretations_of_pathogenicity', 'No_pathogenic_assertion'), ('likely_benign', 'Likely_benign', 'Benign/Likely_benign'), ('benign', 'Benign/Likely_benign', 'Benign'), diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.README.txt.crc new file mode 100644 index 0000000000000000000000000000000000000000..9e5f9ce902db224a761ad24436147d818ccf1c72 GIT binary patch literal 12 TcmYc;N@ieSU}8w&{KW|X5hwzy literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/._SUCCESS.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/._SUCCESS.crc new file mode 100644 index 0000000000000000000000000000000000000000..3b7b044936a890cd8d651d349a752d819d71d22c GIT binary patch literal 8 PcmYc;N@ieSU}69O2$TUk literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..72936da220a756902106ff4cf2923af6473b3ad8 GIT binary patch literal 12 TcmYc;N@ieSU}D%kn@J4-5@!Or literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/README.txt b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/README.txt new file mode 100644 index 0000000000..183eac112f --- /dev/null +++ b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.109-b71b065e4bb6 + Created at 2023/08/04 13:56:14 \ No newline at end of file diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/_SUCCESS b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/_SUCCESS new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..365dde7902284e848e7648852136bc58f7e9fe1c GIT binary patch literal 12 TcmYc;N@ieSU}E@i#9t8r6iWkN literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..6552f3d13deb7c9c65441f45174c6c33cf1ee0fe GIT binary patch literal 240 zcmVo$LKYZ%S1MoCOip+5tZm99LHDwB8i|i!AbwF3)Xt>($s*EB9gN-nF>P3k z%{Li3LBBWnADb?_li@Y{W{}>D^7xvp`Oq(N>HOHFSWr{ImsT6NvHe`yEsCtPno$LX qCrE24gjU}MV^vyeYU+xp8+>RHdswM?oAllloT)F9m%=l!0RRBnh;U8- literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/.part-0.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/.part-0.crc new file mode 100644 index 0000000000000000000000000000000000000000..55eea218ee499b787b2cb720b377be82d6990f12 GIT binary patch literal 12 TcmYc;N@ieSU}E@EdgwU-6&eIh literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/part-0 b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/globals/parts/part-0 new file mode 100644 index 0000000000000000000000000000000000000000..9e0f935fd8edd5e298b11bc76f7b74937ce374c2 GIT binary patch literal 11 QcmZQ)U|?VZVg*J90077UD*ylh literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.index.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.index.crc new file mode 100644 index 0000000000000000000000000000000000000000..b6b7333780cb5e841c36129f1c382aeddeb54ea7 GIT binary patch literal 12 TcmYc;N@ieSU}ET=_>Kbr6GH<{ literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..134eb40272a66060289fca40fd7b699d205487ba GIT binary patch literal 12 TcmYc;N@ieSU}Cs%#P}%y6juZ1 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/index b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/index/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.idx/index new file mode 100644 index 0000000000000000000000000000000000000000..b0a36c8f7d0d8da36786a02033f46401761187c6 GIT binary patch literal 57 ucmb1OU|CKwV)u3#zs z?_S+pj)V8P=iUw~>+ps#+2BED%T+NGP)6movA*6aO(B9@Oo~i^L0cOF$;1oWYx_o! zaG}^}>RixGl80fAcISLG^{~`p(FH2rnuH+& zf>|b79Y!EJ^u2#xz0S(*?ZfrR(I>5$TLe(Z%`y?DLa_sMaf#Z16r4UQg}`KQ{#CsX z6S>+qN`kb8(2o4Bi?5RH`_;3rmvThr4e)sI<7r3U2UQ*ZpNnt^HB4J!!!=i2wZn#? z;A6DP=v; HDFOfhZ?TuW literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/.metadata.json.gz.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/.metadata.json.gz.crc new file mode 100644 index 0000000000000000000000000000000000000000..5e7c307ede75a4e442c4a200e2c857924743411e GIT binary patch literal 16 XcmYc;N@ieSU}AV|yLHvx#TpC%C+r16 literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/metadata.json.gz new file mode 100644 index 0000000000000000000000000000000000000000..74645cb7cd422c7d3beffdda305e7a4624760f9b GIT binary patch literal 604 zcmV-i0;ByOiwFP!000000Ns{RZ`v>vh5w76HZAE$LMh}880(O#X@Y7Gt59T|8}M*# zquC(={-kI3!xZmV^&P zD7H!QnF;{{QZmCOrea{Wk%YU$g!}Cp;$gXfU0_SN@?|ujDNK#tFtf|Yh1CjE((pxy zU0hPVb=E57hAWAcu{437IeY1F)YqDB9}b-}ho_O?D-hdgP0dm*Kyrq&l@dUuU2Q05 z*4n?lwG&f!_WRTAWVj5vpG(m~6C1f)yr9@DSk;?vk_2?^#O_7|&+iV7OAcX6F5*hd z0G+K+_@rrWi&Op|8v_5P(>>pI_ow}1nD4!CBgO>!Sl^l152oN2l3IxIp_VhFLHxoK z!1~qI*Sy_U@febvj91}OWh3D_H4%@W3K%+V2dPZ)p(Nqhi!0%~p_Vx+Nm4MS4=SZH zbicVGk!7U!qOL2ZiiLXZ#1E}q&O@0|P)j>mI4B_nqD znric%+uIp0Z2RE0TtAoxwAG8<_D!$nw&!f__P`5V=65}xb$Z@4yUs83zY{7KFi;yA z+xTanw$vWlIWSB@ujREn+40{2>i363Z0Qqx=>%l_%0cMI^E%zy qtb6Ra%_nim#3n|(b8i?P%G$_^*h*&OP-C>!%CcvyFbrVb2mk<`LM|!* literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/parts/.part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.crc b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/parts/.part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5.crc new file mode 100644 index 0000000000000000000000000000000000000000..16180a7a8d9330e6903182e2d0a9a5e2b3690681 GIT binary patch literal 12 TcmYc;N@ieSU}7ki5Zev_5O)HT literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/parts/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5 b/hail_search/fixtures/GRCh38/VARIANTS/clinvar_path_variants.ht/rows/parts/part-0-23b3a09d-4799-4bcb-9e13-c2812c6915c5 new file mode 100644 index 0000000000000000000000000000000000000000..bd62c00d06cf27c77cd6b9158714d7e138b43581 GIT binary patch literal 28 hcmb1OU|3@phRMTW-%nV1}%866oJ7yvtn1atrZ literal 0 HcmV?d00001 diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index 871f9e0d6c..e85fbadc74 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -7,7 +7,8 @@ from hail_search.constants import AFFECTED, UNAFFECTED, AFFECTED_ID, UNAFFECTED_ID, MALE, VARIANT_DATASET, \ VARIANT_KEY_FIELD, GNOMAD_GENOMES_FIELD, XPOS, GENOME_VERSION_GRCh38_DISPLAY, INHERITANCE_FILTERS, \ ANY_AFFECTED, X_LINKED_RECESSIVE, REF_REF, REF_ALT, COMP_HET_ALT, ALT_ALT, HAS_ALT, HAS_REF, \ - ANNOTATION_OVERRIDE_FIELDS, SCREEN_KEY, SPLICE_AI_FIELD, CLINVAR_KEY, HGMD_KEY, CLINVAR_PATH_RANGES, HGMD_PATH_RANGES + ANNOTATION_OVERRIDE_FIELDS, SCREEN_KEY, SPLICE_AI_FIELD, CLINVAR_KEY, HGMD_KEY, CLINVAR_PATH_SIGNIFICANCES, \ + CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_RANGES, HGMD_PATH_RANGES DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets') @@ -142,42 +143,46 @@ def __init__(self, data_type, sample_data, genome_version, sort=XPOS, num_result self._genome_version = genome_version self._sort = sort self._num_results = num_results + self._data_type = data_type self._ht = None self._enums = None self._globals = None - self._load_filtered_table(data_type, sample_data, **kwargs) + self._load_filtered_table(sample_data, **kwargs) - def _load_filtered_table(self, data_type, sample_data, intervals=None, exclude_intervals=False, variant_ids=None, **kwargs): + def _load_filtered_table(self, sample_data, intervals=None, exclude_intervals=False, variant_ids=None, **kwargs): parsed_intervals, variant_ids = self._parse_intervals(intervals, variant_ids) excluded_intervals = None if exclude_intervals: excluded_intervals = parsed_intervals parsed_intervals = None + self._load_table_kwargs = {'_intervals': parsed_intervals, '_filter_intervals': bool(parsed_intervals)} self.import_filtered_table( - data_type, sample_data, intervals=parsed_intervals, excluded_intervals=excluded_intervals, - variant_ids=variant_ids, **kwargs) + sample_data, excluded_intervals=excluded_intervals, variant_ids=variant_ids, **kwargs) - def import_filtered_table(self, data_type, sample_data, intervals=None, **kwargs): - tables_path = f'{DATASETS_DIR}/{self._genome_version}/{data_type}' - load_table_kwargs = {'_intervals': intervals, '_filter_intervals': bool(intervals)} + def _get_table_path(self, path): + return f'{DATASETS_DIR}/{self._genome_version}/{self._data_type}/{path}' + def _read_table(self, path): + return hl.read_table(self._get_table_path(path), **self._load_table_kwargs) + + def import_filtered_table(self, sample_data, **kwargs): family_samples = defaultdict(list) project_samples = defaultdict(list) for s in sample_data: family_samples[s['family_guid']].append(s) project_samples[s['project_guid']].append(s) - logger.info(f'Loading {data_type} data for {len(family_samples)} families in {len(project_samples)} projects') + logger.info(f'Loading {self._data_type} data for {len(family_samples)} families in {len(project_samples)} projects') if len(family_samples) == 1: family_guid, family_sample_data = list(family_samples.items())[0] - family_ht = hl.read_table(f'{tables_path}/families/{family_guid}.ht', **load_table_kwargs) + family_ht = self._read_table(f'families/{family_guid}.ht') families_ht = self._filter_entries_table(family_ht, family_sample_data, **kwargs) else: filtered_project_hts = [] exception_messages = set() for project_guid, project_sample_data in project_samples.items(): - project_ht = hl.read_table(f'{tables_path}/projects/{project_guid}.ht', **load_table_kwargs) + project_ht = self._read_table(f'projects/{project_guid}.ht') try: filtered_project_hts.append(self._filter_entries_table(project_ht, project_sample_data, **kwargs)) except HTTPBadRequest as e: @@ -199,7 +204,7 @@ def import_filtered_table(self, data_type, sample_data, intervals=None, **kwargs ), ) - annotations_ht_path = f'{tables_path}/annotations.ht' + annotations_ht_path = self._get_table_path('annotations.ht') annotation_ht_query_result = hl.query_table( annotations_ht_path, families_ht.key).first().drop(*families_ht.key) ht = families_ht.annotate(**annotation_ht_query_result) @@ -220,7 +225,7 @@ def import_filtered_table(self, data_type, sample_data, intervals=None, **kwargs ) self._filter_annotated_table(**kwargs) - def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inheritance_filter=None, quality_filter=None, + def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inheritance_filter=None, quality_filter=None, excluded_intervals=None, variant_ids=None, **kwargs): if excluded_intervals: ht = hl.filter_intervals(ht, excluded_intervals, keep=False) @@ -238,10 +243,10 @@ def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inherit if quality_filter.get('vcf_filter'): ht = self._filter_vcf_filters(ht) - passes_quality_filter = self._get_genotype_passes_quality_filter(quality_filter) + passes_quality_filter = self._get_family_passes_quality_filter(quality_filter, ht=ht, **kwargs) if passes_quality_filter is not None: ht = ht.annotate(family_entries=ht.family_entries.map( - lambda entries: hl.or_missing(entries.all(passes_quality_filter), entries) + lambda entries: hl.or_missing(passes_quality_filter(entries), entries) )) ht = ht.filter(ht.family_entries.any(hl.is_defined)) @@ -353,19 +358,18 @@ def _valid_genotype_family_entries(cls, entries, gentoype_entry_indices, genotyp ) return hl.or_missing(is_valid, entries) - @classmethod - def _get_genotype_passes_quality_filter(cls, quality_filter): + def _get_family_passes_quality_filter(self, quality_filter, **kwargs): affected_only = quality_filter.get('affected_only') passes_quality_filters = [] for filter_k, value in quality_filter.items(): - field = cls.GENOTYPE_FIELDS.get(filter_k.replace('min_', '')) + field = self.GENOTYPE_FIELDS.get(filter_k.replace('min_', '')) if field and value: - passes_quality_filters.append(cls._get_genotype_passes_quality_field(field, value, affected_only)) + passes_quality_filters.append(self._get_genotype_passes_quality_field(field, value, affected_only)) if not passes_quality_filters: return None - return lambda gt: hl.all([f(gt) for f in passes_quality_filters]) + return lambda entries: entries.all(lambda gt: hl.all([f(gt) for f in passes_quality_filters])) @classmethod def _get_genotype_passes_quality_field(cls, field, value, affected_only): @@ -646,6 +650,10 @@ def _selected_main_transcript_expr(ht): return hl.or_else(matched_transcript, main_transcript) + def __init__(self, *args, **kwargs): + self._filter_hts = {} + super(VariantHailTableQuery, self).__init__(*args, **kwargs) + def import_filtered_table(self, *args, **kwargs): super(VariantHailTableQuery, self).import_filtered_table(*args, **kwargs) self._ht = self._ht.key_by(**{VARIANT_KEY_FIELD: self._ht.variant_id}) @@ -658,6 +666,33 @@ def _format_transcript_args(self): }) return args + def _get_family_passes_quality_filter(self, quality_filter, ht=None, pathogenicity=None, **kwargs): + passes_quality = super(VariantHailTableQuery, self)._get_family_passes_quality_filter(quality_filter) + clinvar_path_ht = False if passes_quality is None else self._get_clinvar_filter_ht(pathogenicity) + if not clinvar_path_ht: + return passes_quality + + # TODO add ht clinvar filter - hl.is_defined(clinvar_path_ht[ht.key]) + return lambda entries: passes_quality(entries) + + def _get_clinvar_filter_ht(self, pathogenicity): + if self._filter_hts.get(CLINVAR_KEY) is not None: + return self._filter_hts[CLINVAR_KEY] + + clinvar_path_filters = self._get_clinvar_path_filters(pathogenicity) + if not clinvar_path_filters: + self._filter_hts[CLINVAR_KEY] = False + return False + + clinvar_path_ht = self._read_table('clinvar_path_variants.ht') + if CLINVAR_LIKELY_PATH_FILTER not in clinvar_path_filters: + clinvar_path_ht = clinvar_path_ht.filter(clinvar_path_ht.is_pathogenic) + elif CLINVAR_PATH_FILTER not in clinvar_path_filters: + clinvar_path_ht = clinvar_path_ht.filter(clinvar_path_ht.is_likely_pathogenic) + self._filter_hts[CLINVAR_KEY] = clinvar_path_ht + + return clinvar_path_ht + def _get_gene_id_filter(self, gene_ids): self._ht = self._ht.annotate( gene_transcripts=self._ht.sorted_transcript_consequences.filter(lambda t: gene_ids.contains(t.gene_id)) @@ -704,6 +739,12 @@ def _get_annotation_override_filters(self, pathogenicity, annotations): return annotation_filters + @staticmethod + def _get_clinvar_path_filters(pathogenicity): + return { + f for f in (pathogenicity or {}).get(CLINVAR_KEY) or [] if f in CLINVAR_PATH_SIGNIFICANCES + } + def _has_terms_range_expr(self, terms, field, subfield, range_configs): enum_lookup = self._get_enum_lookup(field, subfield) From 5497ab400994341e8bf721df10b160d76ac686ba Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Fri, 4 Aug 2023 17:38:24 -0400 Subject: [PATCH 2/5] path quality override --- hail_search/hail_search_query.py | 3 +-- hail_search/test_search.py | 14 +++++++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index e85fbadc74..a7066146ed 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -672,8 +672,7 @@ def _get_family_passes_quality_filter(self, quality_filter, ht=None, pathogenici if not clinvar_path_ht: return passes_quality - # TODO add ht clinvar filter - hl.is_defined(clinvar_path_ht[ht.key]) - return lambda entries: passes_quality(entries) + return lambda entries: hl.is_defined(clinvar_path_ht[ht.key]) | passes_quality(entries) def _get_clinvar_filter_ht(self, pathogenicity): if self._filter_hts.get(CLINVAR_KEY) is not None: diff --git a/hail_search/test_search.py b/hail_search/test_search.py index a7c54b1a8b..46d81b7d1f 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -182,8 +182,20 @@ async def test_quality_filter(self): omit_sample_type='SV_WES', ) + quality_filter = {'min_gq': 40, 'min_ab': 50} await self._assert_expected_search( - [VARIANT2, FAMILY_3_VARIANT], quality_filter={'min_gq': 40, 'min_ab': 50}, omit_sample_type='SV_WES', + [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', + ) + + annotations = {'splice_ai': '0.0'} # Ensures no variants are filtered out by annotation/path filters + await self._assert_expected_search( + [VARIANT1, VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', + annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}, + ) + + await self._assert_expected_search( + [VARIANT2, FAMILY_3_VARIANT], quality_filter=quality_filter, omit_sample_type='SV_WES', + annotations=annotations, pathogenicity={'clinvar': ['pathogenic']}, ) async def test_location_search(self): From f390b1fc4930c2c865cd7c7f655a63337e8447a6 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 7 Aug 2023 11:20:54 -0400 Subject: [PATCH 3/5] add freq path override --- hail_search/constants.py | 1 + hail_search/hail_search_query.py | 40 ++++++++++++++++++++++---------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/hail_search/constants.py b/hail_search/constants.py index a90011d9a6..e3ff0b92fc 100644 --- a/hail_search/constants.py +++ b/hail_search/constants.py @@ -52,6 +52,7 @@ }, } +PATH_FREQ_OVERRIDE_CUTOFF = 0.05 CLINVAR_PATH_FILTER = 'pathogenic' CLINVAR_LIKELY_PATH_FILTER = 'likely_pathogenic' CLINVAR_PATH_SIGNIFICANCES = {CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER} diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index a7066146ed..cf342a6fa7 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -8,7 +8,7 @@ VARIANT_KEY_FIELD, GNOMAD_GENOMES_FIELD, XPOS, GENOME_VERSION_GRCh38_DISPLAY, INHERITANCE_FILTERS, \ ANY_AFFECTED, X_LINKED_RECESSIVE, REF_REF, REF_ALT, COMP_HET_ALT, ALT_ALT, HAS_ALT, HAS_REF, \ ANNOTATION_OVERRIDE_FIELDS, SCREEN_KEY, SPLICE_AI_FIELD, CLINVAR_KEY, HGMD_KEY, CLINVAR_PATH_SIGNIFICANCES, \ - CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_RANGES, HGMD_PATH_RANGES + CLINVAR_PATH_FILTER, CLINVAR_LIKELY_PATH_FILTER, CLINVAR_PATH_RANGES, HGMD_PATH_RANGES, PATH_FREQ_OVERRIDE_CUTOFF DATASETS_DIR = os.environ.get('DATASETS_DIR', '/hail_datasets') @@ -416,7 +416,7 @@ def _filter_annotated_table(self, gene_ids=None, rs_ids=None, frequencies=None, if rs_ids: self._filter_rs_ids(rs_ids) - self._filter_by_frequency(frequencies) + self._filter_by_frequency(frequencies, pathogenicity) self._filter_by_in_silico(in_silico) @@ -462,18 +462,23 @@ def _parse_intervals(self, intervals, variant_ids): return parsed_intervals, variant_ids - def _filter_by_frequency(self, frequencies): + def _filter_by_frequency(self, frequencies, pathogenicity): frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS} if not frequencies: return + path_override_filter = self._frequency_override_filter(pathogenicity) + filters = [] for pop, freqs in sorted(frequencies.items()): pop_filters = [] pop_expr = self._ht[self.POPULATION_FIELDS.get(pop, pop)] pop_config = self._format_population_config(self.POPULATIONS[pop]) if freqs.get('af') is not None: af_field = pop_config.get('filter_af') or pop_config['af'] - pop_filters.append(pop_expr[af_field] <= freqs['af']) + pop_filter = pop_expr[af_field] <= freqs['af'] + if path_override_filter is not None and freqs['af'] < PATH_FREQ_OVERRIDE_CUTOFF: + pop_filter |= path_override_filter & (pop_expr[af_field] <= PATH_FREQ_OVERRIDE_CUTOFF) + pop_filters.append(pop_filter) elif freqs.get('ac') is not None: ac_field = pop_config['ac'] if ac_field: @@ -488,7 +493,13 @@ def _filter_by_frequency(self, frequencies): pop_filters.append(pop_expr[hemi_field] <= freqs['hh']) if pop_filters: - self._ht = self._ht.filter(hl.is_missing(pop_expr) | hl.all(pop_filters)) + filters.append(hl.is_missing(pop_expr) | hl.all(pop_filters)) + + if filters: + self._ht = self._ht.filter(hl.all(filters)) + + def _frequency_override_filter(self, pathogenicity): + return None def _filter_by_in_silico(self, in_silico_filters): in_silico_filters = in_silico_filters or {} @@ -596,10 +607,10 @@ class VariantHailTableQuery(BaseHailTableQuery): 'revel': PredictionPath('dbnsfp', 'REVEL_score'), 'sift': PredictionPath('dbnsfp', 'SIFT_pred'), } - PATHOGENICITY_FILTERS = [ - (CLINVAR_KEY, 'pathogenicity', CLINVAR_PATH_RANGES), - (HGMD_KEY, 'class', HGMD_PATH_RANGES), - ] + PATHOGENICITY_FILTERS = { + CLINVAR_KEY: ('pathogenicity', CLINVAR_PATH_RANGES), + HGMD_KEY: ('class', HGMD_PATH_RANGES), + } GLOBALS = BaseHailTableQuery.GLOBALS + ['versions'] CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS + ['rsid'] @@ -724,10 +735,10 @@ def _annotate_allowed_consequences(self, annotations, annotation_filters): def _get_annotation_override_filters(self, pathogenicity, annotations): annotation_filters = [] - for key, *args in self.PATHOGENICITY_FILTERS: + for key in self.PATHOGENICITY_FILTERS.keys(): path_terms = (pathogenicity or {}).get(key) if path_terms: - annotation_filters.append(self._has_terms_range_expr(path_terms, key, *args)) + annotation_filters.append(self._has_path_expr(path_terms, key)) if annotations.get(SCREEN_KEY): screen_enum = self._get_enum_lookup(SCREEN_KEY.lower(), 'region_type') allowed_consequences = hl.set({screen_enum[c] for c in annotations[SCREEN_KEY]}) @@ -738,13 +749,18 @@ def _get_annotation_override_filters(self, pathogenicity, annotations): return annotation_filters + def _frequency_override_filter(self, pathogenicity): + path_terms = self._get_clinvar_path_filters(pathogenicity) + return self._has_path_expr(path_terms, CLINVAR_KEY) if path_terms else None + @staticmethod def _get_clinvar_path_filters(pathogenicity): return { f for f in (pathogenicity or {}).get(CLINVAR_KEY) or [] if f in CLINVAR_PATH_SIGNIFICANCES } - def _has_terms_range_expr(self, terms, field, subfield, range_configs): + def _has_path_expr(self, terms, field): + subfield, range_configs = self.PATHOGENICITY_FILTERS[field] enum_lookup = self._get_enum_lookup(field, subfield) ranges = [[None, None]] From 30c6fbacfcd5a5bacbc39893247b853598c67a28 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 7 Aug 2023 13:02:12 -0400 Subject: [PATCH 4/5] pdate freq override tests --- .../VARIANTS/annotations.ht/.README.txt.crc | Bin 12 -> 12 bytes .../annotations.ht/.metadata.json.gz.crc | Bin 16 -> 16 bytes .../GRCh38/VARIANTS/annotations.ht/README.txt | 2 +- .../.index.crc | Bin 0 -> 12 bytes .../.metadata.json.gz.crc | Bin .../index | Bin 0 -> 124 bytes .../metadata.json.gz | Bin .../.index.crc | Bin 12 -> 0 bytes .../index | Bin 124 -> 0 bytes .../VARIANTS/annotations.ht/metadata.json.gz | Bin 995 -> 1003 bytes .../annotations.ht/rows/.metadata.json.gz.crc | Bin 20 -> 20 bytes .../annotations.ht/rows/metadata.json.gz | Bin 1331 -> 1346 bytes ...0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.crc | Bin 0 -> 20 bytes ...0-bd787778-d7c2-4828-835f-ca93e860ce89.crc | Bin 20 -> 0 bytes ...art-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97 | Bin 0 -> 1432 bytes ...art-0-bd787778-d7c2-4828-835f-ca93e860ce89 | Bin 1426 -> 0 bytes hail_search/test_search.py | 13 ++++++++++++- hail_search/test_utils.py | 4 ++-- 18 files changed, 15 insertions(+), 4 deletions(-) create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/.index.crc rename hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/{part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx => part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx}/.metadata.json.gz.crc (100%) create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/index rename hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/{part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx => part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx}/metadata.json.gz (100%) delete mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/.index.crc delete mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/index create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/.part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.crc delete mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/.part-0-bd787778-d7c2-4828-835f-ca93e860ce89.crc create mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97 delete mode 100644 hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/part-0-bd787778-d7c2-4828-835f-ca93e860ce89 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/.README.txt.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/.README.txt.crc index 0410f9ffd47be8779da8add96b515610a0654fce..7509d5e155441e88136d599f56a7ef9eb1da94fb 100644 GIT binary patch literal 12 TcmYc;N@ieSU}E5VqZI%E5g-Du literal 12 TcmYc;N@ieSU}AVPwiE;!i9L3~UT6$r(k4i~O0G9i5#S9T@}|CLIrCVsZqs6ZjbxJ$k~*#2CWp ze1%bf!SM6@^MVXU?kAZA88$V3yr9j*=+5YPj8~9hL)-R?Kwb!=`%3{SpvfXY{DGB` J5lEpK4**ZH9sB?Q literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/metadata.json.gz similarity index 100% rename from hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/metadata.json.gz rename to hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.idx/metadata.json.gz diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/.index.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/.index.crc deleted file mode 100644 index 60183f50e1de3c80c12d9a08b69df8e9e9f8d33c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 12 TcmYc;N@ieSU}Ctc)$$4e6GQ{3 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/index b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/index/part-0-bd787778-d7c2-4828-835f-ca93e860ce89.idx/index deleted file mode 100644 index befb8d34e858d12dcb74e0dc8e5da495f0dfa9ca..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 124 zcma!IU|>iE;!i9L3~UT6$r(k4i~O0G9i5#S9T@}|CLIrCVsZqsBlsB>J$k~*#2CWp ze3Vgu!SM6@^MVXU?wgne88$V3yr9j*=+5YPlvj{pL)-R?Kwb!=`vUGD;q_LWES+CtN4Nb#FX$uKK7l{U*+adz~*%xSgY@%vz|D$sf9gzF12aWqa zhTu4b+hPir#~HVWc&s%&JeFD35j6(1MU>n6V}PhjvAUk!lU(Bx+u;i(4*3q`Ik|cm zFHd>g>@9Wq@2Rn%PkC&yy(}X)U*;a6zSFUpX}Lr{)l%xswWSUwTk5OXmO^{FrLHw! zr>YdeAiDiEJD^PA_dqcnfiQOiezq~HPU@}UWDO}oS7k|F?6 z`{-SS9FV;6Xw(^9X_|4V2xUB}&orZmHb+{Hucf^>$8{$ay%Z-EBoH1P;wQsGp{AFh zeB)jr>oh)j8&u*NDLX72))6x~(Zh6gf7#I?!!3}oI@+2so6;x9 zvN4|E$;Nn^B5Qkx9j8tp`OdLuJ&BMo$1}3HTilHQuDc!2n$HOtCts_=C|rRJ1eJxB zjC<*}33`Q&!xFc3lIkPK>w6=2OY(Vs_i;S0<7Q|K(lqs7qk8X*r{I@rV%!gE2G_{V zV$wj}zfrn}t9!j9AS~@{p#GH;%)5Qt)Jr;rakI>NlI`1xGUQJ8&}!aRcu0a`sU-$C zr|Flw?bqG;a(Fx7yP2shR}!Q(gZO1oKADS8Ni+A9UOs%9c$ZZUtay#rPjK&lM6LS% z_PD|sgK~;Ty*TUzSA+=`-ZUvJ-DKYXakzGQw8|Z8Kr2J7tYX?;2{%Y=%!)K6FCo$|GSRgP6ni|V& zB~nY$Y23j7eTR}rQIdn^vbO?3qWK=aAs?BS!Io!0ov}}7ehCg-W&=*%e@s|(oAk> z7O#xjGxM@95^6xlD=?3>iqS07V5|ohX;dir4VWS6o+q{LxQ=Ww9s7vHnIe{P@kEsq zla0{Z!b!kW5MFqu%zdXq<}@MjEOXEuC5D9BykJdhJjd<{FqxnR{|Idq<-)IBGX+hxgO%${@;Z{V_n)rdVCi?n$9=iS6)(%nrp4#5uWo z7%xwG-0Tgt`R}PVpiX&gvAsMeH(%!tp}y0xnQ65|H`Pk&&9$WtCR^$2*;Yb(x|Oaq zU#qGW!63T*H9Me6;r2i|oeW{_2K;Jc)SdXPVPyp=LRr#KH{GvyjZLuOvL74;o&<-K zcg&gG5@c7jbc!C2(RVn+kC|k99U8>u=h9o7-o#6l}*ia+ruDKnrVK)W9iRK~CGRVBPc@UU|#!gihu zsjYc<`KFEblV~Ot#IsCNoAUL2*J}#tCLo{1)pGi|Nz-AXYV_9uL|Z&p276ir9L2k% zY!O(9@E%v-Fi2&VtPEAUw!RYN3Gc*0+MKtXK1}CrT%w2R>i)8$Lxx)*VRf{gF`LpS z$g(k>;K{~#nj*cu!;VuckYZ<9)K4NL%<+sY?iM%We|)#&S@Ss|{A2&l|kfy2s8rA>5HG^y9W-)1??%ycg z!_~cB637hgY@q&?6U@7P+xR7&T5q#VKgss(L>Y3YeP}grD?B7YvD6Tan^X78-S+G5 zTsgcQ@ZHR0UPuAL=t2B4sGiK_r=YogN-rNiPn^rD09L-n{S(~zA5klRygjaT#-M`Y zQ7;dB$rWLOg)>b`OE;PGe;lrD9wiIQ8qmt%m32(pE8!Z6jhXE56%LRLVgwKDmFeQk R|02rJmTaC+?={LU2GlzH!TJS diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/metadata.json.gz b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/metadata.json.gz index 68f277de116f3a6fe8c5aa0776e683b85f62dde9..1361095c3cd14b1710d650dbedfd9fb699628d73 100644 GIT binary patch literal 1346 zcmV-I1-<$oiwFP!000000Nt2dPvbZgfd5OLcC=xZmQD+KYdbBVEl{-xtrnrkO`Oz2 z;$&?X(Lwp|dz`E7q}-%=nGqVr_VKy-Uci-13_WG7>xsP~n zIrmnScsmYb6h%0~+NQqX11Kgmagj%SUTg6XO(^kD2*F#LPt61HJPu$Z+ zX+2V0QFd%dA|RvrRAr*}(-oM-ZthU&Z!Yp+6Al>)T#7>Y16@fHVMJ{?7mx^cb!}1{ zfTzbgaS!$rVgymmuV=__?$K~yhctLUS^PU-`6%QFIKbficT*g6g<{sifX!4lNdzA6X5I;iERQ0MLeh% zt>BvNsUox2&SD24Hi-(8$X^S@cqMRXBjOx0S)QFC`WwoFR107=yPGYH^x1c__3gl% zyxNH+gVHvp*O&8KH?#11BWv+rEBwf5kTvm*+8bD-m>|Xu7s198h z$ZSkP<7j7c#!a^%sGJvmMxG`%p1aGN<<yM$#44oT0#>cjL{+)HA(aNIa<$@AIV>=x4AN?#L#muq81)LG z3J*I0b%0OW4wFtDY%1N!(ew^78Bj8rbO1-HQzeWv4z)0366j3;X-C-thzg!Ib`DUd z3p#orC2~4w(*>Ju)^wugRnoi)nzuNkVWxvHI$&CCQP5(LMFWe*6&))YRXU;4^mClV~2aWffB8gcZ0;RSs>)&HXD3&*_7~5xY30TjTjxk=wQX`sL)B#H0ioCw{qNs zS)XLdhZY*DHd&Fu1VhW*b=&T?GFR|mvkL;uj2qKpHtF?GC(V*=d0P_sipT^|@CeRm zLK```CO<*oq(2&-wzn<*9b8QFy&v2NpE~?xO2;;J!KNla?P>66VkUs0UAG`}o@*l1i*`O<7rsI8xSpFyBXTB_;UFQHw3TK51H8S%_T3Z! E0NinpIRF3v literal 1331 zcmV-31)WrJgy4Z?TkdzV zx12jGjPWjoF$zNxlGM6~B^-MMOF_$dt~A{26XMP41&|_g3RZrfPfuuUlpA=qzpXPWQuCKP?P9~6bo>6g~T?7gQ6Jf zMJu>wd!oqfwX@hkh)tq`IP^9G@w5^+v=OB#;j%nCL-Zb4>L*$N>-p_`X{67-oo{ZA z&B@E1SaB$AV|sJ3xN#G?(0H+m!+o+PY~Y^XPS)lsC_>W38jIfgXkZ-eT+X=376g^^B2NfwVv~itx?WvQ{%X9RER^S&JMq3%e!%FQ2z3-k+FC^G z;gzy6tIC!<#Suvj2NY{pjkfAZHBtHMUa#G=<@I#3Z1yf-Rn?_9Q=5m3{6F@iM9=!= zYNi}Y!fAvAaS_#oTJLWHZ&6FiBcVWM0hXd_`%;QU#7RW-_qvI6Orr0xM4tw|rTqW5v-NB{9Dk~zR=n>Z z2=9$XG(~*$sUTJ%^%}5hjV7wf?G33kP?f6{r^;c0DP@pU107Q3q{65d5LI~C38({n z(sr11>R?mpPL8HGkja3O$)p1~Qk^Pcq;aT)A(KF_0!TZ`7C=<+w6Sx5I$hAw11XWy zL7OhvbhD-tH7}CpMbNy)84WWXgwX-hVvB+ngDe_YG_L4a(Wuf1m8PF7?W^wQO&dGh z%Z*m9b@FbI{3kXgJQQwpVM8NE2QWHV@iHoOQZ!Av?#!)tn=tE>EcwtvL)9iLGMHd! znY(J+-B#ub9z5)PpK#;Gw46`-gYmRkvMs!&p{IyU0R`W|8BORxPHyNA5Eu?dC*$_E z<=@BWvwZJ6H$rFxMdr>-{(~r}yd)YyTvT)UospAJwlJ9g{P~YuT+Tv6_w$?Ko34v0 zcYVrJ+0UPf4{K=Sa4St=(D7joE6z{5YG!uq`zaB%ABeHIXjQt=EW7Xe0%!tJ!qB?A zbpO4d3$<4*pA2t)mHGoFFce-v*?g>?a(kSx?h48aR(^F5*EsEV9Tqb`x#kKZoIpqP zx?Or_8P@S@rPq$zjM|L({e{{U%7H+B$=Zdieeh0@C!tA?t|hai@&s|Qpz_x_=gyLY z-cj$t8;wV!(YWW0@St}*9*leA;TOM$(dm$kPmVAdpXNX2eOn_IkSTmcdUF4>1(f4L z23Gv^2M9$Vaw?s^^Z@o=GQ^6Kbg%yWMaXgTZDOURCL<^((j*)4P7@N pbCzqO42L_9Yzp5Xcv8>Jr4c!k$#9SmT-wUAe*uUTd->fI003k?mPh~q diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/.part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/.part-0-0f6272b7-11d0-44b2-a095-6d84fd6f4a97.crc new file mode 100644 index 0000000000000000000000000000000000000000..07e11f02752cadbff80fe8bd702d59a3b20a5d9c GIT binary patch literal 20 bcmYc;N@ieSU}9iTE-~2|z21U-bIx-BGqeU( literal 0 HcmV?d00001 diff --git a/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/.part-0-bd787778-d7c2-4828-835f-ca93e860ce89.crc b/hail_search/fixtures/GRCh38/VARIANTS/annotations.ht/rows/parts/.part-0-bd787778-d7c2-4828-835f-ca93e860ce89.crc deleted file mode 100644 index 7f51fce1fc3b875b2778823fd96550bb71a5f2db..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 20 bcmYc;N@ieSU}7i;Rb3&GWj@hM)5Z?~F~P`n~wlh)hr?Dw{1x61)nOn^3&h&4i6Fnri64HQ2T zj71_*lxS%Y1x;%~P(A`Ektm7+ktR?gA_iMUu)tr6T#FO~RveA<$Ik5gJ~OlXyuX^fBURCAoa5NXm=%~qyVwM^4= zOk^&^c3pGW!g0Un+5)6Arq*q)7~A$15uma5==S%#u5+X_pwu8OPU_s@ov+*H0-L+Z z?yX%@hT5ZHe1weJGrhf2I1+4ZLEFGpks~!EK|<)lWQjzk&z_r92o9>6ttd{6z1Bo= zH>jJYt|_un+9a!{9#;*^_T6Np0I$0oa`I3pRU$amvMjw4o^I1hfE0jKVu+A1jXNx4 z$7|w=fj`rCu=e7lVGm`OB2SRE%g3~Ux9PYjEGKEKn>^kdSl-P1$hvrMmVFzEEI?XD zCbZ%f=*=773}bk7lF-+_{!Ywb&e#g28Vxi1Bz}5{PXbH)`jRDy`jc2qElW|9aXT>- z)GX7m4cRPRBvV~+d^a(k{Z>^z?uw?tWHK#nmXnREbxZLZ>m#M8FC++qYeFQ2(Viw- zDTs4SOV&~dM%g1qQpl}v6xmL}Rc*5$V~iBys*^&2qp&=eLY}9}W(tyL$@Jf8Dhu-| zj9IL!nyx5G5>8>BV>^o7k31&iN&zMjn54?o*>@gAq#szJJDTD!jrhMnOd*_;r-9lk z+o{YjXd?{O(PofxYU_=dV5@zINqxvI(ak=#ziP5T#(v$L#37s;D3lrydeG9#$g4$` z!-|-Q z<>)$fSaFj#*Bu$)ar)GePT_#2?efvu#>f=b@q<&5MGz55kRTu|_-v-z5S{N5jt3f| zs3_RoecqiC+Tvc^!i^ju>G(T`0@!f7-95QCa`~{ZzxL@9=iIlW?F{9|mc8L(Yu)c| zpD>s!;KdPsdSs2u$nKf;$4w5w#4N{VNEkfwYj4f$bOnOO`$vT4wS#AiIg(r=-63!8 zyj~n)@MyYj>838+6Q5BWOVP~-c)re7HAgmXlReZZX2nUmek6vOjE9^Nk(&W&MC@es zGgVWjN)iHB03_j<Mv1CYbblF0v6KVrVMlFgAx9Ab}*R9m}M2fVhT2lOQofi#AAu z**tU4p@yPbDvc-O{@Ry5e_259!&5xQ^DDkmf!PfQyn#SEd*}QgypHaScTezp7qVRI z!9zcLvzw1GJ3<#GmUH;~x;XdXr~Akt|KS$0_e&mvv}bpbM@VpS&UTVmfB4(tE~#qU zZ`X_Y%~J1-k^~8~)U=eeBwMmcEN=%POLf(#ag&z?pIu?jR=%@p>627f6VI(WN7#nwraG5Fa`G_%OdGRWd!|W_X`XGvFGO7=AY{jzr5FjBA?GksKR8eZ$<_CPi z`qRfr>#|Kxx!^$d@W0&fk=`YFSKfczE4x&3hE!wXkJ)~0u+^@o-tb?+!Ym94b2wyk)ntsrWFDyl4!7W+X@m8bu|8&%$)Dc>zp(5x&(~( z1E~svWnpn{<095RoeEw&PCUZ9fklfo%WyQN84MXL!W5>P5nhgX?bmN!76^d48ym}T zVit9}U!ceihl*!xJ8W^h76cx_^CRb=x}%y>Z#Gh-qna6jQY8@qJP;VVYG zjvKR(%AQoWscgi$Eks0P{Xw_wA~{-lO=z^fbuZO@1Na2#wdXrK zODXX+C9$sOXNi&;5+{DNVZ2O&71L%U6oNylW-E#l=EIsy-1q6GscTBqC~A(XrXEoZ z%l6zvFc)vQ6j|BGlPlqyWLcJ8iI{G)N`T~oRAPXT0E;*rvLiK-c+cI`AW}oGjJp#< z@zr1%f;$|0{V7Sr=!w7DJx{lA5i+j)J=N|*JOhwx$rzv)Y_RXUTeveULD-rzKS=3Z z4qIVNV*#$2EL0SFByGM|UpPNre*#OGWhsg>Y6k{M?QKEJd zu4|t4bZkelyOGU_ zvbEN+XQWAlzYz@p-`A)@#!J1&Gjy9Q;Fk5fJ7+dW^ zSnfioP&d2y{;J7=^aHv%jv^3B%aa@6f7H^8$?AN|;ReQoIatVR!$ZDuWPE8jT#n`_ zGYz90FB;ASf)VL)3{h>xh@-?-v^bRi`l*orAs%dIIl9gqZfs(3#qk~i=e{{!#vReL zJsw_K$MYzz5c=mJgCHmoA3>V<#IY%EL*^WpPyuMjL|N|M_DgQ5f3tgKGktKJq$1l7 zr{T3fHn?Z^2d^C!57a(;`l9=G<_4bQrQNICuiMj4e@5?3^p{_7rIRZ;`q+XuU2Ltp z<@SmFArUV>5GsPpTpsq0^4|w3eDSG{&68pX$f>IF_QDqeDWSt!hrx$e{a}QNTS*xLz=fDLf`w0hvyQ9FTW% zTTRuBG9>|#<^l=CNS?eF>0?ZGlLps9LKw|?6eF_eU6PhSwPTr#-6gJ}umngLnfY7f zRFS|%b(o>3mdYZDh_`&@7q5uuoLCy;qQ2@WWti4*Fb4I^DR*Og`a7qE{ZFphecPSZ z@)g&?|Kr#aiobfL8HYaKP5OloH zcTO&hleE^F*230AYmkJqw-eH+t458RxFvf019LtQc2+e$#dI~kbny`a(k{pNoF+)& zQKtwUl7x_hFpsbxT_({0e;dIrNYXFlp)O3f70c>E8le>L%JdkiV$8J7BSP+~bH_;C zg7we1p!vRjs%d>5QhVgs_1%xhimw*NYJ`D@$Nn_Z!`bxJ&YeGfd02rnMQhJjIl!m+ UGh=u!OPhc;5wx~GaHItI2YH`#3;+NC diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 46d81b7d1f..6a66ad455d 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -247,7 +247,7 @@ async def test_frequency_filter(self): ) await self._assert_expected_search( - [VARIANT4], frequencies={'gnomad_genomes': {'af': 0.41, 'hh': 1}}, omit_sample_type='SV_WES', + [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.41, 'hh': 1}}, omit_sample_type='SV_WES', ) await self._assert_expected_search( @@ -260,6 +260,17 @@ async def test_frequency_filter(self): omit_sample_type='SV_WES', ) + annotations = {'splice_ai': '0.0'} # Ensures no variants are filtered out by annotation/path filters + await self._assert_expected_search( + [VARIANT1, VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES', + annotations=annotations, pathogenicity={'clinvar': ['likely_pathogenic', 'vus_or_conflicting']}, + ) + + await self._assert_expected_search( + [VARIANT2, VARIANT4], frequencies={'gnomad_genomes': {'af': 0.01}}, omit_sample_type='SV_WES', + annotations=annotations, pathogenicity={'clinvar': ['pathogenic', 'vus_or_conflicting']}, + ) + async def test_annotations_filter(self): await self._assert_expected_search([VARIANT2], pathogenicity={'hgmd': ['hgmd_other']}, omit_sample_type='SV_WES') diff --git a/hail_search/test_utils.py b/hail_search/test_utils.py index 83e1365c1f..27ef225dc8 100644 --- a/hail_search/test_utils.py +++ b/hail_search/test_utils.py @@ -97,7 +97,7 @@ 'topmed': {'af': 0.0784199982881546, 'ac': 20757, 'an': 264690, 'hom': 0, 'het': 20757}, 'exac': {'af': 0.0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'het': 0, 'filter_af': 0.0}, 'gnomad_exomes': {'af': 0.0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'filter_af': 0.0}, - 'gnomad_genomes': {'af': 0.34449315071105957, 'ac': 9271, 'an': 26912, 'hom': 480, 'hemi': 0, 'filter_af': 0.40276646614074707}, + 'gnomad_genomes': {'af': 0.034449315071105957, 'ac': 927, 'an': 26912, 'hom': 48, 'hemi': 0, 'filter_af': 0.040276646614074707}, }, 'predictions': { 'cadd': 4.668000221252441, @@ -168,7 +168,7 @@ 'topmed': {'af': 0.24615199863910675, 'ac': 65154, 'an': 264690, 'hom': 8775, 'het': 47604}, 'exac': {'af': 0.29499998688697815, 'ac': 35805, 'an': 121372, 'hom': 5872, 'hemi': 0, 'het': 24061, 'filter_af': 0.4153035283088684}, 'gnomad_exomes': {'af': 0.28899794816970825, 'ac': 72672, 'an': 251462, 'hom': 11567, 'hemi': 0, 'filter_af': 0.4116474986076355}, - 'gnomad_genomes': {'af': 0.2633855640888214, 'ac': 40003, 'an': 151880, 'hom': 5754, 'hemi': 0, 'filter_af': 0.4067690968513489}, + 'gnomad_genomes': {'af': 0, 'ac': 0, 'an': 0, 'hom': 0, 'hemi': 0, 'filter_af': 0}, }, 'predictions': { 'cadd': 20.899999618530273, From 3156430e87ed7e4d645c345f2ed5ac27d6f33c10 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 7 Aug 2023 13:27:58 -0400 Subject: [PATCH 5/5] fix unit tests --- seqr/views/apis/variant_search_api_tests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 78dfc8317d..37db1aeb93 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -387,7 +387,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro ['12', '48367227', 'TC', 'T', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '2', 'Known gene for phenotype (None)|Excluded (None)', 'a later note (None)|test n\xf8te (None)', '', '', '', '', '', '', '', '', '', '', '', '', '', '', ''], - ['1', '11794419', 'T', 'G', 'ENSG00000177000', 'missense_variant', '0.31111112236976624', '0.29499998688697815', '0.2633855640888214', + ['1', '11794419', 'T', 'G', 'ENSG00000177000', 'missense_variant', '0.31111112236976624', '0.29499998688697815', '0', '0.28899794816970825', '0.24615199863910675', '20.899999618530273', '0.19699999690055847', '2.000999927520752', '0.0', '', 'tolerated', '', 'damaging', 'rs1801131', 'ENST00000376585.6:c.1409A>C', 'ENSP00000365770.1:p.Glu470Ala', 'Conflicting_interpretations_of_pathogenicity', '1', '', '2', '', '', '', '', '', 'HG00731', '2', '99', '1.0', @@ -421,7 +421,7 @@ def test_query_variants(self, mock_get_variants, mock_get_gene_counts, mock_erro ['12', '48367227', 'TC', 'T', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '2', 'Known gene for phenotype (None)|Excluded (None)', 'a later note (None)|test n\xf8te (None)', '', '', '', '', '', '', '', '', '', '', '', '',], - ['1', '11794419', 'T', 'G', 'ENSG00000177000', 'missense_variant', '0.31111112236976624', '0.29499998688697815', '0.2633855640888214', + ['1', '11794419', 'T', 'G', 'ENSG00000177000', 'missense_variant', '0.31111112236976624', '0.29499998688697815', '0', '0.28899794816970825', '0.24615199863910675', '20.899999618530273', '0.19699999690055847', '2.000999927520752', '0.0', '', 'tolerated', '', 'damaging', 'rs1801131', 'ENST00000376585.6:c.1409A>C', 'ENSP00000365770.1:p.Glu470Ala', 'Conflicting_interpretations_of_pathogenicity', '1', '', '2', '', '', 'HG00731', '2', '99', '1.0',