From 4b56a07d51b6204e7b47395205387b176b7086b9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Sun, 11 Feb 2024 23:53:47 -0500 Subject: [PATCH 1/7] maintain sepearte comp het and main hts --- hail_search/queries/base.py | 203 +++++++++++------------- hail_search/queries/mito.py | 12 +- hail_search/queries/multi_data_types.py | 29 +++- hail_search/queries/snv_indel.py | 8 +- hail_search/queries/snv_indel_37.py | 4 +- hail_search/queries/sv.py | 22 +-- hail_search/web_app.py | 2 +- 7 files changed, 149 insertions(+), 131 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index ddae69ea30..b449d58b48 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -233,20 +233,6 @@ def _load_filtered_table(self, sample_data, intervals=None, **kwargs): self.import_filtered_table( sample_data, parsed_intervals=parsed_intervals, **kwargs) - if self._has_comp_het_search: - self._comp_het_ht = self._filter_compound_hets() - if self._is_recessive_search: - self._ht = self._ht.filter(self._ht.family_entries.any(hl.is_defined)) - if self._has_secondary_annotations: - annotation_filters = self._get_annotation_filters(self._ht) - if annotation_filters: - self._ht = self._ht.filter(hl.any(annotation_filters)) - else: - # Data type only has annotations for second hit - self._ht = None - else: - self._ht = None - @classmethod def _get_table_path(cls, path, use_ssd_dir=False): return f'{SSD_DATASETS_DIR if use_ssd_dir else DATASETS_DIR}/{cls.GENOME_VERSION}/{cls.DATA_TYPE}/{path}' @@ -292,7 +278,7 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, ** continue try: filtered_project_hts.append( - (self._filter_entries_table(project_ht, project_sample_data, **kwargs), len(project_sample_data)) + (*self._filter_entries_table(project_ht, project_sample_data, **kwargs), len(project_sample_data)) ) except HTTPBadRequest as e: exception_messages.add(e.reason) @@ -312,37 +298,43 @@ def import_filtered_table(self, sample_data, intervals=None, **kwargs): family_ht = family_ht.annotate_globals( family_guids=[family_guid], family_samples={family_guid: family_ht.sample_ids}, ) - families_ht = self._filter_entries_table(family_ht, family_sample_data, **kwargs) + families_ht, comp_het_families_ht = self._filter_entries_table(family_ht, family_sample_data, **kwargs) else: filtered_project_hts = self._load_filtered_project_hts(project_samples, **kwargs) - families_ht, num_families = filtered_project_hts[0] - entry_type = families_ht.family_entries.dtype.element_type - for project_ht, num_project_families in filtered_project_hts[1:]: - families_ht = families_ht.join(project_ht, how='outer') - families_ht = families_ht.select_globals( - family_guids=families_ht.family_guids.extend(families_ht.family_guids_1) - ) - select_fields = { - 'filters': families_ht.filters.union(families_ht.filters_1), - 'family_entries': hl.bind( - lambda a1, a2: a1.extend(a2), - hl.or_else(families_ht.family_entries, hl.empty_array(entry_type)), - hl.or_else(families_ht.family_entries_1, hl.empty_array(entry_type)), - ), - } - if 'comp_het_family_entries_1' in families_ht.row: - missing_arr = lambda count: hl.range(count).map(lambda i: hl.missing(entry_type)) - select_fields['comp_het_family_entries'] = hl.bind( - lambda a1, a2: a1.extend(a2), - hl.or_else(families_ht.comp_het_family_entries, missing_arr(num_families)), - hl.or_else(families_ht.comp_het_family_entries_1, missing_arr(num_project_families)), - ) - families_ht = families_ht.select(**select_fields) + families_ht, comp_het_families_ht, num_families = filtered_project_hts[0] + main_ht = comp_het_families_ht if families_ht is None else families_ht + entry_type = main_ht.family_entries.dtype.element_type + for project_ht, comp_het_project_ht, num_project_families in filtered_project_hts[1:]: + if families_ht is not None: + families_ht = _add_project_ht(self, families_ht, project_ht, entry_type) + if comp_het_families_ht is not None: + comp_het_families_ht = _add_project_ht(self, comp_het_families_ht, comp_het_project_ht, entry_type) num_families += num_project_families - self._ht = self._query_table_annotations(families_ht, self._get_table_path('annotations.ht')) + # TODO add pre-processing for annotations so do not even read in tables if not going to have vaild annotations + if comp_het_families_ht is not None: + comp_het_ht = self._query_table_annotations(comp_het_families_ht, self._get_table_path('annotations.ht')) + self._comp_het_ht = self._filter_annotated_table(comp_het_ht, is_comp_het=True, **kwargs) + self._comp_het_ht = self._filter_compound_hets() - self._filter_annotated_table(**kwargs) + if families_ht is not None: + ht = self._query_table_annotations(families_ht, self._get_table_path('annotations.ht')) + self._ht = self._filter_annotated_table(ht, **kwargs) + + def _add_project_ht(self, families_ht, project_ht, entry_type): + families_ht = families_ht.join(project_ht, how='outer') + families_ht = families_ht.select_globals( + family_guids=families_ht.family_guids.extend(families_ht.family_guids_1) + ) + select_fields = { + 'filters': families_ht.filters.union(families_ht.filters_1), + 'family_entries': hl.bind( + lambda a1, a2: a1.extend(a2), + hl.or_else(families_ht.family_entries, hl.empty_array(entry_type)), + hl.or_else(families_ht.family_entries_1, hl.empty_array(entry_type)), + ), + } + return families_ht.select(**select_fields) def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inheritance_filter=None, quality_filter=None, **kwargs): @@ -362,11 +354,11 @@ def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inherita )) ht = ht.filter(ht.family_entries.any(hl.is_defined)) - ht = self._filter_inheritance( + ht, ch_ht = self._filter_inheritance( ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, ) - return ht.select_globals('family_guids') + return ht, ch_ht @classmethod def _add_entry_sample_families(cls, ht, sample_data): @@ -432,25 +424,20 @@ def _filter_inheritance(self, ht, inheritance_mode, inheritance_filter, sorted_f lambda entries: hl.or_missing(entries.any(any_valid_entry), entries)) ) - filter_mode_map = {} - if (inheritance_filter or inheritance_mode) and not is_any_affected: - filter_mode_map[inheritance_mode] = 'family_entries' + comp_het_ht = None if self._has_comp_het_search: - filter_mode_map[COMPOUND_HET] = 'comp_het_family_entries' - - for mode, field in sorted(filter_mode_map.items()): - ht = self._filter_families_inheritance( - ht, mode, inheritance_filter, sorted_family_sample_data, field, + comp_het_ht = self._filter_families_inheritance( + ht, COMPOUND_HET, inheritance_filter, sorted_family_sample_data, ) - filter_expr = ht.family_entries.any(hl.is_defined) - if self._has_comp_het_search: - ch_filter = ht.comp_het_family_entries.any(hl.is_defined) - filter_expr = (filter_expr | ch_filter) if self._is_recessive_search else ch_filter + if (inheritance_filter or inheritance_mode) and not is_any_affected: + ht = None if inheritance_mode == COMPOUND_HET else self._filter_families_inheritance( + ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, + ) - return ht.filter(filter_expr) + return ht, comp_het_ht - def _filter_families_inheritance(self, ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, field): + def _filter_families_inheritance(self, ht, inheritance_mode, inheritance_filter, sorted_family_sample_data): individual_genotype_filter = (inheritance_filter or {}).get('genotype') entry_indices_by_gt = defaultdict(lambda: defaultdict(list)) @@ -467,12 +454,11 @@ def _filter_families_inheritance(self, ht, inheritance_mode, inheritance_filter, for genotype, entry_indices in entry_indices_by_gt.items(): entry_indices = hl.dict(entry_indices) - family_entries = ht[field] if field in ht.row else ht.family_entries - ht = ht.annotate(**{field: hl.enumerate(family_entries).map( + ht = ht.annotate(family_entries=hl.enumerate(ht.family_entries).map( lambda x: self._valid_genotype_family_entries(x[1], entry_indices.get(x[0]), genotype, inheritance_mode) - )}) + )) - return ht + return ht.filter(ht.family_entries.any(hl.is_defined)).select_globals('family_guids') @classmethod def _valid_genotype_family_entries(cls, entries, gentoype_entry_indices, genotype, inheritance_mode): @@ -525,30 +511,30 @@ def _parse_variant_keys(self, variant_keys=None, **kwargs): def _prefilter_entries_table(self, ht, **kwargs): return ht - def _filter_annotated_table(self, gene_ids=None, rs_ids=None, frequencies=None, in_silico=None, pathogenicity=None, - annotations=None, annotations_secondary=None, **kwargs): + def _filter_annotated_table(self, ht, gene_ids=None, rs_ids=None, frequencies=None, in_silico=None, pathogenicity=None, + annotations=None, annotations_secondary=None, is_comp_het=False, **kwargs): if gene_ids: - self._filter_by_gene_ids(gene_ids) + ht = self._filter_by_gene_ids(ht, gene_ids) if rs_ids: - self._filter_rs_ids(rs_ids) + ht = self._filter_rs_ids(ht, rs_ids) - self._filter_by_frequency(frequencies, pathogenicity) + ht = self._filter_by_frequency(ht, frequencies, pathogenicity) - self._filter_by_in_silico(in_silico) + ht = self._filter_by_in_silico(ht, in_silico) - self._filter_by_annotations(pathogenicity, annotations, annotations_secondary) + return self._filter_by_annotations(ht, pathogenicity, annotations, annotations_secondary, is_comp_het) - def _filter_by_gene_ids(self, gene_ids): + def _filter_by_gene_ids(self, ht, gene_ids): gene_ids = hl.set(gene_ids) - self._ht = self._ht.annotate( - gene_transcripts=self._ht[self.TRANSCRIPTS_FIELD].filter(lambda t: gene_ids.contains(t.gene_id)) + ht = ht.annotate( + gene_transcripts=ht[self.TRANSCRIPTS_FIELD].filter(lambda t: gene_ids.contains(t.gene_id)) ) - self._ht = self._ht.filter(hl.is_defined(self._ht.gene_transcripts.first())) + return ht.filter(hl.is_defined(ht.gene_transcripts.first())) - def _filter_rs_ids(self, rs_ids): + def _filter_rs_ids(self, ht, rs_ids): rs_id_set = hl.set(rs_ids) - self._ht = self._ht.filter(rs_id_set.contains(self._ht.rsid)) + return ht.filter(rs_id_set.contains(ht.rsid)) def _parse_intervals(self, intervals, **kwargs): parsed_variant_keys = self._parse_variant_keys(**kwargs) @@ -584,16 +570,16 @@ def _parse_intervals(self, intervals, **kwargs): def _should_add_chr_prefix(self): return True - def _filter_by_frequency(self, frequencies, pathogenicity): + def _filter_by_frequency(self, ht, frequencies, pathogenicity): frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS} if not frequencies: - return + return ht - path_override_filter = self._frequency_override_filter(pathogenicity) + path_override_filter = self._frequency_override_filter(ht, pathogenicity) filters = [] for pop, freqs in sorted(frequencies.items()): pop_filters = [] - pop_expr = self._ht[self.POPULATION_FIELDS.get(pop, pop)] + pop_expr = ht[self.POPULATION_FIELDS.get(pop, pop)] pop_config = self._format_population_config(self.POPULATIONS[pop]) if freqs.get('af') is not None: af_field = pop_config.get('filter_af') or pop_config['af'] @@ -618,22 +604,23 @@ def _filter_by_frequency(self, frequencies, pathogenicity): filters.append(hl.is_missing(pop_expr) | hl.all(pop_filters)) if filters: - self._ht = self._ht.filter(hl.all(filters)) + ht = ht.filter(hl.all(filters)) + return ht - def _frequency_override_filter(self, pathogenicity): + def _frequency_override_filter(self, ht, pathogenicity): return None - def _filter_by_in_silico(self, in_silico_filters): + def _filter_by_in_silico(self, ht, in_silico_filters): in_silico_filters = in_silico_filters or {} require_score = in_silico_filters.get('requireScore', False) in_silico_filters = {k: v for k, v in in_silico_filters.items() if k in self.PREDICTION_FIELDS_CONFIG and v} if not in_silico_filters: - return + return ht in_silico_qs = [] missing_qs = [] for in_silico, value in in_silico_filters.items(): - score_filter, ht_value = self._get_in_silico_filter(in_silico, value) + score_filter, ht_value = self._get_in_silico_filter(ht, in_silico, value) in_silico_qs.append(score_filter) if not require_score: missing_qs.append(hl.is_missing(ht_value)) @@ -641,41 +628,46 @@ def _filter_by_in_silico(self, in_silico_filters): if missing_qs: in_silico_qs.append(hl.all(missing_qs)) - self._ht = self._ht.filter(hl.any(in_silico_qs)) + return ht.filter(hl.any(in_silico_qs)) - def _get_in_silico_filter(self, in_silico, value): + def _get_in_silico_filter(self, ht, in_silico, value): score_path = self.PREDICTION_FIELDS_CONFIG[in_silico] enum_lookup = self._get_enum_lookup(*score_path[:2]) if enum_lookup is not None: - ht_value = self._ht[score_path.source][f'{score_path.field}_id'] + ht_value = ht[score_path.source][f'{score_path.field}_id'] score_filter = ht_value == enum_lookup[value] else: - ht_value = self._ht[score_path.source][score_path.field] + ht_value = ht[score_path.source][score_path.field] score_filter = ht_value >= float(value) return score_filter, ht_value - def _filter_by_annotations(self, pathogenicity, annotations, annotations_secondary): + def _filter_by_annotations(self, ht, pathogenicity, annotations, annotations_secondary, is_comp_het): annotations = annotations or {} - annotation_override_filters = self._get_annotation_override_filters(annotations, pathogenicity=pathogenicity) + annotation_override_filters = self._get_annotation_override_filters(ht, annotations, pathogenicity=pathogenicity) - annotation_exprs, _ = self._get_allowed_consequences_annotations(annotations, annotation_override_filters) - if self._has_comp_het_search: + annotation_exprs, _ = self._get_allowed_consequences_annotations(ht, annotations, annotation_override_filters) + if is_comp_het or (self._has_comp_het_search and not annotation_exprs): secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( - annotations_secondary or {}, annotation_override_filters, is_secondary=True) + ht, annotations_secondary or {}, annotation_override_filters, is_secondary=True) if secondary_exprs: annotation_exprs.update({f'{k}_secondary': v for k, v in secondary_exprs.items()}) if secondary_exprs or allowed_secondary_consequences: self._has_secondary_annotations = True + if self._has_secondary_annotations and not is_comp_het: + # Data type only has annotations for second hit, so no query to be done on the main ht + return None if not annotation_exprs: - return + return ht - self._ht = self._ht.annotate(**annotation_exprs) - annotation_filters = self._get_annotation_filters(self._ht) + self._get_annotation_filters(self._ht, is_secondary=True) - self._ht = self._ht.filter(hl.any(annotation_filters)) + ht = ht.annotate(**annotation_exprs) + annotation_filters = self._get_annotation_filters(ht) + if is_comp_het: + annotation_filters += self._get_annotation_filters(ht, is_secondary=True) + return ht.filter(hl.any(annotation_filters)) - def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False): + def _get_allowed_consequences_annotations(self, ht, annotations, annotation_filters, is_secondary=False): allowed_consequences = { ann for field, anns in annotations.items() if anns and (field not in ANNOTATION_OVERRIDE_FIELDS) for ann in anns @@ -686,7 +678,7 @@ def _get_allowed_consequences_annotations(self, annotations, annotation_filters, transcript_consequence_filter = self._get_transcript_consequence_filter(allowed_consequence_ids, allowed_consequences) has_consequence_filter = transcript_consequence_filter is not None if has_consequence_filter: - allowed_transcripts = self._ht[self.TRANSCRIPTS_FIELD].filter(transcript_consequence_filter) + allowed_transcripts = ht[self.TRANSCRIPTS_FIELD].filter(transcript_consequence_filter) annotation_exprs[ALLOWED_TRANSCRIPTS] = allowed_transcripts if annotation_filters: annotation_exprs[HAS_ANNOTATION_OVERRIDE] = hl.any(annotation_filters) @@ -702,11 +694,12 @@ def _get_transcript_consequence_filter(self, allowed_consequence_ids, allowed_co allowed_consequence_ids = hl.set(allowed_consequence_ids) return lambda gc: allowed_consequence_ids.contains(gc.major_consequence_id) - def _get_annotation_override_filters(self, annotations, **kwargs): + def _get_annotation_override_filters(self, ht, annotations, **kwargs): return [] @staticmethod def _get_annotation_filters(ht, is_secondary=False): + # TODO not needed for anything except comp het search, just directly filter for everything else suffix = '_secondary' if is_secondary else '' annotation_filters = [] @@ -722,9 +715,7 @@ def _get_annotation_filters(ht, is_secondary=False): def _filter_compound_hets(self): # pylint: disable=pointless-string-statement - ch_ht = self._ht - if self._is_recessive_search: - ch_ht = ch_ht.filter(ch_ht.comp_het_family_entries.any(hl.is_defined)) + ch_ht = self._comp_het_ht # Get possible pairs of variants within the same gene ch_ht = ch_ht.annotate(gene_ids=self._gene_ids_expr(ch_ht, comp_het=True)) @@ -813,8 +804,8 @@ def _filter_compound_hets(self): def _filter_grouped_compound_hets(self, ch_ht): # Filter variant pairs for family and genotype - ch_ht = ch_ht.annotate(valid_families=hl.enumerate(ch_ht.v1.comp_het_family_entries).map( - lambda x: self._is_valid_comp_het_family(ch_ht, x[1], ch_ht.v2.comp_het_family_entries[x[0]]) + ch_ht = ch_ht.annotate(valid_families=hl.enumerate(ch_ht.v1.family_entries).map( + lambda x: self._is_valid_comp_het_family(ch_ht, x[1], ch_ht.v2.family_entries[x[0]]) )) ch_ht = ch_ht.filter(ch_ht.valid_families.any(lambda x: x)) ch_ht = ch_ht.select(**{k: self._annotated_comp_het_variant(ch_ht, k) for k in ['v1', 'v2']}) @@ -827,7 +818,7 @@ def _annotated_comp_het_variant(ch_ht, field): return variant.annotate( comp_het_gene_ids=ch_ht.comp_het_gene_ids, family_entries=hl.enumerate(ch_ht.valid_families).filter( - lambda x: x[1]).map(lambda x: variant.comp_het_family_entries[x[0]]), + lambda x: x[1]).map(lambda x: variant.family_entries[x[0]]), ) @classmethod @@ -974,7 +965,7 @@ def lookup_variant(self, variant_id, sample_data=None): if sample_data: project_samples, _ = self._parse_sample_data(sample_data) - for pht, _ in self._load_filtered_project_hts(project_samples, skip_all_missing=True): + for pht, _, _ in self._load_filtered_project_hts(project_samples, skip_all_missing=True): project_entries = pht.aggregate(hl.agg.take(hl.struct(**{k: v(pht) for k, v in entry_annotations.items()}), 1)) variant[FAMILY_GUID_FIELD] += project_entries[0][FAMILY_GUID_FIELD] variant[GENOTYPES_FIELD].update(project_entries[0][GENOTYPES_FIELD]) diff --git a/hail_search/queries/mito.py b/hail_search/queries/mito.py index d0c3830bd5..46e5525fd8 100644 --- a/hail_search/queries/mito.py +++ b/hail_search/queries/mito.py @@ -219,19 +219,19 @@ def _get_transcript_consequence_filter(self, allowed_consequence_ids, allowed_co if canonical_consequences else allowed_consequence_ids ).contains) - def _get_annotation_override_filters(self, annotations, pathogenicity=None, **kwargs): + def _get_annotation_override_filters(self, ht, annotations, pathogenicity=None, **kwargs): annotation_filters = [] for key in self.PATHOGENICITY_FILTERS.keys(): path_terms = (pathogenicity or {}).get(key) if path_terms: - annotation_filters.append(self._has_path_expr(path_terms, key)) + annotation_filters.append(self._has_path_expr(ht,path_terms, key)) return annotation_filters - def _frequency_override_filter(self, pathogenicity): + def _frequency_override_filter(self, ht, pathogenicity): path_terms = self._get_clinvar_path_filters(pathogenicity) - return self._has_path_expr(path_terms, CLINVAR_KEY) if path_terms else None + return self._has_path_expr(ht, path_terms, CLINVAR_KEY) if path_terms else None @staticmethod def _get_clinvar_path_filters(pathogenicity): @@ -239,7 +239,7 @@ def _get_clinvar_path_filters(pathogenicity): f for f in (pathogenicity or {}).get(CLINVAR_KEY) or [] if f in CLINVAR_PATH_SIGNIFICANCES } - def _has_path_expr(self, terms, field): + def _has_path_expr(self, ht, terms, field): subfield, range_configs = self.PATHOGENICITY_FILTERS[field] field_name = self.PATHOGENICITY_FIELD_MAP.get(field, field) enum_lookup = self._get_enum_lookup(field_name, subfield) @@ -254,7 +254,7 @@ def _has_path_expr(self, terms, field): ranges.append([None, None]) ranges = [r for r in ranges if r[0] is not None] - value = self._ht[field_name][f'{subfield}_id'] + value = ht[field_name][f'{subfield}_id'] return hl.any(lambda r: (value >= r[0]) & (value <= r[1]), ranges) def _format_results(self, ht, *args, **kwargs): diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index 44d74a382b..cee14dbac6 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -71,7 +71,7 @@ def _filter_data_type_comp_hets(self, variant_ht, variant_families, sv_query): @staticmethod def _family_filtered_ch_ht(ht, overlapped_families, families, key): family_indices = hl.array([families.index(family_guid) for family_guid in overlapped_families]) - ht = ht.annotate(comp_het_family_entries=family_indices.map(lambda i: ht.comp_het_family_entries[i])) + ht = ht.annotate(family_entries=family_indices.map(lambda i: ht.family_entries[i])) return ht.group_by('gene_ids').aggregate(**{key: hl.agg.collect(ht.row)}) def _is_valid_comp_het_family(self, ch_ht, entries_1, entries_2): @@ -93,6 +93,9 @@ def _comp_het_entry_has_ref(self, gt1, gt2): def format_search_ht(self): hts = [] + import logging + import time + logger = logging.getLogger(__name__) for data_type, query in self._data_type_queries.items(): dt_ht = query.format_search_ht() if dt_ht is None: @@ -101,6 +104,28 @@ def format_search_ht(self): if merged_sort_expr is not None: dt_ht = dt_ht.annotate(_sort=merged_sort_expr) hts.append(dt_ht.select('_sort', **{data_type: dt_ht.row})) + # start = time.perf_counter() + # logger.info(f'{data_type}: {dt_ht.count()} ({time.perf_counter() - start:0.4f}s)') + """ + Hom-recessive only: + SV_WGS: 0 (5.9890s) + MITO: 0 (2.4309s) + SNV_INDEL: 3 (16.8396s) + + All recessive (with comp het) + SV_WGS: 0 (14.6799s) + MITO: 0 (8.7807s) + SNV_INDEL: 11 (170.8936s) + comp het SV_WGS: 0 (86.7876s) + Actual total: ~304s + + With updates: + SV_WGS: 0 (20.0788s) + MITO: 0 (9.6441s) + SNV_INDEL: 11 (106.1276s) + SV_WGS: 0 (82.6384s) + Actual total: ~217s + """ for data_type, ch_ht in self._comp_het_hts.items(): ch_ht = ch_ht.annotate( @@ -111,6 +136,8 @@ def format_search_ht(self): _sort=hl.sorted([ch_ht.v1._sort, ch_ht.v2._sort])[0], **{f'comp_het_{data_type}': ch_ht.row}, )) + # start = time.perf_counter() + # logger.info(f'comp het {data_type}: {ch_ht.count()} ({time.perf_counter() - start:0.4f}s)') ht = hts[0] for sub_ht in hts[1:]: diff --git a/hail_search/queries/snv_indel.py b/hail_search/queries/snv_indel.py index bcc06d0015..0eac76c7f9 100644 --- a/hail_search/queries/snv_indel.py +++ b/hail_search/queries/snv_indel.py @@ -93,14 +93,14 @@ def _get_gnomad_af_prefilter(self, frequencies=None, pathogenicity=None, **kwarg return 'is_gt_10_percent' if af_cutoff > PREFILTER_FREQ_CUTOFF else True - def _get_annotation_override_filters(self, annotations, *args, **kwargs): - annotation_filters = super()._get_annotation_override_filters(annotations, *args, **kwargs) + def _get_annotation_override_filters(self, ht, annotations, *args, **kwargs): + annotation_filters = super()._get_annotation_override_filters(ht, annotations, *args, **kwargs) if annotations.get(SCREEN_KEY): allowed_consequences = hl.set(self._get_enum_terms_ids(SCREEN_KEY.lower(), 'region_type', annotations[SCREEN_KEY])) - annotation_filters.append(allowed_consequences.contains(self._ht.screen.region_type_ids.first())) + annotation_filters.append(allowed_consequences.contains(ht.screen.region_type_ids.first())) if annotations.get(SPLICE_AI_FIELD): - score_filter, _ = self._get_in_silico_filter(SPLICE_AI_FIELD, annotations[SPLICE_AI_FIELD]) + score_filter, _ = self._get_in_silico_filter(ht, SPLICE_AI_FIELD, annotations[SPLICE_AI_FIELD]) annotation_filters.append(score_filter) return annotation_filters diff --git a/hail_search/queries/snv_indel_37.py b/hail_search/queries/snv_indel_37.py index b52b14f53a..2ef261692b 100644 --- a/hail_search/queries/snv_indel_37.py +++ b/hail_search/queries/snv_indel_37.py @@ -11,6 +11,6 @@ class SnvIndelHailTableQuery37(SnvIndelHailTableQuery): def _should_add_chr_prefix(self): return False - def _get_annotation_override_filters(self, annotations, *args, **kwargs): + def _get_annotation_override_filters(self, ht, annotations, *args, **kwargs): annotations = {k: v for k, v in annotations.items() if k != SCREEN_KEY} - return super()._get_annotation_override_filters(annotations, *args, **kwargs) + return super()._get_annotation_override_filters(ht, annotations, *args, **kwargs) diff --git a/hail_search/queries/sv.py b/hail_search/queries/sv.py index 5e363adbfc..f88f722a27 100644 --- a/hail_search/queries/sv.py +++ b/hail_search/queries/sv.py @@ -54,18 +54,18 @@ class SvHailTableQuery(BaseHailTableQuery): def _get_sample_type(cls, *args): return cls.DATA_TYPE.split('_')[-1] - def _filter_annotated_table(self, *args, parsed_intervals=None, exclude_intervals=False, **kwargs): + def _filter_annotated_table(self, ht, *args, parsed_intervals=None, exclude_intervals=False, **kwargs): if parsed_intervals: interval_filter = hl.array(parsed_intervals).any(lambda interval: hl.if_else( - self._ht.start_locus.contig == self._ht.end_locus.contig, - interval.overlaps(hl.interval(self._ht.start_locus, self._ht.end_locus)), - interval.contains(self._ht.start_locus) | interval.contains(self._ht.end_locus), + ht.start_locus.contig == ht.end_locus.contig, + interval.overlaps(hl.interval(ht.start_locus, ht.end_locus)), + interval.contains(ht.start_locus) | interval.contains(ht.end_locus), )) if exclude_intervals: interval_filter = ~interval_filter - self._ht = self._ht.filter(interval_filter) + ht = ht.filter(interval_filter) - return super()._filter_annotated_table(*args, **kwargs) + return super()._filter_annotated_table(ht, *args, **kwargs) def _get_family_passes_quality_filter(self, quality_filter, annotations=None, **kwargs): passes_quality = super()._get_family_passes_quality_filter(quality_filter) @@ -78,18 +78,18 @@ def _get_family_passes_quality_filter(self, quality_filter, annotations=None, ** return lambda entries: entries_has_new_call(entries) & passes_quality(entries) - def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False): + def _get_allowed_consequences_annotations(self, ht, annotations, annotation_filters, is_secondary=False): if is_secondary: # SV search can specify secondary SV types, as well as secondary consequences - annotation_filters = self._get_annotation_override_filters(annotations) - return super()._get_allowed_consequences_annotations(annotations, annotation_filters) + annotation_filters = self._get_annotation_override_filters(ht, annotations) + return super()._get_allowed_consequences_annotations(ht, annotations, annotation_filters) - def _get_annotation_override_filters(self, annotations, **kwargs): + def _get_annotation_override_filters(self, ht, annotations, **kwargs): annotation_filters = [] if annotations.get(STRUCTURAL_ANNOTATION_FIELD): allowed_type_ids = self.get_allowed_sv_type_ids(annotations[STRUCTURAL_ANNOTATION_FIELD]) if allowed_type_ids: - annotation_filters.append(hl.set(allowed_type_ids).contains(self._ht.sv_type_id)) + annotation_filters.append(hl.set(allowed_type_ids).contains(ht.sv_type_id)) return annotation_filters diff --git a/hail_search/web_app.py b/hail_search/web_app.py index 4d091df91c..fa171b8a18 100644 --- a/hail_search/web_app.py +++ b/hail_search/web_app.py @@ -65,7 +65,7 @@ async def init_web_app(): spark_conf['spark.driver.memory'] = f'{int((int(MACHINE_MEM)-11)*JVM_MEMORY_FRACTION)}g' if JAVA_OPTS_XSS: spark_conf.update({f'spark.{field}.extraJavaOptions': f'-Xss{JAVA_OPTS_XSS}' for field in ['driver', 'executor']}) - hl.init(idempotent=True, spark_conf=spark_conf or None) + hl.init(idempotent=True, spark_conf=spark_conf or None, backend='local') load_globals() app = web.Application(middlewares=[error_middleware], client_max_size=(1024**2)*10) app.add_routes([ From 414f87d790836cb57a73dcb9cf2cee9650bce6f2 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Feb 2024 00:23:11 -0500 Subject: [PATCH 2/7] fix bugs --- hail_search/queries/base.py | 12 ++++-------- hail_search/queries/gcnv.py | 9 ++++----- hail_search/queries/multi_data_types.py | 2 ++ 3 files changed, 10 insertions(+), 13 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index b449d58b48..d32a6d73ea 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -212,10 +212,6 @@ def __init__(self, sample_data, sort=XPOS, sort_metadata=None, num_results=100, if sample_data: self._load_filtered_table(sample_data, inheritance_mode=inheritance_mode, **kwargs) - @property - def _is_recessive_search(self): - return self._inheritance_mode == RECESSIVE - @property def _has_comp_het_search(self): return self._inheritance_mode in {RECESSIVE, COMPOUND_HET} @@ -306,9 +302,9 @@ def import_filtered_table(self, sample_data, intervals=None, **kwargs): entry_type = main_ht.family_entries.dtype.element_type for project_ht, comp_het_project_ht, num_project_families in filtered_project_hts[1:]: if families_ht is not None: - families_ht = _add_project_ht(self, families_ht, project_ht, entry_type) + families_ht = self._add_project_ht(self, families_ht, project_ht, entry_type) if comp_het_families_ht is not None: - comp_het_families_ht = _add_project_ht(self, comp_het_families_ht, comp_het_project_ht, entry_type) + comp_het_families_ht = self._add_project_ht(self, comp_het_families_ht, comp_het_project_ht, entry_type) num_families += num_project_families # TODO add pre-processing for annotations so do not even read in tables if not going to have vaild annotations @@ -718,7 +714,7 @@ def _filter_compound_hets(self): ch_ht = self._comp_het_ht # Get possible pairs of variants within the same gene - ch_ht = ch_ht.annotate(gene_ids=self._gene_ids_expr(ch_ht, comp_het=True)) + ch_ht = ch_ht.annotate(gene_ids=self._gene_ids_expr(ch_ht)) ch_ht = ch_ht.explode(ch_ht.gene_ids) # Filter allowed transcripts to the grouped gene @@ -822,7 +818,7 @@ def _annotated_comp_het_variant(ch_ht, field): ) @classmethod - def _gene_ids_expr(cls, ht, comp_het=False): + def _gene_ids_expr(cls, ht): return hl.set(ht[cls.TRANSCRIPTS_FIELD].map(lambda t: t.gene_id)) def _is_valid_comp_het_family(self, ch_ht, entries_1, entries_2): diff --git a/hail_search/queries/gcnv.py b/hail_search/queries/gcnv.py index 3ac38ef235..868020e3fe 100644 --- a/hail_search/queries/gcnv.py +++ b/hail_search/queries/gcnv.py @@ -62,10 +62,10 @@ class GcnvHailTableQuery(SvHailTableQuery): POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'} @classmethod - def _get_genotype_override_field(cls, r, field, family_entries_field=None): + def _get_genotype_override_field(cls, r, field): agg, get_default = cls.GENOTYPE_OVERRIDE_FIELDS[field] sample_field = f'sample_{field}' - entries = r[family_entries_field or 'family_entries'].flatmap(lambda x: x) + entries = r.family_entries.flatmap(lambda x: x) return hl.if_else( entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])), get_default(r), agg(entries.map(lambda g: g[sample_field])) @@ -85,10 +85,9 @@ def get_allowed_sv_type_ids(self, sv_types): ]) @classmethod - def _gene_ids_expr(cls, ht, comp_het=False): - family_entries_field = 'comp_het_family_entries' if comp_het else None + def _gene_ids_expr(cls, ht): return hl.or_else( - cls._get_genotype_override_field(ht, 'gene_ids', family_entries_field=family_entries_field), + cls._get_genotype_override_field(ht, 'gene_ids'), super()._gene_ids_expr(ht), ) diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index cee14dbac6..01420934e9 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -70,6 +70,7 @@ def _filter_data_type_comp_hets(self, variant_ht, variant_families, sv_query): @staticmethod def _family_filtered_ch_ht(ht, overlapped_families, families, key): + # TODO only remap families if different family_indices = hl.array([families.index(family_guid) for family_guid in overlapped_families]) ht = ht.annotate(family_entries=family_indices.map(lambda i: ht.family_entries[i])) return ht.group_by('gene_ids').aggregate(**{key: hl.agg.collect(ht.row)}) @@ -125,6 +126,7 @@ def format_search_ht(self): SNV_INDEL: 11 (106.1276s) SV_WGS: 0 (82.6384s) Actual total: ~217s + (actual-actual: 244.699374) """ for data_type, ch_ht in self._comp_het_hts.items(): From f14e486325ea849b409884a73b024fcbdff154d1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Feb 2024 01:09:59 -0500 Subject: [PATCH 3/7] fix multi project comp het search --- hail_search/queries/base.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index d32a6d73ea..969367c730 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -302,9 +302,13 @@ def import_filtered_table(self, sample_data, intervals=None, **kwargs): entry_type = main_ht.family_entries.dtype.element_type for project_ht, comp_het_project_ht, num_project_families in filtered_project_hts[1:]: if families_ht is not None: - families_ht = self._add_project_ht(self, families_ht, project_ht, entry_type) + families_ht = self._add_project_ht(families_ht, project_ht, default=hl.empty_array(entry_type)) if comp_het_families_ht is not None: - comp_het_families_ht = self._add_project_ht(self, comp_het_families_ht, comp_het_project_ht, entry_type) + comp_het_families_ht = self._add_project_ht( + comp_het_families_ht, comp_het_project_ht, + default=hl.range(num_families).map(lambda i: hl.missing(entry_type)), + default_1=hl.range(num_project_families).map(lambda i: hl.missing(entry_type)), + ) num_families += num_project_families # TODO add pre-processing for annotations so do not even read in tables if not going to have vaild annotations @@ -317,20 +321,22 @@ def import_filtered_table(self, sample_data, intervals=None, **kwargs): ht = self._query_table_annotations(families_ht, self._get_table_path('annotations.ht')) self._ht = self._filter_annotated_table(ht, **kwargs) - def _add_project_ht(self, families_ht, project_ht, entry_type): + def _add_project_ht(self, families_ht, project_ht, default, default_1=None): + if default_1 is None: + default_1 = default + families_ht = families_ht.join(project_ht, how='outer') families_ht = families_ht.select_globals( family_guids=families_ht.family_guids.extend(families_ht.family_guids_1) ) - select_fields = { - 'filters': families_ht.filters.union(families_ht.filters_1), - 'family_entries': hl.bind( + return families_ht.select( + filters=families_ht.filters.union(families_ht.filters_1), + family_entries=hl.bind( lambda a1, a2: a1.extend(a2), - hl.or_else(families_ht.family_entries, hl.empty_array(entry_type)), - hl.or_else(families_ht.family_entries_1, hl.empty_array(entry_type)), + hl.or_else(families_ht.family_entries, default), + hl.or_else(families_ht.family_entries_1, default_1), ), - } - return families_ht.select(**select_fields) + ) def _filter_entries_table(self, ht, sample_data, inheritance_mode=None, inheritance_filter=None, quality_filter=None, **kwargs): @@ -567,6 +573,7 @@ def _should_add_chr_prefix(self): return True def _filter_by_frequency(self, ht, frequencies, pathogenicity): + # TODO do not filter if af == 1 frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS} if not frequencies: return ht @@ -642,6 +649,8 @@ def _filter_by_annotations(self, ht, pathogenicity, annotations, annotations_sec annotations = annotations or {} annotation_override_filters = self._get_annotation_override_filters(ht, annotations, pathogenicity=pathogenicity) + # TODO confirm primary and secondary annotations are actually different before annotating etc - + # ignore empty arrays and data-type specific fields annotation_exprs, _ = self._get_allowed_consequences_annotations(ht, annotations, annotation_override_filters) if is_comp_het or (self._has_comp_het_search and not annotation_exprs): secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( From e5cb704dc26f81d43e4d0c6229ae0df8a59eede3 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Feb 2024 01:47:18 -0500 Subject: [PATCH 4/7] fix no inheritance search --- hail_search/queries/base.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 969367c730..27e947659f 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -432,10 +432,13 @@ def _filter_inheritance(self, ht, inheritance_mode, inheritance_filter, sorted_f ht, COMPOUND_HET, inheritance_filter, sorted_family_sample_data, ) - if (inheritance_filter or inheritance_mode) and not is_any_affected: - ht = None if inheritance_mode == COMPOUND_HET else self._filter_families_inheritance( - ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, - ) + if is_any_affected or not (inheritance_filter and inheritance_mode): + # No sample-specific inheritance filtering needed + sorted_family_sample_data = [] + + ht = None if inheritance_mode == COMPOUND_HET else self._filter_families_inheritance( + ht, inheritance_mode, inheritance_filter, sorted_family_sample_data, + ) return ht, comp_het_ht @@ -650,7 +653,9 @@ def _filter_by_annotations(self, ht, pathogenicity, annotations, annotations_sec annotation_override_filters = self._get_annotation_override_filters(ht, annotations, pathogenicity=pathogenicity) # TODO confirm primary and secondary annotations are actually different before annotating etc - - # ignore empty arrays and data-type specific fields + # ignore empty arrays and data-type specific fields from other data types and different sorts + # Run _get_allowed_consequence_ids on both before loading to determine if different + # also check diff overrides somehow annotation_exprs, _ = self._get_allowed_consequences_annotations(ht, annotations, annotation_override_filters) if is_comp_het or (self._has_comp_het_search and not annotation_exprs): secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( From d624bec8aa2b1e1e9fd603027dc22de2ce9ff758 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Feb 2024 02:05:29 -0500 Subject: [PATCH 5/7] oop --- hail_search/queries/base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 27e947659f..a274dc3d05 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -432,7 +432,7 @@ def _filter_inheritance(self, ht, inheritance_mode, inheritance_filter, sorted_f ht, COMPOUND_HET, inheritance_filter, sorted_family_sample_data, ) - if is_any_affected or not (inheritance_filter and inheritance_mode): + if is_any_affected or not (inheritance_filter or inheritance_mode): # No sample-specific inheritance filtering needed sorted_family_sample_data = [] @@ -654,8 +654,8 @@ def _filter_by_annotations(self, ht, pathogenicity, annotations, annotations_sec # TODO confirm primary and secondary annotations are actually different before annotating etc - # ignore empty arrays and data-type specific fields from other data types and different sorts - # Run _get_allowed_consequence_ids on both before loading to determine if different - # also check diff overrides somehow + # Run _get_allowed_consequence_ids on both before loading to determine if different + # also check diff overrides somehow annotation_exprs, _ = self._get_allowed_consequences_annotations(ht, annotations, annotation_override_filters) if is_comp_het or (self._has_comp_het_search and not annotation_exprs): secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( From fe1c7dc540153f59f887e44823bf95e8ee85b09a Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Feb 2024 02:08:59 -0500 Subject: [PATCH 6/7] clean up --- hail_search/queries/base.py | 7 ------ hail_search/queries/multi_data_types.py | 30 ------------------------- hail_search/web_app.py | 2 +- 3 files changed, 1 insertion(+), 38 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index a274dc3d05..bd8364ce3e 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -311,7 +311,6 @@ def import_filtered_table(self, sample_data, intervals=None, **kwargs): ) num_families += num_project_families - # TODO add pre-processing for annotations so do not even read in tables if not going to have vaild annotations if comp_het_families_ht is not None: comp_het_ht = self._query_table_annotations(comp_het_families_ht, self._get_table_path('annotations.ht')) self._comp_het_ht = self._filter_annotated_table(comp_het_ht, is_comp_het=True, **kwargs) @@ -576,7 +575,6 @@ def _should_add_chr_prefix(self): return True def _filter_by_frequency(self, ht, frequencies, pathogenicity): - # TODO do not filter if af == 1 frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS} if not frequencies: return ht @@ -652,10 +650,6 @@ def _filter_by_annotations(self, ht, pathogenicity, annotations, annotations_sec annotations = annotations or {} annotation_override_filters = self._get_annotation_override_filters(ht, annotations, pathogenicity=pathogenicity) - # TODO confirm primary and secondary annotations are actually different before annotating etc - - # ignore empty arrays and data-type specific fields from other data types and different sorts - # Run _get_allowed_consequence_ids on both before loading to determine if different - # also check diff overrides somehow annotation_exprs, _ = self._get_allowed_consequences_annotations(ht, annotations, annotation_override_filters) if is_comp_het or (self._has_comp_het_search and not annotation_exprs): secondary_exprs, allowed_secondary_consequences = self._get_allowed_consequences_annotations( @@ -709,7 +703,6 @@ def _get_annotation_override_filters(self, ht, annotations, **kwargs): @staticmethod def _get_annotation_filters(ht, is_secondary=False): - # TODO not needed for anything except comp het search, just directly filter for everything else suffix = '_secondary' if is_secondary else '' annotation_filters = [] diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index 01420934e9..dd950954f0 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -70,7 +70,6 @@ def _filter_data_type_comp_hets(self, variant_ht, variant_families, sv_query): @staticmethod def _family_filtered_ch_ht(ht, overlapped_families, families, key): - # TODO only remap families if different family_indices = hl.array([families.index(family_guid) for family_guid in overlapped_families]) ht = ht.annotate(family_entries=family_indices.map(lambda i: ht.family_entries[i])) return ht.group_by('gene_ids').aggregate(**{key: hl.agg.collect(ht.row)}) @@ -94,9 +93,6 @@ def _comp_het_entry_has_ref(self, gt1, gt2): def format_search_ht(self): hts = [] - import logging - import time - logger = logging.getLogger(__name__) for data_type, query in self._data_type_queries.items(): dt_ht = query.format_search_ht() if dt_ht is None: @@ -105,30 +101,6 @@ def format_search_ht(self): if merged_sort_expr is not None: dt_ht = dt_ht.annotate(_sort=merged_sort_expr) hts.append(dt_ht.select('_sort', **{data_type: dt_ht.row})) - # start = time.perf_counter() - # logger.info(f'{data_type}: {dt_ht.count()} ({time.perf_counter() - start:0.4f}s)') - """ - Hom-recessive only: - SV_WGS: 0 (5.9890s) - MITO: 0 (2.4309s) - SNV_INDEL: 3 (16.8396s) - - All recessive (with comp het) - SV_WGS: 0 (14.6799s) - MITO: 0 (8.7807s) - SNV_INDEL: 11 (170.8936s) - comp het SV_WGS: 0 (86.7876s) - Actual total: ~304s - - With updates: - SV_WGS: 0 (20.0788s) - MITO: 0 (9.6441s) - SNV_INDEL: 11 (106.1276s) - SV_WGS: 0 (82.6384s) - Actual total: ~217s - (actual-actual: 244.699374) - """ - for data_type, ch_ht in self._comp_het_hts.items(): ch_ht = ch_ht.annotate( v1=self._format_comp_het_result(ch_ht.v1, SNV_INDEL_DATA_TYPE), @@ -138,8 +110,6 @@ def format_search_ht(self): _sort=hl.sorted([ch_ht.v1._sort, ch_ht.v2._sort])[0], **{f'comp_het_{data_type}': ch_ht.row}, )) - # start = time.perf_counter() - # logger.info(f'comp het {data_type}: {ch_ht.count()} ({time.perf_counter() - start:0.4f}s)') ht = hts[0] for sub_ht in hts[1:]: diff --git a/hail_search/web_app.py b/hail_search/web_app.py index fa171b8a18..4d091df91c 100644 --- a/hail_search/web_app.py +++ b/hail_search/web_app.py @@ -65,7 +65,7 @@ async def init_web_app(): spark_conf['spark.driver.memory'] = f'{int((int(MACHINE_MEM)-11)*JVM_MEMORY_FRACTION)}g' if JAVA_OPTS_XSS: spark_conf.update({f'spark.{field}.extraJavaOptions': f'-Xss{JAVA_OPTS_XSS}' for field in ['driver', 'executor']}) - hl.init(idempotent=True, spark_conf=spark_conf or None, backend='local') + hl.init(idempotent=True, spark_conf=spark_conf or None) load_globals() app = web.Application(middlewares=[error_middleware], client_max_size=(1024**2)*10) app.add_routes([ From 5cbe0d440f9906df9b4b3299a74294f6445f3fb7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Mon, 12 Feb 2024 02:18:59 -0500 Subject: [PATCH 7/7] clean up --- hail_search/queries/multi_data_types.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hail_search/queries/multi_data_types.py b/hail_search/queries/multi_data_types.py index dd950954f0..f0ee5be221 100644 --- a/hail_search/queries/multi_data_types.py +++ b/hail_search/queries/multi_data_types.py @@ -101,6 +101,7 @@ def format_search_ht(self): if merged_sort_expr is not None: dt_ht = dt_ht.annotate(_sort=merged_sort_expr) hts.append(dt_ht.select('_sort', **{data_type: dt_ht.row})) + for data_type, ch_ht in self._comp_het_hts.items(): ch_ht = ch_ht.annotate( v1=self._format_comp_het_result(ch_ht.v1, SNV_INDEL_DATA_TYPE),