Skip to content

Commit

Permalink
Merge pull request #3873 from broadinstitute/recessive-search-split-t…
Browse files Browse the repository at this point in the history
…ables

Recessive search split tables
  • Loading branch information
hanars authored Feb 12, 2024
2 parents 7bc3882 + 5cbe0d4 commit 0fa6699
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 140 deletions.
216 changes: 105 additions & 111 deletions hail_search/queries/base.py

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions hail_search/queries/gcnv.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ class GcnvHailTableQuery(SvHailTableQuery):
POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'}

@classmethod
def _get_genotype_override_field(cls, r, field, family_entries_field=None):
def _get_genotype_override_field(cls, r, field):
agg, get_default = cls.GENOTYPE_OVERRIDE_FIELDS[field]
sample_field = f'sample_{field}'
entries = r[family_entries_field or 'family_entries'].flatmap(lambda x: x)
entries = r.family_entries.flatmap(lambda x: x)
return hl.if_else(
entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])),
get_default(r), agg(entries.map(lambda g: g[sample_field]))
Expand All @@ -85,10 +85,9 @@ def get_allowed_sv_type_ids(self, sv_types):
])

@classmethod
def _gene_ids_expr(cls, ht, comp_het=False):
family_entries_field = 'comp_het_family_entries' if comp_het else None
def _gene_ids_expr(cls, ht):
return hl.or_else(
cls._get_genotype_override_field(ht, 'gene_ids', family_entries_field=family_entries_field),
cls._get_genotype_override_field(ht, 'gene_ids'),
super()._gene_ids_expr(ht),
)

Expand Down
12 changes: 6 additions & 6 deletions hail_search/queries/mito.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,27 +219,27 @@ def _get_transcript_consequence_filter(self, allowed_consequence_ids, allowed_co
if canonical_consequences else allowed_consequence_ids
).contains)

def _get_annotation_override_filters(self, annotations, pathogenicity=None, **kwargs):
def _get_annotation_override_filters(self, ht, annotations, pathogenicity=None, **kwargs):
annotation_filters = []

for key in self.PATHOGENICITY_FILTERS.keys():
path_terms = (pathogenicity or {}).get(key)
if path_terms:
annotation_filters.append(self._has_path_expr(path_terms, key))
annotation_filters.append(self._has_path_expr(ht,path_terms, key))

return annotation_filters

def _frequency_override_filter(self, pathogenicity):
def _frequency_override_filter(self, ht, pathogenicity):
path_terms = self._get_clinvar_path_filters(pathogenicity)
return self._has_path_expr(path_terms, CLINVAR_KEY) if path_terms else None
return self._has_path_expr(ht, path_terms, CLINVAR_KEY) if path_terms else None

@staticmethod
def _get_clinvar_path_filters(pathogenicity):
return {
f for f in (pathogenicity or {}).get(CLINVAR_KEY) or [] if f in CLINVAR_PATH_SIGNIFICANCES
}

def _has_path_expr(self, terms, field):
def _has_path_expr(self, ht, terms, field):
subfield, range_configs = self.PATHOGENICITY_FILTERS[field]
field_name = self.PATHOGENICITY_FIELD_MAP.get(field, field)
enum_lookup = self._get_enum_lookup(field_name, subfield)
Expand All @@ -254,7 +254,7 @@ def _has_path_expr(self, terms, field):
ranges.append([None, None])

ranges = [r for r in ranges if r[0] is not None]
value = self._ht[field_name][f'{subfield}_id']
value = ht[field_name][f'{subfield}_id']
return hl.any(lambda r: (value >= r[0]) & (value <= r[1]), ranges)

def _format_results(self, ht, *args, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion hail_search/queries/multi_data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def _filter_data_type_comp_hets(self, variant_ht, variant_families, sv_query):
@staticmethod
def _family_filtered_ch_ht(ht, overlapped_families, families, key):
family_indices = hl.array([families.index(family_guid) for family_guid in overlapped_families])
ht = ht.annotate(comp_het_family_entries=family_indices.map(lambda i: ht.comp_het_family_entries[i]))
ht = ht.annotate(family_entries=family_indices.map(lambda i: ht.family_entries[i]))
return ht.group_by('gene_ids').aggregate(**{key: hl.agg.collect(ht.row)})

def _is_valid_comp_het_family(self, ch_ht, entries_1, entries_2):
Expand Down
8 changes: 4 additions & 4 deletions hail_search/queries/snv_indel.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,14 @@ def _get_gnomad_af_prefilter(self, frequencies=None, pathogenicity=None, **kwarg

return 'is_gt_10_percent' if af_cutoff > PREFILTER_FREQ_CUTOFF else True

def _get_annotation_override_filters(self, annotations, *args, **kwargs):
annotation_filters = super()._get_annotation_override_filters(annotations, *args, **kwargs)
def _get_annotation_override_filters(self, ht, annotations, *args, **kwargs):
annotation_filters = super()._get_annotation_override_filters(ht, annotations, *args, **kwargs)

if annotations.get(SCREEN_KEY):
allowed_consequences = hl.set(self._get_enum_terms_ids(SCREEN_KEY.lower(), 'region_type', annotations[SCREEN_KEY]))
annotation_filters.append(allowed_consequences.contains(self._ht.screen.region_type_ids.first()))
annotation_filters.append(allowed_consequences.contains(ht.screen.region_type_ids.first()))
if annotations.get(SPLICE_AI_FIELD):
score_filter, _ = self._get_in_silico_filter(SPLICE_AI_FIELD, annotations[SPLICE_AI_FIELD])
score_filter, _ = self._get_in_silico_filter(ht, SPLICE_AI_FIELD, annotations[SPLICE_AI_FIELD])
annotation_filters.append(score_filter)

return annotation_filters
4 changes: 2 additions & 2 deletions hail_search/queries/snv_indel_37.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ class SnvIndelHailTableQuery37(SnvIndelHailTableQuery):
def _should_add_chr_prefix(self):
return False

def _get_annotation_override_filters(self, annotations, *args, **kwargs):
def _get_annotation_override_filters(self, ht, annotations, *args, **kwargs):
annotations = {k: v for k, v in annotations.items() if k != SCREEN_KEY}
return super()._get_annotation_override_filters(annotations, *args, **kwargs)
return super()._get_annotation_override_filters(ht, annotations, *args, **kwargs)
22 changes: 11 additions & 11 deletions hail_search/queries/sv.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,18 +54,18 @@ class SvHailTableQuery(BaseHailTableQuery):
def _get_sample_type(cls, *args):
return cls.DATA_TYPE.split('_')[-1]

def _filter_annotated_table(self, *args, parsed_intervals=None, exclude_intervals=False, **kwargs):
def _filter_annotated_table(self, ht, *args, parsed_intervals=None, exclude_intervals=False, **kwargs):
if parsed_intervals:
interval_filter = hl.array(parsed_intervals).any(lambda interval: hl.if_else(
self._ht.start_locus.contig == self._ht.end_locus.contig,
interval.overlaps(hl.interval(self._ht.start_locus, self._ht.end_locus)),
interval.contains(self._ht.start_locus) | interval.contains(self._ht.end_locus),
ht.start_locus.contig == ht.end_locus.contig,
interval.overlaps(hl.interval(ht.start_locus, ht.end_locus)),
interval.contains(ht.start_locus) | interval.contains(ht.end_locus),
))
if exclude_intervals:
interval_filter = ~interval_filter
self._ht = self._ht.filter(interval_filter)
ht = ht.filter(interval_filter)

return super()._filter_annotated_table(*args, **kwargs)
return super()._filter_annotated_table(ht, *args, **kwargs)

def _get_family_passes_quality_filter(self, quality_filter, annotations=None, **kwargs):
passes_quality = super()._get_family_passes_quality_filter(quality_filter)
Expand All @@ -78,18 +78,18 @@ def _get_family_passes_quality_filter(self, quality_filter, annotations=None, **

return lambda entries: entries_has_new_call(entries) & passes_quality(entries)

def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False):
def _get_allowed_consequences_annotations(self, ht, annotations, annotation_filters, is_secondary=False):
if is_secondary:
# SV search can specify secondary SV types, as well as secondary consequences
annotation_filters = self._get_annotation_override_filters(annotations)
return super()._get_allowed_consequences_annotations(annotations, annotation_filters)
annotation_filters = self._get_annotation_override_filters(ht, annotations)
return super()._get_allowed_consequences_annotations(ht, annotations, annotation_filters)

def _get_annotation_override_filters(self, annotations, **kwargs):
def _get_annotation_override_filters(self, ht, annotations, **kwargs):
annotation_filters = []
if annotations.get(STRUCTURAL_ANNOTATION_FIELD):
allowed_type_ids = self.get_allowed_sv_type_ids(annotations[STRUCTURAL_ANNOTATION_FIELD])
if allowed_type_ids:
annotation_filters.append(hl.set(allowed_type_ids).contains(self._ht.sv_type_id))
annotation_filters.append(hl.set(allowed_type_ids).contains(ht.sv_type_id))

return annotation_filters

Expand Down

0 comments on commit 0fa6699

Please sign in to comment.