-
Notifications
You must be signed in to change notification settings - Fork 88
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Hail backend - In-silico filter #3530
Changes from 6 commits
26a3c91
693d04c
5c3eea4
e80cb1b
9b1d052
5c2d792
0b94b58
e9b7682
5e31264
794fa4a
df30e8e
f0aed20
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,6 +105,12 @@ def _format_transcript_args(self): | |
'format_value': lambda value: value.rename({k: _to_camel_case(k) for k in value.keys()}), | ||
} | ||
|
||
def _get_enum_lookup(self, field, subfield): | ||
enum_field = self._enums.get(field, {}).get(subfield) | ||
if enum_field is None: | ||
return None | ||
return {v: i for i, v in enumerate(enum_field)} | ||
|
||
@staticmethod | ||
def _enum_field(value, enum, globals=None, annotate_value=None, format_value=None, drop_fields=None, **kwargs): | ||
annotations = {} | ||
|
@@ -377,9 +383,11 @@ def _filter_vcf_filters(ht): | |
def get_x_chrom_filter(ht, x_interval): | ||
return x_interval.contains(ht.locus) | ||
|
||
def _filter_annotated_table(self, frequencies=None, **kwargs): | ||
def _filter_annotated_table(self, frequencies=None, in_silico=None, **kwargs): | ||
self._filter_by_frequency(frequencies) | ||
|
||
self._filter_by_in_silico(in_silico) | ||
|
||
def _filter_by_frequency(self, frequencies): | ||
frequencies = {k: v for k, v in (frequencies or {}).items() if k in self.POPULATIONS} | ||
if not frequencies: | ||
|
@@ -411,6 +419,44 @@ def _filter_by_frequency(self, frequencies): | |
pop_filter &= pf | ||
self._ht = self._ht.filter(hl.is_missing(pop_expr) | pop_filter) | ||
|
||
def _filter_by_in_silico(self, in_silico_filters): | ||
in_silico_filters = in_silico_filters or {} | ||
require_score = in_silico_filters.get('requireScore', False) | ||
in_silico_filters = { | ||
k: v for k, v in in_silico_filters.items() | ||
if k in self.PREDICTION_FIELDS_CONFIG and v is not None and len(v) != 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah, this was copy-pasted from elasticsearch code, but it does not seem to be neccessary |
||
} | ||
if not in_silico_filters: | ||
return | ||
|
||
in_silico_qs = [] | ||
missing_qs = [] | ||
for in_silico, value in in_silico_filters.items(): | ||
score_path = self.PREDICTION_FIELDS_CONFIG[in_silico] | ||
enum_lookup = self._get_enum_lookup(*score_path) | ||
if enum_lookup is not None: | ||
ht_value = self._ht[score_path.source][f'{score_path.field}_id'] | ||
score_filter = ht_value == enum_lookup[value] | ||
else: | ||
ht_value = self._ht[score_path.source][score_path.field] | ||
score_filter = ht_value >= float(value) | ||
|
||
in_silico_qs.append(score_filter) | ||
if not require_score: | ||
missing_qs.append(hl.is_missing(ht_value)) | ||
|
||
if missing_qs: | ||
missing_q = missing_qs[0] | ||
for q in missing_qs[1:]: | ||
missing_q &= q | ||
in_silico_qs.append(missing_q) | ||
|
||
in_silico_q = in_silico_qs[0] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this block be replaced by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Much cleaner 🎉! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I feel like I was frustrated that there was no easy way to do this in hail, and now I'm like "oh duh, of course there is" |
||
for q in in_silico_qs[1:]: | ||
in_silico_q |= q | ||
|
||
self._ht = self._ht.filter(in_silico_q) | ||
|
||
def _format_results(self, ht): | ||
annotations = {k: v(ht) for k, v in self.annotation_fields.items()} | ||
annotations.update({ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using
includes_missing_score
for therequireScore
is more understandable.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but this is passed from the ui so its not something we can change. I think its clearer to name the variable what its called in the UI code and what its passed in the json as rather than to rename it to something else that means the same thing