From 114c35ba8d083dc5f563d5c0a837a899ebf0d8b9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 2 Aug 2023 16:48:36 -0400 Subject: [PATCH 1/2] save table globals to class instance --- hail_search/hail_search_query.py | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index a98e054fca..4fe665b156 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -65,27 +65,24 @@ def _format_population_config(cls, pop_config): @property def annotation_fields(self): - ht_globals = {k: hl.eval(self._ht[k]) for k in self.GLOBALS} - enums = ht_globals.pop('enums') - annotation_fields = { 'populations': lambda r: hl.struct(**{ population: self.population_expression(r, population) for population in self.POPULATIONS.keys() }), 'predictions': lambda r: hl.struct(**{ - prediction: hl.array(enums[path.source][path.field])[r[path.source][f'{path.field}_id']] - if enums.get(path.source, {}).get(path.field) else r[path.source][path.field] + prediction: hl.array(self._enums[path.source][path.field])[r[path.source][f'{path.field}_id']] + if self._enums.get(path.source, {}).get(path.field) else r[path.source][path.field] for prediction, path in self.PREDICTION_FIELDS_CONFIG.items() }), 'transcripts': lambda r: hl.or_else( r.sorted_transcript_consequences, hl.empty_array(r.sorted_transcript_consequences.dtype.element_type) ).map( - lambda t: self._enum_field(t, enums['sorted_transcript_consequences'], **self._format_transcript_args()) + lambda t: self._enum_field(t, self._enums['sorted_transcript_consequences'], **self._format_transcript_args()) ).group_by(lambda t: t.geneId), } annotation_fields.update(self.BASE_ANNOTATION_FIELDS) - format_enum = lambda k, enum_config: lambda r: self._enum_field(r[k], enums[k], ht_globals=ht_globals, **enum_config) + format_enum = lambda k, enum_config: lambda r: self._enum_field(r[k], self._enums[k], globals=self._globals, **enum_config) annotation_fields.update({ enum_config.get('response_key', k): format_enum(k, enum_config) for k, enum_config in self.ENUM_ANNOTATION_FIELDS.items() @@ -109,7 +106,7 @@ def _format_transcript_args(self): } @staticmethod - def _enum_field(value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, **kwargs): + def _enum_field(value, enum, globals=None, annotate_value=None, format_value=None, drop_fields=None, **kwargs): annotations = {} drop = [] + (drop_fields or []) value_keys = value.keys() @@ -126,7 +123,7 @@ def _enum_field(value, enum, ht_globals=None, annotate_value=None, format_value= value = value.annotate(**annotations) if annotate_value: - annotations = annotate_value(value, enum, ht_globals) + annotations = annotate_value(value, enum, globals) value = value.annotate(**annotations) value = value.drop(*drop) @@ -140,6 +137,8 @@ def __init__(self, data_type, sample_data, genome_version, sort=XPOS, num_result self._sort = sort self._num_results = num_results self._ht = None + self._enums = None + self._globals = None self._load_filtered_table(data_type, sample_data, **kwargs) @@ -190,8 +189,11 @@ def import_filtered_table(self, data_type, sample_data, **kwargs): annotation_ht_query_result = hl.query_table( annotations_ht_path, families_ht.key).first().drop(*families_ht.key) ht = families_ht.annotate(**annotation_ht_query_result) - # Add globals - ht = ht.join(hl.read_table(annotations_ht_path).head(0).select().select_globals(*self.GLOBALS), how='left') + + # Get globals + annotation_globals_ht = hl.read_table(annotations_ht_path).head(0).select() + self._globals = {k: hl.eval(annotation_globals_ht[k]) for k in self.GLOBALS} + self._enums = self._globals.pop('enums') self._ht = ht.transmute( genotypes=ht.family_entries.flatmap(lambda x: x).filter( @@ -482,11 +484,11 @@ class VariantHailTableQuery(BaseHailTableQuery): } BASE_ANNOTATION_FIELDS.update(BaseHailTableQuery.BASE_ANNOTATION_FIELDS) ENUM_ANNOTATION_FIELDS = { - 'clinvar': {'annotate_value': lambda value, enum, ht_globals: { + 'clinvar': {'annotate_value': lambda value, enum, globals: { 'conflictingPathogenicities': value.conflictingPathogenicities.map( lambda p: VariantHailTableQuery._enum_field(p, {k: enum[k] for k in ['pathogenicity']}) ), - 'version': ht_globals['versions'].clinvar, + 'version': globals['versions'].clinvar, }}, 'hgmd': {}, 'screen': { From f8a37076ecb48101a0cd8f846fec69f909622480 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Wed, 2 Aug 2023 16:55:44 -0400 Subject: [PATCH 2/2] codacy fix --- hail_search/hail_search_query.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hail_search/hail_search_query.py b/hail_search/hail_search_query.py index 4fe665b156..9c0491d2ca 100644 --- a/hail_search/hail_search_query.py +++ b/hail_search/hail_search_query.py @@ -82,7 +82,7 @@ def annotation_fields(self): } annotation_fields.update(self.BASE_ANNOTATION_FIELDS) - format_enum = lambda k, enum_config: lambda r: self._enum_field(r[k], self._enums[k], globals=self._globals, **enum_config) + format_enum = lambda k, enum_config: lambda r: self._enum_field(r[k], self._enums[k], ht_globals=self._globals, **enum_config) annotation_fields.update({ enum_config.get('response_key', k): format_enum(k, enum_config) for k, enum_config in self.ENUM_ANNOTATION_FIELDS.items() @@ -106,7 +106,7 @@ def _format_transcript_args(self): } @staticmethod - def _enum_field(value, enum, globals=None, annotate_value=None, format_value=None, drop_fields=None, **kwargs): + def _enum_field(value, enum, ht_globals=None, annotate_value=None, format_value=None, drop_fields=None, **kwargs): annotations = {} drop = [] + (drop_fields or []) value_keys = value.keys() @@ -123,7 +123,7 @@ def _enum_field(value, enum, globals=None, annotate_value=None, format_value=Non value = value.annotate(**annotations) if annotate_value: - annotations = annotate_value(value, enum, globals) + annotations = annotate_value(value, enum, ht_globals) value = value.annotate(**annotations) value = value.drop(*drop) @@ -484,11 +484,11 @@ class VariantHailTableQuery(BaseHailTableQuery): } BASE_ANNOTATION_FIELDS.update(BaseHailTableQuery.BASE_ANNOTATION_FIELDS) ENUM_ANNOTATION_FIELDS = { - 'clinvar': {'annotate_value': lambda value, enum, globals: { + 'clinvar': {'annotate_value': lambda value, enum, ht_globals: { 'conflictingPathogenicities': value.conflictingPathogenicities.map( lambda p: VariantHailTableQuery._enum_field(p, {k: enum[k] for k in ['pathogenicity']}) ), - 'version': globals['versions'].clinvar, + 'version': ht_globals['versions'].clinvar, }}, 'hgmd': {}, 'screen': {