-
Notifications
You must be signed in to change notification settings - Fork 88
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3597 from broadinstitute/dev
Dev
- Loading branch information
Showing
68 changed files
with
991 additions
and
454 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/.metadata.json.gz.crc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.109-b71b065e4bb6 | ||
Created at 2023/08/24 17:42:53 |
Empty file.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+366 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/.part-0.crc
Binary file not shown.
Binary file added
BIN
+330 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/globals/parts/part-0
Binary file not shown.
Binary file added
BIN
+12 Bytes
...S/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.index.crc
Binary file not shown.
Binary file added
BIN
+12 Bytes
...ns.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+104 Bytes
...SV_WES/annotations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/index
Binary file not shown.
Binary file added
BIN
+161 Bytes
...tations.ht/index/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.idx/metadata.json.gz
Binary file not shown.
Binary file not shown.
Binary file added
BIN
+16 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+733 Bytes
hail_search/fixtures/GRCh38/SV_WES/annotations.ht/rows/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+12 Bytes
...SV_WES/annotations.ht/rows/parts/.part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a.crc
Binary file not shown.
Binary file added
BIN
+366 Bytes
...Ch38/SV_WES/annotations.ht/rows/parts/part-0-550-0-0-7fdcd45d-8bea-106b-f455-e20e0468960a
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.README.txt.crc
Binary file not shown.
Binary file added
BIN
+8 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/._SUCCESS.crc
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/.metadata.json.gz.crc
Binary file not shown.
3 changes: 3 additions & 0 deletions
3
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/README.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.109-b71b065e4bb6 | ||
Created at 2023/08/25 10:47:38 |
Empty file.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+295 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/.part-0.crc
Binary file not shown.
Binary file added
BIN
+477 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/globals/parts/part-0
Binary file not shown.
Binary file added
BIN
+12 Bytes
...ies/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.index.crc
Binary file not shown.
Binary file added
BIN
+12 Bytes
..._2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+104 Bytes
...families/F000002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/index
Binary file not shown.
Binary file added
BIN
+161 Bytes
...00002_2.ht/index/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.idx/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+368 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+16 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+669 Bytes
hail_search/fixtures/GRCh38/SV_WES/families/F000002_2.ht/rows/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+12 Bytes
...families/F000002_2.ht/rows/parts/.part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48.crc
Binary file not shown.
Binary file added
BIN
+200 Bytes
..._WES/families/F000002_2.ht/rows/parts/part-0-668-0-0-a8c75457-2bef-dd0d-b9b9-f76af029cc48
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.README.txt.crc
Binary file not shown.
Binary file added
BIN
+8 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/._SUCCESS.crc
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/.metadata.json.gz.crc
Binary file not shown.
3 changes: 3 additions & 0 deletions
3
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/README.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
This folder comprises a Hail (www.hail.is) native Table or MatrixTable. | ||
Written with version 0.2.109-b71b065e4bb6 | ||
Created at 2023/08/25 11:56:33 |
Empty file.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+295 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+12 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/.part-0.crc
Binary file not shown.
Binary file added
BIN
+485 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/globals/parts/part-0
Binary file not shown.
Binary file added
BIN
+12 Bytes
...cts/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.index.crc
Binary file not shown.
Binary file added
BIN
+12 Bytes
...kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+104 Bytes
...projects/R0001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/index
Binary file not shown.
Binary file added
BIN
+161 Bytes
...001_1kg.ht/index/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.idx/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+368 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+16 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/.metadata.json.gz.crc
Binary file not shown.
Binary file added
BIN
+668 Bytes
hail_search/fixtures/GRCh38/SV_WES/projects/R0001_1kg.ht/rows/metadata.json.gz
Binary file not shown.
Binary file added
BIN
+12 Bytes
...projects/R0001_1kg.ht/rows/parts/.part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa.crc
Binary file not shown.
Binary file added
BIN
+235 Bytes
..._WES/projects/R0001_1kg.ht/rows/parts/part-0-702-0-0-4d0242f4-15be-5c06-45f9-2e54e9c95dfa
Binary file not shown.
Empty file.
436 changes: 35 additions & 401 deletions
436
hail_search/hail_search_query.py → hail_search/queries/base.py
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
import hail as hl | ||
|
||
from hail_search.constants import COMP_HET_ALT, HAS_ALT, HAS_REF, REF_REF | ||
from hail_search.queries.sv import SvHailTableQuery | ||
|
||
|
||
class GcnvHailTableQuery(SvHailTableQuery): | ||
|
||
DATA_TYPE = 'SV_WES' | ||
|
||
# gCNV data has no ref/ref calls so a missing entry indicates ref/ref | ||
GENOTYPE_QUERY_MAP = { | ||
**SvHailTableQuery.GENOTYPE_QUERY_MAP, | ||
REF_REF: hl.is_missing, | ||
HAS_REF: lambda gt: hl.is_missing(gt) | gt.is_het_ref(), | ||
HAS_ALT: hl.is_defined, | ||
COMP_HET_ALT: hl.is_defined, | ||
} | ||
MISSING_NUM_ALT = 0 | ||
|
||
GENOTYPE_FIELDS = { | ||
**SvHailTableQuery.GENOTYPE_FIELDS, | ||
**{f.lower(): f for f in ['QS', 'defragged']}, | ||
} | ||
del GENOTYPE_FIELDS['gq'] | ||
GENOTYPE_QUERY_FIELDS = {} | ||
GENOTYPE_OVERRIDE_FIELDS = { | ||
'start': (hl.min, lambda r: r.start_locus.position), | ||
'end': (hl.max, lambda r: r.end_locus.position), | ||
'num_exon': (hl.max, lambda r: r.num_exon), | ||
'gene_ids': ( | ||
lambda entry_gene_ids: entry_gene_ids.fold(lambda s1, s2: s1.union(s2), hl.empty_set(hl.tstr)), | ||
lambda r: hl.missing(hl.tset(hl.tstr)), | ||
), | ||
} | ||
COMPUTED_GENOTYPE_FIELDS = { | ||
**SvHailTableQuery.COMPUTED_GENOTYPE_FIELDS, | ||
**{k: lambda entry, field, r: hl.or_missing( | ||
hl.is_missing(r[field]) | (r[field] != entry[f'sample_{field}']), entry[f'sample_{field}'] | ||
) for k in GENOTYPE_OVERRIDE_FIELDS.keys()}, | ||
} | ||
COMPUTED_GENOTYPE_FIELDS['prev_overlap'] = COMPUTED_GENOTYPE_FIELDS.pop('prev_num_alt') | ||
|
||
CORE_FIELDS = SvHailTableQuery.CORE_FIELDS[:-1] | ||
BASE_ANNOTATION_FIELDS = { | ||
**SvHailTableQuery.BASE_ANNOTATION_FIELDS, | ||
'pos': lambda r: r.start, | ||
'end': lambda r: r.end, | ||
'numExon': lambda r: r.num_exon, | ||
} | ||
del BASE_ANNOTATION_FIELDS['bothsidesSupport'] | ||
|
||
TRANSCRIPTS_ENUM_FIELD = SvHailTableQuery.ENUM_ANNOTATION_FIELDS[SvHailTableQuery.TRANSCRIPTS_FIELD] | ||
ENUM_ANNOTATION_FIELDS = {SvHailTableQuery.TRANSCRIPTS_FIELD: { | ||
**TRANSCRIPTS_ENUM_FIELD, | ||
'format_array_values': lambda values, r: GcnvHailTableQuery.TRANSCRIPTS_ENUM_FIELD['format_array_values']( | ||
hl.if_else(hl.is_missing(r.gene_ids), values, values.filter(lambda t: r.gene_ids.contains(t.geneId))), r, | ||
), | ||
}} | ||
|
||
POPULATIONS = {k: v for k, v in SvHailTableQuery.POPULATIONS.items() if k != 'gnomad_svs'} | ||
|
||
@classmethod | ||
def _get_genotype_override_field(cls, r, field, family_entries_field=None): | ||
agg, get_default = cls.GENOTYPE_OVERRIDE_FIELDS[field] | ||
sample_field = f'sample_{field}' | ||
entries = r[family_entries_field or 'family_entries'].flatmap(lambda x: x) | ||
return hl.if_else( | ||
entries.any(lambda g: hl.is_defined(g.GT) & hl.is_missing(g[sample_field])), | ||
get_default(r), agg(entries.map(lambda g: g[sample_field])) | ||
) | ||
|
||
def _format_results(self, ht, annotation_fields): | ||
ht = ht.annotate(**{k: self._get_genotype_override_field(ht, k) for k in self.GENOTYPE_OVERRIDE_FIELDS}) | ||
return super()._format_results(ht, annotation_fields) | ||
|
||
def get_allowed_sv_type_ids(self, sv_types): | ||
return super().get_allowed_sv_type_ids([ | ||
type.replace('gCNV_', '') for type in sv_types if type.startswith('gCNV_') | ||
]) | ||
|
||
@classmethod | ||
def _gene_ids_expr(cls, ht, comp_het=False): | ||
family_entries_field = 'comp_het_family_entries' if comp_het else None | ||
return hl.or_else( | ||
cls._get_genotype_override_field(ht, 'gene_ids', family_entries_field=family_entries_field), | ||
super()._gene_ids_expr(ht), | ||
) | ||
|
||
def _additional_annotation_fields(self): | ||
return {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import hail as hl | ||
|
||
from hail_search.constants import CONSEQUENCE_SORT, OMIM_SORT | ||
from hail_search.queries.base import BaseHailTableQuery | ||
from hail_search.queries.variants import VariantHailTableQuery | ||
from hail_search.queries.sv import SvHailTableQuery | ||
from hail_search.queries.gcnv import GcnvHailTableQuery | ||
|
||
QUERY_CLASS_MAP = {cls.DATA_TYPE: cls for cls in [VariantHailTableQuery, SvHailTableQuery, GcnvHailTableQuery]} | ||
|
||
|
||
class MultiDataTypeHailTableQuery(BaseHailTableQuery): | ||
|
||
def __init__(self, sample_data, *args, **kwargs): | ||
self._data_type_queries = {k: QUERY_CLASS_MAP[k](v, *args, **kwargs) for k, v in sample_data.items()} | ||
super().__init__(sample_data, *args, **kwargs) | ||
|
||
def _load_filtered_table(self, *args, **kwargs): | ||
pass | ||
|
||
def format_search_ht(self): | ||
ht = None | ||
for data_type, query in self._data_type_queries.items(): | ||
dt_ht = query.format_search_ht() | ||
merged_sort_expr = self._merged_sort_expr(data_type, dt_ht) | ||
if merged_sort_expr is not None: | ||
dt_ht = dt_ht.annotate(_sort=merged_sort_expr) | ||
dt_ht = dt_ht.select('_sort', **{data_type: dt_ht.row}) | ||
if ht is None: | ||
ht = dt_ht | ||
else: | ||
ht = ht.join(dt_ht, 'outer') | ||
ht = ht.transmute(_sort=hl.or_else(ht._sort, ht._sort_1)) | ||
return ht | ||
|
||
def _merged_sort_expr(self, data_type, ht): | ||
# Certain sorts have an extra element for variant-type data, so need to add an element for SV data | ||
if not data_type.startswith('SV'): | ||
return None | ||
|
||
if self._sort == CONSEQUENCE_SORT: | ||
return hl.array([hl.float64(4.5)]).extend(ht._sort.map(hl.float64)) | ||
elif self._sort == OMIM_SORT: | ||
return hl.array([hl.int64(0)]).extend(ht._sort) | ||
elif self._sort_metadata: | ||
return ht._sort[:1].extend(ht._sort) | ||
|
||
return None | ||
|
||
def _format_collected_rows(self, collected): | ||
return super()._format_collected_rows([ | ||
next(row.get(data_type) for data_type in self._data_type_queries if row.get(data_type)) | ||
for row in collected | ||
]) | ||
|
||
def format_gene_counts_ht(self): | ||
hts = [query.format_gene_counts_ht() for query in self._data_type_queries.values()] | ||
ht = hts[0] | ||
for dt_ht in hts[1:]: | ||
ht = ht.join(dt_ht, 'outer') | ||
ht = ht.transmute(**{k: hl.or_else(ht[k], ht[f'{k}_1']) for k in ['gene_ids', 'families']}) | ||
return ht |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
import hail as hl | ||
|
||
|
||
from hail_search.constants import CONSEQUENCE_SORT, NEW_SV_FIELD, STRUCTURAL_ANNOTATION_FIELD | ||
from hail_search.queries.base import BaseHailTableQuery, PredictionPath | ||
|
||
|
||
class SvHailTableQuery(BaseHailTableQuery): | ||
|
||
DATA_TYPE = 'SV_WGS' | ||
|
||
GENOTYPE_FIELDS = {f.lower(): f for f in ['CN', 'GQ']} | ||
COMPUTED_GENOTYPE_FIELDS = { | ||
k: lambda entry, field, *args: entry.concordance[field] for k in ['new_call', 'prev_call', 'prev_num_alt'] | ||
} | ||
GENOTYPE_QUERY_FIELDS = {'gq_sv': 'GQ', 'gq': None} | ||
|
||
TRANSCRIPTS_FIELD = 'sorted_gene_consequences' | ||
TRANSCRIPT_CONSEQUENCE_FIELD = 'major_consequence' | ||
CORE_FIELDS = BaseHailTableQuery.CORE_FIELDS + ['algorithms'] | ||
BASE_ANNOTATION_FIELDS = { | ||
'bothsidesSupport': lambda r: r.bothsides_support, | ||
'chrom': lambda r: r.start_locus.contig.replace('^chr', ''), | ||
'pos': lambda r: r.start_locus.position, | ||
'end': lambda r: r.end_locus.position, | ||
'rg37LocusEnd': lambda r: hl.or_missing( | ||
hl.is_defined(r.rg37_locus_end), hl.struct(contig=r.rg37_locus_end.contig, position=r.rg37_locus_end.position) | ||
), | ||
**BaseHailTableQuery.BASE_ANNOTATION_FIELDS, | ||
} | ||
ENUM_ANNOTATION_FIELDS = { | ||
TRANSCRIPTS_FIELD: BaseHailTableQuery.ENUM_ANNOTATION_FIELDS['transcripts'], | ||
} | ||
|
||
POPULATIONS = { | ||
'sv_callset': {'hemi': None, 'sort': 'callset_af'}, | ||
'gnomad_svs': {'id': 'ID', 'ac': None, 'an': None, 'hom': None, 'hemi': None, 'het': None, 'sort': 'gnomad'}, | ||
} | ||
POPULATION_FIELDS = {'sv_callset': 'gt_stats'} | ||
PREDICTION_FIELDS_CONFIG = { | ||
'strvctvre': PredictionPath('strvctvre', 'score'), | ||
} | ||
|
||
SORTS = { | ||
**BaseHailTableQuery.SORTS, | ||
CONSEQUENCE_SORT: lambda r: [hl.min(r.sorted_gene_consequences.map(lambda g: g.major_consequence_id))], | ||
'size': lambda r: [hl.if_else( | ||
r.start_locus.contig == r.end_locus.contig, r.start_locus.position - r.end_locus.position, -50, | ||
)], | ||
} | ||
|
||
def _parse_intervals(self, intervals, variant_ids, variant_keys=None, **kwargs): | ||
parsed_intervals, _ = super()._parse_intervals(intervals, variant_ids=None, **kwargs) | ||
return parsed_intervals, variant_keys | ||
|
||
def _filter_annotated_table(self, *args, parsed_intervals=None, exclude_intervals=False, **kwargs): | ||
if parsed_intervals: | ||
interval_filter = hl.array(parsed_intervals).any(lambda interval: hl.if_else( | ||
self._ht.start_locus.contig == self._ht.end_locus.contig, | ||
interval.overlaps(hl.interval(self._ht.start_locus, self._ht.end_locus)), | ||
interval.contains(self._ht.start_locus) | interval.contains(self._ht.end_locus), | ||
)) | ||
if exclude_intervals: | ||
interval_filter = ~interval_filter | ||
self._ht = self._ht.filter(interval_filter) | ||
|
||
return super()._filter_annotated_table(*args, **kwargs) | ||
|
||
def _get_family_passes_quality_filter(self, quality_filter, annotations=None, **kwargs): | ||
passes_quality = super()._get_family_passes_quality_filter(quality_filter) | ||
if not (annotations or {}).get(NEW_SV_FIELD): | ||
return passes_quality | ||
|
||
entries_has_new_call = lambda entries: entries.any(lambda x: x.concordance.new_call) | ||
if passes_quality is None: | ||
return entries_has_new_call | ||
|
||
return lambda entries: entries_has_new_call(entries) & passes_quality(entries) | ||
|
||
def _get_allowed_consequences_annotations(self, annotations, annotation_filters, is_secondary=False): | ||
if is_secondary: | ||
# SV search can specify secondary SV types, as well as secondary consequences | ||
annotation_filters = self._get_annotation_override_filters(annotations) | ||
return super()._get_allowed_consequences_annotations(annotations, annotation_filters) | ||
|
||
def _get_annotation_override_filters(self, annotations, **kwargs): | ||
annotation_filters = [] | ||
if annotations.get(STRUCTURAL_ANNOTATION_FIELD): | ||
allowed_type_ids = self.get_allowed_sv_type_ids(annotations[STRUCTURAL_ANNOTATION_FIELD]) | ||
if allowed_type_ids: | ||
annotation_filters.append(hl.set(allowed_type_ids).contains(self._ht.sv_type_id)) | ||
|
||
return annotation_filters | ||
|
||
def get_allowed_sv_type_ids(self, sv_types): | ||
return self._get_enum_terms_ids('sv_type', None, sv_types) | ||
|
||
def _additional_annotation_fields(self): | ||
sv_type_enum = self._enums['sv_type'] | ||
insertion_type_id = sv_type_enum.index('INS') | ||
get_end_chrom = lambda r: hl.or_missing(r.start_locus.contig != r.end_locus.contig, r.end_locus.contig.replace('^chr', '')) | ||
return { | ||
'cpxIntervals': lambda r: self._format_enum( | ||
r, 'cpx_intervals', {'type': sv_type_enum}, annotate_value=lambda val, *args: { | ||
'chrom': val.start.contig, | ||
'start': val.start.position, | ||
'end': val.end.position, | ||
}, | ||
), | ||
# For insertions, end_locus represents the svSourceDetail, otherwise represents the endChrom | ||
'endChrom': lambda r: hl.or_missing(r.sv_type_id != insertion_type_id, get_end_chrom(r)), | ||
'svSourceDetail': lambda r: hl.or_missing(r.sv_type_id == insertion_type_id, hl.bind( | ||
lambda end_chrom: hl.or_missing(hl.is_defined(end_chrom), hl.struct(chrom=end_chrom)), | ||
get_end_chrom(r), | ||
)), | ||
} |
Oops, something went wrong.