From 0f8cbcb76ee5be9cfbff0ae71691ec7c0c27c2c9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 13:58:24 -0500 Subject: [PATCH 01/11] use lookup for sv seqr seach --- ui/shared/components/panel/variants/Annotations.jsx | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/ui/shared/components/panel/variants/Annotations.jsx b/ui/shared/components/panel/variants/Annotations.jsx index 040f6dffef..15b0a619ce 100644 --- a/ui/shared/components/panel/variants/Annotations.jsx +++ b/ui/shared/components/panel/variants/Annotations.jsx @@ -296,8 +296,13 @@ const VARIANT_LINKS = [ }, ] +const getSampleType = (genotypes) => { + const sampleTypes = [...new Set(Object.values(genotypes || {}).map(({ sampleType }) => sampleType).filter(s => s))] + return sampleTypes.length === 1 ? sampleTypes[0] : '' +} + const variantSearchLinks = (variant, mainTranscript, genesById, user, elasticsearchEnabled) => { - const { chrom, endChrom, pos, end, ref, alt, genomeVersion, svType, variantId, transcripts } = variant + const { chrom, endChrom, pos, end, ref, alt, genomeVersion, genotypes, svType, variantId, transcripts } = variant const mainGene = genesById[mainTranscript.geneId] let genes @@ -331,7 +336,7 @@ const variantSearchLinks = (variant, mainTranscript, genesById, user, elasticsea const linkVariant = { genes, variations, hgvsc, ...variant } - const seqrSearchLink = (elasticsearchEnabled || svType) ? ( + const seqrSearchLink = elasticsearchEnabled ? ( ) : ( seqr From e1ec96f02e1314a6635aa60547af19a00dc06753 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 14:18:36 -0500 Subject: [PATCH 02/11] send valid sv lookup query --- hail_search/queries/base.py | 21 +++++++++++---------- seqr/utils/search/hail_search_utils.py | 11 +++++++++-- seqr/utils/search/utils.py | 7 +++---- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index bd8364ce3e..25dc4ce28b 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -265,22 +265,23 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, ** filtered_project_hts = [] exception_messages = set() for i, (project_guid, project_sample_data) in enumerate(project_samples.items()): - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing or i > 0 else None, - ) - if project_ht is None: - continue try: + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing or i > 0 else None, + ) + if project_ht is None: + continue filtered_project_hts.append( (*self._filter_entries_table(project_ht, project_sample_data, **kwargs), len(project_sample_data)) ) - except HTTPBadRequest as e: - exception_messages.add(e.reason) + except Exception as e: + exception_messages.add(str(e)) if exception_messages: - raise HTTPBadRequest(reason='; '.join(exception_messages)) + logger.info(f'Error in {len(exception_messages)} projects') + #raise HTTPBadRequest(reason='; '.join(exception_messages)) return filtered_project_hts diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index c4b700e2ed..a275174a90 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -74,13 +74,20 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id return response_json['results'] -def hail_variant_lookup(user, variant_id, samples=None, **kwargs): +def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=None, **kwargs): + if dataset_type == Sample.DATASET_TYPE_SV_CALLS: + if not sample_type: + from seqr.utils.search.utils import InvalidSearchException + raise InvalidSearchException('Sample type must be specified to look up a structural variant') + dataset_type = f'{dataset_type}_{sample_type}' + # TODO run reciprocal overlap query on other sample type body = { 'variant_id': variant_id, + 'data_type': dataset_type, **kwargs, } if samples: - body['sample_data'] = _get_sample_data(samples)[Sample.DATASET_TYPE_VARIANT_CALLS] + body['sample_data'] = _get_sample_data(samples)[dataset_type] return _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) diff --git a/seqr/utils/search/utils.py b/seqr/utils/search/utils.py index b69a760b40..c9cc5753fe 100644 --- a/seqr/utils/search/utils.py +++ b/seqr/utils/search/utils.py @@ -161,15 +161,14 @@ def variant_lookup(user, variant_id, families=None, genome_version=None, **kwarg return variant parsed_variant_id = _parse_variant_id(variant_id) - if not parsed_variant_id: - raise InvalidSearchException(f'Invalid variant {variant_id}') + dataset_type = DATASET_TYPE_SNP_INDEL_ONLY if parsed_variant_id else Sample.DATASET_TYPE_SV_CALLS if families: - samples, _ = _get_families_search_data(families, dataset_type=DATASET_TYPE_SNP_INDEL_ONLY) + samples, _ = _get_families_search_data(families, dataset_type=dataset_type) kwargs['samples'] = samples lookup_func = backend_specific_call(_raise_search_error('Hail backend is disabled'), hail_variant_lookup) - variant = lookup_func(user, parsed_variant_id, genome_version=GENOME_VERSION_LOOKUP[genome_version], **kwargs) + variant = lookup_func(user, parsed_variant_id or variant_id, genome_version=GENOME_VERSION_LOOKUP[genome_version], dataset_type=dataset_type, **kwargs) safe_redis_set_json(cache_key, variant, expire=timedelta(weeks=2)) return variant From 781d6232f1f0b0fa4dbbb24697bfe7f2e3087150 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 14:39:43 -0500 Subject: [PATCH 03/11] fix lookup genotype overrides --- hail_search/queries/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 25dc4ce28b..fd65471ea1 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -43,6 +43,7 @@ class BaseHailTableQuery(object): GENOTYPE_FIELDS = {} COMPUTED_GENOTYPE_FIELDS = {} + GENOTYPE_OVERRIDE_FIELDS = {} GENOTYPE_QUERY_FIELDS = {} QUALITY_FILTER_FORMAT = {} POPULATIONS = {} @@ -90,7 +91,7 @@ def _format_population_config(cls, pop_config): base_pop_config.pop('sort', None) return base_pop_config - def annotation_fields(self): + def annotation_fields(self, include_genotype_overrides=True): annotation_fields = { GENOTYPES_FIELD: lambda r: r.family_entries.flatmap(lambda x: x).filter( lambda gt: hl.is_defined(gt.individualGuid) @@ -98,7 +99,8 @@ def annotation_fields(self): 'sampleId', 'sampleType', 'individualGuid', 'familyGuid', numAlt=hl.if_else(hl.is_defined(x[0].GT), x[0].GT.n_alt_alleles(), self.MISSING_NUM_ALT), **{k: x[0][field] for k, field in self.GENOTYPE_FIELDS.items()}, - **{_to_camel_case(k): v(x[0], k, r) for k, v in self.COMPUTED_GENOTYPE_FIELDS.items()}, + **{_to_camel_case(k): v(x[0], k, r) for k, v in self.COMPUTED_GENOTYPE_FIELDS.items() + if include_genotype_overrides or k not in self.GENOTYPE_OVERRIDE_FIELDS}, )), 'populations': lambda r: hl.struct(**{ population: self.population_expression(r, population) for population in self.POPULATIONS.keys() @@ -952,7 +954,7 @@ def lookup_variant(self, variant_id, sample_data=None): ht = self._read_table('annotations.ht', drop_globals=['paths', 'versions']) ht = ht.filter(hl.is_defined(ht[XPOS])) - annotation_fields = self.annotation_fields() + annotation_fields = self.annotation_fields(include_genotype_overrides=False) entry_annotations = {k: annotation_fields[k] for k in [FAMILY_GUID_FIELD, GENOTYPES_FIELD]} annotation_fields.update({ FAMILY_GUID_FIELD: lambda ht: hl.empty_array(hl.tstr), @@ -960,7 +962,7 @@ def lookup_variant(self, variant_id, sample_data=None): 'genotypeFilters': lambda ht: hl.str(''), }) - formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=bool(sample_data)) + formatted = self._format_results(ht.key_by(), annotation_fields=annotation_fields, include_genotype_overrides=False) variants = formatted.aggregate(hl.agg.take(formatted.row, 1)) if not variants: From 24ae9ec9703752d6b1b7e7725e76a98c662cb00f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 14:49:25 -0500 Subject: [PATCH 04/11] supprt multiple variants in lookup resonse --- seqr/utils/search/hail_search_utils.py | 3 ++- seqr/views/apis/variant_search_api.py | 8 ++++---- ui/pages/SummaryData/components/VariantLookup.jsx | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index a275174a90..64f5f9009f 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -88,7 +88,8 @@ def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATA } if samples: body['sample_data'] = _get_sample_data(samples)[dataset_type] - return _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) + variant = _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) + return [variant] def _format_search_body(samples, genome_version, num_results, search): diff --git a/seqr/views/apis/variant_search_api.py b/seqr/views/apis/variant_search_api.py index 02976a0e45..035b133029 100644 --- a/seqr/views/apis/variant_search_api.py +++ b/seqr/views/apis/variant_search_api.py @@ -541,12 +541,12 @@ def variant_lookup_handler(request): kwargs.get('genome_version', GENOME_VERSION_GRCh38), request.user, ) - variant = variant_lookup(request.user, families=families, **kwargs) - saved_variants, _ = _get_saved_variant_models([variant], families) if families else (None, None) + variants = variant_lookup(request.user, families=families, **kwargs) + saved_variants, _ = _get_saved_variant_models(variants, families) if families else (None, None) response = get_variants_response( - request, saved_variants=saved_variants, response_variants=[variant], + request, saved_variants=saved_variants, response_variants=variants, add_all_context=include_genotypes, add_locus_list_detail=include_genotypes, ) - response['variant'] = variant + response['variants'] = variants return create_json_response(response) diff --git a/ui/pages/SummaryData/components/VariantLookup.jsx b/ui/pages/SummaryData/components/VariantLookup.jsx index 3b646ee5b2..07236bcd84 100644 --- a/ui/pages/SummaryData/components/VariantLookup.jsx +++ b/ui/pages/SummaryData/components/VariantLookup.jsx @@ -31,10 +31,10 @@ const FIELDS = [ { required: true, ...GENOME_VERSION_FIELD }, ] -const VariantDisplay = ({ variant }) => (variant ? : null) +const VariantDisplay = ({ variants }) => (variants || []).map(variant => ) VariantDisplay.propTypes = { - variant: PropTypes.object, + variants: PropTypes.arrayOf(PropTypes.object), } const onSubmit = updateQueryParams => (data) => { From 711fe32d1166577d64961442c7a0eee1450c003f Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 16:05:54 -0500 Subject: [PATCH 05/11] run second sample type search for svs --- seqr/utils/search/hail_search_utils.py | 29 ++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 64f5f9009f..5dcb1fe863 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -75,21 +75,42 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=None, **kwargs): - if dataset_type == Sample.DATASET_TYPE_SV_CALLS: + is_sv = dataset_type == Sample.DATASET_TYPE_SV_CALLS + if is_sv: if not sample_type: from seqr.utils.search.utils import InvalidSearchException raise InvalidSearchException('Sample type must be specified to look up a structural variant') dataset_type = f'{dataset_type}_{sample_type}' - # TODO run reciprocal overlap query on other sample type + body = { 'variant_id': variant_id, 'data_type': dataset_type, **kwargs, } + sample_data = None if samples: - body['sample_data'] = _get_sample_data(samples)[dataset_type] + sample_data = _get_sample_data(samples) + body['sample_data'] = sample_data.pop(dataset_type) variant = _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) - return [variant] + variants = [variant] + + if is_sv and sample_data: + start = variant['pos'] + end = variant['end'] + offset = 0.2 + if variant.get('endChrom'): + start -= 50 + end += 50 + offset = None + del body['variant_id'] + body.update({ + 'sample_data': sample_data, + 'intervals': [_format_interval(chrom=variant['chrom'], start=start, end=end, offset=offset)], + 'annotations': {'structural': [variant['svType'], f"gCNV_{variant['svType']}"]} + }) + variants += _execute_search(body, user)['results'] + + return variants def _format_search_body(samples, genome_version, num_results, search): From 2b54908c85ee848460f586cd0aead345ef83af05 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 16:26:48 -0500 Subject: [PATCH 06/11] clean up --- seqr/utils/search/hail_search_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 5dcb1fe863..55736fa4bd 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -94,7 +94,7 @@ def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATA variant = _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) variants = [variant] - if is_sv and sample_data: + if is_sv and sample_data and variant['svType'] in {'DEL', 'DUP'}: start = variant['pos'] end = variant['end'] offset = 0.2 From f6b511fff363cb8a18694b156b3170e07b6f01f1 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 16:29:59 -0500 Subject: [PATCH 07/11] remove debig code --- hail_search/queries/base.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/hail_search/queries/base.py b/hail_search/queries/base.py index 6b44297e16..972b33dd15 100644 --- a/hail_search/queries/base.py +++ b/hail_search/queries/base.py @@ -272,23 +272,22 @@ def _load_filtered_project_hts(self, project_samples, skip_all_missing=False, ** filtered_project_hts = [] exception_messages = set() for i, (project_guid, project_sample_data) in enumerate(project_samples.items()): + project_ht = self._read_table( + f'projects/{project_guid}.ht', + use_ssd_dir=True, + skip_missing_field='family_entries' if skip_all_missing or i > 0 else None, + ) + if project_ht is None: + continue try: - project_ht = self._read_table( - f'projects/{project_guid}.ht', - use_ssd_dir=True, - skip_missing_field='family_entries' if skip_all_missing or i > 0 else None, - ) - if project_ht is None: - continue filtered_project_hts.append( (*self._filter_entries_table(project_ht, project_sample_data, **kwargs), len(project_sample_data)) ) - except Exception as e: - exception_messages.add(str(e)) + except HTTPBadRequest as e: + exception_messages.add(e.reason) if exception_messages: - logger.info(f'Error in {len(exception_messages)} projects') - #raise HTTPBadRequest(reason='; '.join(exception_messages)) + raise HTTPBadRequest(reason='; '.join(exception_messages)) return filtered_project_hts From ef4a8bc0948e128878077154e29889249707d2f5 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 16:37:58 -0500 Subject: [PATCH 08/11] fix test --- seqr/utils/search/hail_search_utils.py | 2 +- seqr/views/apis/variant_search_api_tests.py | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index 55736fa4bd..be5509282f 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -84,7 +84,7 @@ def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATA body = { 'variant_id': variant_id, - 'data_type': dataset_type, + 'data_type': dataset_type.replace('_only', ''), **kwargs, } sample_data = None diff --git a/seqr/views/apis/variant_search_api_tests.py b/seqr/views/apis/variant_search_api_tests.py index 6a1a2081b0..e93d221300 100644 --- a/seqr/views/apis/variant_search_api_tests.py +++ b/seqr/views/apis/variant_search_api_tests.py @@ -761,7 +761,7 @@ def _assert_expected_single_variant_results_context(self, response_json, omit_fi @mock.patch('seqr.views.apis.variant_search_api.variant_lookup') def test_variant_lookup(self, mock_variant_lookup): - mock_variant_lookup.return_value = VARIANT_LOOKUP_VARIANT + mock_variant_lookup.return_value = [VARIANT_LOOKUP_VARIANT] url = f'{reverse(variant_lookup_handler)}?variantId=1-10439-AC-A&genomeVersion=38' self.check_require_login(url) @@ -776,15 +776,15 @@ def test_variant_lookup(self, mock_variant_lookup): 'rnaSeqData': {}, 'savedVariantsByGuid': {}, 'transcriptsById': {}, - 'variant': VARIANT_LOOKUP_VARIANT, + 'variants': [VARIANT_LOOKUP_VARIANT], } self.assertDictEqual(response.json(), expected_body) mock_variant_lookup.assert_called_with(self.no_access_user, variant_id='1-10439-AC-A', genome_version='38', families=None) variant = {**VARIANTS[0], 'familyGuids': [], 'genotypes': {}} - mock_variant_lookup.return_value = variant + mock_variant_lookup.return_value = [variant] expected_body.update({ - 'variant': variant, + 'variants': [variant], 'genesById': {'ENSG00000227232': EXPECTED_GENE, 'ENSG00000268903': EXPECTED_GENE}, 'transcriptsById': EXPECTED_SEARCH_RESPONSE['transcriptsById'], }) @@ -794,11 +794,11 @@ def test_variant_lookup(self, mock_variant_lookup): self.assertDictEqual(response.json(), expected_body) self.login_collaborator() - mock_variant_lookup.return_value = SINGLE_FAMILY_VARIANT + mock_variant_lookup.return_value = [SINGLE_FAMILY_VARIANT] response = self.client.get(f'{url.replace("38", "37")}&include_genotypes=true') self.assertEqual(response.status_code, 200) self._assert_expected_single_variant_results_context( - response.json(), variant=SINGLE_FAMILY_VARIANT, omit_fields={'searchedVariants'}, + response.json(), variants=[SINGLE_FAMILY_VARIANT], omit_fields={'searchedVariants'}, ) mock_variant_lookup.assert_called_with( self.collaborator_user, variant_id='1-10439-AC-A', genome_version='37', families=mock.ANY, From 03332b03baa7b672387d54ef36b2090f1a7dcef7 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 16:43:35 -0500 Subject: [PATCH 09/11] fix tests --- seqr/utils/search/hail_search_utils.py | 9 +++++---- seqr/utils/search/hail_search_utils_tests.py | 11 ++++++----- seqr/utils/search/search_utils_tests.py | 10 ++++------ 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/seqr/utils/search/hail_search_utils.py b/seqr/utils/search/hail_search_utils.py index be5509282f..f9ac9d4cbf 100644 --- a/seqr/utils/search/hail_search_utils.py +++ b/seqr/utils/search/hail_search_utils.py @@ -75,22 +75,23 @@ def get_hail_variants_for_variant_ids(samples, genome_version, parsed_variant_id def hail_variant_lookup(user, variant_id, samples=None, dataset_type=Sample.DATASET_TYPE_VARIANT_CALLS, sample_type=None, **kwargs): - is_sv = dataset_type == Sample.DATASET_TYPE_SV_CALLS + data_type = dataset_type.replace('_only', '') + is_sv = data_type == Sample.DATASET_TYPE_SV_CALLS if is_sv: if not sample_type: from seqr.utils.search.utils import InvalidSearchException raise InvalidSearchException('Sample type must be specified to look up a structural variant') - dataset_type = f'{dataset_type}_{sample_type}' + data_type = f'{data_type}_{sample_type}' body = { 'variant_id': variant_id, - 'data_type': dataset_type.replace('_only', ''), + 'data_type': data_type, **kwargs, } sample_data = None if samples: sample_data = _get_sample_data(samples) - body['sample_data'] = sample_data.pop(dataset_type) + body['sample_data'] = sample_data.pop(data_type) variant = _execute_search(body, user, path='lookup', exception_map={404: 'Variant not present in seqr'}) variants = [variant] diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index b8332ecb2e..2f59682070 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -214,20 +214,21 @@ def test_get_variant_query_gene_counts(self): def test_variant_lookup(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=VARIANT_LOOKUP_VARIANT) variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='37', foo='bar') - self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) + self.assertListEqual(variant, [VARIANT_LOOKUP_VARIANT]) self._test_minimal_search_call(expected_search_body={ - 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', 'foo': 'bar', + 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', 'foo': 'bar', 'data_type': 'SNV_INDEL', }) variant_lookup(self.user, '1-10439-AC-A', genome_version='37', families=self.families) self._test_minimal_search_call(expected_search_body={ 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', - 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'], + 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'], 'data_type': 'SNV_INDEL', }) with self.assertRaises(InvalidSearchException) as cm: variant_lookup(self.user, 'prefix_123_DEL') - self.assertEqual(str(cm.exception), 'Invalid variant prefix_123_DEL') + self.assertEqual(str(cm.exception), 'Sample type must be specified to look up a structural variant') + # TODO add test for SVs responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=404) with self.assertRaises(HTTPError) as cm: @@ -235,7 +236,7 @@ def test_variant_lookup(self): self.assertEqual(cm.exception.response.status_code, 404) self.assertEqual(str(cm.exception), 'Variant not present in seqr') self._test_minimal_search_call(expected_search_body={ - 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh38' + 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh38', 'data_type': 'SNV_INDEL', }) @responses.activate diff --git a/seqr/utils/search/search_utils_tests.py b/seqr/utils/search/search_utils_tests.py index ab649e0834..cde3599ea7 100644 --- a/seqr/utils/search/search_utils_tests.py +++ b/seqr/utils/search/search_utils_tests.py @@ -55,20 +55,18 @@ def test_variant_lookup(self, mock_variant_lookup): mock_variant_lookup.return_value = VARIANT_LOOKUP_VARIANT variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='38') self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh38') + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh38', + dataset_type='SNV_INDEL_only') cache_key = 'variant_lookup_results__1-10439-AC-A__38__test_user' self.assert_cached_results(variant, cache_key=cache_key) variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='37', families=self.families) self.assertDictEqual(variant, VARIANT_LOOKUP_VARIANT) - mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh37', samples=mock.ANY) + mock_variant_lookup.assert_called_with(self.user, ('1', 10439, 'AC', 'A'), genome_version='GRCh37', samples=mock.ANY, + dataset_type='SNV_INDEL_only') expected_samples = {s for s in self.search_samples if s.guid not in NON_SNP_INDEL_SAMPLES} self.assertSetEqual(set(mock_variant_lookup.call_args.kwargs['samples']), expected_samples) - with self.assertRaises(InvalidSearchException) as cm: - variant_lookup(self.user, '100-10439-AC-A') - self.assertEqual(str(cm.exception), 'Invalid variant 100-10439-AC-A') - mock_variant_lookup.reset_mock() self.set_cache(variant) cached_variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='38') From 0b3a872150b494384fb271d2f1841226f30bc7f9 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 17:10:25 -0500 Subject: [PATCH 10/11] add tests and fix bugs --- hail_search/queries/sv.py | 2 +- hail_search/test_search.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/hail_search/queries/sv.py b/hail_search/queries/sv.py index 526cc52319..bc887410f6 100644 --- a/hail_search/queries/sv.py +++ b/hail_search/queries/sv.py @@ -76,7 +76,7 @@ def _parse_annotations(self, annotations, *args, **kwargs): def _get_family_passes_quality_filter(self, quality_filter, parsed_annotations=None, **kwargs): passes_quality = super()._get_family_passes_quality_filter(quality_filter) - if not parsed_annotations[NEW_SV_FIELD]: + if not (parsed_annotations or {}).get(NEW_SV_FIELD): return passes_quality entries_has_new_call = lambda entries: entries.any(lambda x: x.concordance.new_call) diff --git a/hail_search/test_search.py b/hail_search/test_search.py index 2bc9337fbe..c02e1c0935 100644 --- a/hail_search/test_search.py +++ b/hail_search/test_search.py @@ -662,6 +662,11 @@ async def test_variant_lookup(self): resp_json = await resp.json() self.assertDictEqual(resp_json, {**SV_VARIANT4, 'familyGuids': [], 'genotypes': {}, 'genotypeFilters': ''}) + async with self.client.request('POST', '/lookup', json={**body, 'sample_data': SV_WGS_SAMPLE_DATA['SV_WGS']}) as resp: + self.assertEqual(resp.status, 200) + resp_json = await resp.json() + self.assertDictEqual(resp_json, SV_VARIANT4) + body.update({'variant_id': 'suffix_140608_DUP', 'data_type': 'SV_WES'}) async with self.client.request('POST', '/lookup', json=body) as resp: self.assertEqual(resp.status, 200) @@ -670,6 +675,16 @@ async def test_variant_lookup(self): **GCNV_VARIANT4, 'numExon': 8, 'end': 38736268, 'familyGuids': [], 'genotypes': {}, 'genotypeFilters': '', }) + async with self.client.request('POST', '/lookup', json={**body, 'sample_data': EXPECTED_SAMPLE_DATA['SV_WES']}) as resp: + self.assertEqual(resp.status, 200) + resp_json = await resp.json() + self.assertDictEqual(resp_json, { + **GCNV_VARIANT4, 'numExon': 8, 'end': 38736268, 'genotypes': { + individual: {k: v for k, v in genotype.items() if k not in {'start', 'end', 'numExon', 'geneIds'}} + for individual, genotype in GCNV_VARIANT4['genotypes'].items() + } + }) + async def test_frequency_filter(self): sv_callset_filter = {'sv_callset': {'af': 0.05}} await self._assert_expected_search( From e81d7951f3194f15c417d2e0235c3a1df5597e41 Mon Sep 17 00:00:00 2001 From: Hana Snow Date: Tue, 13 Feb 2024 17:31:10 -0500 Subject: [PATCH 11/11] add test --- seqr/utils/search/hail_search_utils_tests.py | 52 +++++++++++++++----- 1 file changed, 41 insertions(+), 11 deletions(-) diff --git a/seqr/utils/search/hail_search_utils_tests.py b/seqr/utils/search/hail_search_utils_tests.py index 2f59682070..3c72f4bb57 100644 --- a/seqr/utils/search/hail_search_utils_tests.py +++ b/seqr/utils/search/hail_search_utils_tests.py @@ -12,9 +12,15 @@ from hail_search.test_utils import get_hail_search_body, EXPECTED_SAMPLE_DATA, FAMILY_1_SAMPLE_DATA, \ FAMILY_2_ALL_SAMPLE_DATA, ALL_AFFECTED_SAMPLE_DATA, CUSTOM_AFFECTED_SAMPLE_DATA, HAIL_BACKEND_VARIANTS, \ LOCATION_SEARCH, EXCLUDE_LOCATION_SEARCH, VARIANT_ID_SEARCH, RSID_SEARCH, GENE_COUNTS, FAMILY_2_VARIANT_SAMPLE_DATA, \ - FAMILY_2_MITO_SAMPLE_DATA, EXPECTED_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, MULTI_PROJECT_SAMPLE_DATA + FAMILY_2_MITO_SAMPLE_DATA, EXPECTED_SAMPLE_DATA_WITH_SEX, VARIANT_LOOKUP_VARIANT, MULTI_PROJECT_SAMPLE_DATA, \ + GCNV_VARIANT4, SV_VARIANT2 MOCK_HOST = 'http://test-hail-host' +SV_WGS_SAMPLE_DATA = [{ + 'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', 'project_guid': 'R0004_non_analyst_project', + 'affected': 'A', 'sample_id': 'NA21234', +}] + @mock.patch('seqr.utils.search.hail_search_utils.HAIL_BACKEND_SERVICE_HOSTNAME', MOCK_HOST) class HailSearchUtilsTests(SearchTestHelper, TestCase): @@ -28,11 +34,12 @@ def setUp(self): 'results': HAIL_BACKEND_VARIANTS, 'total': 5, }) - def _test_minimal_search_call(self, expected_search_body=None, **kwargs): + def _test_minimal_search_call(self, expected_search_body=None, call_offset=-1, url_path='search', **kwargs): expected_search = expected_search_body or get_hail_search_body(genome_version='GRCh37', **kwargs) - executed_request = responses.calls[-1].request + executed_request = responses.calls[call_offset].request self.assertEqual(executed_request.headers.get('From'), 'test_user@broadinstitute.org') + self.assertEqual(executed_request.url.split('/')[-1], url_path) self.assertDictEqual(json.loads(executed_request.body), expected_search) def _test_expected_search_call(self, search_fields=None, gene_ids=None, intervals=None, exclude_intervals= None, @@ -149,8 +156,7 @@ def test_query_variants(self): query_variants(self.results_model, user=self.user) sv_sample_data = { 'SV_WES': FAMILY_2_VARIANT_SAMPLE_DATA['SNV_INDEL'], - 'SV_WGS': [{'individual_guid': 'I000018_na21234', 'family_guid': 'F000014_14', - 'project_guid': 'R0004_non_analyst_project', 'affected': 'A', 'sample_id': 'NA21234'}], + 'SV_WGS': SV_WGS_SAMPLE_DATA, } self._test_expected_search_call(search_fields=['annotations'], dataset_type='SV', sample_data=sv_sample_data) @@ -208,34 +214,58 @@ def test_get_variant_query_gene_counts(self): gene_counts = get_variant_query_gene_counts(self.results_model, self.user) self.assertDictEqual(gene_counts, GENE_COUNTS) self.assert_cached_results({'gene_aggs': gene_counts}) - self._test_expected_search_call(sort=None) + self._test_expected_search_call(url_path='gene_counts', sort=None) @responses.activate def test_variant_lookup(self): responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=VARIANT_LOOKUP_VARIANT) variant = variant_lookup(self.user, '1-10439-AC-A', genome_version='37', foo='bar') self.assertListEqual(variant, [VARIANT_LOOKUP_VARIANT]) - self._test_minimal_search_call(expected_search_body={ + self._test_minimal_search_call(url_path='lookup', expected_search_body={ 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', 'foo': 'bar', 'data_type': 'SNV_INDEL', }) variant_lookup(self.user, '1-10439-AC-A', genome_version='37', families=self.families) - self._test_minimal_search_call(expected_search_body={ + self._test_minimal_search_call(url_path='lookup', expected_search_body={ 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh37', 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SNV_INDEL'], 'data_type': 'SNV_INDEL', }) with self.assertRaises(InvalidSearchException) as cm: - variant_lookup(self.user, 'prefix_123_DEL') + variant_lookup(self.user, 'suffix_140608_DUP') self.assertEqual(str(cm.exception), 'Sample type must be specified to look up a structural variant') - # TODO add test for SVs + + responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=GCNV_VARIANT4) + variant_lookup(self.user, 'suffix_140608_DUP', sample_type='WES') + self._test_minimal_search_call(url_path='lookup', expected_search_body={ + 'variant_id': 'suffix_140608_DUP', 'genome_version': 'GRCh38', 'data_type': 'SV_WES', + }) + + sv_families = Family.objects.filter(id__in=[2, 14]) + variant_lookup(self.user, 'suffix_140608_DUP', sample_type='WES', families=sv_families) + self._test_minimal_search_call(url_path='lookup', call_offset=-2, expected_search_body={ + 'variant_id': 'suffix_140608_DUP', 'genome_version': 'GRCh38', 'data_type': 'SV_WES', + 'sample_data': ALL_AFFECTED_SAMPLE_DATA['SV_WES'] + }) + self._test_minimal_search_call(expected_search_body={ + 'genome_version': 'GRCh38', 'data_type': 'SV_WES', 'annotations': {'structural': ['DEL', 'gCNV_DEL']}, + 'intervals': ['17:38718997-38738487'], 'sample_data': {'SV_WGS': SV_WGS_SAMPLE_DATA}, + }) + + # No second lookup call is made for non DELs/DUPs + responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=200, json=SV_VARIANT2) + variant_lookup(self.user, 'cohort_2911.chr1.final_cleanup_INS_chr1_160', sample_type='WGS', families=sv_families) + self._test_minimal_search_call(url_path='lookup', expected_search_body={ + 'variant_id': 'cohort_2911.chr1.final_cleanup_INS_chr1_160', 'genome_version': 'GRCh38', 'data_type': 'SV_WGS', + 'sample_data': SV_WGS_SAMPLE_DATA + }) responses.add(responses.POST, f'{MOCK_HOST}:5000/lookup', status=404) with self.assertRaises(HTTPError) as cm: variant_lookup(self.user, '1-10439-AC-A') self.assertEqual(cm.exception.response.status_code, 404) self.assertEqual(str(cm.exception), 'Variant not present in seqr') - self._test_minimal_search_call(expected_search_body={ + self._test_minimal_search_call(url_path='lookup', expected_search_body={ 'variant_id': ['1', 10439, 'AC', 'A'], 'genome_version': 'GRCh38', 'data_type': 'SNV_INDEL', })